summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAllan Sandfeld Jensen <allan.jensen@qt.io>2020-08-31 12:20:52 +0200
committerAllan Sandfeld Jensen <allan.jensen@qt.io>2020-08-31 13:07:02 +0000
commita9c2e7190bbf1da1133d8d80f0208dc320e003cc (patch)
tree566ce7d595111d029c7bb9dd782041d4cdd5e438
parentdabae54d81d96768c355fd3e1671e48340bf906f (diff)
downloadqtwebengine-chromium-a9c2e7190bbf1da1133d8d80f0208dc320e003cc.tar.gz
[Backport] Security issue 1108639
Pull in a more recent OpenH264 sources from Chromium 85 Change-Id: Iad5293f5eb3332c35a823a5b3a76f66ecf9afa2b Reviewed-by: Michal Klocek <michal.klocek@qt.io>
-rw-r--r--chromium/third_party/openh264/BUILD.gn28
-rw-r--r--chromium/third_party/openh264/openh264_sources.gni1
-rw-r--r--chromium/third_party/openh264/src/.travis.yml6
-rw-r--r--chromium/third_party/openh264/src/Makefile9
-rw-r--r--chromium/third_party/openh264/src/RELEASES49
-rw-r--r--chromium/third_party/openh264/src/build/arch.mk20
-rwxr-xr-xchromium/third_party/openh264/src/build/mips-simd-check.sh32
-rwxr-xr-xchromium/third_party/openh264/src/build/mktargets.py43
-rw-r--r--chromium/third_party/openh264/src/build/platform-android.mk18
-rw-r--r--chromium/third_party/openh264/src/build/platform-bsd.mk3
-rw-r--r--chromium/third_party/openh264/src/build/platform-darwin.mk7
-rw-r--r--chromium/third_party/openh264/src/build/platform-linux.mk3
-rw-r--r--chromium/third_party/openh264/src/codec/api/svc/codec_api.h2
-rw-r--r--chromium/third_party/openh264/src/codec/api/svc/codec_app_def.h19
-rw-r--r--chromium/third_party/openh264/src/codec/api/svc/codec_def.h1
-rw-r--r--chromium/third_party/openh264/src/codec/api/svc/codec_ver.h10
-rw-r--r--chromium/third_party/openh264/src/codec/build/win32/dec/WelsDecCore.vcproj12
-rw-r--r--chromium/third_party/openh264/src/codec/common/inc/WelsThreadLib.h13
-rw-r--r--chromium/third_party/openh264/src/codec/common/inc/asmdefs_mmi.h2
-rw-r--r--chromium/third_party/openh264/src/codec/common/inc/copy_mb.h7
-rw-r--r--chromium/third_party/openh264/src/codec/common/inc/cpu_core.h1
-rw-r--r--chromium/third_party/openh264/src/codec/common/inc/deblocking_common.h14
-rw-r--r--chromium/third_party/openh264/src/codec/common/inc/expand_pic.h5
-rw-r--r--chromium/third_party/openh264/src/codec/common/inc/msa_macros.h2393
-rw-r--r--chromium/third_party/openh264/src/codec/common/meson.build50
-rw-r--r--chromium/third_party/openh264/src/codec/common/mips/copy_mb_msa.c80
-rw-r--r--chromium/third_party/openh264/src/codec/common/mips/deblock_msa.c1024
-rw-r--r--chromium/third_party/openh264/src/codec/common/src/WelsThreadLib.cpp12
-rw-r--r--chromium/third_party/openh264/src/codec/common/src/cpu.cpp45
-rw-r--r--chromium/third_party/openh264/src/codec/common/src/expand_pic.cpp234
-rw-r--r--chromium/third_party/openh264/src/codec/common/src/utils.cpp2
-rw-r--r--chromium/third_party/openh264/src/codec/common/targets.mk20
-rw-r--r--chromium/third_party/openh264/src/codec/common/x86/asm_inc.asm15
-rw-r--r--chromium/third_party/openh264/src/codec/console/dec/src/h264dec.cpp190
-rw-r--r--chromium/third_party/openh264/src/codec/decoder/core/inc/deblocking.h33
-rw-r--r--chromium/third_party/openh264/src/codec/decoder/core/inc/decode_slice.h15
-rw-r--r--chromium/third_party/openh264/src/codec/decoder/core/inc/decoder.h21
-rw-r--r--chromium/third_party/openh264/src/codec/decoder/core/inc/decoder_context.h167
-rw-r--r--chromium/third_party/openh264/src/codec/decoder/core/inc/decoder_core.h28
-rw-r--r--chromium/third_party/openh264/src/codec/decoder/core/inc/manage_dec_ref.h3
-rw-r--r--chromium/third_party/openh264/src/codec/decoder/core/inc/mv_pred.h47
-rw-r--r--chromium/third_party/openh264/src/codec/decoder/core/inc/parse_mb_syn_cabac.h2
-rw-r--r--chromium/third_party/openh264/src/codec/decoder/core/inc/parse_mb_syn_cavlc.h17
-rw-r--r--chromium/third_party/openh264/src/codec/decoder/core/inc/pic_queue.h3
-rw-r--r--chromium/third_party/openh264/src/codec/decoder/core/inc/picture.h11
-rw-r--r--chromium/third_party/openh264/src/codec/decoder/core/inc/rec_mb.h7
-rw-r--r--chromium/third_party/openh264/src/codec/decoder/core/inc/slice.h2
-rw-r--r--chromium/third_party/openh264/src/codec/decoder/core/inc/wels_common_basis.h4
-rw-r--r--chromium/third_party/openh264/src/codec/decoder/core/inc/wels_decoder_thread.h170
-rw-r--r--chromium/third_party/openh264/src/codec/decoder/core/src/au_parser.cpp123
-rw-r--r--chromium/third_party/openh264/src/codec/decoder/core/src/deblocking.cpp435
-rw-r--r--chromium/third_party/openh264/src/codec/decoder/core/src/decode_slice.cpp1831
-rw-r--r--chromium/third_party/openh264/src/codec/decoder/core/src/decoder.cpp204
-rw-r--r--chromium/third_party/openh264/src/codec/decoder/core/src/decoder_core.cpp615
-rw-r--r--chromium/third_party/openh264/src/codec/decoder/core/src/error_concealment.cpp68
-rw-r--r--chromium/third_party/openh264/src/codec/decoder/core/src/manage_dec_ref.cpp159
-rw-r--r--chromium/third_party/openh264/src/codec/decoder/core/src/mv_pred.cpp708
-rw-r--r--chromium/third_party/openh264/src/codec/decoder/core/src/parse_mb_syn_cabac.cpp305
-rw-r--r--chromium/third_party/openh264/src/codec/decoder/core/src/parse_mb_syn_cavlc.cpp796
-rw-r--r--chromium/third_party/openh264/src/codec/decoder/core/src/pic_queue.cpp73
-rw-r--r--chromium/third_party/openh264/src/codec/decoder/core/src/rec_mb.cpp382
-rw-r--r--chromium/third_party/openh264/src/codec/decoder/core/src/wels_decoder_thread.cpp311
-rw-r--r--chromium/third_party/openh264/src/codec/decoder/meson.build29
-rw-r--r--chromium/third_party/openh264/src/codec/decoder/plus/inc/welsDecoderExt.h55
-rw-r--r--chromium/third_party/openh264/src/codec/decoder/plus/src/welsDecoderExt.cpp1179
-rw-r--r--chromium/third_party/openh264/src/codec/decoder/targets.mk19
-rw-r--r--chromium/third_party/openh264/src/codec/encoder/core/inc/param_svc.h2
-rw-r--r--chromium/third_party/openh264/src/codec/encoder/core/src/au_set.cpp2
-rw-r--r--chromium/third_party/openh264/src/codec/encoder/core/src/deblocking.cpp18
-rw-r--r--chromium/third_party/openh264/src/codec/encoder/core/src/encode_mb_aux.cpp13
-rw-r--r--chromium/third_party/openh264/src/codec/encoder/core/src/encoder_ext.cpp4
-rw-r--r--chromium/third_party/openh264/src/codec/encoder/core/src/ref_list_mgr_svc.cpp1
-rw-r--r--chromium/third_party/openh264/src/codec/encoder/core/src/svc_encode_slice.cpp2
-rw-r--r--chromium/third_party/openh264/src/codec/encoder/meson.build48
-rw-r--r--chromium/third_party/openh264/src/codec/encoder/targets.mk18
-rw-r--r--chromium/third_party/openh264/src/codec/meson.build5
-rw-r--r--chromium/third_party/openh264/src/codec/processing/meson.build34
-rw-r--r--chromium/third_party/openh264/src/codec/processing/src/scenechangedetection/SceneChangeDetection.h6
-rw-r--r--chromium/third_party/openh264/src/codec/processing/targets.mk18
-rw-r--r--chromium/third_party/openh264/src/gmpopenh264.info2
-rw-r--r--chromium/third_party/openh264/src/include/wels/meson.build2
-rw-r--r--chromium/third_party/openh264/src/meson.build96
-rw-r--r--chromium/third_party/openh264/src/meson_options.txt1
-rw-r--r--chromium/third_party/openh264/src/openh264.pc.in2
-rw-r--r--chromium/third_party/openh264/src/openh264.rc8
85 files changed, 9852 insertions, 2622 deletions
diff --git a/chromium/third_party/openh264/BUILD.gn b/chromium/third_party/openh264/BUILD.gn
index f3b9d997a82..8afb73a4dc4 100644
--- a/chromium/third_party/openh264/BUILD.gn
+++ b/chromium/third_party/openh264/BUILD.gn
@@ -5,9 +5,9 @@
import("//build/config/linux/pkg_config.gni")
import("//build/config/sanitizers/sanitizers.gni")
import("//build/shim_headers.gni")
+import("//third_party/nasm/nasm_assemble.gni")
import("//third_party/openh264/openh264_args.gni")
import("//third_party/openh264/openh264_sources.gni")
-import("//third_party/yasm/yasm_assemble.gni")
# Config shared by all openh264 targets.
config("config") {
@@ -42,7 +42,7 @@ config("config") {
}
}
-# YASM assembly is only checked to be working on Windows and Linux.
+# NASM assembly is only checked to be working on Windows and Linux.
# Mac is known to fail certain tests when building, but actual assembly
# is believed to work.
# MSAN builds are flaky with assembler. crbug.com/685168
@@ -53,19 +53,19 @@ use_assembler = (is_win || is_linux) &&
# This IF statement will make the targets visible only on specific builds,
# which will lead to failures on other platforms if accidentally invoked.
if (use_assembler) {
- yasm_defines = []
+ asm_defines = []
if (!is_component_build) {
if (is_mac || is_ios) {
- yasm_defines += [ "WELS_PRIVATE_EXTERN=:private_extern" ]
+ asm_defines += [ "WELS_PRIVATE_EXTERN=private_extern" ]
} else if (is_linux || is_android || is_fuchsia) {
- yasm_defines += [ "WELS_PRIVATE_EXTERN=:hidden" ]
+ asm_defines += [ "WELS_PRIVATE_EXTERN=hidden" ]
}
}
- yasm_assemble("openh264_common_yasm") {
+ nasm_assemble("openh264_common_asm") {
include_dirs = openh264_common_include_dirs
sources = openh264_common_sources_asm_x86
- defines = yasm_defines
+ defines = asm_defines
if (target_cpu == "x86") {
defines += [ "X86_32", "X86_32_PICASM" ]
} else { # x64
@@ -82,11 +82,11 @@ if (use_assembler) {
}
}
- yasm_assemble("openh264_processing_yasm") {
+ nasm_assemble("openh264_processing_asm") {
include_dirs = openh264_processing_include_dirs
include_dirs += [ "./src/codec/common/x86" ]
sources = openh264_processing_sources_asm_x86
- defines = yasm_defines
+ defines = asm_defines
if (target_cpu == "x86") {
defines += [ "X86_32", "X86_32_PICASM" ]
} else { # x64
@@ -103,11 +103,11 @@ if (use_assembler) {
}
}
- yasm_assemble("openh264_encoder_yasm") {
+ nasm_assemble("openh264_encoder_asm") {
include_dirs = openh264_encoder_include_dirs
include_dirs += [ "./src/codec/common/x86" ]
sources = openh264_encoder_sources_asm_x86
- defines = yasm_defines
+ defines = asm_defines
if (target_cpu == "x86") {
defines += [ "X86_32", "X86_32_PICASM" ]
} else { # x64
@@ -138,7 +138,7 @@ source_set("bundled_common") {
deps = []
if (use_assembler) {
defines = [ "X86_ASM" ]
- deps += [ ":openh264_common_yasm" ]
+ deps += [ ":openh264_common_asm" ]
}
if (is_android) {
deps += [
@@ -166,7 +166,7 @@ source_set("bundled_processing") {
]
if (use_assembler) {
defines = [ "X86_ASM" ]
- deps += [ ":openh264_processing_yasm" ]
+ deps += [ ":openh264_processing_asm" ]
}
}
@@ -192,7 +192,7 @@ source_set("bundled_encoder") {
]
if (use_assembler) {
defines = [ "X86_ASM" ]
- deps += [ ":openh264_encoder_yasm" ]
+ deps += [ ":openh264_encoder_asm" ]
}
}
diff --git a/chromium/third_party/openh264/openh264_sources.gni b/chromium/third_party/openh264/openh264_sources.gni
index 0f9b77bcc76..e714dad9ef7 100644
--- a/chromium/third_party/openh264/openh264_sources.gni
+++ b/chromium/third_party/openh264/openh264_sources.gni
@@ -4,6 +4,7 @@ openh264_common_include_dirs = [
"//third_party/openh264/src/codec/common/arm",
"//third_party/openh264/src/codec/common/inc",
"//third_party/openh264/src/codec/common/src",
+ "//third_party/openh264/src/codec/common/x86",
]
openh264_common_sources = [
diff --git a/chromium/third_party/openh264/src/.travis.yml b/chromium/third_party/openh264/src/.travis.yml
index 6e79ec14288..5eec0c291f1 100644
--- a/chromium/third_party/openh264/src/.travis.yml
+++ b/chromium/third_party/openh264/src/.travis.yml
@@ -1,5 +1,5 @@
language: cpp
-dist: trusty
+dist: xenial
compiler:
- g++
@@ -7,8 +7,8 @@ compiler:
before_install:
- sudo apt-get update -qq
- - sudo apt-get install -qq nasm g++-multilib gcc-multilib libc6-dev-i386 python3-pip unzip
- - sudo python3 -m pip install meson==0.44.1
+ - sudo apt-get install -qq nasm g++-multilib gcc-multilib libc6-dev-i386 python3-pip python3-setuptools unzip
+ - sudo python3 -m pip install meson==0.47.0
- wget https://github.com/ninja-build/ninja/releases/download/v1.8.2/ninja-linux.zip
- unzip ninja-linux.zip
- export PATH=$PATH:$PWD
diff --git a/chromium/third_party/openh264/src/Makefile b/chromium/third_party/openh264/src/Makefile
index e70c966d34d..53d16ccb65e 100644
--- a/chromium/third_party/openh264/src/Makefile
+++ b/chromium/third_party/openh264/src/Makefile
@@ -34,9 +34,10 @@ GTEST_VER=release-1.8.1
CCASFLAGS=$(CFLAGS)
STATIC_LDFLAGS=-lstdc++
STRIP ?= strip
+USE_STACK_PROTECTOR = Yes
-SHAREDLIB_MAJORVERSION=5
-FULL_VERSION := 1.9.0
+SHAREDLIB_MAJORVERSION=6
+FULL_VERSION := 2.1.0
ifeq (,$(wildcard $(SRC_PATH)gmp-api))
HAVE_GMP_API=No
@@ -285,10 +286,10 @@ endif
endif
$(PROJECT_NAME).pc: $(PROJECT_NAME).pc.in
- @sed -e 's;@prefix@;$(PREFIX);' -e 's;@VERSION@;$(FULL_VERSION);' -e 's;@LIBS@;;' -e 's;@LIBS_PRIVATE@;$(STATIC_LDFLAGS);' < $< > $@
+ @sed -e 's;@prefix@;$(PREFIX);' -e 's;@libdir@;$(PREFIX)/lib;' -e 's;@VERSION@;$(FULL_VERSION);' -e 's;@LIBS@;;' -e 's;@LIBS_PRIVATE@;$(STATIC_LDFLAGS);' < $< > $@
$(PROJECT_NAME)-static.pc: $(PROJECT_NAME).pc.in
- @sed -e 's;@prefix@;$(PREFIX);' -e 's;@VERSION@;$(FULL_VERSION);' -e 's;@LIBS@;$(STATIC_LDFLAGS);' -e 's;@LIBS_PRIVATE@;;' < $< > $@
+ @sed -e 's;@prefix@;$(PREFIX);' -e 's;@libdir@;$(PREFIX)/lib;' -e 's;@VERSION@;$(FULL_VERSION);' -e 's;@LIBS@;$(STATIC_LDFLAGS);' -e 's;@LIBS_PRIVATE@;;' < $< > $@
install-headers:
mkdir -p $(DESTDIR)$(PREFIX)/include/wels
diff --git a/chromium/third_party/openh264/src/RELEASES b/chromium/third_party/openh264/src/RELEASES
index 632f1374d6a..349549db7d5 100644
--- a/chromium/third_party/openh264/src/RELEASES
+++ b/chromium/third_party/openh264/src/RELEASES
@@ -1,6 +1,24 @@
Releases
-----------
+v2.1.0
+------
+- Experimentally support for multi-thread decoding(default disabled,and may result in random problems if enabled)
+- Assembly optimization for loongson platform
+- Update meson version to 5
+- Some minor bug fixes
+
+v2.0.0
+------
+- B-frame decoding support for Main and High Profile with two test cases
+- Add support for loongson(https://en.wikipedia.org/wiki/Loongson) platform
+- Add clang support for arm/arm64/x86 for NDK version over 17
+- Enable stack protector
+- Add some test cases
+- Avoid using C++/CX code for threads for Windows Phone/Windows Store/UWP
+- Remove extra visual studio projects for the decoder
+- Remove check for working compiler in NDK
+- Bug fixes
v1.8.0
------
@@ -180,6 +198,37 @@ Binaries
These binary releases are distributed under this license:
http://www.openh264.org/BINARY_LICENSE.txt
+v2.1.0
+http://ciscobinary.openh264.org/libopenh264-2.1.0-android-arm.so.bz2
+http://ciscobinary.openh264.org/libopenh264-2.1.0-android-arm.so.sig.bz2
+http://ciscobinary.openh264.org/libopenh264-2.1.0-android-arm64.so.bz2
+http://ciscobinary.openh264.org/libopenh264-2.1.0-android-arm64.so.sig.bz2
+http://ciscobinary.openh264.org/libopenh264-2.1.0-ios.a.bz2
+http://ciscobinary.openh264.org/libopenh264-2.1.0-ios.a.sig.bz2
+http://ciscobinary.openh264.org/libopenh264-2.1.0-linux32.5.so.bz2
+http://ciscobinary.openh264.org/libopenh264-2.1.0-linux32.5.so.sig.bz2
+http://ciscobinary.openh264.org/libopenh264-2.1.0-linux64.5.so.bz2
+http://ciscobinary.openh264.org/libopenh264-2.1.0-linux64.5.so.sig.bz2
+http://ciscobinary.openh264.org/libopenh264-2.1.0-osx32.5.dylib.bz2
+http://ciscobinary.openh264.org/libopenh264-2.1.0-osx64.5.dylib.bz2
+http://ciscobinary.openh264.org/openh264-2.1.0-win32.dll.bz2
+http://ciscobinary.openh264.org/openh264-2.1.0-win64.dll.bz2
+
+v2.0.0
+------
+http://ciscobinary.openh264.org/libopenh264-2.0.0-android.so.bz2
+http://ciscobinary.openh264.org/libopenh264-2.0.0-android.so.sig.bz2
+http://ciscobinary.openh264.org/libopenh264-2.0.0-ios.a.bz2
+http://ciscobinary.openh264.org/libopenh264-2.0.0-ios.a.sig.bz2
+http://ciscobinary.openh264.org/libopenh264-2.0.0-linux32.5.so.bz2
+http://ciscobinary.openh264.org/libopenh264-2.0.0-linux32.5.so.sig.bz2
+http://ciscobinary.openh264.org/libopenh264-2.0.0-linux64.5.so.bz2
+http://ciscobinary.openh264.org/libopenh264-2.0.0-linux64.5.so.sig.bz2
+http://ciscobinary.openh264.org/libopenh264-2.0.0-osx32.5.dylib.bz2
+http://ciscobinary.openh264.org/libopenh264-2.0.0-osx64.5.dylib.bz2
+http://ciscobinary.openh264.org/openh264-2.0.0-win32.dll.bz2
+http://ciscobinary.openh264.org/openh264-2.0.0-win64.dll.bz2
+
v1.8.0
------
http://ciscobinary.openh264.org/libopenh264-1.8.0-android19.so.bz2
diff --git a/chromium/third_party/openh264/src/build/arch.mk b/chromium/third_party/openh264/src/build/arch.mk
index 8ac3e70a5ad..c6570ed4e59 100644
--- a/chromium/third_party/openh264/src/build/arch.mk
+++ b/chromium/third_party/openh264/src/build/arch.mk
@@ -30,14 +30,26 @@ CFLAGS += -DHAVE_NEON_AARCH64
endif
endif
-#for loongson
+#for mips
ifneq ($(filter mips mips64, $(ARCH)),)
ifeq ($(USE_ASM), Yes)
+ENABLE_MMI=Yes
+ENABLE_MSA=Yes
ASM_ARCH = mips
ASMFLAGS += -I$(SRC_PATH)codec/common/mips/
-LOONGSON3A = $(shell g++ -dM -E - < /dev/null | grep '_MIPS_TUNE ' | cut -f 3 -d " ")
-ifeq ($(LOONGSON3A), "loongson3a")
-CFLAGS += -DHAVE_MMI
+#mmi
+ifeq ($(ENABLE_MMI), Yes)
+ENABLE_MMI = $(shell $(SRC_PATH)build/mips-simd-check.sh $(CC) mmi)
+ifeq ($(ENABLE_MMI), Yes)
+CFLAGS += -DHAVE_MMI -march=loongson3a
+endif
+endif
+#msa
+ifeq ($(ENABLE_MSA), Yes)
+ENABLE_MSA = $(shell $(SRC_PATH)build/mips-simd-check.sh $(CC) msa)
+ifeq ($(ENABLE_MSA), Yes)
+CFLAGS += -DHAVE_MSA -mmsa
+endif
endif
endif
endif
diff --git a/chromium/third_party/openh264/src/build/mips-simd-check.sh b/chromium/third_party/openh264/src/build/mips-simd-check.sh
new file mode 100755
index 00000000000..d0d72f9edd6
--- /dev/null
+++ b/chromium/third_party/openh264/src/build/mips-simd-check.sh
@@ -0,0 +1,32 @@
+#!/bin/bash
+#**********************************************************************************
+# This script is using in build/arch.mk for mips to detect the simd instructions:
+# mmi, msa (maybe more in the future).
+#
+# --usage:
+# ./mips-simd-check.sh $(CC) mmi
+# or ./mips-simd-check.sh $(CC) msa
+#
+# date: 10/17/2019 Created
+#**********************************************************************************
+
+TMPC=$(mktemp tmp.XXXXXX.c)
+TMPO=$(mktemp tmp.XXXXXX.o)
+if [ $2 == "mmi" ]
+then
+ echo "void main(void){ __asm__ volatile(\"punpcklhw \$f0, \$f0, \$f0\"); }" > $TMPC
+ $1 -march=loongson3a $TMPC -o $TMPO &> /dev/null
+ if test -s $TMPO
+ then
+ echo "Yes"
+ fi
+elif [ $2 == "msa" ]
+then
+ echo "void main(void){ __asm__ volatile(\"addvi.b \$w0, \$w1, 1\"); }" > $TMPC
+ $1 -mmsa $TMPC -o $TMPO &> /dev/null
+ if test -s $TMPO
+ then
+ echo "Yes"
+ fi
+fi
+rm -f $TMPC $TMPO
diff --git a/chromium/third_party/openh264/src/build/mktargets.py b/chromium/third_party/openh264/src/build/mktargets.py
index 593280c0991..518909d3dfd 100755
--- a/chromium/third_party/openh264/src/build/mktargets.py
+++ b/chromium/third_party/openh264/src/build/mktargets.py
@@ -119,9 +119,9 @@ for file in sfiles:
armfiles.append(file)
mipsfiles = []
for file in cfiles:
- c = file.split('/')
- if 'mips' in c:
- mipsfiles.append(file)
+ c = file.split('/')
+ if 'mips' in c:
+ mipsfiles.append(file)
cfiles = [x for x in cfiles if x not in mipsfiles]
@@ -181,15 +181,34 @@ if len(arm64files) > 0:
f.write("OBJS += $(%s_OBJSARM64)\n\n"%(PREFIX))
if len(mipsfiles) > 0:
- f.write("%s_ASM_MIPS_SRCS=\\\n"%(PREFIX))
- for c in mipsfiles:
- f.write("\t$(%s_SRCDIR)/%s\\\n"%(PREFIX, c))
- f.write("\n")
- f.write("%s_OBJSMIPS += $(%s_ASM_MIPS_SRCS:.c=.$(OBJ))\n"%(PREFIX, PREFIX))
- f.write("ifeq ($(ASM_ARCH), mips)\n")
- f.write("%s_OBJS += $(%s_OBJSMIPS)\n"%(PREFIX,PREFIX))
- f.write("endif\n")
- f.write("OBJS += $(%s_OBJSMIPS)\n\n"%(PREFIX))
+ mmifiles = []
+ for file in mipsfiles:
+ if '_mmi' in file:
+ mmifiles.append(file)
+ f.write("%s_ASM_MIPS_MMI_SRCS=\\\n"%(PREFIX))
+ for c in mmifiles:
+ f.write("\t$(%s_SRCDIR)/%s\\\n"%(PREFIX, c))
+ f.write("\n")
+ f.write("%s_OBJSMIPS_MMI += $(%s_ASM_MIPS_MMI_SRCS:.c=.$(OBJ))\n\n"%(PREFIX, PREFIX))
+ msafiles = []
+ for file in mipsfiles:
+ if '_msa' in file:
+ msafiles.append(file)
+ f.write("%s_ASM_MIPS_MSA_SRCS=\\\n"%(PREFIX))
+ for c in msafiles:
+ f.write("\t$(%s_SRCDIR)/%s\\\n"%(PREFIX, c))
+ f.write("\n")
+ f.write("%s_OBJSMIPS_MSA += $(%s_ASM_MIPS_MSA_SRCS:.c=.$(OBJ))\n"%(PREFIX, PREFIX))
+ f.write("ifeq ($(ASM_ARCH), mips)\n")
+ f.write("ifeq ($(ENABLE_MMI), Yes)\n")
+ f.write("%s_OBJS += $(%s_OBJSMIPS_MMI)\n"%(PREFIX,PREFIX))
+ f.write("endif\n")
+ f.write("ifeq ($(ENABLE_MSA), Yes)\n")
+ f.write("%s_OBJS += $(%s_OBJSMIPS_MSA)\n"%(PREFIX,PREFIX))
+ f.write("endif\n")
+ f.write("endif\n")
+ f.write("OBJS += $(%s_OBJSMIPS_MMI)\n"%(PREFIX))
+ f.write("OBJS += $(%s_OBJSMIPS_MSA)\n\n"%(PREFIX))
f.write("OBJS += $(%s_OBJS)\n\n"%(PREFIX))
write_cpp_rule_pattern(f)
diff --git a/chromium/third_party/openh264/src/build/platform-android.mk b/chromium/third_party/openh264/src/build/platform-android.mk
index 7f50eec6350..0c442dfac13 100644
--- a/chromium/third_party/openh264/src/build/platform-android.mk
+++ b/chromium/third_party/openh264/src/build/platform-android.mk
@@ -45,10 +45,14 @@ CXX = $(TOOLCHAINPREFIX)g++
CC = $(TOOLCHAINPREFIX)gcc
AR = $(TOOLCHAINPREFIX)ar
CFLAGS += -DANDROID_NDK -fpic --sysroot=$(SYSROOT) -MMD -MP
+ifeq ($(USE_STACK_PROTECTOR), Yes)
+CFLAGS += -fstack-protector-all
+endif
CFLAGS += -isystem $(NDKROOT)/sysroot/usr/include -isystem $(NDKROOT)/sysroot/usr/include/$(TOOLCHAIN_NAME) -D__ANDROID_API__=$(NDKLEVEL)
CXXFLAGS += -fno-rtti -fno-exceptions
LDFLAGS += --sysroot=$(SYSROOT)
SHLDFLAGS = -Wl,--no-undefined -Wl,-z,relro -Wl,-z,now -Wl,-soname,lib$(PROJECT_NAME).so
+UTSHLDFLAGS = -Wl,-soname,libut.so
ifeq ($(NDK_TOOLCHAIN_VERSION), clang)
HOST_OS = $(shell uname -s | tr [A-Z] [a-z])
@@ -70,19 +74,31 @@ ifeq ($(NDK_TOOLCHAIN_VERSION), clang)
CFLAGS += -target $(TARGET_NAME)
LDFLAGS += -target $(TARGET_NAME) -gcc-toolchain $(GCC_TOOLCHAIN_PATH)
- LDFLAGS += -Wl,--exclude-libs,libgcc.a -Wl,--exclude-libs,libunwind.a
endif
+# background reading: https://android.googlesource.com/platform/ndk/+/master/docs/BuildSystemMaintainers.md#unwinding
+LDFLAGS += -Wl,--exclude-libs,libgcc.a -Wl,--exclude-libs,libunwind.a
+
+ifneq ($(findstring /,$(CXX)),$(findstring \,$(CXX)))
ifneq ($(CXX),$(wildcard $(CXX)))
ifneq ($(CXX).exe,$(wildcard $(CXX).exe))
$(error Compiler not found, bad NDKROOT or ARCH?)
endif
endif
+endif
+ifeq ($(NDK_TOOLCHAIN_VERSION), clang)
+STL_INCLUDES = \
+ -I$(NDKROOT)/sources/cxx-stl/llvm-libc++/include \
+ -I$(NDKROOT)/sources/cxx-stl/llvm-libc++abi/include
+STL_LIB = \
+ $(NDKROOT)/sources/cxx-stl/llvm-libc++/libs/$(APP_ABI)/libc++_static.a
+else
STL_INCLUDES = \
-I$(NDKROOT)/sources/cxx-stl/stlport/stlport
STL_LIB = \
$(NDKROOT)/sources/cxx-stl/stlport/libs/$(APP_ABI)/libstlport_static.a
+endif
GTEST_INCLUDES = $(STL_INCLUDES)
CODEC_UNITTEST_INCLUDES = $(STL_INCLUDES)
diff --git a/chromium/third_party/openh264/src/build/platform-bsd.mk b/chromium/third_party/openh264/src/build/platform-bsd.mk
index cade69aac6f..2e0bf2ccd60 100644
--- a/chromium/third_party/openh264/src/build/platform-bsd.mk
+++ b/chromium/third_party/openh264/src/build/platform-bsd.mk
@@ -4,6 +4,9 @@ SHAREDLIBSUFFIXFULLVER=$(SHAREDLIBSUFFIX).$(FULL_VERSION)
SHAREDLIBSUFFIXMAJORVER=$(SHAREDLIBSUFFIX).$(SHAREDLIB_MAJORVERSION)
SHLDFLAGS = -Wl,-soname,$(LIBPREFIX)$(PROJECT_NAME).$(SHAREDLIBSUFFIXMAJORVER)
CFLAGS += -fPIC
+ifeq ($(USE_STACK_PROTECTOR), Yes)
+CFLAGS += -fstack-protector-all
+endif
LDFLAGS += -lpthread
STATIC_LDFLAGS += -lpthread -lm
ifeq ($(ASM_ARCH), x86)
diff --git a/chromium/third_party/openh264/src/build/platform-darwin.mk b/chromium/third_party/openh264/src/build/platform-darwin.mk
index 95947427753..6f91dafb2af 100644
--- a/chromium/third_party/openh264/src/build/platform-darwin.mk
+++ b/chromium/third_party/openh264/src/build/platform-darwin.mk
@@ -3,14 +3,17 @@ SHAREDLIB_DIR = $(PREFIX)/lib
SHAREDLIBSUFFIX = dylib
SHAREDLIBSUFFIXFULLVER=$(FULL_VERSION).$(SHAREDLIBSUFFIX)
SHAREDLIBSUFFIXMAJORVER=$(SHAREDLIB_MAJORVERSION).$(SHAREDLIBSUFFIX)
-CURRENT_VERSION := 1.9.0
-COMPATIBILITY_VERSION := 1.9.0
+CURRENT_VERSION := 2.1.0
+COMPATIBILITY_VERSION := 2.1.0
SHLDFLAGS = -dynamiclib -twolevel_namespace -undefined dynamic_lookup \
-fno-common -headerpad_max_install_names -install_name \
$(SHAREDLIB_DIR)/$(LIBPREFIX)$(PROJECT_NAME).$(SHAREDLIBSUFFIXMAJORVER)
SHARED = -dynamiclib
SHARED += -current_version $(CURRENT_VERSION) -compatibility_version $(COMPATIBILITY_VERSION)
CFLAGS += -Wall -fPIC -MMD -MP
+ifeq ($(USE_STACK_PROTECTOR), Yes)
+CFLAGS += -fstack-protector-all
+endif
ifeq ($(ASM_ARCH), x86)
ASMFLAGS += -DPREFIX
ifeq ($(ARCH), x86_64)
diff --git a/chromium/third_party/openh264/src/build/platform-linux.mk b/chromium/third_party/openh264/src/build/platform-linux.mk
index 52230a2a6f3..b5c006b2325 100644
--- a/chromium/third_party/openh264/src/build/platform-linux.mk
+++ b/chromium/third_party/openh264/src/build/platform-linux.mk
@@ -4,6 +4,9 @@ SHAREDLIBSUFFIXFULLVER=$(SHAREDLIBSUFFIX).$(FULL_VERSION)
SHAREDLIBSUFFIXMAJORVER=$(SHAREDLIBSUFFIX).$(SHAREDLIB_MAJORVERSION)
SHLDFLAGS = -Wl,-soname,$(LIBPREFIX)$(PROJECT_NAME).$(SHAREDLIBSUFFIXMAJORVER)
CFLAGS += -Wall -fno-strict-aliasing -fPIC -MMD -MP
+ifeq ($(USE_STACK_PROTECTOR), Yes)
+CFLAGS += -fstack-protector-all
+endif
LDFLAGS += -lpthread
STATIC_LDFLAGS += -lpthread -lm
AR_OPTS = crD $@
diff --git a/chromium/third_party/openh264/src/codec/api/svc/codec_api.h b/chromium/third_party/openh264/src/codec/api/svc/codec_api.h
index cea05329ed9..a1326c8f054 100644
--- a/chromium/third_party/openh264/src/codec/api/svc/codec_api.h
+++ b/chromium/third_party/openh264/src/codec/api/svc/codec_api.h
@@ -330,7 +330,7 @@ class ISVCEncoder {
virtual int EXTAPI SetOption (ENCODER_OPTION eOptionId, void* pOption) = 0;
/**
- * @brief Set option for encoder, detail option type, please refer to enumurate ENCODER_OPTION.
+ * @brief Get option for encoder, detail option type, please refer to enumurate ENCODER_OPTION.
* @param pOption option for encoder such as InDataFormat, IDRInterval, SVC Encode Param, Frame Rate, Bitrate,...
* @return CM_RETURN: 0 - success; otherwise - failed;
*/
diff --git a/chromium/third_party/openh264/src/codec/api/svc/codec_app_def.h b/chromium/third_party/openh264/src/codec/api/svc/codec_app_def.h
index e5ee3cc46bf..bb3c3d67b78 100644
--- a/chromium/third_party/openh264/src/codec/api/svc/codec_app_def.h
+++ b/chromium/third_party/openh264/src/codec/api/svc/codec_app_def.h
@@ -78,13 +78,14 @@ typedef enum {
/**
* Errors derived from bitstream parsing
*/
- dsErrorFree = 0x00, ///< bit stream error-free
- dsFramePending = 0x01, ///< need more throughput to generate a frame output,
- dsRefLost = 0x02, ///< layer lost at reference frame with temporal id 0
- dsBitstreamError = 0x04, ///< error bitstreams(maybe broken internal frame) the decoder cared
- dsDepLayerLost = 0x08, ///< dependented layer is ever lost
- dsNoParamSets = 0x10, ///< no parameter set NALs involved
- dsDataErrorConcealed = 0x20, ///< current data error concealed specified
+ dsErrorFree = 0x00, ///< bit stream error-free
+ dsFramePending = 0x01, ///< need more throughput to generate a frame output,
+ dsRefLost = 0x02, ///< layer lost at reference frame with temporal id 0
+ dsBitstreamError = 0x04, ///< error bitstreams(maybe broken internal frame) the decoder cared
+ dsDepLayerLost = 0x08, ///< dependented layer is ever lost
+ dsNoParamSets = 0x10, ///< no parameter set NALs involved
+ dsDataErrorConcealed = 0x20, ///< current data error concealed specified
+ dsRefListNullPtrs = 0x40, ///<ref picure list contains null ptrs within uiRefCount range
/**
* Errors derived from logic level
@@ -166,8 +167,8 @@ typedef enum {
DECODER_OPTION_LEVEL, ///< get current AU level info,only is used in GetOption
DECODER_OPTION_STATISTICS_LOG_INTERVAL,///< set log output interval
DECODER_OPTION_IS_REF_PIC, ///< feedback current frame is ref pic or not
- DECODER_OPTION_NUM_OF_FRAMES_REMAINING_IN_BUFFER ///< number of frames remaining in decoder buffer when pictures are required to re-ordered into display-order.
-
+ DECODER_OPTION_NUM_OF_FRAMES_REMAINING_IN_BUFFER, ///< number of frames remaining in decoder buffer when pictures are required to re-ordered into display-order.
+ DECODER_OPTION_NUM_OF_THREADS, ///< number of decoding threads. The maximum thread count is equal or less than lesser of (cpu core counts and 16).
} DECODER_OPTION;
/**
diff --git a/chromium/third_party/openh264/src/codec/api/svc/codec_def.h b/chromium/third_party/openh264/src/codec/api/svc/codec_def.h
index 4f7eb9d88d0..edde5f4a2e9 100644
--- a/chromium/third_party/openh264/src/codec/api/svc/codec_def.h
+++ b/chromium/third_party/openh264/src/codec/api/svc/codec_def.h
@@ -201,6 +201,7 @@ typedef struct TagBufferInfo {
union {
SSysMEMBuffer sSystemBuffer; ///< memory info for one picture
} UsrData; ///< output buffer info
+ unsigned char* pDst[3]; //point to picture YUV data
} SBufferInfo;
diff --git a/chromium/third_party/openh264/src/codec/api/svc/codec_ver.h b/chromium/third_party/openh264/src/codec/api/svc/codec_ver.h
index 1c366f139d6..a4e494f6b09 100644
--- a/chromium/third_party/openh264/src/codec/api/svc/codec_ver.h
+++ b/chromium/third_party/openh264/src/codec/api/svc/codec_ver.h
@@ -4,12 +4,12 @@
#include "codec_app_def.h"
-static const OpenH264Version g_stCodecVersion = {1, 9, 0, 1806};
-static const char* const g_strCodecVer = "OpenH264 version:1.9.0.1806";
+static const OpenH264Version g_stCodecVersion = {2, 1, 0, 2002};
+static const char* const g_strCodecVer = "OpenH264 version:2.1.0.2002";
-#define OPENH264_MAJOR (1)
-#define OPENH264_MINOR (9)
+#define OPENH264_MAJOR (2)
+#define OPENH264_MINOR (1)
#define OPENH264_REVISION (0)
-#define OPENH264_RESERVED (1806)
+#define OPENH264_RESERVED (2002)
#endif // CODEC_VER_H
diff --git a/chromium/third_party/openh264/src/codec/build/win32/dec/WelsDecCore.vcproj b/chromium/third_party/openh264/src/codec/build/win32/dec/WelsDecCore.vcproj
index a697fce1f95..de7f119f332 100644
--- a/chromium/third_party/openh264/src/codec/build/win32/dec/WelsDecCore.vcproj
+++ b/chromium/third_party/openh264/src/codec/build/win32/dec/WelsDecCore.vcproj
@@ -860,6 +860,10 @@
RelativePath="..\..\..\common\inc\wels_const_common.h"
>
</File>
+ <File
+ RelativePath="..\..\..\decoder\core\inc\wels_decoder_thread.h"
+ >
+ </File>
</Filter>
<Filter
Name="Source Files"
@@ -977,6 +981,14 @@
RelativePath="..\..\..\common\src\utils.cpp"
>
</File>
+ <File
+ RelativePath="..\..\..\common\src\WelsThreadLib.cpp"
+ >
+ </File>
+ <File
+ RelativePath="..\..\..\decoder\core\src\wels_decoder_thread.cpp"
+ >
+ </File>
</Filter>
</Files>
<Globals>
diff --git a/chromium/third_party/openh264/src/codec/common/inc/WelsThreadLib.h b/chromium/third_party/openh264/src/codec/common/inc/WelsThreadLib.h
index 494a076c031..cd26dbdd91e 100644
--- a/chromium/third_party/openh264/src/codec/common/inc/WelsThreadLib.h
+++ b/chromium/third_party/openh264/src/codec/common/inc/WelsThreadLib.h
@@ -60,6 +60,19 @@ typedef HANDLE WELS_EVENT;
#define WELS_THREAD_ROUTINE_TYPE DWORD WINAPI
#define WELS_THREAD_ROUTINE_RETURN(rc) return (DWORD)rc;
+#ifdef WINAPI_FAMILY
+#if !WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
+#define WP80
+
+#define InitializeCriticalSection(x) InitializeCriticalSectionEx(x, 0, 0)
+#define GetSystemInfo(x) GetNativeSystemInfo(x)
+#define CreateEvent(attr, reset, init, name) CreateEventEx(attr, name, ((reset) ? CREATE_EVENT_MANUAL_RESET : 0) | ((init) ? CREATE_EVENT_INITIAL_SET : 0), EVENT_ALL_ACCESS)
+#define CreateSemaphore(a, b, c, d) CreateSemaphoreEx(a, b, c, d, 0, SEMAPHORE_ALL_ACCESS)
+#define WaitForSingleObject(a, b) WaitForSingleObjectEx(a, b, FALSE)
+#define WaitForMultipleObjects(a, b, c, d) WaitForMultipleObjectsEx(a, b, c, d, FALSE)
+#endif
+#endif
+
#else // NON-WINDOWS
#include <stdlib.h>
diff --git a/chromium/third_party/openh264/src/codec/common/inc/asmdefs_mmi.h b/chromium/third_party/openh264/src/codec/common/inc/asmdefs_mmi.h
index 5baa823e2b6..69a7ae3981f 100644
--- a/chromium/third_party/openh264/src/codec/common/inc/asmdefs_mmi.h
+++ b/chromium/third_party/openh264/src/codec/common/inc/asmdefs_mmi.h
@@ -289,7 +289,7 @@
* backup register
*/
#define BACKUP_REG \
- double __back_temp[8]; \
+ double __attribute__((aligned(16))) __back_temp[8]; \
if (_MIPS_SIM == _ABI64) \
__asm__ volatile ( \
"gssqc1 $f25, $f24, 0x00(%[temp]) \n\t" \
diff --git a/chromium/third_party/openh264/src/codec/common/inc/copy_mb.h b/chromium/third_party/openh264/src/codec/common/inc/copy_mb.h
index 56bef626faa..532702a9edc 100644
--- a/chromium/third_party/openh264/src/codec/common/inc/copy_mb.h
+++ b/chromium/third_party/openh264/src/codec/common/inc/copy_mb.h
@@ -82,6 +82,13 @@ void WelsCopy16x8NotAligned_mmi (uint8_t* Dst, int32_t iStrideD, uint8_t* Src,
void WelsCopy16x16_mmi (uint8_t* Dst, int32_t iStrideD, uint8_t* Src, int32_t iStrideS);
void WelsCopy16x16NotAligned_mmi (uint8_t* Dst, int32_t iStrideD, uint8_t* Src, int32_t iStrideS);
#endif//HAVE_MMI
+
+#if defined (HAVE_MSA)
+void WelsCopy8x8_msa (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS);
+void WelsCopy8x16_msa (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS);
+void WelsCopy16x8_msa (uint8_t* Dst, int32_t iStrideD, uint8_t* Src, int32_t iStrideS);
+void WelsCopy16x16_msa (uint8_t* Dst, int32_t iStrideD, uint8_t* Src, int32_t iStrideS);
+#endif//HAVE_MSA
#if defined(__cplusplus)
}
#endif//__cplusplus
diff --git a/chromium/third_party/openh264/src/codec/common/inc/cpu_core.h b/chromium/third_party/openh264/src/codec/common/inc/cpu_core.h
index e5906c62b99..f25787b04a5 100644
--- a/chromium/third_party/openh264/src/codec/common/inc/cpu_core.h
+++ b/chromium/third_party/openh264/src/codec/common/inc/cpu_core.h
@@ -86,6 +86,7 @@
/* For loongson */
#define WELS_CPU_MMI 0x00000001 /* mmi */
+#define WELS_CPU_MSA 0x00000002 /* msa */
/*
* Interfaces for CPU core feature detection as below
diff --git a/chromium/third_party/openh264/src/codec/common/inc/deblocking_common.h b/chromium/third_party/openh264/src/codec/common/inc/deblocking_common.h
index a605a6a224f..3ec9b2e5d8c 100644
--- a/chromium/third_party/openh264/src/codec/common/inc/deblocking_common.h
+++ b/chromium/third_party/openh264/src/codec/common/inc/deblocking_common.h
@@ -91,6 +91,20 @@ void DeblockChromaLt4H_mmi (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, i
int8_t* pTC);
void WelsNonZeroCount_mmi (int8_t* pNonZeroCount);
#endif//HAVE_MMI
+
+#if defined(HAVE_MSA)
+void DeblockLumaLt4V_msa (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* pTc);
+void DeblockLumaEq4V_msa (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta);
+void DeblockLumaLt4H_msa (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* pTc);
+void DeblockLumaEq4H_msa (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta);
+void DeblockChromaEq4V_msa (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta);
+void DeblockChromaLt4V_msa (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta,
+ int8_t* pTC);
+void DeblockChromaEq4H_msa (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta);
+void DeblockChromaLt4H_msa (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta,
+ int8_t* pTC);
+void WelsNonZeroCount_msa (int8_t* pNonZeroCount);
+#endif//HAVE_MSA
#if defined(__cplusplus)
}
#endif//__cplusplus
diff --git a/chromium/third_party/openh264/src/codec/common/inc/expand_pic.h b/chromium/third_party/openh264/src/codec/common/inc/expand_pic.h
index cbb69b421ff..2b06d9e47fe 100644
--- a/chromium/third_party/openh264/src/codec/common/inc/expand_pic.h
+++ b/chromium/third_party/openh264/src/codec/common/inc/expand_pic.h
@@ -47,6 +47,7 @@ extern "C" {
#endif//__cplusplus
#define PADDING_LENGTH 32 // reference extension
+#define CHROMA_PADDING_LENGTH 16 // chroma reference extension
#if defined(X86_ASM)
void ExpandPictureLuma_sse2 (uint8_t* pDst,
@@ -89,6 +90,10 @@ typedef struct TagExpandPicFunc {
PExpandPictureFunc pfExpandChromaPicture[2];
} SExpandPicFunc;
+void PadMBLuma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiPicW, const int32_t& kiPicH,
+ const int32_t& kiMbX, const int32_t& kiMbY, const int32_t& kiMBWidth, const int32_t& kiMBHeight);
+void PadMBChroma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiPicW, const int32_t& kiPicH,
+ const int32_t& kiMbX, const int32_t& kiMbY, const int32_t& kiMBWidth, const int32_t& kiMBHeight);
void ExpandReferencingPicture (uint8_t* pData[3], int32_t iWidth, int32_t iHeight, int32_t iStride[3],
PExpandPictureFunc pExpLuma, PExpandPictureFunc pExpChrom[2]);
diff --git a/chromium/third_party/openh264/src/codec/common/inc/msa_macros.h b/chromium/third_party/openh264/src/codec/common/inc/msa_macros.h
new file mode 100644
index 00000000000..2eef0e5b838
--- /dev/null
+++ b/chromium/third_party/openh264/src/codec/common/inc/msa_macros.h
@@ -0,0 +1,2393 @@
+/*
+ * Copyright © 2020 Loongson Technology Co. Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Author: Yin Shiyou (yinshiyou-hf@loongson.cn)
+ * Gu Xiwei (guxiwei-hf@loongson.cn)
+ */
+
+/*
+ * This header file is copied from loongson LSOM project.
+ * MSA macros is implemented with msa intrinsics in msa.h,
+ * and used for simplifing MSA optimization.
+ */
+
+#ifndef _MSA_MACROS_H
+#define _MSA_MACROS_H 1
+#define MSA_MACROS_VERSION 18
+#include <msa.h>
+
+#if (__mips_isa_rev >= 6)
+ #define LH(psrc) \
+ ( { \
+ uint16_t val_lh_m = *(uint16_t *)(psrc); \
+ val_lh_m; \
+ } )
+
+ #define LW(psrc) \
+ ( { \
+ uint32_t val_lw_m = *(uint32_t *)(psrc); \
+ val_lw_m; \
+ } )
+
+ #if (__mips == 64)
+ #define LD(psrc) \
+ ( { \
+ uint64_t val_ld_m = *(uint64_t *)(psrc); \
+ val_ld_m; \
+ } )
+ #else // !(__mips == 64)
+ #define LD(psrc) \
+ ( { \
+ uint8_t *psrc_ld_m = (uint8_t *) (psrc); \
+ uint32_t val0_ld_m, val1_ld_m; \
+ uint64_t val_ld_m = 0; \
+ \
+ val0_ld_m = LW(psrc_ld_m); \
+ val1_ld_m = LW(psrc_ld_m + 4); \
+ \
+ val_ld_m = (uint64_t) (val1_ld_m); \
+ val_ld_m = (uint64_t) ((val_ld_m << 32) & 0xFFFFFFFF00000000); \
+ val_ld_m = (uint64_t) (val_ld_m | (uint64_t) val0_ld_m); \
+ \
+ val_ld_m; \
+ } )
+ #endif // (__mips == 64)
+
+ #define SH(val, pdst) *(uint16_t *)(pdst) = (val);
+ #define SW(val, pdst) *(uint32_t *)(pdst) = (val);
+ #define SD(val, pdst) *(uint64_t *)(pdst) = (val);
+
+#else // !(__mips_isa_rev >= 6)
+ #define LH(psrc) \
+ ( { \
+ uint8_t *psrc_lh_m = (uint8_t *) (psrc); \
+ uint16_t val_lh_m; \
+ \
+ __asm__ volatile ( \
+ "ulh %[val_lh_m], %[psrc_lh_m] \n\t" \
+ \
+ : [val_lh_m] "=r" (val_lh_m) \
+ : [psrc_lh_m] "m" (*psrc_lh_m) \
+ ); \
+ \
+ val_lh_m; \
+ } )
+
+ #define LW(psrc) \
+ ( { \
+ uint8_t *psrc_lw_m = (uint8_t *) (psrc); \
+ uint32_t val_lw_m; \
+ \
+ __asm__ volatile ( \
+ "ulw %[val_lw_m], %[psrc_lw_m] \n\t" \
+ \
+ : [val_lw_m] "=r" (val_lw_m) \
+ : [psrc_lw_m] "m" (*psrc_lw_m) \
+ ); \
+ \
+ val_lw_m; \
+ } )
+
+ #if (__mips == 64)
+ #define LD(psrc) \
+ ( { \
+ uint8_t *psrc_ld_m = (uint8_t *) (psrc); \
+ uint64_t val_ld_m = 0; \
+ \
+ __asm__ volatile ( \
+ "uld %[val_ld_m], %[psrc_ld_m] \n\t" \
+ \
+ : [val_ld_m] "=r" (val_ld_m) \
+ : [psrc_ld_m] "m" (*psrc_ld_m) \
+ ); \
+ \
+ val_ld_m; \
+ } )
+ #else // !(__mips == 64)
+ #define LD(psrc) \
+ ( { \
+ uint8_t *psrc_ld_m = (uint8_t *) (psrc); \
+ uint32_t val0_ld_m, val1_ld_m; \
+ uint64_t val_ld_m = 0; \
+ \
+ val0_ld_m = LW(psrc_ld_m); \
+ val1_ld_m = LW(psrc_ld_m + 4); \
+ \
+ val_ld_m = (uint64_t) (val1_ld_m); \
+ val_ld_m = (uint64_t) ((val_ld_m << 32) & 0xFFFFFFFF00000000); \
+ val_ld_m = (uint64_t) (val_ld_m | (uint64_t) val0_ld_m); \
+ \
+ val_ld_m; \
+ } )
+ #endif // (__mips == 64)
+
+ #define SH(val, pdst) \
+ { \
+ uint8_t *pdst_sh_m = (uint8_t *) (pdst); \
+ uint16_t val_sh_m = (val); \
+ \
+ __asm__ volatile ( \
+ "ush %[val_sh_m], %[pdst_sh_m] \n\t" \
+ \
+ : [pdst_sh_m] "=m" (*pdst_sh_m) \
+ : [val_sh_m] "r" (val_sh_m) \
+ ); \
+ }
+
+ #define SW(val, pdst) \
+ { \
+ uint8_t *pdst_sw_m = (uint8_t *) (pdst); \
+ uint32_t val_sw_m = (val); \
+ \
+ __asm__ volatile ( \
+ "usw %[val_sw_m], %[pdst_sw_m] \n\t" \
+ \
+ : [pdst_sw_m] "=m" (*pdst_sw_m) \
+ : [val_sw_m] "r" (val_sw_m) \
+ ); \
+ }
+
+ #define SD(val, pdst) \
+ { \
+ uint8_t *pdst_sd_m = (uint8_t *) (pdst); \
+ uint32_t val0_sd_m, val1_sd_m; \
+ \
+ val0_sd_m = (uint32_t) ((val) & 0x00000000FFFFFFFF); \
+ val1_sd_m = (uint32_t) (((val) >> 32) & 0x00000000FFFFFFFF); \
+ \
+ SW(val0_sd_m, pdst_sd_m); \
+ SW(val1_sd_m, pdst_sd_m + 4); \
+ }
+#endif // (__mips_isa_rev >= 6)
+
+
+
+
+
+
+/* Description : Load vector elements with stride.
+ * Arguments : Inputs - psrc (source pointer to load from)
+ * - stride
+ * Outputs - out0, out1...
+ * Return Type - as per RTYPE
+ * Details : Loads elements in 'out0' from (psrc).
+ * Loads elements in 'out1' from (psrc + stride).
+ */
+#define MSA_LD_V(RTYPE, psrc, out) (out) = *((RTYPE *)(psrc));
+
+#define MSA_LD_V2(RTYPE, psrc, stride, out0, out1) \
+{ \
+ MSA_LD_V(RTYPE, (psrc), out0); \
+ MSA_LD_V(RTYPE, (psrc) + (stride), out1); \
+}
+
+#define MSA_LD_V4(RTYPE, psrc, stride, out0, out1, out2, out3) \
+{ \
+ MSA_LD_V2(RTYPE, (psrc), stride, out0, out1); \
+ MSA_LD_V2(RTYPE, (psrc) + 2 * (stride) , stride, out2, out3); \
+}
+
+#define MSA_LD_V8(RTYPE, psrc, stride, out0, out1, out2, out3, \
+ out4, out5, out6, out7) \
+{ \
+ MSA_LD_V4(RTYPE, (psrc), stride, out0, out1, out2, out3); \
+ MSA_LD_V4(RTYPE, (psrc) + 4 * (stride), stride, out4, out5, out6, out7); \
+}
+
+/* Description : Store vectors with stride.
+ * Arguments : Inputs - in0, in1... (source vector to be stored)
+ * - stride
+ * Outputs - pdst (destination pointer to store to)
+ * Details : Stores elements from 'in0' to (pdst).
+ * Stores elements from 'in1' to (pdst + stride).
+ */
+#define MSA_ST_V(RTYPE, in, pdst) *((RTYPE *)(pdst)) = (in);
+
+#define MSA_ST_V2(RTYPE, in0, in1, pdst, stride) \
+{ \
+ MSA_ST_V(RTYPE, in0, (pdst)); \
+ MSA_ST_V(RTYPE, in1, (pdst) + (stride)); \
+}
+
+#define MSA_ST_V4(RTYPE, in0, in1, in2, in3, pdst, stride) \
+{ \
+ MSA_ST_V2(RTYPE, in0, in1, (pdst), stride); \
+ MSA_ST_V2(RTYPE, in2, in3, (pdst) + 2 * (stride), stride); \
+}
+
+#define MSA_ST_V8(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, pdst, stride) \
+{ \
+ MSA_ST_V4(RTYPE, in0, in1, in2, in3, (pdst), stride); \
+ MSA_ST_V4(RTYPE, in4, in5, in6, in7, (pdst) + 4 * (stride), stride); \
+}
+
+/* Description : Store half word elements of vector with stride.
+ * Arguments : Inputs - in (source vector)
+ * - pdst (destination pointer to store to)
+ * - stride
+ * Details : Stores half word 'idx0' from 'in' to (pdst).
+ * Stores half word 'idx1' from 'in' to (pdst + stride).
+ * Similar for other elements.
+ */
+#define MSA_ST_H(in, idx, pdst) \
+{ \
+ uint16_t out0_m; \
+ out0_m = __msa_copy_u_h((v8i16) in, idx); \
+ SH(out0_m, (pdst)); \
+}
+#define MSA_ST_H2(in, idx0, idx1, pdst, stride) \
+{ \
+ uint16_t out0_m, out1_m; \
+ out0_m = __msa_copy_u_h((v8i16) in, idx0); \
+ out1_m = __msa_copy_u_h((v8i16) in, idx1); \
+ SH(out0_m, (pdst)); \
+ SH(out1_m, (pdst) + stride); \
+}
+#define MSA_ST_H4(in, idx0, idx1, idx2, idx3, pdst, stride) \
+{ \
+ uint16_t out0_m, out1_m, out2_m, out3_m; \
+ out0_m = __msa_copy_u_h((v8i16) in, idx0); \
+ out1_m = __msa_copy_u_h((v8i16) in, idx1); \
+ out2_m = __msa_copy_u_h((v8i16) in, idx2); \
+ out3_m = __msa_copy_u_h((v8i16) in, idx3); \
+ SH(out0_m, (pdst)); \
+ SH(out1_m, (pdst) + stride); \
+ SH(out2_m, (pdst) + 2 * stride); \
+ SH(out3_m, (pdst) + 3 * stride); \
+}
+#define MSA_ST_H8(in, idx0, idx1, idx2, idx3, idx4, idx5, \
+ idx6, idx7, pdst, stride) \
+{ \
+ MSA_ST_H4(in, idx0, idx1, idx2, idx3, pdst, stride) \
+ MSA_ST_H4(in, idx4, idx5, idx6, idx7, (pdst) + 4*stride, stride) \
+}
+
+/* Description : Store word elements of vector with stride.
+ * Arguments : Inputs - in (source vector)
+ * - pdst (destination pointer to store to)
+ * - stride
+ * Details : Stores word 'idx0' from 'in' to (pdst).
+ * Stores word 'idx1' from 'in' to (pdst + stride).
+ * Similar for other elements.
+ */
+#define MSA_ST_W(in, idx, pdst) \
+{ \
+ uint32_t out0_m; \
+ out0_m = __msa_copy_u_w((v4i32) in, idx); \
+ SW(out0_m, (pdst)); \
+}
+#define MSA_ST_W2(in, idx0, idx1, pdst, stride) \
+{ \
+ uint32_t out0_m, out1_m; \
+ out0_m = __msa_copy_u_w((v4i32) in, idx0); \
+ out1_m = __msa_copy_u_w((v4i32) in, idx1); \
+ SW(out0_m, (pdst)); \
+ SW(out1_m, (pdst) + stride); \
+}
+#define MSA_ST_W4(in, idx0, idx1, idx2, idx3, pdst, stride) \
+{ \
+ uint32_t out0_m, out1_m, out2_m, out3_m; \
+ out0_m = __msa_copy_u_w((v4i32) in, idx0); \
+ out1_m = __msa_copy_u_w((v4i32) in, idx1); \
+ out2_m = __msa_copy_u_w((v4i32) in, idx2); \
+ out3_m = __msa_copy_u_w((v4i32) in, idx3); \
+ SW(out0_m, (pdst)); \
+ SW(out1_m, (pdst) + stride); \
+ SW(out2_m, (pdst) + 2*stride); \
+ SW(out3_m, (pdst) + 3*stride); \
+}
+#define MSA_ST_W8(in0, in1, idx0, idx1, idx2, idx3, \
+ idx4, idx5, idx6, idx7, pdst, stride) \
+{ \
+ MSA_ST_W4(in0, idx0, idx1, idx2, idx3, pdst, stride) \
+ MSA_ST_W4(in1, idx4, idx5, idx6, idx7, pdst + 4*stride, stride) \
+}
+
+/* Description : Store double word elements of vector with stride.
+ * Arguments : Inputs - in (source vector)
+ * - pdst (destination pointer to store to)
+ * - stride
+ * Details : Stores double word 'idx0' from 'in' to (pdst).
+ * Stores double word 'idx1' from 'in' to (pdst + stride).
+ * Similar for other elements.
+ */
+#define MSA_ST_D(in, idx, pdst) \
+{ \
+ uint64_t out0_m; \
+ out0_m = __msa_copy_u_d((v2i64) in, idx); \
+ SD(out0_m, (pdst)); \
+}
+#define MSA_ST_D2(in, idx0, idx1, pdst, stride) \
+{ \
+ uint64_t out0_m, out1_m; \
+ out0_m = __msa_copy_u_d((v2i64) in, idx0); \
+ out1_m = __msa_copy_u_d((v2i64) in, idx1); \
+ SD(out0_m, (pdst)); \
+ SD(out1_m, (pdst) + stride); \
+}
+#define MSA_ST_D4(in0, in1, idx0, idx1, idx2, idx3, pdst, stride) \
+{ \
+ uint64_t out0_m, out1_m, out2_m, out3_m; \
+ out0_m = __msa_copy_u_d((v2i64) in0, idx0); \
+ out1_m = __msa_copy_u_d((v2i64) in0, idx1); \
+ out2_m = __msa_copy_u_d((v2i64) in1, idx2); \
+ out3_m = __msa_copy_u_d((v2i64) in1, idx3); \
+ SD(out0_m, (pdst)); \
+ SD(out1_m, (pdst) + stride); \
+ SD(out2_m, (pdst) + 2 * stride); \
+ SD(out3_m, (pdst) + 3 * stride); \
+}
+#define MSA_ST_D8(in0, in1, in2, in3, idx0, idx1, idx2, idx3, \
+ idx4, idx5, idx6, idx7, pdst, stride) \
+{ \
+ MSA_ST_D4(in0, in1, idx0, idx1, idx2, idx3, pdst, stride) \
+ MSA_ST_D4(in2, in3, idx4, idx5, idx6, idx7, pdst + 4 * stride, stride) \
+}
+
+/* Description : Shuffle byte vector elements as per mask vector.
+ * Arguments : Inputs - in0, in1 (source vectors)
+ * - mask (mask vectors)
+ * Outputs - out (dstination vectors)
+ * Return Type - as per RTYPE
+ * Details : Selective byte elements from 'in0' & 'in1' are copied to 'out' as
+ * per control vector 'mask'.
+ */
+#define MSA_VSHF_B(RTYPE, in0, in1, mask, out) \
+{ \
+ out = (RTYPE) __msa_vshf_b((v16i8) mask, (v16i8) in0, (v16i8) in1); \
+}
+
+#define MSA_VSHF_B2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1) \
+{ \
+ MSA_VSHF_B(RTYPE, in0, in1, mask0, out0) \
+ MSA_VSHF_B(RTYPE, in2, in3, mask1, out1) \
+}
+
+#define MSA_VSHF_B4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
+ mask0, mask1, mask2, mask3, out0, out1, out2, out3) \
+{ \
+ MSA_VSHF_B2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1); \
+ MSA_VSHF_B2(RTYPE, in4, in5, in6, in7, mask2, mask3, out2, out3); \
+}
+
+/* Description : Shuffle halfword vector elements as per mask vector.
+ * Arguments : Inputs - in0, in1 (source vectors)
+ * - mask (mask vectors)
+ * Outputs - out (dstination vectors)
+ * Return Type - as per RTYPE
+ * Details : Selective halfword elements from 'in0' & 'in1' are copied to 'out' as
+ * per control vector 'mask'.
+ */
+#define MSA_VSHF_H(RTYPE, in0, in1, mask, out) \
+{ \
+ out = (RTYPE) __msa_vshf_h((v8i16) mask, (v8i16) in0, (v8i16) in1); \
+}
+
+#define MSA_VSHF_H2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1) \
+{ \
+ MSA_VSHF_H(RTYPE, in0, in1, mask0, out0) \
+ MSA_VSHF_H(RTYPE, in2, in3, mask1, out1) \
+}
+
+#define MSA_VSHF_H4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
+ mask0, mask1, mask2, mask3, out0, out1, out2, out3) \
+{ \
+ MSA_VSHF_H2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1); \
+ MSA_VSHF_H2(RTYPE, in4, in5, in6, in7, mask2, mask3, out2, out3); \
+}
+
+/* Description : Shuffle word vector elements as per mask vector.
+ * Arguments : Inputs - in0, in1 (source vectors)
+ * - mask (mask vectors)
+ * Outputs - out (dstination vectors)
+ * Return Type - as per RTYPE
+ * Details : Selective word elements from 'in0' & 'in1' are copied to 'out' as
+ * per control vector 'mask'.
+ */
+#define MSA_VSHF_W(RTYPE, in0, in1, mask, out) \
+{ \
+ out = (RTYPE) __msa_vshf_w((v4i32) mask, (v4i32) in0, (v4i32) in1); \
+}
+
+#define MSA_VSHF_W2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1) \
+{ \
+ MSA_VSHF_W(RTYPE, in0, in1, mask0, out0) \
+ MSA_VSHF_W(RTYPE, in2, in3, mask1, out1) \
+}
+
+#define MSA_VSHF_W4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
+ mask0, mask1, mask2, mask3, out0, out1, out2, out3) \
+{ \
+ MSA_VSHF_W2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1); \
+ MSA_VSHF_W2(RTYPE, in4, in5, in6, in7, mask2, mask3, out2, out3); \
+}
+
+/* Description : Interleave even byte elements from vectors.
+ * Arguments : Inputs - in0, in1
+ * Outputs - out
+ * Return Type - as per RTYPE
+ * Details : Even byte elements of 'in0' and even byte
+ * elements of 'in1' are interleaved and copied to 'out'.
+ */
+#define MSA_ILVEV_B(RTYPE, in0, in1, out) \
+{ \
+ out = (RTYPE) __msa_ilvev_b((v16i8) in0, (v16i8) in1); \
+}
+
+#define MSA_ILVEV_B2(RTYPE, in0, in1, in2, in3, out0, out1) \
+{ \
+ MSA_ILVEV_B(RTYPE, in0, in1, out0); \
+ MSA_ILVEV_B(RTYPE, in2, in3, out1); \
+}
+
+#define MSA_ILVEV_B4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
+ out0, out1, out2, out3) \
+{ \
+ MSA_ILVEV_B2(RTYPE, in0, in1, in2, in3, out0, out1); \
+ MSA_ILVEV_B2(RTYPE, in4, in5, in6, in7, out2, out3); \
+}
+
+/* Description : Interleave even half word elements from vectors.
+ * Arguments : Inputs - in0, in1
+ * Outputs - out
+ * Return Type - as per RTYPE
+ * Details : Even half word elements of 'in0' and even half word
+ * elements of 'in1' are interleaved and copied to 'out'.
+ */
+#define MSA_ILVEV_H(RTYPE, in0, in1, out) \
+{ \
+ out = (RTYPE) __msa_ilvev_h((v8i16) in0, (v8i16) in1); \
+}
+
+#define MSA_ILVEV_H2(RTYPE, in0, in1, in2, in3, out0, out1) \
+{ \
+ MSA_ILVEV_H(RTYPE, in0, in1, out0); \
+ MSA_ILVEV_H(RTYPE, in2, in3, out1); \
+}
+
+#define MSA_ILVEV_H4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
+ out0, out1, out2, out3) \
+{ \
+ MSA_ILVEV_H2(RTYPE, in0, in1, in2, in3, out0, out1); \
+ MSA_ILVEV_H2(RTYPE, in4, in5, in6, in7, out2, out3); \
+}
+
+/* Description : Interleave even word elements from vectors.
+ * Arguments : Inputs - in0, in1
+ * Outputs - out
+ * Return Type - as per RTYPE
+ * Details : Even word elements of 'in0' and even word
+ * elements of 'in1' are interleaved and copied to 'out'.
+ */
+#define MSA_ILVEV_W(RTYPE, in0, in1, out) \
+{ \
+ out = (RTYPE) __msa_ilvev_w((v2i64) in0, (v2i64) in1); \
+}
+
+#define MSA_ILVEV_W2(RTYPE, in0, in1, in2, in3, out0, out1) \
+{ \
+ MSA_ILVEV_W(RTYPE, in0, in1, out0); \
+ MSA_ILVEV_W(RTYPE, in2, in3, out1); \
+}
+
+#define MSA_ILVEV_W4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
+ out0, out1, out2, out3) \
+{ \
+ MSA_ILVEV_W2(RTYPE, in0, in1, in2, in3, out0, out1); \
+ MSA_ILVEV_W2(RTYPE, in4, in5, in6, in7, out2, out3); \
+}
+
+/* Description : Interleave even double word elements from vectors.
+ * Arguments : Inputs - in0, in1
+ * Outputs - out
+ * Return Type - as per RTYPE
+ * Details : Even double word elements of 'in0' and even double word
+ * elements of 'in1' are interleaved and copied to 'out'.
+ */
+#define MSA_ILVEV_D(RTYPE, in0, in1, out) \
+{ \
+ out = (RTYPE) __msa_ilvev_d((v2i64) in0, (v2i64) in1); \
+}
+
+#define MSA_ILVEV_D2(RTYPE, in0, in1, in2, in3, out0, out1) \
+{ \
+ MSA_ILVEV_D(RTYPE, in0, in1, out0); \
+ MSA_ILVEV_D(RTYPE, in2, in3, out1); \
+}
+
+#define MSA_ILVEV_D4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
+ out0, out1, out2, out3) \
+{ \
+ MSA_ILVEV_D2(RTYPE, in0, in1, in2, in3, out0, out1); \
+ MSA_ILVEV_D2(RTYPE, in4, in5, in6, in7, out2, out3); \
+}
+
+/* Description : Interleave odd byte elements from vectors.
+ * Arguments : Inputs - in0, in1
+ * Outputs - out
+ * Return Type - as per RTYPE
+ * Details : Odd byte elements of 'in0' and odd byte
+ * elements of 'in1' are interleaved and copied to 'out'.
+ */
+#define MSA_ILVOD_B(RTYPE, in0, in1, out) \
+{ \
+ out = (RTYPE) __msa_ilvod_b((v16i8) in0, (v16i8) in1); \
+}
+
+#define MSA_ILVOD_B2(RTYPE, in0, in1, in2, in3, out0, out1) \
+{ \
+ MSA_ILVOD_B(RTYPE, in0, in1, out0); \
+ MSA_ILVOD_B(RTYPE, in2, in3, out1); \
+}
+
+#define MSA_ILVOD_B4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
+ out0, out1, out2, out3) \
+{ \
+ MSA_ILVOD_B2(RTYPE, in0, in1, in2, in3, out0, out1); \
+ MSA_ILVOD_B2(RTYPE, in4, in5, in6, in7, out2, out3); \
+}
+
+/* Description : Interleave odd half word elements from vectors.
+ * Arguments : Inputs - in0, in1
+ * Outputs - out
+ * Return Type - as per RTYPE
+ * Details : Odd half word elements of 'in0' and odd half word
+ * elements of 'in1' are interleaved and copied to 'out'.
+ */
+#define MSA_ILVOD_H(RTYPE, in0, in1, out) \
+{ \
+ out = (RTYPE) __msa_ilvod_h((v8i16) in0, (v8i16) in1); \
+}
+
+#define MSA_ILVOD_H2(RTYPE, in0, in1, in2, in3, out0, out1) \
+{ \
+ MSA_ILVOD_H(RTYPE, in0, in1, out0); \
+ MSA_ILVOD_H(RTYPE, in2, in3, out1); \
+}
+
+#define MSA_ILVOD_H4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
+ out0, out1, out2, out3) \
+{ \
+ MSA_ILVOD_H2(RTYPE, in0, in1, in2, in3, out0, out1); \
+ MSA_ILVOD_H2(RTYPE, in4, in5, in6, in7, out2, out3); \
+}
+
+/* Description : Interleave odd word elements from vectors.
+ * Arguments : Inputs - in0, in1
+ * Outputs - out
+ * Return Type - as per RTYPE
+ * Details : Odd word elements of 'in0' and odd word
+ * elements of 'in1' are interleaved and copied to 'out'.
+ */
+#define MSA_ILVOD_W(RTYPE, in0, in1, out) \
+{ \
+ out = (RTYPE) __msa_ilvod_w((v4i32) in0, (v4i32) in1); \
+}
+
+#define MSA_ILVOD_W2(RTYPE, in0, in1, in2, in3, out0, out1) \
+{ \
+ MSA_ILVOD_W(RTYPE, in0, in1, out0); \
+ MSA_ILVOD_W(RTYPE, in2, in3, out1); \
+}
+
+#define MSA_ILVOD_W4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
+ out0, out1, out2, out3) \
+{ \
+ MSA_ILVOD_W2(RTYPE, in0, in1, in2, in3, out0, out1); \
+ MSA_ILVOD_W2(RTYPE, in4, in5, in6, in7, out2, out3); \
+}
+
+/* Description : Interleave odd double word elements from vectors.
+ * Arguments : Inputs - in0, in1
+ * Outputs - out
+ * Return Type - as per RTYPE
+ * Details : Odd double word elements of 'in0' and odd double word
+ * elements of 'in1' are interleaved and copied to 'out'.
+ */
+#define MSA_ILVOD_D(RTYPE, in0, in1, out) \
+{ \
+ out = (RTYPE) __msa_ilvod_d((v2i64) in0, (v2i64) in1); \
+}
+
+#define MSA_ILVOD_D2(RTYPE, in0, in1, in2, in3, out0, out1) \
+{ \
+ MSA_ILVOD_D(RTYPE, in0, in1, out0); \
+ MSA_ILVOD_D(RTYPE, in2, in3, out1); \
+}
+
+#define MSA_ILVOD_D4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
+ out0, out1, out2, out3) \
+{ \
+ MSA_ILVOD_D2(RTYPE, in0, in1, in2, in3, out0, out1); \
+ MSA_ILVOD_D2(RTYPE, in4, in5, in6, in7, out2, out3); \
+}
+
+/* Description : Interleave left half of byte elements from vectors.
+ * Arguments : Inputs - in0, in1
+ * Outputs - out
+ * Return Type - as per RTYPE
+ * Details : Left half of byte elements of 'in0' and left half of byte
+ * elements of 'in1' are interleaved and copied to 'out'.
+ */
+#define MSA_ILVL_B(RTYPE, in0, in1, out) \
+{ \
+ out = (RTYPE) __msa_ilvl_b((v16i8) in0, (v16i8) in1); \
+}
+
+#define MSA_ILVL_B2(RTYPE, in0, in1, in2, in3, out0, out1) \
+{ \
+ MSA_ILVL_B(RTYPE, in0, in1, out0); \
+ MSA_ILVL_B(RTYPE, in2, in3, out1); \
+}
+
+#define MSA_ILVL_B4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
+ out0, out1, out2, out3) \
+{ \
+ MSA_ILVL_B2(RTYPE, in0, in1, in2, in3, out0, out1); \
+ MSA_ILVL_B2(RTYPE, in4, in5, in6, in7, out2, out3); \
+}
+
+/* Description : Interleave left half of halfword elements from vectors.
+ * Arguments : Inputs - in0, in1
+ * Outputs - out
+ * Return Type - as per RTYPE
+ * Details : Left half of halfword elements of 'in0' and left half of halfword
+ * elements of 'in1' are interleaved and copied to 'out'.
+ */
+#define MSA_ILVL_H(RTYPE, in0, in1, out) \
+{ \
+ out = (RTYPE) __msa_ilvl_h((v8i16) in0, (v8i16) in1); \
+}
+
+#define MSA_ILVL_H2(RTYPE, in0, in1, in2, in3, out0, out1) \
+{ \
+ MSA_ILVL_H(RTYPE, in0, in1, out0); \
+ MSA_ILVL_H(RTYPE, in2, in3, out1); \
+}
+
+#define MSA_ILVL_H4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
+ out0, out1, out2, out3) \
+{ \
+ MSA_ILVL_H2(RTYPE, in0, in1, in2, in3, out0, out1); \
+ MSA_ILVL_H2(RTYPE, in4, in5, in6, in7, out2, out3); \
+}
+
+/* Description : Interleave left half of word elements from vectors.
+ * Arguments : Inputs - in0, in1
+ * Outputs - out
+ * Return Type - as per RTYPE
+ * Details : Left half of word elements of 'in0' and left half of word
+ * elements of 'in1' are interleaved and copied to 'out'.
+ */
+#define MSA_ILVL_W(RTYPE, in0, in1, out) \
+{ \
+ out = (RTYPE) __msa_ilvl_w((v4i32) in0, (v4i32) in1); \
+}
+
+#define MSA_ILVL_W2(RTYPE, in0, in1, in2, in3, out0, out1) \
+{ \
+ MSA_ILVL_W(RTYPE, in0, in1, out0); \
+ MSA_ILVL_W(RTYPE, in2, in3, out1); \
+}
+
+#define MSA_ILVL_W4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
+ out0, out1, out2, out3) \
+{ \
+ MSA_ILVL_W2(RTYPE, in0, in1, in2, in3, out0, out1); \
+ MSA_ILVL_W2(RTYPE, in4, in5, in6, in7, out2, out3); \
+}
+
+/* Description : Interleave left half of double word elements from vectors.
+ * Arguments : Inputs - in0, in1
+ * Outputs - out
+ * Return Type - as per RTYPE
+ * Details : Left half of double word elements of 'in0' and left half of
+ * double word elements of 'in1' are interleaved and copied to 'out'.
+ */
+#define MSA_ILVL_D(RTYPE, in0, in1, out) \
+{ \
+ out = (RTYPE) __msa_ilvl_d((v2i64) in0, (v2i64) in1); \
+}
+
+#define MSA_ILVL_D2(RTYPE, in0, in1, in2, in3, out0, out1) \
+{ \
+ MSA_ILVL_D(RTYPE, in0, in1, out0); \
+ MSA_ILVL_D(RTYPE, in2, in3, out1); \
+}
+
+#define MSA_ILVL_D4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
+ out0, out1, out2, out3) \
+{ \
+ MSA_ILVL_D2(RTYPE, in0, in1, in2, in3, out0, out1); \
+ MSA_ILVL_D2(RTYPE, in4, in5, in6, in7, out2, out3); \
+}
+
+/* Description : Interleave right half of byte elements from vectors.
+ * Arguments : Inputs - in0, in1
+ * Outputs - out
+ * Return Type - as per RTYPE
+ * Details : Right half of byte elements of 'in0' and right half of byte
+ * elements of 'in1' are interleaved and copied to 'out'.
+ */
+#define MSA_ILVR_B(RTYPE, in0, in1, out) \
+{ \
+ out = (RTYPE) __msa_ilvr_b((v16i8) in0, (v16i8) in1); \
+}
+
+#define MSA_ILVR_B2(RTYPE, in0, in1, in2, in3, out0, out1) \
+{ \
+ MSA_ILVR_B(RTYPE, in0, in1, out0); \
+ MSA_ILVR_B(RTYPE, in2, in3, out1); \
+}
+
+#define MSA_ILVR_B4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
+ out0, out1, out2, out3) \
+{ \
+ MSA_ILVR_B2(RTYPE, in0, in1, in2, in3, out0, out1); \
+ MSA_ILVR_B2(RTYPE, in4, in5, in6, in7, out2, out3); \
+}
+
+/* Description : Interleave right half of halfword elements from vectors.
+ * Arguments : Inputs - in0, in1
+ * Outputs - out
+ * Return Type - as per RTYPE
+ * Details : Right half of halfword elements of 'in0' and right half of halfword
+ * elements of 'in1' are interleaved and copied to 'out'.
+ */
+#define MSA_ILVR_H(RTYPE, in0, in1, out) \
+{ \
+ out = (RTYPE) __msa_ilvr_h((v8i16) in0, (v8i16) in1); \
+}
+
+#define MSA_ILVR_H2(RTYPE, in0, in1, in2, in3, out0, out1) \
+{ \
+ MSA_ILVR_H(RTYPE, in0, in1, out0); \
+ MSA_ILVR_H(RTYPE, in2, in3, out1); \
+}
+
+#define MSA_ILVR_H4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
+ out0, out1, out2, out3) \
+{ \
+ MSA_ILVR_H2(RTYPE, in0, in1, in2, in3, out0, out1); \
+ MSA_ILVR_H2(RTYPE, in4, in5, in6, in7, out2, out3); \
+}
+
+/* Description : Interleave right half of word elements from vectors.
+ * Arguments : Inputs - in0, in1
+ * Outputs - out
+ * Return Type - as per RTYPE
+ * Details : Right half of word elements of 'in0' and right half of word
+ * elements of 'in1' are interleaved and copied to 'out'.
+ */
+#define MSA_ILVR_W(RTYPE, in0, in1, out) \
+{ \
+ out = (RTYPE) __msa_ilvr_w((v4i32) in0, (v4i32) in1); \
+}
+
+#define MSA_ILVR_W2(RTYPE, in0, in1, in2, in3, out0, out1) \
+{ \
+ MSA_ILVR_W(RTYPE, in0, in1, out0); \
+ MSA_ILVR_W(RTYPE, in2, in3, out1); \
+}
+
+#define MSA_ILVR_W4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
+ out0, out1, out2, out3) \
+{ \
+ MSA_ILVR_W2(RTYPE, in0, in1, in2, in3, out0, out1); \
+ MSA_ILVR_W2(RTYPE, in4, in5, in6, in7, out2, out3); \
+}
+
+/* Description : Interleave right half of double word elements from vectors.
+ * Arguments : Inputs - in0, in1
+ * Outputs - out
+ * Return Type - as per RTYPE
+ * Details : Right half of double word elements of 'in0' and right half of
+ * double word elements of 'in1' are interleaved and copied to 'out'.
+ */
+#define MSA_ILVR_D(RTYPE, in0, in1, out) \
+{ \
+ out = (RTYPE) __msa_ilvr_d((v2i64) in0, (v2i64) in1); \
+}
+
+#define MSA_ILVR_D2(RTYPE, in0, in1, in2, in3, out0, out1) \
+{ \
+ MSA_ILVR_D(RTYPE, in0, in1, out0); \
+ MSA_ILVR_D(RTYPE, in2, in3, out1); \
+}
+
+#define MSA_ILVR_D4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
+ out0, out1, out2, out3) \
+{ \
+ MSA_ILVR_D2(RTYPE, in0, in1, in2, in3, out0, out1); \
+ MSA_ILVR_D2(RTYPE, in4, in5, in6, in7, out2, out3); \
+}
+
+/* Description : Interleave both left and right half of input vectors.
+ * Arguments : Inputs - in0, in1
+ * Outputs - out0, out1
+ * Return Type - as per RTYPE
+ * Details : Right half of byte elements from 'in0' and 'in1' are
+ * interleaved and stored to 'out0'.
+ * Left half of byte elements from 'in0' and 'in1' are
+ * interleaved and stored to 'out1'.
+ */
+#define MSA_ILVRL_B2(RTYPE, in0, in1, out0, out1) \
+{ \
+ MSA_ILVR_B(RTYPE, in0, in1, out0); \
+ MSA_ILVL_B(RTYPE, in0, in1, out1); \
+}
+
+#define MSA_ILVRL_B4(RTYPE, in0, in1, in2, in3, \
+ out0, out1, out2, out3) \
+{ \
+ MSA_ILVRL_B2(RTYPE, in0, in1, out0, out1); \
+ MSA_ILVRL_B2(RTYPE, in2, in3, out2, out3); \
+}
+
+/* Description : Interleave both left and right half of input vectors.
+ * Arguments : Inputs - in0, in1
+ * Outputs - out0, out1
+ * Return Type - as per RTYPE
+ * Details : Right half of halfword elements from 'in0' and 'in1' are
+ * interleaved and stored to 'out0'.
+ * Left half of halfword elements from 'in0' and 'in1' are
+ * interleaved and stored to 'out1'.
+ */
+#define MSA_ILVRL_H2(RTYPE, in0, in1, out0, out1) \
+{ \
+ MSA_ILVR_H(RTYPE, in0, in1, out0); \
+ MSA_ILVL_H(RTYPE, in0, in1, out1); \
+}
+
+#define MSA_ILVRL_H4(RTYPE, in0, in1, in2, in3, \
+ out0, out1, out2, out3) \
+{ \
+ MSA_ILVRL_H2(RTYPE, in0, in1, out0, out1); \
+ MSA_ILVRL_H2(RTYPE, in2, in3, out2, out3); \
+}
+
+/* Description : Interleave both left and right half of input vectors.
+ * Arguments : Inputs - in0, in1
+ * Outputs - out0, out1
+ * Return Type - as per RTYPE
+ * Details : Right half of word elements from 'in0' and 'in1' are
+ * interleaved and stored to 'out0'.
+ * Left half of word elements from 'in0' and 'in1' are
+ * interleaved and stored to 'out1'.
+ */
+#define MSA_ILVRL_W2(RTYPE, in0, in1, out0, out1) \
+{ \
+ MSA_ILVR_W(RTYPE, in0, in1, out0); \
+ MSA_ILVL_W(RTYPE, in0, in1, out1); \
+}
+
+#define MSA_ILVRL_W4(RTYPE, in0, in1, in2, in3, \
+ out0, out1, out2, out3) \
+{ \
+ MSA_ILVRL_W2(RTYPE, in0, in1, out0, out1); \
+ MSA_ILVRL_W2(RTYPE, in2, in3, out2, out3); \
+}
+
+/* Description : Interleave both left and right half of input vectors.
+ * Arguments : Inputs - in0, in1
+ * Outputs - out0, out1
+ * Return Type - as per RTYPE
+ * Details : Right half of double word elements from 'in0' and 'in1' are
+ * interleaved and stored to 'out0'.
+ * Left half of double word elements from 'in0' and 'in1' are
+ * interleaved and stored to 'out1'.
+ */
+#define MSA_ILVRL_D2(RTYPE, in0, in1, out0, out1) \
+{ \
+ MSA_ILVR_D(RTYPE, in0, in1, out0); \
+ MSA_ILVL_D(RTYPE, in0, in1, out1); \
+}
+
+#define MSA_ILVRL_D4(RTYPE, in0, in1, in2, in3, \
+ out0, out1, out2, out3) \
+{ \
+ MSA_ILVRL_D2(RTYPE, in0, in1, out0, out1); \
+ MSA_ILVRL_D2(RTYPE, in2, in3, out2, out3); \
+}
+
+/* Description : Indexed byte elements are replicated to all elements in
+ * output vector.
+ * Arguments : Inputs - in, idx
+ * Outputs - out
+ * Return Type - as per RTYPE
+ * Details : 'idx' element value from 'in' vector is replicated to all
+ * elements in 'out' vector.
+ * Valid index range for halfword operation is 0-7.
+ */
+#define MSA_SPLATI_B(RTYPE, in, idx, out) \
+{ \
+ out = (RTYPE) __msa_splati_b((v16i8) in, idx); \
+}
+
+#define MSA_SPLATI_B2(RTYPE, in, idx0, idx1, out0, out1) \
+{ \
+ MSA_SPLATI_B(RTYPE, in, idx0, out0) \
+ MSA_SPLATI_B(RTYPE, in, idx1, out1) \
+}
+
+#define MSA_SPLATI_B4(RTYPE, in, idx0, idx1, idx2, idx3, \
+ out0, out1, out2, out3) \
+{ \
+ MSA_SPLATI_B2(RTYPE, in, idx0, idx1, out0, out1) \
+ MSA_SPLATI_B2(RTYPE, in, idx2, idx3, out2, out3) \
+}
+
+/* Description : Indexed halfword elements are replicated to all elements in
+ * output vector.
+ * Arguments : Inputs - in, idx
+ * Outputs - out
+ * Return Type - as per RTYPE
+ * Details : 'idx' element value from 'in' vector is replicated to all
+ * elements in 'out' vector.
+ * Valid index range for halfword operation is 0-7.
+ */
+#define MSA_SPLATI_H(RTYPE, in, idx, out) \
+{ \
+ out = (RTYPE) __msa_splati_h((v8i16) in, idx); \
+}
+
+#define MSA_SPLATI_H2(RTYPE, in, idx0, idx1, out0, out1) \
+{ \
+ MSA_SPLATI_H(RTYPE, in, idx0, out0) \
+ MSA_SPLATI_H(RTYPE, in, idx1, out1) \
+}
+
+#define MSA_SPLATI_H4(RTYPE, in, idx0, idx1, idx2, idx3, \
+ out0, out1, out2, out3) \
+{ \
+ MSA_SPLATI_H2(RTYPE, in, idx0, idx1, out0, out1) \
+ MSA_SPLATI_H2(RTYPE, in, idx2, idx3, out2, out3) \
+}
+
+/* Description : Indexed word elements are replicated to all elements in
+ * output vector.
+ * Arguments : Inputs - in, idx
+ * Outputs - out
+ * Return Type - as per RTYPE
+ * Details : 'idx' element value from 'in' vector is replicated to all
+ * elements in 'out' vector.
+ * Valid index range for halfword operation is 0-3.
+ */
+#define MSA_SPLATI_W(RTYPE, in, idx, out) \
+{ \
+ out = (RTYPE) __msa_splati_w((v4i32) in, idx); \
+}
+
+#define MSA_SPLATI_W2(RTYPE, in, idx0, idx1, out0, out1) \
+{ \
+ MSA_SPLATI_W(RTYPE, in, idx0, out0) \
+ MSA_SPLATI_W(RTYPE, in, idx1, out1) \
+}
+
+#define MSA_SPLATI_W4(RTYPE, in, idx0, idx1, idx2, idx3, \
+ out0, out1, out2, out3) \
+{ \
+ MSA_SPLATI_W2(RTYPE, in, idx0, idx1, out0, out1) \
+ MSA_SPLATI_W2(RTYPE, in, idx2, idx3, out2, out3) \
+}
+
+/* Description : Pack even byte elements of vector pairs.
+ * Arguments : Inputs - in0, in1
+ * Outputs - out
+ * Return Type - as per RTYPE
+ * Details : Even byte elements of 'in0' are copied to the left half of
+ * 'out' & even byte elements of 'in1' are copied to the right
+ * half of 'out'.
+ */
+#define MSA_PCKEV_B(RTYPE, in0, in1, out) \
+{ \
+ out = (RTYPE) __msa_pckev_b((v16i8) in0, (v16i8) in1); \
+}
+
+#define MSA_PCKEV_B2(RTYPE, in0, in1, in2, in3, out0, out1) \
+{ \
+ MSA_PCKEV_B(RTYPE, in0, in1, out0) \
+ MSA_PCKEV_B(RTYPE, in2, in3, out1) \
+}
+
+#define MSA_PCKEV_B4(RTYPE, in0, in1, in2, in3, in4, in5, \
+ in6, in7, out0, out1, out2, out3) \
+{ \
+ MSA_PCKEV_B2(RTYPE, in0, in1, in2, in3, out0, out1) \
+ MSA_PCKEV_B2(RTYPE, in4, in5, in6, in7, out2, out3) \
+}
+
+/* Description : Pack even halfword elements of vector pairs.
+ * Arguments : Inputs - in0, in1
+ * Outputs - out
+ * Return Type - as per RTYPE
+ * Details : Even halfword elements of 'in0' are copied to the left half of
+ * 'out' & even halfword elements of 'in1' are copied to the right
+ * half of 'out'.
+ */
+#define MSA_PCKEV_H(RTYPE, in0, in1, out) \
+{ \
+ out = (RTYPE) __msa_pckev_h((v8i16) in0, (v8i16) in1); \
+}
+
+#define MSA_PCKEV_H2(RTYPE, in0, in1, in2, in3, out0, out1) \
+{ \
+ MSA_PCKEV_H(RTYPE, in0, in1, out0) \
+ MSA_PCKEV_H(RTYPE, in2, in3, out1) \
+}
+
+#define MSA_PCKEV_H4(RTYPE, in0, in1, in2, in3, in4, in5, \
+ in6, in7, out0, out1, out2, out3) \
+{ \
+ MSA_PCKEV_H2(RTYPE, in0, in1, in2, in3, out0, out1) \
+ MSA_PCKEV_H2(RTYPE, in4, in5, in6, in7, out2, out3) \
+}
+
+/* Description : Pack even word elements of vector pairs.
+ * Arguments : Inputs - in0, in1
+ * Outputs - out
+ * Return Type - as per RTYPE
+ * Details : Even word elements of 'in0' are copied to the left half of
+ * 'out' & even word elements of 'in1' are copied to the right
+ * half of 'out'.
+ */
+#define MSA_PCKEV_W(RTYPE, in0, in1, out) \
+{ \
+ out = (RTYPE) __msa_pckev_w((v4i32) in0, (v4i32) in1); \
+}
+
+#define MSA_PCKEV_W2(RTYPE, in0, in1, in2, in3, out0, out1) \
+{ \
+ MSA_PCKEV_W(RTYPE, in0, in1, out0) \
+ MSA_PCKEV_W(RTYPE, in2, in3, out1) \
+}
+
+#define MSA_PCKEV_W4(RTYPE, in0, in1, in2, in3, in4, in5, \
+ in6, in7, out0, out1, out2, out3) \
+{ \
+ MSA_PCKEV_W2(RTYPE, in0, in1, in2, in3, out0, out1) \
+ MSA_PCKEV_W2(RTYPE, in4, in5, in6, in7, out2, out3) \
+}
+
+/* Description : Pack even double word elements of vector pairs.
+ * Arguments : Inputs - in0, in1
+ * Outputs - out
+ * Return Type - as per RTYPE
+ * Details : Even double word elements of 'in0' are copied to the left
+ * half of 'out' & even double word elements of 'in1' are
+ * copied to the right half of 'out'.
+ */
+#define MSA_PCKEV_D(RTYPE, in0, in1, out) \
+{ \
+ out = (RTYPE) __msa_pckev_d((v2i64) in0, (v2i64) in1); \
+}
+
+#define MSA_PCKEV_D2(RTYPE, in0, in1, in2, in3, out0, out1) \
+{ \
+ MSA_PCKEV_D(RTYPE, in0, in1, out0) \
+ MSA_PCKEV_D(RTYPE, in2, in3, out1) \
+}
+
+#define MSA_PCKEV_D4(RTYPE, in0, in1, in2, in3, in4, in5, \
+ in6, in7, out0, out1, out2, out3) \
+{ \
+ MSA_PCKEV_D2(RTYPE, in0, in1, in2, in3, out0, out1) \
+ MSA_PCKEV_D2(RTYPE, in4, in5, in6, in7, out2, out3) \
+}
+
+/* Description : Pack odd byte elements of vector pairs.
+ * Arguments : Inputs - in0, in1
+ * Outputs - out
+ * Return Type - as per RTYPE
+ * Details : Odd byte elements of 'in0' are copied to the left half of
+ * 'out' & odd byte elements of 'in1' are copied to the right
+ * half of 'out'.
+ */
+#define MSA_PCKOD_B(RTYPE, in0, in1, out) \
+{ \
+ out = (RTYPE) __msa_pckod_b((v16i8) in0, (v16i8) in1); \
+}
+
+#define MSA_PCKOD_B2(RTYPE, in0, in1, in2, in3, out0, out1) \
+{ \
+ MSA_PCKOD_B(RTYPE, in0, in1, out0) \
+ MSA_PCKOD_B(RTYPE, in2, in3, out1) \
+}
+
+#define MSA_PCKOD_B4(RTYPE, in0, in1, in2, in3, in4, in5, \
+ in6, in7, out0, out1, out2, out3) \
+{ \
+ MSA_PCKOD_B2(RTYPE, in0, in1, in2, in3, out0, out1) \
+ MSA_PCKOD_B2(RTYPE, in4, in5, in6, in7, out2, out3) \
+}
+
+/* Description : Pack odd halfword elements of vector pairs.
+ * Arguments : Inputs - in0, in1
+ * Outputs - out
+ * Return Type - as per RTYPE
+ * Details : Odd halfword elements of 'in0' are copied to the left half of
+ * 'out' & odd halfword elements of 'in1' are copied to the right
+ * half of 'out'.
+ */
+#define MSA_PCKOD_H(RTYPE, in0, in1, out) \
+{ \
+ out = (RTYPE) __msa_pckod_h((v8i16) in0, (v8i16) in1); \
+}
+
+#define MSA_PCKOD_H2(RTYPE, in0, in1, in2, in3, out0, out1) \
+{ \
+ MSA_PCKOD_H(RTYPE, in0, in1, out0) \
+ MSA_PCKOD_H(RTYPE, in2, in3, out1) \
+}
+
+#define MSA_PCKOD_H4(RTYPE, in0, in1, in2, in3, in4, in5, \
+ in6, in7, out0, out1, out2, out3) \
+{ \
+ MSA_PCKOD_H2(RTYPE, in0, in1, in2, in3, out0, out1) \
+ MSA_PCKOD_H2(RTYPE, in4, in5, in6, in7, out2, out3) \
+}
+
+/* Description : Pack odd word elements of vector pairs.
+ * Arguments : Inputs - in0, in1
+ * Outputs - out
+ * Return Type - as per RTYPE
+ * Details : Odd word elements of 'in0' are copied to the left half of
+ * 'out' & odd word elements of 'in1' are copied to the right
+ * half of 'out'.
+ */
+#define MSA_PCKOD_W(RTYPE, in0, in1, out) \
+{ \
+ out = (RTYPE) __msa_pckod_w((v4i32) in0, (v4i32) in1); \
+}
+
+#define MSA_PCKOD_W2(RTYPE, in0, in1, in2, in3, out0, out1) \
+{ \
+ MSA_PCKOD_W(RTYPE, in0, in1, out0) \
+ MSA_PCKOD_W(RTYPE, in2, in3, out1) \
+}
+
+#define MSA_PCKOD_W4(RTYPE, in0, in1, in2, in3, in4, in5, \
+ in6, in7, out0, out1, out2, out3) \
+{ \
+ MSA_PCKOD_W2(RTYPE, in0, in1, in2, in3, out0, out1) \
+ MSA_PCKOD_W2(RTYPE, in4, in5, in6, in7, out2, out3) \
+}
+
+/* Description : Pack odd double word elements of vector pairs.
+ * Arguments : Inputs - in0, in1
+ * Outputs - out
+ * Return Type - as per RTYPE
+ * Details : Odd double word elements of 'in0' are copied to the left
+ * half of 'out' & odd double word elements of 'in1' are
+ * copied to the right half of 'out'.
+ */
+#define MSA_PCKOD_D(RTYPE, in0, in1, out) \
+{ \
+ out = (RTYPE) __msa_pckod_d((v2i64) in0, (v2i64) in1); \
+}
+
+#define MSA_PCKOD_D2(RTYPE, in0, in1, in2, in3, out0, out1) \
+{ \
+ MSA_PCKOD_D(RTYPE, in0, in1, out0) \
+ MSA_PCKOD_D(RTYPE, in2, in3, out1) \
+}
+
+#define MSA_PCKOD_D4(RTYPE, in0, in1, in2, in3, in4, in5, \
+ in6, in7, out0, out1, out2, out3) \
+{ \
+ MSA_PCKOD_D2(RTYPE, in0, in1, in2, in3, out0, out1) \
+ MSA_PCKOD_D2(RTYPE, in4, in5, in6, in7, out2, out3) \
+}
+
+/* Description : Dot product of unsigned byte vector elements.
+ * Arguments : Inputs - mult
+ * cnst
+ * Outputs - out
+ * Return Type - as per RTYPE
+ * Details : Unsigned byte elements from 'mult' are multiplied with
+ * unsigned byte elements from 'cnst' producing a result
+ * twice the size of input i.e. unsigned halfword.
+ * Then this multiplication results of adjacent odd-even elements
+ * are added together and stored to the out vector.
+ */
+#define MSA_DOTP_UB(RTYPE, mult, cnst, out) \
+{ \
+ out = (RTYPE) __msa_dotp_u_h((v16u8) mult, (v16u8) cnst); \
+}
+
+#define MSA_DOTP_UB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) \
+{ \
+ MSA_DOTP_UB(RTYPE, mult0, cnst0, out0) \
+ MSA_DOTP_UB(RTYPE, mult1, cnst1, out1) \
+}
+
+#define MSA_DOTP_UB4(RTYPE, mult0, mult1, mult2, mult3, \
+ cnst0, cnst1, cnst2, cnst3, \
+ out0, out1, out2, out3) \
+{ \
+ MSA_DOTP_UB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1); \
+ MSA_DOTP_UB2(RTYPE, mult2, mult3, cnst2, cnst3, out2, out3); \
+}
+
+/* Description : Dot product of signed byte vector elements.
+ * Arguments : Inputs - mult
+ * cnst
+ * Outputs - out
+ * Return Type - as per RTYPE
+ * Details : Signed byte elements from 'mult' are multiplied with
+ * signed byte elements from 'cnst' producing a result
+ * twice the size of input i.e. signed halfword.
+ * Then this multiplication results of adjacent odd-even elements
+ * are added together and stored to the out vector.
+ */
+#define MSA_DOTP_SB(RTYPE, mult, cnst, out) \
+{ \
+ out = (RTYPE) __msa_dotp_s_h((v16i8) mult, (v16i8) cnst); \
+}
+
+#define MSA_DOTP_SB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) \
+{ \
+ MSA_DOTP_SB(RTYPE, mult0, cnst0, out0) \
+ MSA_DOTP_SB(RTYPE, mult1, cnst1, out1) \
+}
+
+#define MSA_DOTP_SB4(RTYPE, mult0, mult1, mult2, mult3, \
+ cnst0, cnst1, cnst2, cnst3, \
+ out0, out1, out2, out3) \
+{ \
+ MSA_DOTP_SB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1); \
+ MSA_DOTP_SB2(RTYPE, mult2, mult3, cnst2, cnst3, out2, out3); \
+}
+
+/* Description : Dot product of unsigned halfword vector elements.
+ * Arguments : Inputs - mult
+ * cnst
+ * Outputs - out
+ * Return Type - as per RTYPE
+ * Details : Unsigned halfword elements from 'mult' are multiplied with
+ * unsigned halfword elements from 'cnst' producing a result
+ * twice the size of input i.e. unsigned word.
+ * Then this multiplication results of adjacent odd-even elements
+ * are added together and stored to the out vector.
+ */
+#define MSA_DOTP_UH(RTYPE, mult, cnst, out) \
+{ \
+ out = (RTYPE) __msa_dotp_u_w((v8u16) mult, (v8u16) cnst); \
+}
+
+#define MSA_DOTP_UH2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) \
+{ \
+ MSA_DOTP_UH(RTYPE, mult0, cnst0, out0) \
+ MSA_DOTP_UH(RTYPE, mult1, cnst1, out1) \
+}
+
+#define MSA_DOTP_UH4(RTYPE, mult0, mult1, mult2, mult3, \
+ cnst0, cnst1, cnst2, cnst3, \
+ out0, out1, out2, out3) \
+{ \
+ MSA_DOTP_UH2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1); \
+ MSA_DOTP_UH2(RTYPE, mult2, mult3, cnst2, cnst3, out2, out3); \
+}
+
+/* Description : Dot product of signed halfword vector elements.
+ * Arguments : Inputs - mult
+ * cnst
+ * Outputs - out
+ * Return Type - as per RTYPE
+ * Details : Signed halfword elements from 'mult' are multiplied with
+ * signed halfword elements from 'cnst' producing a result
+ * twice the size of input i.e. signed word.
+ * Then this multiplication results of adjacent odd-even elements
+ * are added together and stored to the out vector.
+ */
+#define MSA_DOTP_SH(RTYPE, mult, cnst, out) \
+{ \
+ out = (RTYPE) __msa_dotp_s_w((v8i16) mult, (v8i16) cnst); \
+}
+
+#define MSA_DOTP_SH2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) \
+{ \
+ MSA_DOTP_SH(RTYPE, mult0, cnst0, out0) \
+ MSA_DOTP_SH(RTYPE, mult1, cnst1, out1) \
+}
+
+#define MSA_DOTP_SH4(RTYPE, mult0, mult1, mult2, mult3, \
+ cnst0, cnst1, cnst2, cnst3, \
+ out0, out1, out2, out3) \
+{ \
+ MSA_DOTP_SH2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1); \
+ MSA_DOTP_SH2(RTYPE, mult2, mult3, cnst2, cnst3, out2, out3); \
+}
+
+/* Description : Dot product & addition of unsigned byte vector elements.
+ * Arguments : Inputs - mult
+ * cnst
+ * Outputs - out
+ * Return Type - as per RTYPE
+ * Details : Unsigned byte elements from 'mult' are multiplied with
+ * unsigned byte elements from 'cnst' producing a result
+ * twice the size of input i.e. unsigned halfword.
+ * Then this multiplication results of adjacent odd-even elements
+ * are added to the out vector.
+ */
+#define MSA_DPADD_UB(RTYPE, mult, cnst, out) \
+{ \
+ out = (RTYPE) __msa_dpadd_u_h((v8u16) out, \
+ (v16u8) mult, (v16u8) cnst); \
+}
+
+#define MSA_DPADD_UB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) \
+{ \
+ MSA_DPADD_UB(RTYPE, mult0, cnst0, out0) \
+ MSA_DPADD_UB(RTYPE, mult1, cnst1, out1) \
+}
+
+#define MSA_DPADD_UB4(RTYPE, mult0, mult1, mult2, mult3, \
+ cnst0, cnst1, cnst2, cnst3, out0, out1, out2, out3) \
+{ \
+ MSA_DPADD_UB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1); \
+ MSA_DPADD_UB2(RTYPE, mult2, mult3, cnst2, cnst3, out2, out3); \
+}
+
+/* Description : Dot product & addition of signed byte vector elements.
+ * Arguments : Inputs - mult
+ * cnst
+ * Outputs - out
+ * Return Type - as per RTYPE
+ * Details : Signed byte elements from 'mult' are multiplied with
+ * signed byte elements from 'cnst' producing a result
+ * twice the size of input i.e. signed halfword.
+ * Then this multiplication results of adjacent odd-even elements
+ * are added to the out vector.
+ */
+#define MSA_DPADD_SB(RTYPE, mult, cnst, out) \
+{ \
+ out = (RTYPE) __msa_dpadd_s_h((v8i16) out, \
+ (v16i8) mult, (v16i8) cnst); \
+}
+
+#define MSA_DPADD_SB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) \
+{ \
+ MSA_DPADD_SB(RTYPE, mult0, cnst0, out0) \
+ MSA_DPADD_SB(RTYPE, mult1, cnst1, out1) \
+}
+
+#define MSA_DPADD_SB4(RTYPE, mult0, mult1, mult2, mult3, \
+ cnst0, cnst1, cnst2, cnst3, out0, out1, out2, out3) \
+{ \
+ MSA_DPADD_SB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1); \
+ MSA_DPADD_SB2(RTYPE, mult2, mult3, cnst2, cnst3, out2, out3); \
+}
+
+/* Description : Dot product & addition of unsigned halfword vector elements.
+ * Arguments : Inputs - mult
+ * cnst
+ * Outputs - out
+ * Return Type - as per RTYPE
+ * Details : Unsigned halfword elements from 'mult' are multiplied with
+ * unsigned halfword elements from 'cnst' producing a result
+ * twice the size of input i.e. unsigned word.
+ * Then this multiplication results of adjacent odd-even elements
+ * are added to the out vector.
+ */
+#define MSA_DPADD_UH(RTYPE, mult, cnst, out) \
+{ \
+ out = (RTYPE) __msa_dpadd_u_w((v4u32) out, \
+ (v8u16) mult, (v8u16) cnst); \
+}
+
+#define MSA_DPADD_UH2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) \
+{ \
+ MSA_DPADD_UH(RTYPE, mult0, cnst0, out0) \
+ MSA_DPADD_UH(RTYPE, mult1, cnst1, out1) \
+}
+
+#define MSA_DPADD_UH4(RTYPE, mult0, mult1, mult2, mult3, \
+ cnst0, cnst1, cnst2, cnst3, out0, out1, out2, out3) \
+{ \
+ MSA_DPADD_UH2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1); \
+ MSA_DPADD_UH2(RTYPE, mult2, mult3, cnst2, cnst3, out2, out3); \
+}
+
+/* Description : Dot product & addition of signed halfword vector elements.
+ * Arguments : Inputs - mult
+ * cnst
+ * Outputs - out
+ * Return Type - as per RTYPE
+ * Details : Signed halfword elements from 'mult' are multiplied with
+ * signed halfword elements from 'cnst' producing a result
+ * twice the size of input i.e. signed word.
+ * Then this multiplication results of adjacent odd-even elements
+ * are added to the out vector.
+ */
+#define MSA_DPADD_SH(RTYPE, mult, cnst, out) \
+{ \
+ out = (RTYPE) __msa_dpadd_s_w((v4i32) out, \
+ (v8i16) mult, (v8i16) cnst); \
+}
+
+#define MSA_DPADD_SH2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) \
+{ \
+ MSA_DPADD_SH(RTYPE, mult0, cnst0, out0) \
+ MSA_DPADD_SH(RTYPE, mult1, cnst1, out1) \
+}
+
+#define MSA_DPADD_SH4(RTYPE, mult0, mult1, mult2, mult3, \
+ cnst0, cnst1, cnst2, cnst3, out0, out1, out2, out3) \
+{ \
+ MSA_DPADD_SH2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1); \
+ MSA_DPADD_SH2(RTYPE, mult2, mult3, cnst2, cnst3, out2, out3); \
+}
+
+/* Description : Clip all signed halfword elements of input vector between min & max.
+ * out = ((in) < (min)) ? (min) : (((in) > (max)) ? (max) : (in)).
+ * Arguments : Inputs - in (input vector)
+ * - min (min threshold)
+ * - max (max threshold)
+ * Outputs - in (output vector with clipped elements)
+ * Note : type of 'in' must be v8i16.
+ */
+#define MSA_CLIP_SH(in, min, max) \
+{ \
+ in = __msa_max_s_h((v8i16) min, (v8i16) in); \
+ in = __msa_min_s_h((v8i16) max, (v8i16) in); \
+}
+
+/* Description : Clip all signed halfword elements of input vector between 0 & 255.
+ * Arguments : Inputs - in (input vector)
+ * Outputs - in (output vector with clipped elements)
+ * Note : type of 'in' must be v8i16.
+ */
+#define MSA_CLIP_SH_0_255(in) \
+{ \
+ in = __msa_maxi_s_h((v8i16) in, 0); \
+ in = (v8i16) __msa_sat_u_h((v8u16) in, 7); \
+}
+
+#define MSA_CLIP_SH2_0_255(in0, in1) \
+{ \
+ MSA_CLIP_SH_0_255(in0); \
+ MSA_CLIP_SH_0_255(in1); \
+}
+
+#define MSA_CLIP_SH4_0_255(in0, in1, in2, in3) \
+{ \
+ MSA_CLIP_SH2_0_255(in0, in1); \
+ MSA_CLIP_SH2_0_255(in2, in3); \
+}
+
+#define MSA_CLIP_SH8_0_255(in0, in1, in2, in3, \
+ in4, in5, in6, in7) \
+{ \
+ MSA_CLIP_SH4_0_255(in0, in1, in2, in3); \
+ MSA_CLIP_SH4_0_255(in4, in5, in6, in7); \
+}
+
+/* Description : Clip all signed word elements of input vector between 0 & 255.
+ * Arguments : Inputs - in (input vector)
+ * Outputs - in (output vector with clipped elements)
+ * Note : type of 'in' must be v4i32.
+ */
+#define MSA_CLIP_SW_0_255(in) \
+{ \
+ in = __msa_maxi_s_w((v4i32) in, 0); \
+ in = (v4i32) __msa_sat_u_w((v4u32) in, 7); \
+}
+
+#define MSA_CLIP_SW2_0_255(in0, in1) \
+{ \
+ MSA_CLIP_SW_0_255(in0); \
+ MSA_CLIP_SW_0_255(in1); \
+}
+
+#define MSA_CLIP_SW4_0_255(in0, in1, in2, in3) \
+{ \
+ MSA_CLIP_SW2_0_255(in0, in1); \
+ MSA_CLIP_SW2_0_255(in2, in3); \
+}
+
+#define MSA_CLIP_SW8_0_255(in0, in1, in2, in3, \
+ in4, in5, in6, in7) \
+{ \
+ MSA_CLIP_SW4_0_255(in0, in1, in2, in3); \
+ MSA_CLIP_SW4_0_255(in4, in5, in6, in7); \
+}
+
+/* Description : Addition of 16 unsigned byte elements.
+ * 16 unsigned byte elements of input vector are added
+ * together and resulted integer sum is returned.
+ * Arguments : Inputs - in (unsigned byte vector)
+ * Outputs - sum_m (u32 sum)
+ * Return Type - unsigned word
+ */
+#define MSA_HADD_UB_U32(in, sum_m) \
+{ \
+ v8u16 res_m; \
+ v4u32 res0_m; \
+ v2u64 res1_m, res2_m; \
+ \
+ res_m = __msa_hadd_u_h((v16u8) in, (v16u8) in); \
+ res0_m = __msa_hadd_u_w(res_m, res_m); \
+ res1_m = __msa_hadd_u_d(res0_m, res0_m); \
+ res2_m = (v2u64) __msa_splati_d((v2i64) res1_m, 1); \
+ res1_m += res2_m; \
+ sum_m = __msa_copy_u_w((v4i32) res1_m, 0); \
+}
+
+/* Description : Addition of 8 unsigned halfword elements.
+ * 8 unsigned halfword elements of input vector are added
+ * together and resulted integer sum is returned.
+ * Arguments : Inputs - in (unsigned halfword vector)
+ * Outputs - sum_m (u32 sum)
+ * Return Type - unsigned word
+ */
+#define MSA_HADD_UH_U32(in, sum_m) \
+{ \
+ v4u32 res_m; \
+ v2u64 res0_m, res1_m; \
+ \
+ res_m = __msa_hadd_u_w((v8u16) in, (v8u16) in); \
+ res0_m = __msa_hadd_u_d(res_m, res_m); \
+ res1_m = (v2u64) __msa_splati_d((v2i64) res0_m, 1); \
+ res0_m += res1_m; \
+ sum_m = __msa_copy_u_w((v4i32) res0_m, 0); \
+}
+
+/* Description : Addition of 4 unsigned word elements.
+ * 4 unsigned word elements of input vector are added together and
+ * resulted integer sum is returned.
+ * Arguments : Inputs - in (unsigned word vector)
+ * Outputs - sum_m (u32 sum)
+ * Return Type - unsigned word
+ */
+#define MSA_HADD_UW_U32(in, sum_m) \
+{ \
+ v2u64 res0_m, res1_m; \
+ \
+ res0_m = __msa_hadd_u_d((v4u32) in, (v4u32) in); \
+ res1_m = (v2u64) __msa_splati_d((v2i64) res0_m, 1); \
+ res0_m += res1_m; \
+ sum_m = __msa_copy_u_w((v4i32) res0_m, 0); \
+}
+
+/* Description : Addition of 16 signed byte elements.
+ * 16 signed byte elements of input vector are added
+ * together and resulted integer sum is returned.
+ * Arguments : Inputs - in (signed byte vector)
+ * Outputs - sum_m (i32 sum)
+ * Return Type - signed word
+ */
+#define MSA_HADD_SB_S32(in, sum_m) \
+{ \
+ v8i16 res_m; \
+ v4i32 res0_m; \
+ v2i64 res1_m, res2_m; \
+ \
+ res_m = __msa_hadd_s_h((v16i8) in, (v16i8) in); \
+ res0_m = __msa_hadd_s_w(res_m, res_m); \
+ res1_m = __msa_hadd_s_d(res0_m, res0_m); \
+ res2_m = __msa_splati_d(res1_m, 1); \
+ res1_m += res2_m; \
+ sum_m = __msa_copy_s_w((v4i32) res1_m, 0); \
+}
+
+/* Description : Addition of 8 signed halfword elements.
+ * 8 signed halfword elements of input vector are added
+ * together and resulted integer sum is returned.
+ * Arguments : Inputs - in (signed halfword vector)
+ * Outputs - sum_m (i32 sum)
+ * Return Type - signed word
+ */
+#define MSA_HADD_SH_S32(in, sum_m) \
+{ \
+ v4i32 res_m; \
+ v2i64 res0_m, res1_m; \
+ \
+ res_m = __msa_hadd_s_w((v8i16) in, (v8i16) in); \
+ res0_m = __msa_hadd_s_d(res_m, res_m); \
+ res1_m = __msa_splati_d(res0_m, 1); \
+ res0_m += res1_m; \
+ sum_m = __msa_copy_s_w((v4i32) res0_m, 0); \
+}
+
+/* Description : Addition of 4 signed word elements.
+ * 4 signed word elements of input vector are added together and
+ * resulted integer sum is returned.
+ * Arguments : Inputs - in (signed word vector)
+ * Outputs - sum_m (i32 sum)
+ * Return Type - signed word
+ */
+#define MSA_HADD_SW_S32(in, sum_m) \
+{ \
+ v2i64 res0_m, res1_m; \
+ \
+ res0_m = __msa_hadd_s_d((v4i32) in, (v4i32) in); \
+ res1_m = __msa_splati_d(res0_m, 1); \
+ res0_m += res1_m; \
+ sum_m = __msa_copy_s_w((v4i32) res0_m, 0); \
+}
+
+/* Description : Saturate the unsigned halfword element values to the max
+ * unsigned value of (sat_val+1 bits).
+ * The element data width remains unchanged.
+ * Arguments : Inputs - in, sat_val
+ * Outputs - in (in place)
+ * Return Type - v8u16
+ * Details : Each unsigned halfword element from 'in' is saturated to the
+ * value generated with (sat_val+1) bit range.
+ * Results are in placed to original vectors.
+ */
+#define MSA_SAT_UH(in, sat_val) \
+{ \
+ in = __msa_sat_u_h(in, sat_val); \
+}
+
+#define MSA_SAT_UH2(in0, in1, sat_val) \
+{ \
+ MSA_SAT_UH(in0, sat_val) \
+ MSA_SAT_UH(in1, sat_val) \
+}
+
+#define MSA_SAT_UH4(in0, in1, in2, in3, sat_val) \
+{ \
+ MSA_SAT_UH2(in0, in1, sat_val) \
+ MSA_SAT_UH2(in2, in3, sat_val) \
+}
+
+/* Description : Saturate the signed halfword element values to the max
+ * signed value of (sat_val+1 bits).
+ * The element data width remains unchanged.
+ * Arguments : Inputs - in, sat_val
+ * Outputs - in (in place)
+ * Return Type - v8i16
+ * Details : Each signed halfword element from 'in' is saturated to the
+ * value generated with (sat_val+1) bit range.
+ * Results are in placed to original vectors.
+ */
+#define MSA_SAT_SH(in, sat_val) \
+{ \
+ in = __msa_sat_s_h(in, sat_val); \
+}
+
+#define MSA_SAT_SH2(in0, in1, sat_val) \
+{ \
+ MSA_SAT_SH(in0, sat_val) \
+ MSA_SAT_SH(in1, sat_val) \
+}
+
+#define MSA_SAT_SH4(in0, in1, in2, in3, sat_val) \
+{ \
+ MSA_SAT_SH2(in0, in1, sat_val) \
+ MSA_SAT_SH2(in2, in3, sat_val) \
+}
+
+/* Description : Saturate the unsigned word element values to the max
+ * unsigned value of (sat_val+1 bits).
+ * The element data width remains unchanged.
+ * Arguments : Inputs - in, sat_val
+ * Outputs - in (in place)
+ * Return Type - v4u32
+ * Details : Each unsigned word element from 'in' is saturated to the
+ * value generated with (sat_val+1) bit range.
+ * Results are in placed to original vectors.
+ */
+#define MSA_SAT_UW(in, sat_val) \
+{ \
+ in = __msa_sat_u_w(in, sat_val); \
+}
+
+#define MSA_SAT_UW2(in0, in1, sat_val) \
+{ \
+ MSA_SAT_UW(in0, sat_val) \
+ MSA_SAT_UW(in1, sat_val) \
+}
+
+#define MSA_SAT_UW4(in0, in1, in2, in3, sat_val) \
+{ \
+ MSA_SAT_UW2(in0, in1, sat_val) \
+ MSA_SAT_UW2(in2, in3, sat_val) \
+}
+
+/* Description : Saturate the signed word element values to the max
+ * signed value of (sat_val+1 bits).
+ * The element data width remains unchanged.
+ * Arguments : Inputs - in, sat_val
+ * Outputs - in (in place)
+ * Return Type - v4i32
+ * Details : Each signed word element from 'in' is saturated to the
+ * value generated with (sat_val+1) bit range.
+ * Results are in placed to original vectors.
+ */
+#define MSA_SAT_SW(in, sat_val) \
+{ \
+ in = __msa_sat_s_w(in, sat_val); \
+}
+
+#define MSA_SAT_SW2(in0, in1, sat_val) \
+{ \
+ MSA_SAT_SW(in0, sat_val) \
+ MSA_SAT_SW(in1, sat_val) \
+}
+
+#define MSA_SAT_SW4(in0, in1, in2, in3, sat_val) \
+{ \
+ MSA_SAT_SW2(in0, in1, sat_val) \
+ MSA_SAT_SW2(in2, in3, sat_val) \
+}
+
+/* Description : Each byte element is logically xor'ed with immediate 128.
+ * Arguments : Inputs - in
+ * Outputs - in (in-place)
+ * Return Type - as per RTYPE
+ * Details : Each unsigned byte element from input vector 'in' is
+ * logically xor'ed with 128 and result is in-place stored in
+ * 'in' vector.
+ */
+#define MSA_XORI_B_128(RTYPE, in) \
+{ \
+ in = (RTYPE) __msa_xori_b((v16u8) in, 128); \
+}
+
+#define MSA_XORI_B2_128(RTYPE, in0, in1) \
+{ \
+ MSA_XORI_B_128(RTYPE, in0); \
+ MSA_XORI_B_128(RTYPE, in1); \
+}
+
+#define MSA_XORI_B4_128(RTYPE, in0, in1, in2, in3) \
+{ \
+ MSA_XORI_B2_128(RTYPE, in0, in1); \
+ MSA_XORI_B2_128(RTYPE, in2, in3); \
+}
+
+/* Description : Shift right logical all byte elements of vector.
+ * Arguments : Inputs - in, shift
+ * Outputs - in (in place)
+ * Return Type - as per RTYPE
+ * Details : Each element of vector 'in' is shifted right logical by
+ * number of bits respective element holds in vector 'shift' and
+ * result is in place written to 'in'.
+ * Here, 'shift' is a vector passed in.
+ */
+#define MSA_SRL_B(RTYPE, in, shift) \
+{ \
+ in = (RTYPE) __msa_srl_b((v16i8) in, (v16i8) shift); \
+}
+
+#define MSA_SRL_B2(RTYPE, in0, in1, shift) \
+{ \
+ MSA_SRL_B(RTYPE, in0, shift); \
+ MSA_SRL_B(RTYPE, in1, shift); \
+}
+
+#define MSA_SRL_B4(RTYPE, in0, in1, in2, in3, shift) \
+{ \
+ MSA_SRL_B2(RTYPE, in0, in1, shift); \
+ MSA_SRL_B2(RTYPE, in2, in3, shift); \
+}
+
+/* Description : Shift right logical all halfword elements of vector.
+ * Arguments : Inputs - in, shift
+ * Outputs - in (in place)
+ * Return Type - as per RTYPE
+ * Details : Each element of vector 'in' is shifted right logical by
+ * number of bits respective element holds in vector 'shift' and
+ * result is in place written to 'in'.
+ * Here, 'shift' is a vector passed in.
+ */
+#define MSA_SRL_H(RTYPE, in, shift) \
+{ \
+ in = (RTYPE) __msa_srl_h((v8i16) in, (v8i16) shift); \
+}
+
+#define MSA_SRL_H2(RTYPE, in0, in1, shift) \
+{ \
+ MSA_SRL_H(RTYPE, in0, shift); \
+ MSA_SRL_H(RTYPE, in1, shift); \
+}
+
+#define MSA_SRL_H4(RTYPE, in0, in1, in2, in3, shift) \
+{ \
+ MSA_SRL_H2(RTYPE, in0, in1, shift); \
+ MSA_SRL_H2(RTYPE, in2, in3, shift); \
+}
+
+/* Description : Shift right logical all word elements of vector.
+ * Arguments : Inputs - in, shift
+ * Outputs - in (in place)
+ * Return Type - as per RTYPE
+ * Details : Each element of vector 'in' is shifted right logical by
+ * number of bits respective element holds in vector 'shift' and
+ * result is in place written to 'in'.
+ * Here, 'shift' is a vector passed in.
+ */
+#define MSA_SRL_W(RTYPE, in, shift) \
+{ \
+ in = (RTYPE) __msa_srl_w((v4i32) in, (v4i32) shift); \
+}
+
+#define MSA_SRL_W2(RTYPE, in0, in1, shift) \
+{ \
+ MSA_SRL_W(RTYPE, in0, shift); \
+ MSA_SRL_W(RTYPE, in1, shift); \
+}
+
+#define MSA_SRL_W4(RTYPE, in0, in1, in2, in3, shift) \
+{ \
+ MSA_SRL_W2(RTYPE, in0, in1, shift); \
+ MSA_SRL_W2(RTYPE, in2, in3, shift); \
+}
+
+/* Description : Shift right logical all double word elements of vector.
+ * Arguments : Inputs - in, shift
+ * Outputs - in (in place)
+ * Return Type - as per RTYPE
+ * Details : Each element of vector 'in' is shifted right logical by
+ * number of bits respective element holds in vector 'shift' and
+ * result is in place written to 'in'.
+ * Here, 'shift' is a vector passed in.
+ */
+#define MSA_SRL_D(RTYPE, in, shift) \
+{ \
+ in = (RTYPE) __msa_srl_d((v2i64) in, (v2i64) shift); \
+}
+
+#define MSA_SRL_D2(RTYPE, in0, in1, shift) \
+{ \
+ MSA_SRL_D(RTYPE, in0, shift); \
+ MSA_SRL_D(RTYPE, in1, shift); \
+}
+
+#define MSA_SRL_D4(RTYPE, in0, in1, in2, in3, shift) \
+{ \
+ MSA_SRL_D2(RTYPE, in0, in1, shift); \
+ MSA_SRL_D2(RTYPE, in2, in3, shift); \
+}
+
+/* Description : Shift right logical rounded all byte elements of vector.
+ * Arguments : Inputs - in, shift
+ * Outputs - in (in place)
+ * Return Type - as per RTYPE
+ * Details : Each element of vector 'in' is shifted right logical rounded
+ * by number of bits respective element holds in vector 'shift'
+ * and result is in place written to 'in'.
+ * Here, 'shift' is a vector passed in.
+ */
+#define MSA_SRLR_B(RTYPE, in, shift) \
+{ \
+ in = (RTYPE) __msa_srlr_b((v16i8) in, (v16i8) shift); \
+}
+
+#define MSA_SRLR_B2(RTYPE, in0, in1, shift) \
+{ \
+ MSA_SRLR_B(RTYPE, in0, shift); \
+ MSA_SRLR_B(RTYPE, in1, shift); \
+}
+
+#define MSA_SRLR_B4(RTYPE, in0, in1, in2, in3, shift) \
+{ \
+ MSA_SRLR_B2(RTYPE, in0, in1, shift); \
+ MSA_SRLR_B2(RTYPE, in2, in3, shift); \
+}
+
+/* Description : Shift right logical rounded all halfword elements of vector.
+ * Arguments : Inputs - in, shift
+ * Outputs - in (in place)
+ * Return Type - as per RTYPE
+ * Details : Each element of vector 'in' is shifted right logical rounded
+ * by number of bits respective element holds in vector 'shift'
+ * and result is in place written to 'in'.
+ * Here, 'shift' is a vector passed in.
+ */
+#define MSA_SRLR_H(RTYPE, in, shift) \
+{ \
+ in = (RTYPE) __msa_srlr_h((v8i16) in, (v8i16) shift); \
+}
+
+#define MSA_SRLR_H2(RTYPE, in0, in1, shift) \
+{ \
+ MSA_SRLR_H(RTYPE, in0, shift); \
+ MSA_SRLR_H(RTYPE, in1, shift); \
+}
+
+#define MSA_SRLR_H4(RTYPE, in0, in1, in2, in3, shift) \
+{ \
+ MSA_SRLR_H2(RTYPE, in0, in1, shift); \
+ MSA_SRLR_H2(RTYPE, in2, in3, shift); \
+}
+
+/* Description : Shift right logical rounded all word elements of vector.
+ * Arguments : Inputs - in, shift
+ * Outputs - in (in place)
+ * Return Type - as per RTYPE
+ * Details : Each element of vector 'in' is shifted right logical rounded
+ * by number of bits respective element holds in vector 'shift'
+ * and result is in place written to 'in'.
+ * Here, 'shift' is a vector passed in.
+ */
+#define MSA_SRLR_W(RTYPE, in, shift) \
+{ \
+ in = (RTYPE) __msa_srlr_w((v4i32) in, (v4i32) shift); \
+}
+
+#define MSA_SRLR_W2(RTYPE, in0, in1, shift) \
+{ \
+ MSA_SRLR_W(RTYPE, in0, shift); \
+ MSA_SRLR_W(RTYPE, in1, shift); \
+}
+
+#define MSA_SRLR_W4(RTYPE, in0, in1, in2, in3, shift) \
+{ \
+ MSA_SRLR_W2(RTYPE, in0, in1, shift); \
+ MSA_SRLR_W2(RTYPE, in2, in3, shift); \
+}
+
+/* Description : Shift right logical rounded all double word elements of vector.
+ * Arguments : Inputs - in, shift
+ * Outputs - in (in place)
+ * Return Type - as per RTYPE
+ * Details : Each element of vector 'in' is shifted right logical rounded
+ * by number of bits respective element holds in vector 'shift'
+ * and result is in place written to 'in'.
+ * Here, 'shift' is a vector passed in.
+ */
+#define MSA_SRLR_D(RTYPE, in, shift) \
+{ \
+ in = (RTYPE) __msa_srlr_d((v2i64) in, (v2i64) shift); \
+}
+
+#define MSA_SRLR_D2(RTYPE, in0, in1, shift) \
+{ \
+ MSA_SRLR_D(RTYPE, in0, shift); \
+ MSA_SRLR_D(RTYPE, in1, shift); \
+}
+
+#define MSA_SRLR_D4(RTYPE, in0, in1, in2, in3, shift) \
+{ \
+ MSA_SRLR_D2(RTYPE, in0, in1, shift); \
+ MSA_SRLR_D2(RTYPE, in2, in3, shift); \
+}
+
+/* Description : Shift right arithmetic rounded all byte elements of vector.
+ * Arguments : Inputs - in, shift
+ * Outputs - in (in place)
+ * Return Type - as per RTYPE
+ * Details : Each element of vector 'in' is shifted right arithmetic
+ * rounded by number of bits respective element holds in
+ * vector 'shift' and result is in place written to 'in'.
+ * Here, 'shift' is a vector passed in.
+ */
+#define MSA_SRAR_B(RTYPE, in, shift) \
+{ \
+ in = (RTYPE) __msa_srar_b((v16i8) in, (v16i8) shift); \
+}
+
+#define MSA_SRAR_B2(RTYPE, in0, in1, shift) \
+{ \
+ MSA_SRAR_B(RTYPE, in0, shift); \
+ MSA_SRAR_B(RTYPE, in1, shift); \
+}
+
+#define MSA_SRAR_B4(RTYPE, in0, in1, in2, in3, shift) \
+{ \
+ MSA_SRAR_B2(RTYPE, in0, in1, shift); \
+ MSA_SRAR_B2(RTYPE, in2, in3, shift); \
+}
+
+/* Description : Shift right arithmetic rounded all halfword elements of vector.
+ * Arguments : Inputs - in, shift
+ * Outputs - in (in place)
+ * Return Type - as per RTYPE
+ * Details : Each element of vector 'in' is shifted right arithmetic
+ * rounded by number of bits respective element holds in
+ * vector 'shift' and result is in place written to 'in'.
+ * Here, 'shift' is a vector passed in.
+ */
+#define MSA_SRAR_H(RTYPE, in, shift) \
+{ \
+ in = (RTYPE) __msa_srar_h((v8i16) in, (v8i16) shift); \
+}
+
+#define MSA_SRAR_H2(RTYPE, in0, in1, shift) \
+{ \
+ MSA_SRAR_H(RTYPE, in0, shift); \
+ MSA_SRAR_H(RTYPE, in1, shift); \
+}
+
+#define MSA_SRAR_H4(RTYPE, in0, in1, in2, in3, shift) \
+{ \
+ MSA_SRAR_H2(RTYPE, in0, in1, shift); \
+ MSA_SRAR_H2(RTYPE, in2, in3, shift); \
+}
+
+/* Description : Shift right arithmetic rounded all word elements of vector.
+ * Arguments : Inputs - in, shift
+ * Outputs - in (in place)
+ * Return Type - as per RTYPE
+ * Details : Each element of vector 'in' is shifted right arithmetic
+ * rounded by number of bits respective element holds in
+ * vector 'shift' and result is in place written to 'in'.
+ * Here, 'shift' is a vector passed in.
+ */
+#define MSA_SRAR_W(RTYPE, in, shift) \
+{ \
+ in = (RTYPE) __msa_srar_w((v4i32) in, (v4i32) shift); \
+}
+
+#define MSA_SRAR_W2(RTYPE, in0, in1, shift) \
+{ \
+ MSA_SRAR_W(RTYPE, in0, shift); \
+ MSA_SRAR_W(RTYPE, in1, shift); \
+}
+
+#define MSA_SRAR_W4(RTYPE, in0, in1, in2, in3, shift) \
+{ \
+ MSA_SRAR_W2(RTYPE, in0, in1, shift); \
+ MSA_SRAR_W2(RTYPE, in2, in3, shift); \
+}
+
+/* Description : Shift right arithmetic rounded all double word elements
+ * of vector.
+ * Arguments : Inputs - in, shift
+ * Outputs - in (in place)
+ * Return Type - as per RTYPE
+ * Details : Each element of vector 'in' is shifted right arithmetic
+ * rounded by number of bits respective element holds in
+ * vector 'shift' and result is in place written to 'in'.
+ * Here, 'shift' is a vector passed in.
+ */
+#define MSA_SRAR_D(RTYPE, in, shift) \
+{ \
+ in = (RTYPE) __msa_srar_d((v2i64) in, (v2i64) shift); \
+}
+
+#define MSA_SRAR_D2(RTYPE, in0, in1, shift) \
+{ \
+ MSA_SRAR_D(RTYPE, in0, shift); \
+ MSA_SRAR_D(RTYPE, in1, shift); \
+}
+
+#define MSA_SRAR_D4(RTYPE, in0, in1, in2, in3, shift) \
+{ \
+ MSA_SRAR_D2(RTYPE, in0, in1, shift); \
+ MSA_SRAR_D2(RTYPE, in2, in3, shift); \
+}
+
+/* Description : Shift right arithmetic rounded all byte elements of vector.
+ * Arguments : Inputs - in, shift
+ * Outputs - in (in place)
+ * Return Type - as per RTYPE
+ * Details : Each element of vector 'in' is shifted right arithmetic
+ * rounded by number of bits respective element holds in vector
+ * 'shift' and result is in place written to 'in'.
+ * Here, 'shift' is a immediate number passed in.
+ */
+#define MSA_SRARI_B(RTYPE, in, shift) \
+{ \
+ in = (RTYPE) __msa_srari_b((v16i8) in, (v16i8) shift); \
+}
+
+#define MSA_SRARI_B2(RTYPE, in0, in1, shift) \
+{ \
+ MSA_SRARI_B(RTYPE, in0, shift); \
+ MSA_SRARI_B(RTYPE, in1, shift); \
+}
+
+#define MSA_SRARI_B4(RTYPE, in0, in1, in2, in3, shift) \
+{ \
+ MSA_SRARI_B2(RTYPE, in0, in1, shift); \
+ MSA_SRARI_B2(RTYPE, in2, in3, shift); \
+}
+
+/* Description : Shift right arithmetic rounded all halfword elements of vector.
+ * Arguments : Inputs - in, shift
+ * Outputs - in (in place)
+ * Return Type - as per RTYPE
+ * Details : Each element of vector 'in' is shifted right arithmetic
+ * rounded by number of bits respective element holds in vector
+ * 'shift' and result is in place written to 'in'.
+ * Here, 'shift' is a immediate number passed in.
+ */
+#define MSA_SRARI_H(RTYPE, in, shift) \
+{ \
+ in = (RTYPE) __msa_srari_h((v8i16) in, (v8i16) shift); \
+}
+
+#define MSA_SRARI_H2(RTYPE, in0, in1, shift) \
+{ \
+ MSA_SRARI_H(RTYPE, in0, shift); \
+ MSA_SRARI_H(RTYPE, in1, shift); \
+}
+
+#define MSA_SRARI_H4(RTYPE, in0, in1, in2, in3, shift) \
+{ \
+ MSA_SRARI_H2(RTYPE, in0, in1, shift); \
+ MSA_SRARI_H2(RTYPE, in2, in3, shift); \
+}
+
+/* Description : Shift right arithmetic rounded all word elements of vector.
+ * Arguments : Inputs - in, shift
+ * Outputs - in (in place)
+ * Return Type - as per RTYPE
+ * Details : Each element of vector 'in' is shifted right arithmetic
+ * rounded by number of bits respective element holds in vector
+ * 'shift' and result is in place written to 'in'.
+ * Here, 'shift' is a immediate number passed in.
+ */
+#define MSA_SRARI_W(RTYPE, in, shift) \
+{ \
+ in = (RTYPE) __msa_srari_w((v4i32) in, (v4i32) shift); \
+}
+
+#define MSA_SRARI_W2(RTYPE, in0, in1, shift) \
+{ \
+ MSA_SRARI_W(RTYPE, in0, shift); \
+ MSA_SRARI_W(RTYPE, in1, shift); \
+}
+
+#define MSA_SRARI_W4(RTYPE, in0, in1, in2, in3, shift) \
+{ \
+ MSA_SRARI_W2(RTYPE, in0, in1, shift); \
+ MSA_SRARI_W2(RTYPE, in2, in3, shift); \
+}
+
+/* Description : Shift right arithmetic rounded all double word elements
+ * of vector.
+ * Arguments : Inputs - in, shift
+ * Outputs - in (in place)
+ * Return Type - as per RTYPE
+ * Details : Each element of vector 'in' is shifted right arithmetic
+ * rounded by number of bits respective element holds in
+ * vector 'shift' and result is in place written to 'in'.
+ * Here, 'shift' is a immediate number passed in.
+ */
+#define MSA_SRARI_D(RTYPE, in, shift) \
+{ \
+ in = (RTYPE) __msa_srari_d((v2i64) in, (v2i64) shift); \
+}
+
+#define MSA_SRARI_D2(RTYPE, in0, in1, shift) \
+{ \
+ MSA_SRARI_D(RTYPE, in0, shift); \
+ MSA_SRARI_D(RTYPE, in1, shift); \
+}
+
+#define MSA_SRARI_D4(RTYPE, in0, in1, in2, in3, shift) \
+{ \
+ MSA_SRARI_D2(RTYPE, in0, in1, shift); \
+ MSA_SRARI_D2(RTYPE, in2, in3, shift); \
+}
+
+/* Description : Transposes input 4x4 byte block.
+ * Arguments : Inputs - in0, in1, in2, in3 (input 4x4 byte block)
+ * Outputs - out0, out1, out2, out3 (output 4x4 byte block)
+ * Return Type - RTYPE
+ * Details :
+ */
+#define MSA_TRANSPOSE4x4_B(RTYPE, in0, in1, in2, in3, \
+ out0, out1, out2, out3) \
+{ \
+ v16i8 zero_m = { 0 }; \
+ \
+ MSA_ILVR_B2(RTYPE, in2, in0, in3, in1, out2, out3); \
+ out0 = (RTYPE) __msa_ilvr_b((v16i8) out3, (v16i8) out2); \
+ out1 = (RTYPE) __msa_sldi_b(zero_m, (v16i8) out0, 4); \
+ out2 = (RTYPE) __msa_sldi_b(zero_m, (v16i8) out1, 4); \
+ out3 = (RTYPE) __msa_sldi_b(zero_m, (v16i8) out2, 4); \
+}
+
+/* Description : Transposes input 8x4 byte block into 4x8.
+ * Arguments : Inputs - in0, in1, in2 ~ in7 (input 8x4 byte block)
+ * Outputs - out0, out1, out2, out3 (output 4x8 byte block)
+ * Return Type - RTYPE
+ * Details :
+ */
+#define MSA_TRANSPOSE8x4_B(RTYPE, in0, in1, in2, in3, in4, in5, \
+ in6, in7, out0, out1, out2, out3) \
+{ \
+ v16i8 zero_m = { 0 }; \
+ \
+ MSA_ILVR_B4(RTYPE, in2, in0, in3, in1, in6, in4, in7, in5, \
+ out0, out1, out2, out3); \
+ MSA_ILVR_H2(RTYPE, out2, out0, out3, out1, out2, out3); \
+ out0 = (RTYPE) __msa_ilvr_b((v16i8) out3, (v16i8) out2); \
+ out1 = (RTYPE) __msa_sldi_b(zero_m, (v16i8) out0, 8); \
+ out2 = (RTYPE) __msa_ilvl_b((v16i8) out3, (v16i8) out2); \
+ out3 = (RTYPE) __msa_sldi_b(zero_m, (v16i8) out2, 8); \
+}
+
+/* Description : Transposes 16x4 block into 4x16 with byte elements in vectors.
+ * Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7,
+ * in8, in9, in10, in11, in12, in13, in14, in15
+ * Outputs - out0, out1, out2, out3
+ * Return Type - RTYPE
+ * Details :
+ */
+#define MSA_TRANSPOSE16x4_B(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
+ in8, in9, in10, in11, in12, in13, in14, in15, \
+ out0, out1, out2, out3) \
+{ \
+ v2i64 tmp0_m, tmp1_m, tmp2_m, tmp3_m; \
+ \
+ MSA_ILVR_B4(RTYPE, in2, in0, in3, in1, in6, in4, in7, in5, \
+ out0, out1, out2, out3); \
+ MSA_ILVR_H2(RTYPE, out2, out0, out3, out1, out2, out3); \
+ MSA_ILVRL_B2(v2i64, out3, out2, tmp0_m, tmp1_m); \
+ \
+ MSA_ILVR_B4(RTYPE, in10, in8, in11, in9, in14, in12, in15, in13, \
+ out0, out1, out2, out3); \
+ MSA_ILVR_H2(RTYPE, out2, out0, out3, out1, out2, out3); \
+ MSA_ILVRL_B2(v2i64, out3, out2, tmp2_m, tmp3_m); \
+ \
+ MSA_ILVRL_D4(RTYPE, tmp2_m, tmp0_m, tmp3_m, tmp1_m, \
+ out0, out1, out2, out3); \
+}
+
+/* Description : Transposes input 8x8 byte block.
+ * Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7
+ * (input 8x8 byte block)
+ * Outputs - out0, out1, out2, out3, out4, out5, out6, out7
+ * (output 8x8 byte block)
+ * Return Type - RTYPE
+ * Details :
+ */
+#define MSA_TRANSPOSE8x8_B(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
+ out0, out1, out2, out3, out4, out5, out6, out7) \
+{ \
+ v16i8 zero_m = {0}; \
+ \
+ MSA_ILVR_B4(RTYPE, in2, in0, in3, in1, in6, in4, in7, in5, \
+ out0, out1, out2, out3); \
+ MSA_ILVRL_B4(RTYPE, out1, out0, out3, out2, out4, out5, out6, out7); \
+ MSA_ILVRL_W4(RTYPE, out6, out4, out7, out5, out0, out2, out4, out6); \
+ out1 = (RTYPE) __msa_sldi_b(zero_m, (v16i8) out0, 8); \
+ out3 = (RTYPE) __msa_sldi_b(zero_m, (v16i8) out2, 8); \
+ out5 = (RTYPE) __msa_sldi_b(zero_m, (v16i8) out4, 8); \
+ out7 = (RTYPE) __msa_sldi_b(zero_m, (v16i8) out6, 8); \
+}
+
+/* Description : Transposes 16x8 block into 8x16 with byte elements in vectors.
+ * Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7,
+ * in8, in9, in10, in11, in12, in13, in14, in15
+ * Outputs - out0, out1, out2, out3, out4, out5, out6, out7
+ * Return Type - RTYPE
+ * Details :
+ */
+#define MSA_TRANSPOSE16x8_B(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
+ in8, in9, in10, in11, in12, in13, in14, in15, \
+ out0, out1, out2, out3, out4, out5, out6, out7) \
+{ \
+ v16i8 tmp0_m, tmp1_m, tmp2_m, tmp3_m; \
+ \
+ MSA_ILVEV_D4(RTYPE, in8, in0, in9, in1, in10, in2, in11, in3, \
+ out7, out6, out5, out4); \
+ MSA_ILVEV_D4(RTYPE, in12, in4, in13, in5, in14, in6, in15, in7, \
+ out3, out2, out1, out0); \
+ \
+ tmp0_m = __msa_ilvev_b((v16i8) out6, (v16i8) out7); \
+ tmp1_m = __msa_ilvod_b((v16i8) out6, (v16i8) out7); \
+ out6 = (RTYPE) __msa_ilvev_b((v16i8) out4, (v16i8) out5); \
+ out5 = (RTYPE) __msa_ilvod_b((v16i8) out4, (v16i8) out5); \
+ tmp2_m = __msa_ilvev_b((v16i8) out2, (v16i8) out3); \
+ tmp3_m = __msa_ilvod_b((v16i8) out2, (v16i8) out3); \
+ out2 = (RTYPE) __msa_ilvev_b((v16i8) out0, (v16i8) out1); \
+ out1 = (RTYPE) __msa_ilvod_b((v16i8) out0, (v16i8) out1); \
+ \
+ MSA_ILVEV_H2(RTYPE, out6, tmp0_m, out2, tmp2_m, out3, out7); \
+ out0 = (RTYPE) __msa_ilvev_w((v4i32) out7, (v4i32) out3); \
+ out4 = (RTYPE) __msa_ilvod_w((v4i32) out7, (v4i32) out3); \
+ \
+ MSA_ILVOD_H2(RTYPE, out6, tmp0_m, out2, tmp2_m, out3, out7); \
+ out2 = (RTYPE) __msa_ilvev_w((v4i32) out7, (v4i32) out3); \
+ out6 = (RTYPE) __msa_ilvod_w((v4i32) out7, (v4i32) out3); \
+ \
+ MSA_ILVOD_H2(v16i8, out5, tmp1_m, out1, tmp3_m, tmp0_m, tmp2_m); \
+ out3 = (RTYPE) __msa_ilvev_w((v4i32) tmp2_m, (v4i32) tmp0_m); \
+ out7 = (RTYPE) __msa_ilvod_w((v4i32) tmp2_m, (v4i32) tmp0_m); \
+ \
+ MSA_ILVEV_H2(v16i8, out5, tmp1_m, out1, tmp3_m, tmp0_m, tmp2_m); \
+ out1 = (RTYPE) __msa_ilvev_w((v4i32) tmp2_m, (v4i32) tmp0_m); \
+ out5 = (RTYPE) __msa_ilvod_w((v4i32) tmp2_m, (v4i32) tmp0_m); \
+}
+
+/* Description : Transposes 4x4 block with half word elements in vectors.
+ * Arguments : Inputs - in0, in1, in2, in3
+ * Outputs - out0, out1, out2, out3
+ * Return Type - RTYPE
+ * Details :
+ */
+#define MSA_TRANSPOSE4x4_H(RTYPE, in0, in1, in2, in3, \
+ out0, out1, out2, out3) \
+{ \
+ MSA_ILVR_H2(RTYPE, in1, in0, in3, in2, out1, out3); \
+ MSA_ILVRL_W2(RTYPE, out3, out1, out0, out2); \
+ MSA_ILVL_D2(RTYPE, out0, out0, out2, out2, out1, out3); \
+}
+
+/* Description : Transposes 8x4 block with half word elements in vectors.
+ * Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7
+ * Outputs - out0, out1, out2, out3
+ * Return Type - RTYPE
+ * Details :
+ */
+#define MSA_TRANSPOSE8x4_H(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
+ out0, out1, out2, out3) \
+{ \
+ v8i16 s0_m, s1_m; \
+ v8i16 tmp0_m, tmp1_m, tmp2_m, tmp3_m; \
+ \
+ MSA_ILVR_H2(v8i16, in6, in4, in7, in5, s0_m, s1_m); \
+ MSA_ILVRL_H2(v8i16, s1_m, s0_m, tmp0_m, tmp1_m); \
+ MSA_ILVR_H2(v8i16, in2, in0, in3, in1, s0_m, s1_m); \
+ MSA_ILVRL_H2(v8i16, s1_m, s0_m, tmp2_m, tmp3_m); \
+ MSA_PCKEV_D2(RTYPE, tmp0_m, tmp2_m, tmp1_m, tmp3_m, out0, out2); \
+ MSA_PCKOD_D2(RTYPE, tmp0_m, tmp2_m, tmp1_m, tmp3_m, out1, out3); \
+}
+
+/* Description : Transposes 8x8 block with half word elements in vectors.
+ * Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7
+ * Outputs - out0, out1, out2, out3, out4, out5, out6, out7
+ * Return Type - RTYPE
+ * Details :
+ */
+#define MSA_TRANSPOSE8x8_H(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
+ out0, out1, out2, out3, out4, out5, out6, out7) \
+{ \
+ v8i16 s0_m, s1_m; \
+ v8i16 tmp0_m, tmp1_m, tmp2_m, tmp3_m; \
+ v8i16 tmp4_m, tmp5_m, tmp6_m, tmp7_m; \
+ \
+ MSA_ILVR_H2(v8i16, in6, in4, in7, in5, s0_m, s1_m); \
+ MSA_ILVRL_H2(v8i16, s1_m, s0_m, tmp0_m, tmp1_m); \
+ MSA_ILVL_H2(v8i16, in6, in4, in7, in5, s0_m, s1_m); \
+ MSA_ILVRL_H2(v8i16, s1_m, s0_m, tmp2_m, tmp3_m); \
+ MSA_ILVR_H2(v8i16, in2, in0, in3, in1, s0_m, s1_m); \
+ MSA_ILVRL_H2(v8i16, s1_m, s0_m, tmp4_m, tmp5_m); \
+ MSA_ILVL_H2(v8i16, in2, in0, in3, in1, s0_m, s1_m); \
+ MSA_ILVRL_H2(v8i16, s1_m, s0_m, tmp6_m, tmp7_m); \
+ MSA_PCKEV_D4(RTYPE, tmp0_m, tmp4_m, tmp1_m, tmp5_m, tmp2_m, tmp6_m, \
+ tmp3_m, tmp7_m, out0, out2, out4, out6); \
+ MSA_PCKOD_D4(RTYPE, tmp0_m, tmp4_m, tmp1_m, tmp5_m, tmp2_m, tmp6_m, \
+ tmp3_m, tmp7_m, out1, out3, out5, out7); \
+}
+
+#endif /* _MSA_MACROS_H */
diff --git a/chromium/third_party/openh264/src/codec/common/meson.build b/chromium/third_party/openh264/src/codec/common/meson.build
index d7d15a61ee9..7f8acb685be 100644
--- a/chromium/third_party/openh264/src/codec/common/meson.build
+++ b/chromium/third_party/openh264/src/codec/common/meson.build
@@ -17,21 +17,41 @@ cpp_sources = [
'src/WelsThreadPool.cpp',
]
-asm_sources = [
- 'x86/cpuid.asm',
- 'x86/dct.asm',
- 'x86/deblock.asm',
- 'x86/expand_picture.asm',
- 'x86/intra_pred_com.asm',
- 'x86/mb_copy.asm',
- 'x86/mc_chroma.asm',
- 'x86/mc_luma.asm',
- 'x86/satd_sad.asm',
- 'x86/vaa.asm',
-]
-
-objs_asm = asm_gen.process(asm_sources)
+objs_asm = []
+if ['x86', 'x86_64'].contains(cpu_family)
+ asm_sources = [
+ 'x86/cpuid.asm',
+ 'x86/dct.asm',
+ 'x86/deblock.asm',
+ 'x86/expand_picture.asm',
+ 'x86/intra_pred_com.asm',
+ 'x86/mb_copy.asm',
+ 'x86/mc_chroma.asm',
+ 'x86/mc_luma.asm',
+ 'x86/satd_sad.asm',
+ 'x86/vaa.asm',
+ ]
+ objs_asm += asm_gen.process(asm_sources)
+elif cpu_family == 'arm'
+ cpp_sources += [
+ 'arm/copy_mb_neon.S',
+ 'arm/deblocking_neon.S',
+ 'arm/expand_picture_neon.S',
+ 'arm/intra_pred_common_neon.S',
+ 'arm/mc_neon.S',
+ ]
+elif cpu_family == 'aarch64'
+ cpp_sources += [
+ 'arm64/copy_mb_aarch64_neon.S',
+ 'arm64/deblocking_aarch64_neon.S',
+ 'arm64/expand_picture_aarch64_neon.S',
+ 'arm64/intra_pred_common_aarch64_neon.S',
+ 'arm64/mc_aarch64_neon.S',
+ ]
+else
+ error('Unsupported cpu_family @0@'.format(cpu_family))
+endif
libcommon = static_library('common', cpp_sources, objs_asm,
- include_directories: inc,
+ include_directories: [inc, casm_inc],
dependencies: deps)
diff --git a/chromium/third_party/openh264/src/codec/common/mips/copy_mb_msa.c b/chromium/third_party/openh264/src/codec/common/mips/copy_mb_msa.c
new file mode 100644
index 00000000000..4ba01edc3bd
--- /dev/null
+++ b/chromium/third_party/openh264/src/codec/common/mips/copy_mb_msa.c
@@ -0,0 +1,80 @@
+/*!
+ * \copy
+ * Copyright (C) 2020 Loongson Technology Co. Ltd.
+ * Contributed by Gu Xiwei(guxiwei-hf@loongson.cn)
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ *
+ * \file copy_mb_msa.c
+ *
+ * \brief MIPS MSA optimizations
+ *
+ * \date 14/05/2020 Created
+ *
+ *************************************************************************************
+ */
+
+#include <stdint.h>
+#include "msa_macros.h"
+
+void WelsCopy8x8_msa(uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc,
+ int32_t iStrideS ) {
+ v16u8 src0, src1;
+ for (int i = 0; i < 4; i++) {
+ MSA_LD_V2(v16u8, pSrc, iStrideS, src0, src1);
+ MSA_ST_D(src0, 0, pDst);
+ MSA_ST_D(src1, 0, pDst + iStrideD);
+ pSrc += 2 * iStrideS;
+ pDst += 2 * iStrideD;
+ }
+}
+
+void WelsCopy8x16_msa(uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc,
+ int32_t iStrideS) {
+ WelsCopy8x8_msa(pDst, iStrideD, pSrc, iStrideS);
+ WelsCopy8x8_msa(pDst + 8 * iStrideD, iStrideD,
+ pSrc + 8 * iStrideS, iStrideS);
+}
+
+void WelsCopy16x8_msa(uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc,
+ int32_t iStrideS) {
+ v16u8 src0, src1;
+ for (int i = 0; i < 4; i++) {
+ MSA_LD_V2(v16u8, pSrc, iStrideS, src0, src1);
+ MSA_ST_V2(v16u8, src0, src1, pDst, iStrideD);
+ pSrc += 2 * iStrideS;
+ pDst += 2 * iStrideD;
+ }
+}
+
+void WelsCopy16x16_msa(uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc,
+ int32_t iStrideS) {
+ WelsCopy16x8_msa(pDst, iStrideD, pSrc, iStrideS);
+ WelsCopy16x8_msa(pDst + 8 * iStrideD, iStrideD,
+ pSrc + 8 * iStrideS, iStrideS);
+};
diff --git a/chromium/third_party/openh264/src/codec/common/mips/deblock_msa.c b/chromium/third_party/openh264/src/codec/common/mips/deblock_msa.c
new file mode 100644
index 00000000000..0d3dfcb798e
--- /dev/null
+++ b/chromium/third_party/openh264/src/codec/common/mips/deblock_msa.c
@@ -0,0 +1,1024 @@
+/*!
+ * \copy
+ * Copyright (C) 2019 Loongson Technology Co. Ltd.
+ * Contributed by Gu Xiwei(guxiwei-hf@loongson.cn)
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ *
+ * \file deblock_msa.c
+ *
+ * \brief MIPS MSA optimizations
+ *
+ * \date 15/05/2020 Created
+ *
+ *************************************************************************************
+ */
+
+#include <stdint.h>
+#include "msa_macros.h"
+
+void DeblockLumaLt4V_msa(uint8_t *pPix, int32_t iStride, int32_t iAlpha,
+ int32_t iBeta, int8_t *pTc) {
+ v16u8 p0, p1, p2, q0, q1, q2;
+ v16i8 iTc, negiTc, negTc, flags, f;
+ v8i16 p0_l, p0_r, p1_l, p1_r, p2_l, p2_r, q0_l, q0_r, q1_l, q1_r, q2_l, q2_r;
+ v8i16 tc_l, tc_r, negTc_l, negTc_r;
+ v8i16 iTc_l, iTc_r, negiTc_l, negiTc_r;
+ // Use for temporary variable
+ v8i16 t0, t1, t2, t3;
+ v16u8 alpha, beta;
+ v16u8 bDetaP0Q0, bDetaP1P0, bDetaQ1Q0, bDetaP2P0, bDetaQ2Q0;
+ v16i8 const_1_b = __msa_ldi_b(1);
+ v8i16 const_1_h = __msa_ldi_h(1);
+ v8i16 const_4_h = __msa_ldi_h(4);
+ v8i16 const_not_255_h = __msa_ldi_h(~255);
+ v16i8 zero = { 0 };
+ v16i8 tc = { pTc[0 >> 2], pTc[1 >> 2], pTc[2 >> 2], pTc[3 >> 2],
+ pTc[4 >> 2], pTc[5 >> 2], pTc[6 >> 2], pTc[7 >> 2],
+ pTc[8 >> 2], pTc[9 >> 2], pTc[10 >> 2], pTc[11 >> 2],
+ pTc[12 >> 2], pTc[13 >> 2], pTc[14 >> 2], pTc[15 >> 2] };
+ negTc = zero - tc;
+ iTc = tc;
+
+ // Load data from pPix
+ MSA_LD_V4(v16u8, pPix - 3 * iStride, iStride, p2, p1, p0, q0);
+ MSA_LD_V2(v16u8, pPix + iStride, iStride, q1, q2);
+ alpha = (v16u8)__msa_fill_b(iAlpha);
+ beta = (v16u8)__msa_fill_b(iBeta);
+
+ bDetaP0Q0 = __msa_asub_u_b(p0, q0);
+ bDetaP1P0 = __msa_asub_u_b(p1, p0);
+ bDetaQ1Q0 = __msa_asub_u_b(q1, q0);
+ bDetaP2P0 = __msa_asub_u_b(p2, p0);
+ bDetaQ2Q0 = __msa_asub_u_b(q2, q0);
+ bDetaP0Q0 = (v16u8)__msa_clt_u_b(bDetaP0Q0, alpha);
+ bDetaP1P0 = (v16u8)__msa_clt_u_b(bDetaP1P0, beta);
+ bDetaQ1Q0 = (v16u8)__msa_clt_u_b(bDetaQ1Q0, beta);
+ bDetaP2P0 = (v16u8)__msa_clt_u_b(bDetaP2P0, beta);
+ bDetaQ2Q0 = (v16u8)__msa_clt_u_b(bDetaQ2Q0, beta);
+
+ // Unsigned extend p0, p1, p2, q0, q1, q2 from 8 bits to 16 bits
+ MSA_ILVRL_B4(v8i16, zero, p0, zero, p1,
+ p0_r, p0_l, p1_r, p1_l);
+ MSA_ILVRL_B4(v8i16, zero, p2, zero, q0,
+ p2_r, p2_l, q0_r, q0_l);
+ MSA_ILVRL_B4(v8i16, zero, q1, zero, q2,
+ q1_r, q1_l, q2_r, q2_l);
+ // Signed extend tc, negTc from 8 bits to 16 bits
+ flags = __msa_clt_s_b(tc, zero);
+ MSA_ILVRL_B2(v8i16, flags, tc, tc_r, tc_l);
+ flags = __msa_clt_s_b(negTc, zero);
+ MSA_ILVRL_B2(v8i16, flags, negTc, negTc_r, negTc_l);
+
+ f = (v16i8)bDetaP0Q0 & (v16i8)bDetaP1P0 & (v16i8)bDetaQ1Q0;
+ flags = f & (v16i8)bDetaP2P0;
+ flags = __msa_ceq_b(flags, zero);
+ iTc += ((~flags) & const_1_b);
+ flags = f & (v16i8)bDetaQ2Q0;
+ flags = __msa_ceq_b(flags, zero);
+ iTc += ((~flags) & const_1_b);
+ negiTc = zero - iTc;
+ // Signed extend iTc, negiTc from 8 bits to 16 bits
+ flags = __msa_clt_s_b(iTc, zero);
+ MSA_ILVRL_B2(v8i16, flags, iTc, iTc_r, iTc_l);
+ flags = __msa_clt_s_b(negiTc, zero);
+ MSA_ILVRL_B2(v8i16, flags, negiTc, negiTc_r, negiTc_l);
+
+ // Calculate the left part
+ // p1
+ t0 = (p2_l + ((p0_l + q0_l + const_1_h) >> 1) - (p1_l << 1)) >> 1;
+ t0 = __msa_max_s_h(negTc_l, t0);
+ t0 = __msa_min_s_h(tc_l, t0);
+ t1 = p1_l + t0;
+ // q1
+ t0 = (q2_l + ((p0_l + q0_l + const_1_h) >> 1) - (q1_l << 1)) >> 1;
+ t0 = __msa_max_s_h(negTc_l, t0);
+ t0 = __msa_min_s_h(tc_l, t0);
+ t2 = q1_l + t0;
+ // iDeta
+ t0 = (((q0_l - p0_l) << 2) + (p1_l - q1_l) + const_4_h) >> 3;
+ t0 = __msa_max_s_h(negiTc_l, t0);
+ t0 = __msa_min_s_h(iTc_l, t0);
+ p1_l = t1;
+ q1_l = t2;
+ // p0
+ t1 = p0_l + t0;
+ t2 = t1 & const_not_255_h;
+ t3 = __msa_cle_s_h((v8i16)zero, t1);
+ flags = (v16i8)__msa_ceq_h(t2, (v8i16)zero);
+ p0_l = (t1 & (v8i16)flags) + (t3 & (v8i16)(~flags));
+ // q0
+ t1 = q0_l - t0;
+ t2 = t1 & const_not_255_h;
+ t3 = __msa_cle_s_h((v8i16)zero, t1);
+ flags = (v16i8)__msa_ceq_h(t2, (v8i16)zero);
+ q0_l = (t1 & (v8i16)flags) + (t3 & (v8i16)(~flags));
+
+ // Calculate the right part
+ // p1
+ t0 = (p2_r + ((p0_r + q0_r + const_1_h) >> 1) - (p1_r << 1)) >> 1;
+ t0 = __msa_max_s_h(negTc_r, t0);
+ t0 = __msa_min_s_h(tc_r, t0);
+ t1 = p1_r + t0;
+ // q1
+ t0 = (q2_r + ((p0_r + q0_r + const_1_h) >> 1) - (q1_r << 1)) >> 1;
+ t0 = __msa_max_s_h(negTc_r, t0);
+ t0 = __msa_min_s_h(tc_r, t0);
+ t2 = q1_r + t0;
+ // iDeta
+ t0 = (((q0_r - p0_r) << 2) + (p1_r - q1_r) + const_4_h) >> 3;
+ t0 = __msa_max_s_h(negiTc_r, t0);
+ t0 = __msa_min_s_h(iTc_r, t0);
+ p1_r = t1;
+ q1_r = t2;
+ // p0
+ t1 = p0_r + t0;
+ t2 = t1 & const_not_255_h;
+ t3 = __msa_cle_s_h((v8i16)zero, t1);
+ flags = (v16i8)__msa_ceq_h(t2, (v8i16)zero);
+ p0_r = (t1 & (v8i16)flags) + (t3 & (v8i16)(~flags));
+ // q0
+ t1 = q0_r - t0;
+ t2 = t1 & const_not_255_h;
+ t3 = __msa_cle_s_h((v8i16)zero, t1);
+ flags = (v16i8)__msa_ceq_h(t2, (v8i16)zero);
+ q0_r = (t1 & (v8i16)flags) + (t3 & (v8i16)(~flags));
+
+ // Combined left and right
+ MSA_PCKEV_B4(v8i16, p1_l, p1_r, p0_l, p0_r, q0_l, q0_r, q1_l, q1_r,
+ t0, t1, t2, t3);
+ flags = (v16i8)__msa_cle_s_b(zero, tc);
+ flags &= f;
+ p0 = (v16u8)(((v16i8)t1 & flags) + (p0 & (~flags)));
+ q0 = (v16u8)(((v16i8)t2 & flags) + (q0 & (~flags)));
+ // Using t1, t2 as temporary flags
+ t1 = (v8i16)(flags & (~(__msa_ceq_b((v16i8)bDetaP2P0, zero))));
+ p1 = (v16u8)(t0 & t1) + (p1 & (v16u8)(~t1));
+ t2 = (v8i16)(flags & (~(__msa_ceq_b((v16i8)bDetaQ2Q0, zero))));
+ q1 = (v16u8)(t3 & t2) + (q1 & (v16u8)(~t2));
+
+ // Store data to pPix
+ MSA_ST_V4(v16u8, p1, p0, q0, q1, pPix - 2 * iStride, iStride);
+}
+
+void DeblockLumaEq4V_msa(uint8_t *pPix, int32_t iStride, int32_t iAlpha,
+ int32_t iBeta) {
+ v16u8 p0, p1, p2, p3, q0, q1, q2, q3;
+ v8i16 p0_l, p0_r, p1_l, p1_r, p2_l, p2_r, p3_l, p3_r,
+ q0_l, q0_r, q1_l, q1_r, q2_l, q2_r, q3_l, q3_r;
+ v8i16 t0, t1, t2, t0_con1;
+ v8i16 s0, s1, s2, s0_con1;
+ v16u8 alpha, beta;
+ v16u8 iDetaP0Q0, bDetaP1P0, bDetaQ1Q0, bDetaP2P0, bDetaQ2Q0;
+ // Condition mask
+ v16u8 mask0, mask1;
+ v16i8 const_2_b = __msa_ldi_b(2);
+ v8i16 const_2_h = __msa_ldi_h(2);
+ v8i16 const_4_h = __msa_ldi_h(4);
+ v16i8 zero = { 0 };
+
+ // Load data from pPix
+ MSA_LD_V8(v16u8, pPix - 4 * iStride, iStride, p3, p2, p1, p0,
+ q0, q1, q2, q3);
+ // iAlpha and beta are uint8_t type
+ alpha = (v16u8)__msa_fill_b(iAlpha);
+ beta = (v16u8)__msa_fill_b(iBeta);
+
+ // iDetaP0Q0 is not bool type
+ iDetaP0Q0 = __msa_asub_u_b(p0, q0);
+
+ bDetaP1P0 = __msa_asub_u_b(p1, p0);
+ bDetaQ1Q0 = __msa_asub_u_b(q1, q0);
+ bDetaP2P0 = __msa_asub_u_b(p2, p0);
+ bDetaQ2Q0 = __msa_asub_u_b(q2, q0);
+ bDetaP1P0 = (v16u8)__msa_clt_u_b(bDetaP1P0, beta);
+ bDetaQ1Q0 = (v16u8)__msa_clt_u_b(bDetaQ1Q0, beta);
+ bDetaP2P0 = (v16u8)__msa_clt_u_b(bDetaP2P0, beta);
+ bDetaQ2Q0 = (v16u8)__msa_clt_u_b(bDetaQ2Q0, beta);
+
+ // Unsigned extend p0, p1, p2, p3, q0, q1, q2, q3 from 8 bits to 16 bits
+ MSA_ILVRL_B4(v8i16, zero, p0, zero, p1,
+ p0_r, p0_l, p1_r, p1_l);
+ MSA_ILVRL_B4(v8i16, zero, p2, zero, p3,
+ p2_r, p2_l, p3_r, p3_l);
+ MSA_ILVRL_B4(v8i16, zero, q0, zero, q1,
+ q0_r, q0_l, q1_r, q1_l);
+ MSA_ILVRL_B4(v8i16, zero, q2, zero, q3,
+ q2_r, q2_l, q3_r, q3_l);
+
+ // Calculate condition mask
+ // (iDetaP0Q0 < iAlpha) && bDetaP1P0 && bDetaQ1Q0
+ mask0 = (v16u8)__msa_clt_u_b(iDetaP0Q0, alpha);
+ mask0 &= bDetaP1P0;
+ mask0 &= bDetaQ1Q0;
+ // iDetaP0Q0 < ((iAlpha >> 2) + 2)
+ mask1 = (v16u8)((alpha >> 2) + const_2_b);
+ mask1 = (v16u8)__msa_clt_u_b(iDetaP0Q0, mask1);
+
+ // Calculate the left part
+ // p0
+ t0 = (p2_l + (p1_l << 1) + (p0_l << 1) + (q0_l << 1) + q1_l + const_4_h) >> 3;
+ // p1
+ t1 = (p2_l + p1_l + p0_l + q0_l + const_2_h) >> 2;
+ // p2
+ t2 = ((p3_l << 1) + p2_l + (p2_l << 1) + p1_l + p0_l + q0_l + const_4_h) >> 3;
+ // p0 condition 1
+ t0_con1 = ((p1_l << 1) + p0_l + q1_l + const_2_h) >> 2;
+ // q0
+ s0 = (p1_l + (p0_l << 1) + (q0_l << 1) + (q1_l << 1) + q2_l + const_4_h) >> 3;
+ // q1
+ s1 = (p0_l + q0_l + q1_l + q2_l + const_2_h) >> 2;
+ // q2
+ s2 = ((q3_l << 1) + q2_l + (q2_l << 1) + q1_l + q0_l + p0_l + const_4_h) >> 3;
+ // q0 condition 1
+ s0_con1 = ((q1_l << 1) + q0_l + p1_l + const_2_h) >> 2;
+ // Move back
+ p0_l = t0;
+ p1_l = t1;
+ p2_l = t2;
+ q0_l = s0;
+ q1_l = s1;
+ q2_l = s2;
+ // Use p3_l, q3_l as tmp
+ p3_l = t0_con1;
+ q3_l = s0_con1;
+
+ // Calculate the right part
+ // p0
+ t0 = (p2_r + (p1_r << 1) + (p0_r << 1) + (q0_r << 1) + q1_r + const_4_h) >> 3;
+ // p1
+ t1 = (p2_r + p1_r + p0_r + q0_r + const_2_h) >> 2;
+ // p2
+ t2 = ((p3_r << 1) + p2_r + (p2_r << 1) + p1_r + p0_r + q0_r + const_4_h) >> 3;
+ // p0 condition 1
+ t0_con1 = ((p1_r << 1) + p0_r + q1_r + const_2_h) >> 2;
+ // q0
+ s0 = (p1_r + (p0_r << 1) + (q0_r << 1) + (q1_r << 1) + q2_r + const_4_h) >> 3;
+ // q1
+ s1 = (p0_r + q0_r + q1_r + q2_r + const_2_h) >> 2;
+ // q2
+ s2 = ((q3_r << 1) + q2_r + (q2_r << 1) + q1_r + q0_r + p0_r + const_4_h) >> 3;
+ // q0 condition 1
+ s0_con1 = ((q1_r << 1) + q0_r + p1_r + const_2_h) >> 2;
+ // Move back
+ p0_r = t0;
+ p1_r = t1;
+ p2_r = t2;
+ q0_r = s0;
+ q1_r = s1;
+ q2_r = s2;
+ // Use p3_r, q3_r as tmp
+ p3_r = t0_con1;
+ q3_r = s0_con1;
+
+ // Combined left and right
+ MSA_PCKEV_B4(v8i16, p0_l, p0_r, p1_l, p1_r, p2_l, p2_r, q0_l, q0_r,
+ t0, t1, t2, s0);
+ MSA_PCKEV_B4(v8i16, q1_l, q1_r, q2_l, q2_r, p3_l, p3_r, q3_l, q3_r,
+ s1, s2, t0_con1, s0_con1);
+ t0 = (v8i16)(((v16u8)t0 & mask0 & mask1 & bDetaP2P0) + ((v16u8)t0_con1 &
+ mask0 & mask1 & (~bDetaP2P0)) + ((v16u8)t0_con1 & mask0 & (~mask1)));
+ t1 = (v8i16)((v16u8)t1 & mask0 & mask1 & bDetaP2P0);
+ t2 = (v8i16)((v16u8)t2 & mask0 & mask1 & bDetaP2P0);
+ s0 = (v8i16)(((v16u8)s0 & mask0 & mask1 & bDetaQ2Q0) + ((v16u8)s0_con1 &
+ mask0 & mask1 & (~bDetaQ2Q0)) + ((v16u8)s0_con1 & mask0 & (~mask1)));
+ s1 = (v8i16)((v16u8)s1 & mask0 & mask1 & bDetaQ2Q0);
+ s2 = (v8i16)((v16u8)s2 & mask0 & mask1 & bDetaQ2Q0);
+ p0 = (v16u8)t0 + (p0 & (~mask0));
+ p1 = (v16u8)t1 + (p1 & ~(mask0 & mask1 & bDetaP2P0));
+ p2 = (v16u8)t2 + (p2 & ~(mask0 & mask1 & bDetaP2P0));
+ q0 = (v16u8)s0 + (q0 & (~mask0));
+ q1 = (v16u8)s1 + (q1 & ~(mask0 & mask1 & bDetaQ2Q0));
+ q2 = (v16u8)s2 + (q2 & ~(mask0 & mask1 & bDetaQ2Q0));
+
+ // Store data to pPix
+ MSA_ST_V4(v16u8, p2, p1, p0, q0, pPix - 3 * iStride, iStride);
+ MSA_ST_V2(v16u8, q1, q2, pPix + iStride, iStride);
+}
+
+
+void DeblockLumaLt4H_msa(uint8_t* pPix, int32_t iStride, int32_t iAlpha,
+ int32_t iBeta, int8_t* pTc) {
+ v16u8 p0, p1, p2, q0, q1, q2;
+ v16i8 iTc, negiTc, negTc, flags, f;
+ v8i16 p0_l, p0_r, p1_l, p1_r, p2_l, p2_r, q0_l, q0_r, q1_l, q1_r, q2_l, q2_r;
+ v8i16 tc_l, tc_r, negTc_l, negTc_r;
+ v8i16 iTc_l, iTc_r, negiTc_l, negiTc_r;
+ // Use for temporary variable
+ v8i16 t0, t1, t2, t3;
+ v16u8 alpha, beta;
+ v16u8 bDetaP0Q0, bDetaP1P0, bDetaQ1Q0, bDetaP2P0, bDetaQ2Q0;
+ v16i8 const_1_b = __msa_ldi_b(1);
+ v8i16 const_1_h = __msa_ldi_h(1);
+ v8i16 const_4_h = __msa_ldi_h(4);
+ v8i16 const_not_255_h = __msa_ldi_h(~255);
+ v16i8 zero = { 0 };
+ v16i8 tc = { pTc[0 >> 2], pTc[1 >> 2], pTc[2 >> 2], pTc[3 >> 2],
+ pTc[4 >> 2], pTc[5 >> 2], pTc[6 >> 2], pTc[7 >> 2],
+ pTc[8 >> 2], pTc[9 >> 2], pTc[10 >> 2], pTc[11 >> 2],
+ pTc[12 >> 2], pTc[13 >> 2], pTc[14 >> 2], pTc[15 >> 2] };
+ negTc = zero - tc;
+ iTc = tc;
+
+ // Load data from pPix
+ MSA_LD_V8(v8i16, pPix - 3, iStride, t0, t1, t2, t3, q1_l, q1_r, q2_l, q2_r);
+ MSA_LD_V8(v8i16, pPix + 8 * iStride - 3, iStride, p0_l, p0_r, p1_l, p1_r,
+ p2_l, p2_r, q0_l, q0_r);
+ // Transpose 16x8 to 8x16, we just need p0, p1, p2, q0, q1, q2
+ MSA_TRANSPOSE16x8_B(v16u8, t0, t1, t2, t3, q1_l, q1_r, q2_l, q2_r,
+ p0_l, p0_r, p1_l, p1_r, p2_l, p2_r, q0_l, q0_r,
+ p2, p1, p0, q0, q1, q2, alpha, beta);
+
+ alpha = (v16u8)__msa_fill_b(iAlpha);
+ beta = (v16u8)__msa_fill_b(iBeta);
+
+ bDetaP0Q0 = __msa_asub_u_b(p0, q0);
+ bDetaP1P0 = __msa_asub_u_b(p1, p0);
+ bDetaQ1Q0 = __msa_asub_u_b(q1, q0);
+ bDetaP2P0 = __msa_asub_u_b(p2, p0);
+ bDetaQ2Q0 = __msa_asub_u_b(q2, q0);
+ bDetaP0Q0 = (v16u8)__msa_clt_u_b(bDetaP0Q0, alpha);
+ bDetaP1P0 = (v16u8)__msa_clt_u_b(bDetaP1P0, beta);
+ bDetaQ1Q0 = (v16u8)__msa_clt_u_b(bDetaQ1Q0, beta);
+ bDetaP2P0 = (v16u8)__msa_clt_u_b(bDetaP2P0, beta);
+ bDetaQ2Q0 = (v16u8)__msa_clt_u_b(bDetaQ2Q0, beta);
+
+ // Unsigned extend p0, p1, p2, q0, q1, q2 from 8 bits to 16 bits
+ MSA_ILVRL_B4(v8i16, zero, p0, zero, p1,
+ p0_r, p0_l, p1_r, p1_l);
+ MSA_ILVRL_B4(v8i16, zero, p2, zero, q0,
+ p2_r, p2_l, q0_r, q0_l);
+ MSA_ILVRL_B4(v8i16, zero, q1, zero, q2,
+ q1_r, q1_l, q2_r, q2_l);
+ // Signed extend tc, negTc from 8 bits to 16 bits
+ flags = __msa_clt_s_b(tc, zero);
+ MSA_ILVRL_B2(v8i16, flags, tc, tc_r, tc_l);
+ flags = __msa_clt_s_b(negTc, zero);
+ MSA_ILVRL_B2(v8i16, flags, negTc, negTc_r, negTc_l);
+
+ f = (v16i8)bDetaP0Q0 & (v16i8)bDetaP1P0 & (v16i8)bDetaQ1Q0;
+ flags = f & (v16i8)bDetaP2P0;
+ flags = __msa_ceq_b(flags, zero);
+ iTc += ((~flags) & const_1_b);
+ flags = f & (v16i8)bDetaQ2Q0;
+ flags = __msa_ceq_b(flags, zero);
+ iTc += ((~flags) & const_1_b);
+ negiTc = zero - iTc;
+ // Signed extend iTc, negiTc from 8 bits to 16 bits
+ flags = __msa_clt_s_b(iTc, zero);
+ MSA_ILVRL_B2(v8i16, flags, iTc, iTc_r, iTc_l);
+ flags = __msa_clt_s_b(negiTc, zero);
+ MSA_ILVRL_B2(v8i16, flags, negiTc, negiTc_r, negiTc_l);
+
+ // Calculate the left part
+ // p1
+ t0 = (p2_l + ((p0_l + q0_l + const_1_h) >> 1) - (p1_l << 1)) >> 1;
+ t0 = __msa_max_s_h(negTc_l, t0);
+ t0 = __msa_min_s_h(tc_l, t0);
+ t1 = p1_l + t0;
+ // q1
+ t0 = (q2_l + ((p0_l + q0_l + const_1_h) >> 1) - (q1_l << 1)) >> 1;
+ t0 = __msa_max_s_h(negTc_l, t0);
+ t0 = __msa_min_s_h(tc_l, t0);
+ t2 = q1_l + t0;
+ // iDeta
+ t0 = (((q0_l - p0_l) << 2) + (p1_l - q1_l) + const_4_h) >> 3;
+ t0 = __msa_max_s_h(negiTc_l, t0);
+ t0 = __msa_min_s_h(iTc_l, t0);
+ p1_l = t1;
+ q1_l = t2;
+ // p0
+ t1 = p0_l + t0;
+ t2 = t1 & const_not_255_h;
+ t3 = __msa_cle_s_h((v8i16)zero, t1);
+ flags = (v16i8)__msa_ceq_h(t2, (v8i16)zero);
+ p0_l = (t1 & (v8i16)flags) + (t3 & (v8i16)(~flags));
+ // q0
+ t1 = q0_l - t0;
+ t2 = t1 & const_not_255_h;
+ t3 = __msa_cle_s_h((v8i16)zero, t1);
+ flags = (v16i8)__msa_ceq_h(t2, (v8i16)zero);
+ q0_l = (t1 & (v8i16)flags) + (t3 & (v8i16)(~flags));
+
+ // Calculate the right part
+ // p1
+ t0 = (p2_r + ((p0_r + q0_r + const_1_h) >> 1) - (p1_r << 1)) >> 1;
+ t0 = __msa_max_s_h(negTc_r, t0);
+ t0 = __msa_min_s_h(tc_r, t0);
+ t1 = p1_r + t0;
+ // q1
+ t0 = (q2_r + ((p0_r + q0_r + const_1_h) >> 1) - (q1_r << 1)) >> 1;
+ t0 = __msa_max_s_h(negTc_r, t0);
+ t0 = __msa_min_s_h(tc_r, t0);
+ t2 = q1_r + t0;
+ // iDeta
+ t0 = (((q0_r - p0_r) << 2) + (p1_r - q1_r) + const_4_h) >> 3;
+ t0 = __msa_max_s_h(negiTc_r, t0);
+ t0 = __msa_min_s_h(iTc_r, t0);
+ p1_r = t1;
+ q1_r = t2;
+ // p0
+ t1 = p0_r + t0;
+ t2 = t1 & const_not_255_h;
+ t3 = __msa_cle_s_h((v8i16)zero, t1);
+ flags = (v16i8)__msa_ceq_h(t2, (v8i16)zero);
+ p0_r = (t1 & (v8i16)flags) + (t3 & (v8i16)(~flags));
+ // q0
+ t1 = q0_r - t0;
+ t2 = t1 & const_not_255_h;
+ t3 = __msa_cle_s_h((v8i16)zero, t1);
+ flags = (v16i8)__msa_ceq_h(t2, (v8i16)zero);
+ q0_r = (t1 & (v8i16)flags) + (t3 & (v8i16)(~flags));
+
+ // Combined left and right
+ MSA_PCKEV_B4(v8i16, p1_l, p1_r, p0_l, p0_r, q0_l, q0_r, q1_l, q1_r,
+ t0, t1, t2, t3);
+ flags = (v16i8)__msa_cle_s_b(zero, tc);
+ flags &= f;
+ p0 = (v16u8)(((v16i8)t1 & flags) + (p0 & (~flags)));
+ q0 = (v16u8)(((v16i8)t2 & flags) + (q0 & (~flags)));
+ // Using t1, t2 as temporary flags
+ t1 = (v8i16)(flags & (~(__msa_ceq_b((v16i8)bDetaP2P0, zero))));
+ p1 = (v16u8)(t0 & t1) + (p1 & (v16u8)(~t1));
+ t2 = (v8i16)(flags & (~(__msa_ceq_b((v16i8)bDetaQ2Q0, zero))));
+ q1 = (v16u8)(t3 & t2) + (q1 & (v16u8)(~t2));
+
+ MSA_ILVRL_B4(v8i16, p0, p1, q1, q0, t0, t1, t2, t3);
+ MSA_ILVRL_H4(v16u8, t2, t0, t3, t1, p1, p0, q0, q1);
+ // Store data to pPix
+ MSA_ST_W8(p1, p0, 0, 1, 2, 3, 0, 1, 2, 3, pPix - 2, iStride);
+ MSA_ST_W8(q0, q1, 0, 1, 2, 3, 0, 1, 2, 3, pPix + 8 * iStride - 2, iStride);
+}
+
+void DeblockLumaEq4H_msa(uint8_t *pPix, int32_t iStride, int32_t iAlpha,
+ int32_t iBeta) {
+ v16u8 p0, p1, p2, p3, q0, q1, q2, q3;
+ v8i16 p0_l, p0_r, p1_l, p1_r, p2_l, p2_r, p3_l, p3_r,
+ q0_l, q0_r, q1_l, q1_r, q2_l, q2_r, q3_l, q3_r;
+ v8i16 t0, t1, t2, t0_con1;
+ v8i16 s0, s1, s2, s0_con1;
+ v16u8 alpha, beta;
+ v16u8 iDetaP0Q0, bDetaP1P0, bDetaQ1Q0, bDetaP2P0, bDetaQ2Q0;
+ // Condition mask
+ v16u8 mask0, mask1;
+ v16i8 const_2_b = __msa_ldi_b(2);
+ v8i16 const_2_h = __msa_ldi_h(2);
+ v8i16 const_4_h = __msa_ldi_h(4);
+ v16i8 zero = { 0 };
+
+ // Load data from pPix
+ MSA_LD_V8(v8i16, pPix - 4, iStride, p0_l, p0_r, p1_l, p1_r,
+ p2_l, p2_r, p3_l, p3_r);
+ MSA_LD_V8(v8i16, pPix + 8 * iStride - 4, iStride,
+ q0_l, q0_r, q1_l, q1_r, q2_l, q2_r, q3_l, q3_r);
+ // Transpose 16x8 to 8x16, we just need p0, p1, p2, p3, q0, q1, q2, q3
+ MSA_TRANSPOSE16x8_B(v16u8, p0_l, p0_r, p1_l, p1_r, p2_l, p2_r, p3_l, p3_r,
+ q0_l, q0_r, q1_l, q1_r, q2_l, q2_r, q3_l, q3_r,
+ p3, p2, p1, p0, q0, q1, q2, q3);
+ // iAlpha and beta are uint8_t type
+ alpha = (v16u8)__msa_fill_b(iAlpha);
+ beta = (v16u8)__msa_fill_b(iBeta);
+
+ // iDetaP0Q0 is not bool type
+ iDetaP0Q0 = __msa_asub_u_b(p0, q0);
+
+ bDetaP1P0 = __msa_asub_u_b(p1, p0);
+ bDetaQ1Q0 = __msa_asub_u_b(q1, q0);
+ bDetaP2P0 = __msa_asub_u_b(p2, p0);
+ bDetaQ2Q0 = __msa_asub_u_b(q2, q0);
+ bDetaP1P0 = (v16u8)__msa_clt_u_b(bDetaP1P0, beta);
+ bDetaQ1Q0 = (v16u8)__msa_clt_u_b(bDetaQ1Q0, beta);
+ bDetaP2P0 = (v16u8)__msa_clt_u_b(bDetaP2P0, beta);
+ bDetaQ2Q0 = (v16u8)__msa_clt_u_b(bDetaQ2Q0, beta);
+
+ // Unsigned extend p0, p1, p2, p3, q0, q1, q2, q3 from 8 bits to 16 bits
+ MSA_ILVRL_B4(v8i16, zero, p0, zero, p1,
+ p0_r, p0_l, p1_r, p1_l);
+ MSA_ILVRL_B4(v8i16, zero, p2, zero, p3,
+ p2_r, p2_l, p3_r, p3_l);
+ MSA_ILVRL_B4(v8i16, zero, q0, zero, q1,
+ q0_r, q0_l, q1_r, q1_l);
+ MSA_ILVRL_B4(v8i16, zero, q2, zero, q3,
+ q2_r, q2_l, q3_r, q3_l);
+
+ // Calculate condition mask
+ // (iDetaP0Q0 < iAlpha) && bDetaP1P0 && bDetaQ1Q0
+ mask0 = (v16u8)__msa_clt_u_b(iDetaP0Q0, alpha);
+ mask0 &= bDetaP1P0;
+ mask0 &= bDetaQ1Q0;
+ // iDetaP0Q0 < ((iAlpha >> 2) + 2)
+ mask1 = (v16u8)((alpha >> 2) + const_2_b);
+ mask1 = (v16u8)__msa_clt_u_b(iDetaP0Q0, mask1);
+
+ // Calculate the left part
+ // p0
+ t0 = (p2_l + (p1_l << 1) + (p0_l << 1) + (q0_l << 1) + q1_l + const_4_h) >> 3;
+ // p1
+ t1 = (p2_l + p1_l + p0_l + q0_l + const_2_h) >> 2;
+ // p2
+ t2 = ((p3_l << 1) + p2_l + (p2_l << 1) + p1_l + p0_l + q0_l + const_4_h) >> 3;
+ // p0 condition 1
+ t0_con1 = ((p1_l << 1) + p0_l + q1_l + const_2_h) >> 2;
+ // q0
+ s0 = (p1_l + (p0_l << 1) + (q0_l << 1) + (q1_l << 1) + q2_l + const_4_h) >> 3;
+ // q1
+ s1 = (p0_l + q0_l + q1_l + q2_l + const_2_h) >> 2;
+ // q2
+ s2 = ((q3_l << 1) + q2_l + (q2_l << 1) + q1_l + q0_l + p0_l + const_4_h) >> 3;
+ // q0 condition 1
+ s0_con1 = ((q1_l << 1) + q0_l + p1_l + const_2_h) >> 2;
+ // Move back
+ p0_l = t0;
+ p1_l = t1;
+ p2_l = t2;
+ q0_l = s0;
+ q1_l = s1;
+ q2_l = s2;
+ // Use p3_l, q3_l as tmp
+ p3_l = t0_con1;
+ q3_l = s0_con1;
+
+ // Calculate the right part
+ // p0
+ t0 = (p2_r + (p1_r << 1) + (p0_r << 1) + (q0_r << 1) + q1_r + const_4_h) >> 3;
+ // p1
+ t1 = (p2_r + p1_r + p0_r + q0_r + const_2_h) >> 2;
+ // p2
+ t2 = ((p3_r << 1) + p2_r + (p2_r << 1) + p1_r + p0_r + q0_r + const_4_h) >> 3;
+ // p0 condition 1
+ t0_con1 = ((p1_r << 1) + p0_r + q1_r + const_2_h) >> 2;
+ // q0
+ s0 = (p1_r + (p0_r << 1) + (q0_r << 1) + (q1_r << 1) + q2_r + const_4_h) >> 3;
+ // q1
+ s1 = (p0_r + q0_r + q1_r + q2_r + const_2_h) >> 2;
+ // q2
+ s2 = ((q3_r << 1) + q2_r + (q2_r << 1) + q1_r + q0_r + p0_r + const_4_h) >> 3;
+ // q0 condition 1
+ s0_con1 = ((q1_r << 1) + q0_r + p1_r + const_2_h) >> 2;
+ // Move back
+ p0_r = t0;
+ p1_r = t1;
+ p2_r = t2;
+ q0_r = s0;
+ q1_r = s1;
+ q2_r = s2;
+ // Use p3_r, q3_r as tmp
+ p3_r = t0_con1;
+ q3_r = s0_con1;
+
+ // Combined left and right
+ MSA_PCKEV_B4(v8i16, p0_l, p0_r, p1_l, p1_r, p2_l, p2_r, q0_l, q0_r,
+ t0, t1, t2, s0);
+ MSA_PCKEV_B4(v8i16, q1_l, q1_r, q2_l, q2_r, p3_l, p3_r, q3_l, q3_r,
+ s1, s2, t0_con1, s0_con1);
+ t0 = (v8i16)(((v16u8)t0 & mask0 & mask1 & bDetaP2P0) + ((v16u8)t0_con1 &
+ mask0 & mask1 & (~bDetaP2P0)) + ((v16u8)t0_con1 & mask0 & (~mask1)));
+ t1 = (v8i16)((v16u8)t1 & mask0 & mask1 & bDetaP2P0);
+ t2 = (v8i16)((v16u8)t2 & mask0 & mask1 & bDetaP2P0);
+ s0 = (v8i16)(((v16u8)s0 & mask0 & mask1 & bDetaQ2Q0) + ((v16u8)s0_con1 &
+ mask0 & mask1 & (~bDetaQ2Q0)) + ((v16u8)s0_con1 & mask0 & (~mask1)));
+ s1 = (v8i16)((v16u8)s1 & mask0 & mask1 & bDetaQ2Q0);
+ s2 = (v8i16)((v16u8)s2 & mask0 & mask1 & bDetaQ2Q0);
+ p0 = (v16u8)t0 + (p0 & (~mask0));
+ p1 = (v16u8)t1 + (p1 & ~(mask0 & mask1 & bDetaP2P0));
+ p2 = (v16u8)t2 + (p2 & ~(mask0 & mask1 & bDetaP2P0));
+ q0 = (v16u8)s0 + (q0 & (~mask0));
+ q1 = (v16u8)s1 + (q1 & ~(mask0 & mask1 & bDetaQ2Q0));
+ q2 = (v16u8)s2 + (q2 & ~(mask0 & mask1 & bDetaQ2Q0));
+
+ MSA_ILVRL_B4(v8i16, p1, p2, q0, p0, t0, s0, t1, s1);
+ MSA_ILVRL_B2(v8i16, q2, q1, t2, s2);
+ MSA_ILVRL_H4(v16u8, t1, t0, s1, s0, p2, p1, p0, q0);
+ // Store data to pPix
+ MSA_ST_W8(p2, p1, 0, 1, 2, 3, 0, 1, 2, 3, pPix - 3, iStride);
+ MSA_ST_W8(p0, q0, 0, 1, 2, 3, 0, 1, 2, 3, pPix + 8 * iStride - 3, iStride);
+ MSA_ST_H8(t2, 0, 1, 2, 3, 4, 5, 6, 7, pPix + 1, iStride);
+ MSA_ST_H8(s2, 0, 1, 2, 3, 4, 5, 6, 7, pPix + 8 * iStride + 1, iStride);
+}
+
+void DeblockChromaLt4V_msa(uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride,
+ int32_t iAlpha, int32_t iBeta, int8_t* pTc) {
+ v16u8 p0, p1, q0, q1;
+ v8i16 p0_e, p1_e, q0_e, q1_e;
+ v16i8 negTc, flags, f;
+ v8i16 tc_e, negTc_e;
+ // Use for temporary variable
+ v8i16 t0, t1, t2, t3;
+ v16u8 alpha, beta;
+ v16u8 bDetaP0Q0, bDetaP1P0, bDetaQ1Q0;
+ v8i16 const_4_h = __msa_ldi_h(4);
+ v8i16 const_not_255_h = __msa_ldi_h(~255);
+ v16i8 zero = { 0 };
+ v16i8 tc = { pTc[0 >> 1], pTc[1 >> 1], pTc[2 >> 1], pTc[3 >> 1],
+ pTc[4 >> 1], pTc[5 >> 1], pTc[6 >> 1], pTc[7 >> 1] };
+ negTc = zero - tc;
+
+ alpha = (v16u8)__msa_fill_b(iAlpha);
+ beta = (v16u8)__msa_fill_b(iBeta);
+ // Signed extend tc, negTc from 8 bits to 16 bits
+ flags = __msa_clt_s_b(tc, zero);
+ MSA_ILVR_B(v8i16, flags, tc, tc_e);
+ flags = __msa_clt_s_b(negTc, zero);
+ MSA_ILVR_B(v8i16, flags, negTc, negTc_e);
+
+ // Cb
+ // Load data from pPixCb
+ MSA_LD_V4(v16u8, pPixCb - 2 * iStride, iStride, p1, p0, q0, q1);
+
+ bDetaP0Q0 = __msa_asub_u_b(p0, q0);
+ bDetaP1P0 = __msa_asub_u_b(p1, p0);
+ bDetaQ1Q0 = __msa_asub_u_b(q1, q0);
+ bDetaP0Q0 = (v16u8)__msa_clt_u_b(bDetaP0Q0, alpha);
+ bDetaP1P0 = (v16u8)__msa_clt_u_b(bDetaP1P0, beta);
+ bDetaQ1Q0 = (v16u8)__msa_clt_u_b(bDetaQ1Q0, beta);
+
+ // Unsigned extend p0, p1, q0, q1 from 8 bits to 16 bits
+ MSA_ILVR_B4(v8i16, zero, p0, zero, p1, zero, q0, zero, q1,
+ p0_e, p1_e, q0_e, q1_e);
+
+ f = (v16i8)bDetaP0Q0 & (v16i8)bDetaP1P0 & (v16i8)bDetaQ1Q0;
+
+ // iDeta
+ t0 = (((q0_e - p0_e) << 2) + (p1_e - q1_e) + const_4_h) >> 3;
+ t0 = __msa_max_s_h(negTc_e, t0);
+ t0 = __msa_min_s_h(tc_e, t0);
+ // p0
+ t1 = p0_e + t0;
+ t2 = t1 & const_not_255_h;
+ t3 = __msa_cle_s_h((v8i16)zero, t1);
+ flags = (v16i8)__msa_ceq_h(t2, (v8i16)zero);
+ p0_e = (t1 & (v8i16)flags) + (t3 & (v8i16)(~flags));
+ // q0
+ t1 = q0_e - t0;
+ t2 = t1 & const_not_255_h;
+ t3 = __msa_cle_s_h((v8i16)zero, t1);
+ flags = (v16i8)__msa_ceq_h(t2, (v8i16)zero);
+ q0_e = (t1 & (v8i16)flags) + (t3 & (v8i16)(~flags));
+
+ MSA_PCKEV_B2(v8i16, p0_e, p0_e, q0_e, q0_e, t0, t1);
+ flags = (v16i8)__msa_cle_s_b(zero, tc);
+ flags &= f;
+ p0 = (v16u8)(((v16i8)t0 & flags) + (p0 & (~flags)));
+ q0 = (v16u8)(((v16i8)t1 & flags) + (q0 & (~flags)));
+ // Store data to pPixCb
+ MSA_ST_D(p0, 0, pPixCb - iStride);
+ MSA_ST_D(q0, 0, pPixCb);
+
+ // Cr
+ // Load data from pPixCr
+ MSA_LD_V4(v16u8, pPixCr - 2 * iStride, iStride, p1, p0, q0, q1);
+
+ bDetaP0Q0 = __msa_asub_u_b(p0, q0);
+ bDetaP1P0 = __msa_asub_u_b(p1, p0);
+ bDetaQ1Q0 = __msa_asub_u_b(q1, q0);
+ bDetaP0Q0 = (v16u8)__msa_clt_u_b(bDetaP0Q0, alpha);
+ bDetaP1P0 = (v16u8)__msa_clt_u_b(bDetaP1P0, beta);
+ bDetaQ1Q0 = (v16u8)__msa_clt_u_b(bDetaQ1Q0, beta);
+
+ // Unsigned extend p0, p1, q0, q1 from 8 bits to 16 bits
+ MSA_ILVR_B4(v8i16, zero, p0, zero, p1, zero, q0, zero, q1,
+ p0_e, p1_e, q0_e, q1_e);
+
+ f = (v16i8)bDetaP0Q0 & (v16i8)bDetaP1P0 & (v16i8)bDetaQ1Q0;
+
+ // iDeta
+ t0 = (((q0_e - p0_e) << 2) + (p1_e - q1_e) + const_4_h) >> 3;
+ t0 = __msa_max_s_h(negTc_e, t0);
+ t0 = __msa_min_s_h(tc_e, t0);
+ // p0
+ t1 = p0_e + t0;
+ t2 = t1 & const_not_255_h;
+ t3 = __msa_cle_s_h((v8i16)zero, t1);
+ flags = (v16i8)__msa_ceq_h(t2, (v8i16)zero);
+ p0_e = (t1 & (v8i16)flags) + (t3 & (v8i16)(~flags));
+ // q0
+ t1 = q0_e - t0;
+ t2 = t1 & const_not_255_h;
+ t3 = __msa_cle_s_h((v8i16)zero, t1);
+ flags = (v16i8)__msa_ceq_h(t2, (v8i16)zero);
+ q0_e = (t1 & (v8i16)flags) + (t3 & (v8i16)(~flags));
+
+ MSA_PCKEV_B2(v8i16, p0_e, p0_e, q0_e, q0_e, t0, t1);
+ flags = (v16i8)__msa_cle_s_b(zero, tc);
+ flags &= f;
+ p0 = (v16u8)(((v16i8)t0 & flags) + (p0 & (~flags)));
+ q0 = (v16u8)(((v16i8)t1 & flags) + (q0 & (~flags)));
+ // Store data to pPixCr
+ MSA_ST_D(p0, 0, pPixCr - iStride);
+ MSA_ST_D(q0, 0, pPixCr);
+}
+
+void DeblockChromaEq4V_msa(uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride,
+ int32_t iAlpha, int32_t iBeta) {
+ v16u8 p0, p1, q0, q1;
+ v8i16 p0_e, p1_e, q0_e, q1_e;
+ v16i8 f;
+ // Use for temporary variable
+ v8i16 t0, t1;
+ v16u8 alpha, beta;
+ v16u8 bDetaP0Q0, bDetaP1P0, bDetaQ1Q0;
+ v8i16 const_2_h = __msa_ldi_h(2);
+ v16i8 zero = { 0 };
+
+ alpha = (v16u8)__msa_fill_b(iAlpha);
+ beta = (v16u8)__msa_fill_b(iBeta);
+
+ // Cb
+ // Load data from pPixCb
+ MSA_LD_V4(v16u8, pPixCb - 2 * iStride, iStride, p1, p0, q0, q1);
+
+ bDetaP0Q0 = __msa_asub_u_b(p0, q0);
+ bDetaP1P0 = __msa_asub_u_b(p1, p0);
+ bDetaQ1Q0 = __msa_asub_u_b(q1, q0);
+ bDetaP0Q0 = (v16u8)__msa_clt_u_b(bDetaP0Q0, alpha);
+ bDetaP1P0 = (v16u8)__msa_clt_u_b(bDetaP1P0, beta);
+ bDetaQ1Q0 = (v16u8)__msa_clt_u_b(bDetaQ1Q0, beta);
+
+ // Unsigned extend p0, p1, q0, q1 from 8 bits to 16 bits
+ MSA_ILVR_B4(v8i16, zero, p0, zero, p1, zero, q0, zero, q1,
+ p0_e, p1_e, q0_e, q1_e);
+
+ f = (v16i8)bDetaP0Q0 & (v16i8)bDetaP1P0 & (v16i8)bDetaQ1Q0;
+
+ // p0
+ p0_e = ((p1_e << 1) + p0_e + q1_e + const_2_h) >> 2;
+ // q0
+ q0_e = ((q1_e << 1) + q0_e + p1_e + const_2_h) >> 2;
+
+ MSA_PCKEV_B2(v8i16, p0_e, p0_e, q0_e, q0_e, t0, t1);
+ p0 = (v16u8)(((v16i8)t0 & f) + (p0 & (~f)));
+ q0 = (v16u8)(((v16i8)t1 & f) + (q0 & (~f)));
+ // Store data to pPixCb
+ MSA_ST_D(p0, 0, pPixCb - iStride);
+ MSA_ST_D(q0, 0, pPixCb);
+
+ // Cr
+ // Load data from pPixCr
+ MSA_LD_V4(v16u8, pPixCr - 2 * iStride, iStride, p1, p0, q0, q1);
+
+ bDetaP0Q0 = __msa_asub_u_b(p0, q0);
+ bDetaP1P0 = __msa_asub_u_b(p1, p0);
+ bDetaQ1Q0 = __msa_asub_u_b(q1, q0);
+ bDetaP0Q0 = (v16u8)__msa_clt_u_b(bDetaP0Q0, alpha);
+ bDetaP1P0 = (v16u8)__msa_clt_u_b(bDetaP1P0, beta);
+ bDetaQ1Q0 = (v16u8)__msa_clt_u_b(bDetaQ1Q0, beta);
+
+ // Unsigned extend p0, p1, q0, q1 from 8 bits to 16 bits
+ MSA_ILVR_B4(v8i16, zero, p0, zero, p1, zero, q0, zero, q1,
+ p0_e, p1_e, q0_e, q1_e);
+
+ f = (v16i8)bDetaP0Q0 & (v16i8)bDetaP1P0 & (v16i8)bDetaQ1Q0;
+
+ // p0
+ p0_e = ((p1_e << 1) + p0_e + q1_e + const_2_h) >> 2;
+ // q0
+ q0_e = ((q1_e << 1) + q0_e + p1_e + const_2_h) >> 2;
+
+ MSA_PCKEV_B2(v8i16, p0_e, p0_e, q0_e, q0_e, t0, t1);
+ p0 = (v16u8)(((v16i8)t0 & f) + (p0 & (~f)));
+ q0 = (v16u8)(((v16i8)t1 & f) + (q0 & (~f)));
+ // Store data to pPixCr
+ MSA_ST_D(p0, 0, pPixCr - iStride);
+ MSA_ST_D(q0, 0, pPixCr);
+}
+
+void DeblockChromaLt4H_msa(uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride,
+ int32_t iAlpha, int32_t iBeta, int8_t* pTc) {
+ v16u8 p0, p1, q0, q1;
+ v8i16 p0_e, p1_e, q0_e, q1_e;
+ v16i8 negTc, flags, f;
+ v8i16 tc_e, negTc_e;
+ // Use for temporary variable
+ v8i16 t0, t1, t2, t3;
+ v16u8 alpha, beta;
+ v16u8 bDetaP0Q0, bDetaP1P0, bDetaQ1Q0;
+ v8i16 const_4_h = __msa_ldi_h(4);
+ v8i16 const_not_255_h = __msa_ldi_h(~255);
+ v16i8 zero = { 0 };
+ v16i8 tc = { pTc[0 >> 1], pTc[1 >> 1], pTc[2 >> 1], pTc[3 >> 1],
+ pTc[4 >> 1], pTc[5 >> 1], pTc[6 >> 1], pTc[7 >> 1] };
+ negTc = zero - tc;
+
+ alpha = (v16u8)__msa_fill_b(iAlpha);
+ beta = (v16u8)__msa_fill_b(iBeta);
+ // Signed extend tc, negTc from 8 bits to 16 bits
+ flags = __msa_clt_s_b(tc, zero);
+ MSA_ILVR_B(v8i16, flags, tc, tc_e);
+ flags = __msa_clt_s_b(negTc, zero);
+ MSA_ILVR_B(v8i16, flags, negTc, negTc_e);
+
+ // Cb
+ // Load data from pPixCb
+ MSA_LD_V8(v8i16, pPixCb - 2, iStride, p1_e, p0_e, q0_e, q1_e,
+ t0, t1, t2, t3);
+ // Transpose 8x4 to 4x8, we just need p0, p1, q0, q1
+ MSA_TRANSPOSE8x4_B(v16u8, p1_e, p0_e, q0_e, q1_e, t0, t1, t2, t3,
+ p1, p0, q0, q1);
+
+ bDetaP0Q0 = __msa_asub_u_b(p0, q0);
+ bDetaP1P0 = __msa_asub_u_b(p1, p0);
+ bDetaQ1Q0 = __msa_asub_u_b(q1, q0);
+ bDetaP0Q0 = (v16u8)__msa_clt_u_b(bDetaP0Q0, alpha);
+ bDetaP1P0 = (v16u8)__msa_clt_u_b(bDetaP1P0, beta);
+ bDetaQ1Q0 = (v16u8)__msa_clt_u_b(bDetaQ1Q0, beta);
+
+ // Unsigned extend p0, p1, q0, q1 from 8 bits to 16 bits
+ MSA_ILVR_B4(v8i16, zero, p0, zero, p1, zero, q0, zero, q1,
+ p0_e, p1_e, q0_e, q1_e);
+
+ f = (v16i8)bDetaP0Q0 & (v16i8)bDetaP1P0 & (v16i8)bDetaQ1Q0;
+
+ // iDeta
+ t0 = (((q0_e - p0_e) << 2) + (p1_e - q1_e) + const_4_h) >> 3;
+ t0 = __msa_max_s_h(negTc_e, t0);
+ t0 = __msa_min_s_h(tc_e, t0);
+ // p0
+ t1 = p0_e + t0;
+ t2 = t1 & const_not_255_h;
+ t3 = __msa_cle_s_h((v8i16)zero, t1);
+ flags = (v16i8)__msa_ceq_h(t2, (v8i16)zero);
+ p0_e = (t1 & (v8i16)flags) + (t3 & (v8i16)(~flags));
+ // q0
+ t1 = q0_e - t0;
+ t2 = t1 & const_not_255_h;
+ t3 = __msa_cle_s_h((v8i16)zero, t1);
+ flags = (v16i8)__msa_ceq_h(t2, (v8i16)zero);
+ q0_e = (t1 & (v8i16)flags) + (t3 & (v8i16)(~flags));
+
+ MSA_PCKEV_B2(v8i16, p0_e, p0_e, q0_e, q0_e, t0, t1);
+ flags = (v16i8)__msa_cle_s_b(zero, tc);
+ flags &= f;
+ p0 = (v16u8)(((v16i8)t0 & flags) + (p0 & (~flags)));
+ q0 = (v16u8)(((v16i8)t1 & flags) + (q0 & (~flags)));
+ // Store data to pPixCb
+ MSA_ILVR_B(v16u8, q0, p0, p0);
+ MSA_ST_H8(p0, 0, 1, 2, 3, 4, 5, 6, 7, pPixCb - 1, iStride);
+
+ // Cr
+ // Load data from pPixCr
+ MSA_LD_V8(v8i16, pPixCr - 2, iStride, p1_e, p0_e, q0_e, q1_e,
+ t0, t1, t2, t3);
+ // Transpose 8x4 to 4x8, we just need p0, p1, q0, q1
+ MSA_TRANSPOSE8x4_B(v16u8, p1_e, p0_e, q0_e, q1_e, t0, t1, t2, t3,
+ p1, p0, q0, q1);
+
+ bDetaP0Q0 = __msa_asub_u_b(p0, q0);
+ bDetaP1P0 = __msa_asub_u_b(p1, p0);
+ bDetaQ1Q0 = __msa_asub_u_b(q1, q0);
+ bDetaP0Q0 = (v16u8)__msa_clt_u_b(bDetaP0Q0, alpha);
+ bDetaP1P0 = (v16u8)__msa_clt_u_b(bDetaP1P0, beta);
+ bDetaQ1Q0 = (v16u8)__msa_clt_u_b(bDetaQ1Q0, beta);
+
+ // Unsigned extend p0, p1, q0, q1 from 8 bits to 16 bits
+ MSA_ILVR_B4(v8i16, zero, p0, zero, p1, zero, q0, zero, q1,
+ p0_e, p1_e, q0_e, q1_e);
+
+ f = (v16i8)bDetaP0Q0 & (v16i8)bDetaP1P0 & (v16i8)bDetaQ1Q0;
+
+ // iDeta
+ t0 = (((q0_e - p0_e) << 2) + (p1_e - q1_e) + const_4_h) >> 3;
+ t0 = __msa_max_s_h(negTc_e, t0);
+ t0 = __msa_min_s_h(tc_e, t0);
+ // p0
+ t1 = p0_e + t0;
+ t2 = t1 & const_not_255_h;
+ t3 = __msa_cle_s_h((v8i16)zero, t1);
+ flags = (v16i8)__msa_ceq_h(t2, (v8i16)zero);
+ p0_e = (t1 & (v8i16)flags) + (t3 & (v8i16)(~flags));
+ // q0
+ t1 = q0_e - t0;
+ t2 = t1 & const_not_255_h;
+ t3 = __msa_cle_s_h((v8i16)zero, t1);
+ flags = (v16i8)__msa_ceq_h(t2, (v8i16)zero);
+ q0_e = (t1 & (v8i16)flags) + (t3 & (v8i16)(~flags));
+
+ MSA_PCKEV_B2(v8i16, p0_e, p0_e, q0_e, q0_e, t0, t1);
+ flags = (v16i8)__msa_cle_s_b(zero, tc);
+ flags &= f;
+ p0 = (v16u8)(((v16i8)t0 & flags) + (p0 & (~flags)));
+ q0 = (v16u8)(((v16i8)t1 & flags) + (q0 & (~flags)));
+ // Store data to pPixCr
+ MSA_ILVR_B(v16u8, q0, p0, p0);
+ MSA_ST_H8(p0, 0, 1, 2, 3, 4, 5, 6, 7, pPixCr - 1, iStride);
+}
+
+void DeblockChromaEq4H_msa(uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride,
+ int32_t iAlpha, int32_t iBeta) {
+ v16u8 p0, p1, q0, q1;
+ v8i16 p0_e, p1_e, q0_e, q1_e;
+ v16i8 f;
+ // Use for temporary variable
+ v8i16 t0, t1, t2, t3;
+ v16u8 alpha, beta;
+ v16u8 bDetaP0Q0, bDetaP1P0, bDetaQ1Q0;
+ v8i16 const_2_h = __msa_ldi_h(2);
+ v16i8 zero = { 0 };
+
+ alpha = (v16u8)__msa_fill_b(iAlpha);
+ beta = (v16u8)__msa_fill_b(iBeta);
+
+ // Cb
+ // Load data from pPixCb
+ MSA_LD_V8(v8i16, pPixCb - 2, iStride, p1_e, p0_e, q0_e, q1_e,
+ t0, t1, t2, t3);
+ // Transpose 8x4 to 4x8, we just need p0, p1, q0, q1
+ MSA_TRANSPOSE8x4_B(v16u8, p1_e, p0_e, q0_e, q1_e, t0, t1, t2, t3,
+ p1, p0, q0, q1);
+
+ bDetaP0Q0 = __msa_asub_u_b(p0, q0);
+ bDetaP1P0 = __msa_asub_u_b(p1, p0);
+ bDetaQ1Q0 = __msa_asub_u_b(q1, q0);
+ bDetaP0Q0 = (v16u8)__msa_clt_u_b(bDetaP0Q0, alpha);
+ bDetaP1P0 = (v16u8)__msa_clt_u_b(bDetaP1P0, beta);
+ bDetaQ1Q0 = (v16u8)__msa_clt_u_b(bDetaQ1Q0, beta);
+
+ // Unsigned extend p0, p1, q0, q1 from 8 bits to 16 bits
+ MSA_ILVR_B4(v8i16, zero, p0, zero, p1, zero, q0, zero, q1,
+ p0_e, p1_e, q0_e, q1_e);
+
+ f = (v16i8)bDetaP0Q0 & (v16i8)bDetaP1P0 & (v16i8)bDetaQ1Q0;
+
+ // p0
+ p0_e = ((p1_e << 1) + p0_e + q1_e + const_2_h) >> 2;
+ // q0
+ q0_e = ((q1_e << 1) + q0_e + p1_e + const_2_h) >> 2;
+
+ MSA_PCKEV_B2(v8i16, p0_e, p0_e, q0_e, q0_e, t0, t1);
+ p0 = (v16u8)(((v16i8)t0 & f) + (p0 & (~f)));
+ q0 = (v16u8)(((v16i8)t1 & f) + (q0 & (~f)));
+ // Store data to pPixCb
+ MSA_ILVR_B(v16u8, q0, p0, p0);
+ MSA_ST_H8(p0, 0, 1, 2, 3, 4, 5, 6, 7, pPixCb - 1, iStride);
+
+ // Cr
+ // Load data from pPixCr
+ MSA_LD_V8(v8i16, pPixCr - 2, iStride, p1_e, p0_e, q0_e, q1_e,
+ t0, t1, t2, t3);
+ // Transpose 8x4 to 4x8, we just need p0, p1, q0, q1
+ MSA_TRANSPOSE8x4_B(v16u8, p1_e, p0_e, q0_e, q1_e, t0, t1, t2, t3,
+ p1, p0, q0, q1);
+
+ bDetaP0Q0 = __msa_asub_u_b(p0, q0);
+ bDetaP1P0 = __msa_asub_u_b(p1, p0);
+ bDetaQ1Q0 = __msa_asub_u_b(q1, q0);
+ bDetaP0Q0 = (v16u8)__msa_clt_u_b(bDetaP0Q0, alpha);
+ bDetaP1P0 = (v16u8)__msa_clt_u_b(bDetaP1P0, beta);
+ bDetaQ1Q0 = (v16u8)__msa_clt_u_b(bDetaQ1Q0, beta);
+
+ // Unsigned extend p0, p1, q0, q1 from 8 bits to 16 bits
+ MSA_ILVR_B4(v8i16, zero, p0, zero, p1, zero, q0, zero, q1,
+ p0_e, p1_e, q0_e, q1_e);
+
+ f = (v16i8)bDetaP0Q0 & (v16i8)bDetaP1P0 & (v16i8)bDetaQ1Q0;
+
+ // p0
+ p0_e = ((p1_e << 1) + p0_e + q1_e + const_2_h) >> 2;
+ // q0
+ q0_e = ((q1_e << 1) + q0_e + p1_e + const_2_h) >> 2;
+
+ MSA_PCKEV_B2(v8i16, p0_e, p0_e, q0_e, q0_e, t0, t1);
+ p0 = (v16u8)(((v16i8)t0 & f) + (p0 & (~f)));
+ q0 = (v16u8)(((v16i8)t1 & f) + (q0 & (~f)));
+ // Store data to pPixCr
+ MSA_ILVR_B(v16u8, q0, p0, p0);
+ MSA_ST_H8(p0, 0, 1, 2, 3, 4, 5, 6, 7, pPixCr - 1, iStride);
+}
+
+void WelsNonZeroCount_msa(int8_t* pNonZeroCount) {
+ v16u8 src0, src1;
+ v16u8 zero = { 0 };
+ v16u8 const_1 = (v16u8)__msa_fill_b(0x01);
+
+ MSA_LD_V2(v16u8, pNonZeroCount, 16, src0, src1);
+ src0 = (v16u8)__msa_ceq_b((v16i8)zero, (v16i8)src0);
+ src1 = (v16u8)__msa_ceq_b((v16i8)zero, (v16i8)src1);
+ src0 += const_1;
+ src1 += const_1;
+ MSA_ST_V(v16u8, src0, pNonZeroCount);
+ MSA_ST_D(src1, 0, pNonZeroCount + 16);
+}
diff --git a/chromium/third_party/openh264/src/codec/common/src/WelsThreadLib.cpp b/chromium/third_party/openh264/src/codec/common/src/WelsThreadLib.cpp
index 8aa67f11659..4477e34e34c 100644
--- a/chromium/third_party/openh264/src/codec/common/src/WelsThreadLib.cpp
+++ b/chromium/third_party/openh264/src/codec/common/src/WelsThreadLib.cpp
@@ -71,18 +71,6 @@
#if defined(_WIN32) || defined(__CYGWIN__)
-#ifdef WINAPI_FAMILY
-#if !WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
-#define WP80
-
-#define InitializeCriticalSection(x) InitializeCriticalSectionEx(x, 0, 0)
-#define GetSystemInfo(x) GetNativeSystemInfo(x)
-#define CreateEvent(attr, reset, init, name) CreateEventEx(attr, name, ((reset) ? CREATE_EVENT_MANUAL_RESET : 0) | ((init) ? CREATE_EVENT_INITIAL_SET : 0), EVENT_ALL_ACCESS)
-#define WaitForSingleObject(a, b) WaitForSingleObjectEx(a, b, FALSE)
-#define WaitForMultipleObjects(a, b, c, d) WaitForMultipleObjectsEx(a, b, c, d, FALSE)
-#endif
-#endif
-
WELS_THREAD_ERROR_CODE WelsMutexInit (WELS_MUTEX* mutex) {
InitializeCriticalSection (mutex);
diff --git a/chromium/third_party/openh264/src/codec/common/src/cpu.cpp b/chromium/third_party/openh264/src/codec/common/src/cpu.cpp
index a39fd064578..fb5d3dae4c8 100644
--- a/chromium/third_party/openh264/src/codec/common/src/cpu.cpp
+++ b/chromium/third_party/openh264/src/codec/common/src/cpu.cpp
@@ -309,12 +309,45 @@ uint32_t WelsCPUFeatureDetect (int32_t* pNumberOfLogicProcessors) {
#elif defined(mips)
/* for loongson */
+static uint32_t get_cpu_flags_from_cpuinfo(void)
+{
+ uint32_t flags = 0;
+
+# ifdef __linux__
+ FILE* fp = fopen("/proc/cpuinfo", "r");
+ if (!fp)
+ return flags;
+
+ char buf[200];
+ memset(buf, 0, sizeof(buf));
+ while (fgets(buf, sizeof(buf), fp)) {
+ if (!strncmp(buf, "model name", strlen("model name"))) {
+ if (strstr(buf, "Loongson-3A") || strstr(buf, "Loongson-3B") ||
+ strstr(buf, "Loongson-2K")) {
+ flags |= WELS_CPU_MMI;
+ }
+ break;
+ }
+ }
+ while (fgets(buf, sizeof(buf), fp)) {
+ if(!strncmp(buf, "ASEs implemented", strlen("ASEs implemented"))) {
+ if (strstr(buf, "loongson-mmi") && strstr(buf, "loongson-ext")) {
+ flags |= WELS_CPU_MMI;
+ }
+ if (strstr(buf, "msa")) {
+ flags |= WELS_CPU_MSA;
+ }
+ break;
+ }
+ }
+ fclose(fp);
+# endif
+
+ return flags;
+}
+
uint32_t WelsCPUFeatureDetect (int32_t* pNumberOfLogicProcessors) {
-#if defined(HAVE_MMI)
- return WELS_CPU_MMI;
-#else
- return 0;
-#endif
+ return get_cpu_flags_from_cpuinfo();
}
#else /* Neither X86_ASM, HAVE_NEON, HAVE_NEON_AARCH64 nor mips */
@@ -324,5 +357,3 @@ uint32_t WelsCPUFeatureDetect (int32_t* pNumberOfLogicProcessors) {
}
#endif
-
-
diff --git a/chromium/third_party/openh264/src/codec/common/src/expand_pic.cpp b/chromium/third_party/openh264/src/codec/common/src/expand_pic.cpp
index 65ffb3fddc2..fcd96540e92 100644
--- a/chromium/third_party/openh264/src/codec/common/src/expand_pic.cpp
+++ b/chromium/third_party/openh264/src/codec/common/src/expand_pic.cpp
@@ -33,6 +33,240 @@
#include "expand_pic.h"
#include "cpu_core.h"
+static inline void MBPadTopLeftLuma_c (uint8_t*& pDst, const int32_t& kiStride) {
+ const uint8_t kuiTL = pDst[0];
+ int32_t i = 0;
+ uint8_t* pTopLeft = pDst;
+ do {
+ pTopLeft -= kiStride;
+ // pad pTop
+ memcpy (pTopLeft, pDst, 16); // confirmed_safe_unsafe_usage
+ memset (pTopLeft - PADDING_LENGTH, kuiTL, PADDING_LENGTH); //pTop left
+ } while (++i < PADDING_LENGTH);
+}
+
+static inline void MBPadTopLuma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiMbX) {
+ uint8_t* pTopLine = pDst + (kiMbX << 4);
+ int32_t i = 0;
+ uint8_t* pTop = pTopLine;
+ do {
+ pTop -= kiStride;
+ // pad pTop
+ memcpy (pTop, pTopLine, 16); // confirmed_safe_unsafe_usage
+ } while (++i < PADDING_LENGTH);
+}
+
+static inline void MBPadBottomLuma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiMbX,
+ const int32_t& kiPicH) {
+ uint8_t* pBottomLine = pDst + (kiPicH - 1) * kiStride + (kiMbX << 4);
+ int32_t i = 0;
+ uint8_t* pBottom = pBottomLine;
+ do {
+ pBottom += kiStride;
+ // pad pBottom
+ memcpy (pBottom, pBottomLine, 16); // confirmed_safe_unsafe_usage
+ } while (++i < PADDING_LENGTH);
+}
+
+static inline void MBPadTopRightLuma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiPicW) {
+ uint8_t* pTopRight = pDst + kiPicW;
+ const uint8_t kuiTR = pTopRight[-1];
+ int32_t i = 0;
+ uint8_t* pTop = pTopRight;
+ do {
+ pTop -= kiStride;
+ // pad pTop
+ memcpy (pTop - 16, pTopRight - 16, 16); // confirmed_safe_unsafe_usage
+ memset (pTop, kuiTR, PADDING_LENGTH); //pTop Right
+ } while (++i < PADDING_LENGTH);
+}
+
+static inline void MBPadBottomLeftLuma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiPicH) {
+ uint8_t* pDstLastLine = pDst + (kiPicH - 1) * kiStride;
+ const uint8_t kuiBL = pDstLastLine[0];
+ int32_t i = 0;
+ uint8_t* pBottom = pDstLastLine;
+ do {
+ pBottom += kiStride;
+ // pad pBottom
+ memcpy (pBottom, pDstLastLine, 16); // confirmed_safe_unsafe_usage
+ memset (pBottom - PADDING_LENGTH, kuiBL, PADDING_LENGTH); //pBottom left
+ } while (++i < PADDING_LENGTH);
+}
+
+static inline void MBPadBottomRightLuma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiPicW,
+ const int32_t& kiPicH) {
+ uint8_t* pDstLastLine = pDst + (kiPicH - 1) * kiStride + kiPicW;
+ const uint8_t kuiBR = pDstLastLine[-1];
+ int32_t i = 0;
+ uint8_t* pBottom = pDstLastLine;
+ do {
+ pBottom += kiStride;
+ // pad pBottom
+ memcpy (pBottom - 16, pDstLastLine - 16, 16); // confirmed_safe_unsafe_usage
+ memset (pBottom, kuiBR, PADDING_LENGTH); //pBottom Right
+ } while (++i < PADDING_LENGTH);
+}
+
+static inline void MBPadLeftLuma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiMbY) {
+ uint8_t* pTmp = pDst + (kiMbY << 4) * kiStride;
+ for (int32_t i = 0; i < 16; ++i) {
+ // pad left
+ memset (pTmp - PADDING_LENGTH, pTmp[0], PADDING_LENGTH);
+ pTmp += kiStride;
+ }
+}
+
+static inline void MBPadRightLuma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiMbY,
+ const int32_t& kiPicW) {
+ uint8_t* pTmp = pDst + (kiMbY << 4) * kiStride + kiPicW;
+ for (int32_t i = 0; i < 16; ++i) {
+ // pad right
+ memset (pTmp, pTmp[-1], PADDING_LENGTH);
+ pTmp += kiStride;
+ }
+}
+
+static inline void MBPadTopChroma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiMbX) {
+ uint8_t* pTopLine = pDst + (kiMbX << 3);
+ int32_t i = 0;
+ uint8_t* pTop = pTopLine;
+ do {
+ pTop -= kiStride;
+ // pad pTop
+ memcpy (pTop, pTopLine, 8); // confirmed_safe_unsafe_usage
+ } while (++i < CHROMA_PADDING_LENGTH);
+}
+
+static inline void MBPadBottomChroma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiMbX,
+ const int32_t& kiPicH) {
+ uint8_t* pBottomLine = pDst + (kiPicH - 1) * kiStride + (kiMbX << 3);
+ int32_t i = 0;
+ uint8_t* pBottom = pBottomLine;
+ do {
+ pBottom += kiStride;
+ // pad pBottom
+ memcpy (pBottom, pBottomLine, 8); // confirmed_safe_unsafe_usage
+ } while (++i < CHROMA_PADDING_LENGTH);
+}
+
+static inline void MBPadTopLeftChroma_c (uint8_t*& pDst, const int32_t& kiStride) {
+ const uint8_t kuiTL = pDst[0];
+ int32_t i = 0;
+ uint8_t* pTopLeft = pDst;
+ do {
+ pTopLeft -= kiStride;
+ // pad pTop
+ memcpy (pTopLeft, pDst, 8); // confirmed_safe_unsafe_usage
+ memset (pTopLeft - CHROMA_PADDING_LENGTH, kuiTL, CHROMA_PADDING_LENGTH); //pTop left
+ } while (++i < CHROMA_PADDING_LENGTH);
+}
+
+static inline void MBPadTopRightChroma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiPicW) {
+ uint8_t* pTopRight = pDst + kiPicW;
+ const uint8_t kuiTR = pTopRight[-1];
+ int32_t i = 0;
+ uint8_t* pTop = pTopRight;
+ do {
+ pTop -= kiStride;
+ // pad pTop
+ memcpy (pTop - 8, pTopRight - 8, 8); // confirmed_safe_unsafe_usage
+ memset (pTop, kuiTR, CHROMA_PADDING_LENGTH); //pTop Right
+ } while (++i < CHROMA_PADDING_LENGTH);
+}
+
+static inline void MBPadBottomLeftChroma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiPicH) {
+ uint8_t* pDstLastLine = pDst + (kiPicH - 1) * kiStride;
+ const uint8_t kuiBL = pDstLastLine[0];
+ int32_t i = 0;
+ uint8_t* pBottom = pDstLastLine;
+ do {
+ pBottom += kiStride;
+ // pad pBottom
+ memcpy (pBottom, pDstLastLine, 8); // confirmed_safe_unsafe_usage
+ memset (pBottom - CHROMA_PADDING_LENGTH, kuiBL, CHROMA_PADDING_LENGTH); //pBottom left
+ } while (++i < CHROMA_PADDING_LENGTH);
+}
+
+static inline void MBPadBottomRightChroma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiPicW,
+ const int32_t kiPicH) {
+ uint8_t* pDstLastLine = pDst + (kiPicH - 1) * kiStride + kiPicW;
+ const uint8_t kuiBR = pDstLastLine[-1];
+ int32_t i = 0;
+ uint8_t* pBottom = pDstLastLine;
+ do {
+ pBottom += kiStride;
+ // pad pBottom
+ memcpy (pBottom - 8, pDstLastLine - 8, 8); // confirmed_safe_unsafe_usage
+ memset (pBottom, kuiBR, CHROMA_PADDING_LENGTH); //pBottom Right
+ } while (++i < CHROMA_PADDING_LENGTH);
+}
+
+static inline void MBPadLeftChroma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiMbY) {
+ uint8_t* pTmp = pDst + (kiMbY << 3) * kiStride;
+ for (int32_t i = 0; i < 8; ++i) {
+ // pad left
+ memset (pTmp - CHROMA_PADDING_LENGTH, pTmp[0], CHROMA_PADDING_LENGTH);
+ pTmp += kiStride;
+ }
+}
+
+static inline void MBPadRightChroma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiMbY,
+ const int32_t& kiPicW) {
+ uint8_t* pTmp = pDst + (kiMbY << 3) * kiStride + kiPicW;
+ for (int32_t i = 0; i < 8; ++i) {
+ // pad right
+ memset (pTmp, pTmp[-1], CHROMA_PADDING_LENGTH);
+ pTmp += kiStride;
+ }
+}
+
+void PadMBLuma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiPicW, const int32_t& kiPicH,
+ const int32_t& kiMbX, const int32_t& kiMbY, const int32_t& kiMBWidth, const int32_t& kiMBHeight) {
+ if (kiMbX == 0 && kiMbY == 0) {
+ MBPadTopLeftLuma_c (pDst, kiStride);
+ } else if (kiMbY == 0 && kiMbX == kiMBWidth - 1) {
+ MBPadTopRightLuma_c (pDst, kiStride, kiPicW);
+ } else if (kiMbY == kiMBHeight - 1 && kiMbX == 0) {
+ MBPadBottomLeftLuma_c (pDst, kiStride, kiPicH);
+ } else if (kiMbY == kiMBHeight - 1 && kiMbX == kiMBWidth - 1) {
+ MBPadBottomRightLuma_c (pDst, kiStride, kiPicW, kiPicH);
+ }
+ if (kiMbX == 0) {
+ MBPadLeftLuma_c (pDst, kiStride, kiMbY);
+ } else if (kiMbX == kiMBWidth - 1) {
+ MBPadRightLuma_c (pDst, kiStride, kiMbY, kiPicW);
+ }
+ if (kiMbY == 0 && kiMbX > 0 && kiMbX < kiMBWidth - 1) {
+ MBPadTopLuma_c (pDst, kiStride, kiMbX);
+ } else if (kiMbY == kiMBHeight - 1 && kiMbX > 0 && kiMbX < kiMBWidth - 1) {
+ MBPadBottomLuma_c (pDst, kiStride, kiMbX, kiPicH);
+ }
+}
+
+void PadMBChroma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiPicW, const int32_t& kiPicH,
+ const int32_t& kiMbX, const int32_t& kiMbY, const int32_t& kiMBWidth, const int32_t& kiMBHeight) {
+ if (kiMbX == 0 && kiMbY == 0) {
+ MBPadTopLeftChroma_c (pDst, kiStride);
+ } else if (kiMbY == 0 && kiMbX == kiMBWidth - 1) {
+ MBPadTopRightChroma_c (pDst, kiStride, kiPicW);
+ } else if (kiMbY == kiMBHeight - 1 && kiMbX == 0) {
+ MBPadBottomLeftChroma_c (pDst, kiStride, kiPicH);
+ } else if (kiMbY == kiMBHeight - 1 && kiMbX == kiMBWidth - 1) {
+ MBPadBottomRightChroma_c (pDst, kiStride, kiPicW, kiPicH);
+ }
+ if (kiMbX == 0) {
+ MBPadLeftChroma_c (pDst, kiStride, kiMbY);
+ } else if (kiMbX == kiMBWidth - 1) {
+ MBPadRightChroma_c (pDst, kiStride, kiMbY, kiPicW);
+ }
+ if (kiMbY == 0 && kiMbX > 0 && kiMbX < kiMBWidth - 1) {
+ MBPadTopChroma_c (pDst, kiStride, kiMbX);
+ } else if (kiMbY == kiMBHeight - 1 && kiMbX > 0 && kiMbX < kiMBWidth - 1) {
+ MBPadBottomChroma_c (pDst, kiStride, kiMbX, kiPicH);
+ }
+}
+
// rewrite it (split into luma & chroma) that is helpful for mmx/sse2 optimization perform, 9/27/2009
static inline void ExpandPictureLuma_c (uint8_t* pDst, const int32_t kiStride, const int32_t kiPicW,
const int32_t kiPicH) {
diff --git a/chromium/third_party/openh264/src/codec/common/src/utils.cpp b/chromium/third_party/openh264/src/codec/common/src/utils.cpp
index 2edd73a84e5..fc0fbf90470 100644
--- a/chromium/third_party/openh264/src/codec/common/src/utils.cpp
+++ b/chromium/third_party/openh264/src/codec/common/src/utils.cpp
@@ -76,7 +76,7 @@ void WelsLog (SLogContext* logCtx, int32_t iLevel, const char* kpFmt, ...) {
#ifndef CALC_PSNR
#define CONST_FACTOR_PSNR (10.0 / log(10.0)) // for good computation
-#define CALC_PSNR(w, h, s) ((float)(CONST_FACTOR_PSNR * log( 65025.0 * w * h / iSqe )))
+#define CALC_PSNR(w, h, s) ((float)(CONST_FACTOR_PSNR * log( 65025.0 * w * h / s )))
#endif//CALC_PSNR
/*
diff --git a/chromium/third_party/openh264/src/codec/common/targets.mk b/chromium/third_party/openh264/src/codec/common/targets.mk
index 96843cd9df0..43de4ce4cf4 100644
--- a/chromium/third_party/openh264/src/codec/common/targets.mk
+++ b/chromium/third_party/openh264/src/codec/common/targets.mk
@@ -66,18 +66,30 @@ COMMON_OBJS += $(COMMON_OBJSARM64)
endif
OBJS += $(COMMON_OBJSARM64)
-COMMON_ASM_MIPS_SRCS=\
+COMMON_ASM_MIPS_MMI_SRCS=\
$(COMMON_SRCDIR)/mips/copy_mb_mmi.c\
$(COMMON_SRCDIR)/mips/deblock_mmi.c\
$(COMMON_SRCDIR)/mips/expand_picture_mmi.c\
$(COMMON_SRCDIR)/mips/intra_pred_com_mmi.c\
$(COMMON_SRCDIR)/mips/satd_sad_mmi.c\
-COMMON_OBJSMIPS += $(COMMON_ASM_MIPS_SRCS:.c=.$(OBJ))
+COMMON_OBJSMIPS_MMI += $(COMMON_ASM_MIPS_MMI_SRCS:.c=.$(OBJ))
+
+COMMON_ASM_MIPS_MSA_SRCS=\
+ $(COMMON_SRCDIR)/mips/copy_mb_msa.c\
+ $(COMMON_SRCDIR)/mips/deblock_msa.c\
+
+COMMON_OBJSMIPS_MSA += $(COMMON_ASM_MIPS_MSA_SRCS:.c=.$(OBJ))
ifeq ($(ASM_ARCH), mips)
-COMMON_OBJS += $(COMMON_OBJSMIPS)
+ifeq ($(ENABLE_MMI), Yes)
+COMMON_OBJS += $(COMMON_OBJSMIPS_MMI)
+endif
+ifeq ($(ENABLE_MSA), Yes)
+COMMON_OBJS += $(COMMON_OBJSMIPS_MSA)
+endif
endif
-OBJS += $(COMMON_OBJSMIPS)
+OBJS += $(COMMON_OBJSMIPS_MMI)
+OBJS += $(COMMON_OBJSMIPS_MSA)
OBJS += $(COMMON_OBJS)
diff --git a/chromium/third_party/openh264/src/codec/common/x86/asm_inc.asm b/chromium/third_party/openh264/src/codec/common/x86/asm_inc.asm
index 11253ce7c7f..56366fb6299 100644
--- a/chromium/third_party/openh264/src/codec/common/x86/asm_inc.asm
+++ b/chromium/third_party/openh264/src/codec/common/x86/asm_inc.asm
@@ -485,15 +485,20 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits ; Mark the stack as non-
%endmacro
%macro WELS_EXTERN 1
- %ifndef WELS_PRIVATE_EXTERN
- %define WELS_PRIVATE_EXTERN
- %endif
ALIGN 16, nop
%ifdef PREFIX
- global _%1 WELS_PRIVATE_EXTERN
+ %ifdef WELS_PRIVATE_EXTERN
+ global _%1: WELS_PRIVATE_EXTERN
+ %else
+ global _%1
+ %endif
%define %1 _%1
%else
- global %1 WELS_PRIVATE_EXTERN
+ %ifdef WELS_PRIVATE_EXTERN
+ global %1: WELS_PRIVATE_EXTERN
+ %else
+ global %1
+ %endif
%endif
%1:
%endmacro
diff --git a/chromium/third_party/openh264/src/codec/console/dec/src/h264dec.cpp b/chromium/third_party/openh264/src/codec/console/dec/src/h264dec.cpp
index 88223dce772..0b59ec105a7 100644
--- a/chromium/third_party/openh264/src/codec/console/dec/src/h264dec.cpp
+++ b/chromium/third_party/openh264/src/codec/console/dec/src/h264dec.cpp
@@ -52,7 +52,6 @@
#include "measure_time.h"
#include "d3d9_utils.h"
-
using namespace std;
#if defined (WINDOWS_PHONE)
@@ -69,6 +68,105 @@ int g_iDecodedFrameNum = 0;
#endif
//using namespace WelsDec;
+int32_t readPicture (uint8_t* pBuf, const int32_t& iFileSize, const int32_t& bufPos, uint8_t*& pSpsBuf,
+ int32_t& sps_byte_count) {
+ int32_t bytes_available = iFileSize - bufPos;
+ if (bytes_available < 4) {
+ return bytes_available;
+ }
+ uint8_t* ptr = pBuf + bufPos;
+ int32_t read_bytes = 0;
+ int32_t sps_count = 0;
+ int32_t pps_count = 0;
+ int32_t non_idr_pict_count = 0;
+ int32_t idr_pict_count = 0;
+ pSpsBuf = NULL;
+ sps_byte_count = 0;
+ while (read_bytes < bytes_available - 4) {
+ bool has4ByteStartCode = ptr[0] == 0 && ptr[1] == 0 && ptr[2] == 0 && ptr[3] == 1;
+ bool has3ByteStartCode = false;
+ if (!has4ByteStartCode) {
+ has3ByteStartCode = ptr[0] == 0 && ptr[1] == 0 && ptr[2] == 1;
+ }
+ if (has4ByteStartCode || has3ByteStartCode) {
+ uint8_t nal_unit_type = has4ByteStartCode ? (ptr[4] & 0x1F) : (ptr[3] & 0x1F);
+ if (nal_unit_type == 1) {
+ if (++non_idr_pict_count == 1 && idr_pict_count == 1) {
+ return read_bytes;
+ }
+ if (non_idr_pict_count == 2) {
+ return read_bytes;
+ }
+ } else if (nal_unit_type == 5) {
+ if (++idr_pict_count == 1 && non_idr_pict_count == 1) {
+ return read_bytes;
+ }
+ if (idr_pict_count == 2) {
+ return read_bytes;
+ }
+ } else if (nal_unit_type == 7) {
+ pSpsBuf = ptr + (has4ByteStartCode ? 4 : 3);
+ if ((++sps_count == 1) && (non_idr_pict_count == 1 || idr_pict_count == 1)) {
+ return read_bytes;
+ }
+ } else if (nal_unit_type == 8) {
+ if (++pps_count == 1 && sps_count == 1) {
+ sps_byte_count = int32_t (ptr - pSpsBuf);
+ }
+ }
+ if (read_bytes >= bytes_available - 4) {
+ return bytes_available;
+ }
+ read_bytes += 4;
+ ptr += 4;
+ } else {
+ ++ptr;
+ ++read_bytes;
+ }
+ }
+ return bytes_available;
+}
+
+void FlushFrames (ISVCDecoder* pDecoder, int64_t& iTotal, FILE* pYuvFile, FILE* pOptionFile, int32_t& iFrameCount,
+ unsigned long long& uiTimeStamp, int32_t& iWidth, int32_t& iHeight, int32_t& iLastWidth, int32_t iLastHeight) {
+ uint8_t* pData[3] = { NULL };
+ uint8_t* pDst[3] = { NULL };
+ SBufferInfo sDstBufInfo;
+ int32_t num_of_frames_in_buffer = 0;
+ CUtils cOutputModule;
+ pDecoder->GetOption (DECODER_OPTION_NUM_OF_FRAMES_REMAINING_IN_BUFFER, &num_of_frames_in_buffer);
+ for (int32_t i = 0; i < num_of_frames_in_buffer; ++i) {
+ int64_t iStart = WelsTime();
+ pData[0] = NULL;
+ pData[1] = NULL;
+ pData[2] = NULL;
+ memset (&sDstBufInfo, 0, sizeof (SBufferInfo));
+ sDstBufInfo.uiInBsTimeStamp = uiTimeStamp;
+ pDecoder->FlushFrame (pData, &sDstBufInfo);
+ if (sDstBufInfo.iBufferStatus == 1) {
+ pDst[0] = sDstBufInfo.pDst[0];
+ pDst[1] = sDstBufInfo.pDst[1];
+ pDst[2] = sDstBufInfo.pDst[2];
+ }
+ int64_t iEnd = WelsTime();
+ iTotal += iEnd - iStart;
+ if (sDstBufInfo.iBufferStatus == 1) {
+ cOutputModule.Process ((void**)pDst, &sDstBufInfo, pYuvFile);
+ iWidth = sDstBufInfo.UsrData.sSystemBuffer.iWidth;
+ iHeight = sDstBufInfo.UsrData.sSystemBuffer.iHeight;
+ if (pOptionFile != NULL) {
+ if (iWidth != iLastWidth && iHeight != iLastHeight) {
+ fwrite (&iFrameCount, sizeof (iFrameCount), 1, pOptionFile);
+ fwrite (&iWidth, sizeof (iWidth), 1, pOptionFile);
+ fwrite (&iHeight, sizeof (iHeight), 1, pOptionFile);
+ iLastWidth = iWidth;
+ iLastHeight = iHeight;
+ }
+ }
+ ++iFrameCount;
+ }
+ }
+}
void H264DecodeInstance (ISVCDecoder* pDecoder, const char* kpH264FileName, const char* kpOuputFileName,
int32_t& iWidth, int32_t& iHeight, const char* pOptionFileName, const char* pLengthFileName,
int32_t iErrorConMethod,
@@ -95,14 +193,17 @@ void H264DecodeInstance (ISVCDecoder* pDecoder, const char* kpH264FileName, cons
int32_t iBufPos = 0;
int32_t iFileSize;
- int32_t i = 0;
int32_t iLastWidth = 0, iLastHeight = 0;
int32_t iFrameCount = 0;
int32_t iEndOfStreamFlag = 0;
- int32_t num_of_frames_in_buffer = 0;
pDecoder->SetOption (DECODER_OPTION_ERROR_CON_IDC, &iErrorConMethod);
CUtils cOutputModule;
double dElapsed = 0;
+ uint8_t uLastSpsBuf[32];
+ int32_t iLastSpsByteCount = 0;
+
+ int32_t iThreadCount = 1;
+ pDecoder->GetOption (DECODER_OPTION_NUM_OF_THREADS, &iThreadCount);
if (kpH264FileName) {
pH264File = fopen (kpH264FileName, "rb");
@@ -148,7 +249,7 @@ void H264DecodeInstance (ISVCDecoder* pDecoder, const char* kpH264FileName, cons
fseek (pH264File, 0L, SEEK_END);
iFileSize = (int32_t) ftell (pH264File);
- if (iFileSize <= 0) {
+ if (iFileSize <= 4) {
fprintf (stderr, "Current Bit Stream File is too small, read error!!!!\n");
goto label_exit;
}
@@ -181,13 +282,32 @@ void H264DecodeInstance (ISVCDecoder* pDecoder, const char* kpH264FileName, cons
goto label_exit;
iSliceSize = static_cast<int32_t> (pInfo[2]);
} else {
- for (i = 0; i < iFileSize; i++) {
- if ((pBuf[iBufPos + i] == 0 && pBuf[iBufPos + i + 1] == 0 && pBuf[iBufPos + i + 2] == 0 && pBuf[iBufPos + i + 3] == 1
- && i > 0) || (pBuf[iBufPos + i] == 0 && pBuf[iBufPos + i + 1] == 0 && pBuf[iBufPos + i + 2] == 1 && i > 0)) {
- break;
+ if (iThreadCount >= 1) {
+ uint8_t* uSpsPtr = NULL;
+ int32_t iSpsByteCount = 0;
+ iSliceSize = readPicture (pBuf, iFileSize, iBufPos, uSpsPtr, iSpsByteCount);
+ if (iLastSpsByteCount > 0 && iSpsByteCount > 0) {
+ if (iSpsByteCount != iLastSpsByteCount || memcmp (uSpsPtr, uLastSpsBuf, iLastSpsByteCount) != 0) {
+ //whenever new sequence is different from preceding sequence. All pending frames must be flushed out before the new sequence can start to decode.
+ FlushFrames (pDecoder, iTotal, pYuvFile, pOptionFile, iFrameCount, uiTimeStamp, iWidth, iHeight, iLastWidth,
+ iLastHeight);
+ }
+ }
+ if (iSpsByteCount > 0 && uSpsPtr != NULL) {
+ if (iSpsByteCount > 32) iSpsByteCount = 32;
+ iLastSpsByteCount = iSpsByteCount;
+ memcpy (uLastSpsBuf, uSpsPtr, iSpsByteCount);
+ }
+ } else {
+ int i = 0;
+ for (i = 0; i < iFileSize; i++) {
+ if ((pBuf[iBufPos + i] == 0 && pBuf[iBufPos + i + 1] == 0 && pBuf[iBufPos + i + 2] == 0 && pBuf[iBufPos + i + 3] == 1
+ && i > 0) || (pBuf[iBufPos + i] == 0 && pBuf[iBufPos + i + 1] == 0 && pBuf[iBufPos + i + 2] == 1 && i > 0)) {
+ break;
+ }
}
+ iSliceSize = i;
}
- iSliceSize = i;
}
if (iSliceSize < 4) { //too small size, no effective data, ignore
iBufPos += iSliceSize;
@@ -225,9 +345,9 @@ void H264DecodeInstance (ISVCDecoder* pDecoder, const char* kpH264FileName, cons
}
if (sDstBufInfo.iBufferStatus == 1) {
- pDst[0] = pData[0];
- pDst[1] = pData[1];
- pDst[2] = pData[2];
+ pDst[0] = sDstBufInfo.pDst[0];
+ pDst[1] = sDstBufInfo.pDst[1];
+ pDst[2] = sDstBufInfo.pDst[2];
}
iEnd = WelsTime();
iTotal += iEnd - iStart;
@@ -257,9 +377,9 @@ void H264DecodeInstance (ISVCDecoder* pDecoder, const char* kpH264FileName, cons
sDstBufInfo.uiInBsTimeStamp = uiTimeStamp;
pDecoder->DecodeFrame2 (NULL, 0, pData, &sDstBufInfo);
if (sDstBufInfo.iBufferStatus == 1) {
- pDst[0] = pData[0];
- pDst[1] = pData[1];
- pDst[2] = pData[2];
+ pDst[0] = sDstBufInfo.pDst[0];
+ pDst[1] = sDstBufInfo.pDst[1];
+ pDst[2] = sDstBufInfo.pDst[2];
}
iEnd = WelsTime();
iTotal += iEnd - iStart;
@@ -283,41 +403,8 @@ void H264DecodeInstance (ISVCDecoder* pDecoder, const char* kpH264FileName, cons
iBufPos += iSliceSize;
++ iSliceIndex;
}
-
- pDecoder->GetOption (DECODER_OPTION_NUM_OF_FRAMES_REMAINING_IN_BUFFER, &num_of_frames_in_buffer);
- for (int32_t i = 0; i < num_of_frames_in_buffer; ++i) {
- iStart = WelsTime();
- pData[0] = NULL;
- pData[1] = NULL;
- pData[2] = NULL;
- memset (&sDstBufInfo, 0, sizeof (SBufferInfo));
- sDstBufInfo.uiInBsTimeStamp = uiTimeStamp;
- sDstBufInfo.iBufferStatus = 1;
- pDecoder->FlushFrame (pData, &sDstBufInfo);
- if (sDstBufInfo.iBufferStatus == 1) {
- pDst[0] = pData[0];
- pDst[1] = pData[1];
- pDst[2] = pData[2];
- }
- iEnd = WelsTime();
- iTotal += iEnd - iStart;
- if (sDstBufInfo.iBufferStatus == 1) {
- cOutputModule.Process ((void**)pDst, &sDstBufInfo, pYuvFile);
- iWidth = sDstBufInfo.UsrData.sSystemBuffer.iWidth;
- iHeight = sDstBufInfo.UsrData.sSystemBuffer.iHeight;
-
- if (pOptionFile != NULL) {
- if (iWidth != iLastWidth && iHeight != iLastHeight) {
- fwrite (&iFrameCount, sizeof (iFrameCount), 1, pOptionFile);
- fwrite (&iWidth, sizeof (iWidth), 1, pOptionFile);
- fwrite (&iHeight, sizeof (iHeight), 1, pOptionFile);
- iLastWidth = iWidth;
- iLastHeight = iHeight;
- }
- }
- ++iFrameCount;
- }
- }
+ FlushFrames (pDecoder, iTotal, pYuvFile, pOptionFile, iFrameCount, uiTimeStamp, iWidth, iHeight, iLastWidth,
+ iLastHeight);
dElapsed = iTotal / 1e6;
fprintf (stderr, "-------------------------------------------------------\n");
fprintf (stderr, "iWidth:\t\t%d\nheight:\t\t%d\nFrames:\t\t%d\ndecode time:\t%f sec\nFPS:\t\t%f fps\n",
@@ -489,6 +576,9 @@ int32_t main (int32_t iArgC, char* pArgV[]) {
pDecoder->SetOption (DECODER_OPTION_TRACE_LEVEL, &iLevelSetting);
}
+ int32_t iThreadCount = 0;
+ pDecoder->SetOption (DECODER_OPTION_NUM_OF_THREADS, &iThreadCount);
+
if (pDecoder->Initialize (&sDecParam)) {
printf ("Decoder initialization failed.\n");
return 1;
diff --git a/chromium/third_party/openh264/src/codec/decoder/core/inc/deblocking.h b/chromium/third_party/openh264/src/codec/decoder/core/inc/deblocking.h
index 3fb3a2c7dd4..e84114a83cd 100644
--- a/chromium/third_party/openh264/src/codec/decoder/core/inc/deblocking.h
+++ b/chromium/third_party/openh264/src/codec/decoder/core/inc/deblocking.h
@@ -67,6 +67,26 @@ void DeblockingInit (PDeblockingFunc pDeblockingFunc, int32_t iCpu);
void WelsDeblockingFilterSlice (PWelsDecoderContext pCtx, PDeblockingFilterMbFunc pDeblockMb);
/*!
+* \brief AVC slice init deblocking filtering target layer
+*
+* \in and out param SDeblockingFilter
+* \in and out param iFilterIdc
+*
+* \return NONE
+*/
+void WelsDeblockingInitFilter (PWelsDecoderContext pCtx, SDeblockingFilter& pFilter, int32_t& iFilterIdc);
+
+/*!
+* \brief AVC MB deblocking filtering target layer
+*
+* \param DqLayer which has the current location of MB to be deblocked.
+*
+* \return NONE
+*/
+void WelsDeblockingFilterMB (PDqLayer pCurDqLayer, SDeblockingFilter& pFilter, int32_t& iFilterIdc,
+ PDeblockingFilterMbFunc pDeblockMb);
+
+/*!
* \brief pixel deblocking filtering
*
* \param filter deblocking filter
@@ -77,16 +97,21 @@ void WelsDeblockingFilterSlice (PWelsDecoderContext pCtx, PDeblockingFilterMbFun
* \return NONE
*/
-uint32_t DeblockingBsMarginalMBAvcbase (PDqLayer pCurDqLayer, int32_t iEdge, int32_t iNeighMb, int32_t iMbXy);
+uint32_t DeblockingBsMarginalMBAvcbase (PDeblockingFilter pFilter, PDqLayer pCurDqLayer, int32_t iEdge,
+ int32_t iNeighMb, int32_t iMbXy);
uint32_t DeblockingBSliceBsMarginalMBAvcbase (PDqLayer pCurDqLayer, int32_t iEdge, int32_t iNeighMb, int32_t iMbXy);
int32_t DeblockingAvailableNoInterlayer (PDqLayer pCurDqLayer, int32_t iFilterIdc);
-void DeblockingIntraMb (PDqLayer pCurDqLayer, PDeblockingFilter pFilter, int32_t iBoundryFlag);
-void DeblockingInterMb (PDqLayer pCurDqLayer, PDeblockingFilter pFilter, uint8_t nBS[2][4][4], int32_t iBoundryFlag);
-
void WelsDeblockingMb (PDqLayer pCurDqLayer, PDeblockingFilter pFilter, int32_t iBoundryFlag);
+inline int8_t* GetPNzc (PDqLayer pCurDqLayer, int32_t iMbXy) {
+ if (pCurDqLayer->pDec != NULL && pCurDqLayer->pDec->pNzc != NULL) {
+ return pCurDqLayer->pDec->pNzc[iMbXy];
+ }
+ return pCurDqLayer->pNzc[iMbXy];
+}
+
} // namespace WelsDec
#endif //WELS_DEBLOCKING_H__
diff --git a/chromium/third_party/openh264/src/codec/decoder/core/inc/decode_slice.h b/chromium/third_party/openh264/src/codec/decoder/core/inc/decode_slice.h
index 23d27591f66..f7197abbf13 100644
--- a/chromium/third_party/openh264/src/codec/decoder/core/inc/decode_slice.h
+++ b/chromium/third_party/openh264/src/codec/decoder/core/inc/decode_slice.h
@@ -42,6 +42,10 @@ int32_t WelsDecodeMbCavlcISlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uin
int32_t WelsActualDecodeMbCavlcPSlice (PWelsDecoderContext pCtx);
int32_t WelsDecodeMbCavlcPSlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag);
+
+int32_t WelsActualDecodeMbCavlcBSlice (PWelsDecoderContext pCtx);
+int32_t WelsDecodeMbCavlcBSlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag);
+
typedef int32_t (*PWelsDecMbFunc) (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag);
int32_t WelsDecodeMbCabacISlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag);
@@ -54,17 +58,18 @@ int32_t WelsDecodeMbCabacBSliceBaseMode0 (PWelsDecoderContext pCtx, PWelsNeighAv
int32_t WelsTargetSliceConstruction (PWelsDecoderContext pCtx); //construction based on slice
int32_t WelsDecodeSlice (PWelsDecoderContext pCtx, bool bFirstSliceInLayer, PNalUnit pNalCur);
+int32_t WelsDecodeAndConstructSlice (PWelsDecoderContext pCtx);
int32_t WelsTargetMbConstruction (PWelsDecoderContext pCtx);
-int32_t WelsMbIntraPredictionConstruction (PWelsDecoderContext pCtx, PDqLayer pCurLayer, bool bOutput);
-int32_t WelsMbInterSampleConstruction (PWelsDecoderContext pCtx, PDqLayer pCurLayer,
+int32_t WelsMbIntraPredictionConstruction (PWelsDecoderContext pCtx, PDqLayer pCurDqLayer, bool bOutput);
+int32_t WelsMbInterSampleConstruction (PWelsDecoderContext pCtx, PDqLayer pCurDqLayer,
uint8_t* pDstY, uint8_t* pDstU, uint8_t* pDstV, int32_t iStrideL, int32_t iStrideC);
-int32_t WelsMbInterConstruction (PWelsDecoderContext pCtx, PDqLayer pCurLayer);
+int32_t WelsMbInterConstruction (PWelsDecoderContext pCtx, PDqLayer pCurDqLayer);
void WelsLumaDcDequantIdct (int16_t* pBlock, int32_t iQp, PWelsDecoderContext pCtx);
-int32_t WelsMbInterPrediction (PWelsDecoderContext pCtx, PDqLayer pCurLayer);
+int32_t WelsMbInterPrediction (PWelsDecoderContext pCtx, PDqLayer pCurDqLayer);
void WelsChromaDcIdct (int16_t* pBlock);
-bool ComputeColocated (PWelsDecoderContext pCtx);
+bool ComputeColocatedTemporalScaling (PWelsDecoderContext pCtx);
#ifdef __cplusplus
extern "C" {
diff --git a/chromium/third_party/openh264/src/codec/decoder/core/inc/decoder.h b/chromium/third_party/openh264/src/codec/decoder/core/inc/decoder.h
index 3cc514faa2d..0c84739d51e 100644
--- a/chromium/third_party/openh264/src/codec/decoder/core/inc/decoder.h
+++ b/chromium/third_party/openh264/src/codec/decoder/core/inc/decoder.h
@@ -59,6 +59,21 @@ int32_t DecoderConfigParam (PWelsDecoderContext pCtx, const SDecodingParam* kpPa
*/
void WelsDecoderDefaults (PWelsDecoderContext pCtx, SLogContext* pLogCtx);
+/*
+* fill last decoded picture info
+*/
+void WelsDecoderLastDecPicInfoDefaults (SWelsLastDecPicInfo& sLastDecPicInfo);
+
+/*!
+* \brief fill data fields in SPS and PPS default for decoder context
+*/
+void WelsDecoderSpsPpsDefaults (SWelsDecoderSpsPpsCTX& sSpsPpsCtx);
+
+/*!
+* \brief copy SpsPps from one Ctx to another ctx for threaded code
+*/
+void CopySpsPps (PWelsDecoderContext pFromCtx, PWelsDecoderContext pToCtx);
+
/*!
*************************************************************************************
* \brief Initialize Wels decoder parameters and memory
@@ -157,7 +172,11 @@ void UpdateDecStatNoFreezingInfo (PWelsDecoderContext pCtx);
//update decoder statistics information
void UpdateDecStat (PWelsDecoderContext pCtx, const bool kbOutput);
//Destroy picutre buffer
-void DestroyPicBuff (PPicBuff* ppPicBuf, CMemoryAlign* pMa);
+void DestroyPicBuff (PWelsDecoderContext pCtx, PPicBuff* ppPicBuf, CMemoryAlign* pMa);
+//reset picture reodering buffer list
+void ResetReorderingPictureBuffers (PPictReoderingStatus pPictReoderingStatus, PPictInfo pPictInfo,
+ const bool& bFullReset);
+
#ifdef __cplusplus
}
#endif//__cplusplus
diff --git a/chromium/third_party/openh264/src/codec/decoder/core/inc/decoder_context.h b/chromium/third_party/openh264/src/codec/decoder/core/inc/decoder_context.h
index 0b9401de13c..2ad2aeda2f8 100644
--- a/chromium/third_party/openh264/src/codec/decoder/core/inc/decoder_context.h
+++ b/chromium/third_party/openh264/src/codec/decoder/core/inc/decoder_context.h
@@ -56,6 +56,7 @@
#include "expand_pic.h"
#include "mc.h"
#include "memory_align.h"
+#include "wels_decoder_thread.h"
namespace WelsDec {
#define MAX_PRED_MODE_ID_I16x16 3
@@ -64,6 +65,7 @@ namespace WelsDec {
#define WELS_QP_MAX 51
#define LONG_TERM_REF
+#define IMinInt32 -0x7FFFFFFF
typedef struct SWels_Cabac_Element {
uint8_t uiState;
uint8_t uiMPS;
@@ -172,6 +174,7 @@ typedef struct tagDeblockingFilter {
int8_t iChromaQP[2];
int8_t iLumaQP;
struct TagDeblockingFunc* pLoopf;
+ PPicture* pRefPics[LIST_A];
} SDeblockingFilter, *PDeblockingFilter;
typedef void (*PDeblockingFilterMbFunc) (PDqLayer pCurDqLayer, PDeblockingFilter filter, int32_t boundry_flag);
@@ -214,7 +217,7 @@ typedef struct TagBlockFunc {
} SBlockFunc;
typedef void (*PWelsFillNeighborMbInfoIntra4x4Func) (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount,
- int8_t* pIntraPredMode, PDqLayer pCurLayer);
+ int8_t* pIntraPredMode, PDqLayer pCurDqLayer);
typedef void (*PWelsMapNeighToSample) (PWelsNeighAvail pNeighAvail, int32_t* pSampleAvail);
typedef void (*PWelsMap16NeighToSample) (PWelsNeighAvail pNeighAvail, uint8_t* pSampleAvail);
typedef int32_t (*PWelsParseIntra4x4ModeFunc) (PWelsNeighAvail pNeighAvail, int8_t* pIntraPredMode, PBitStringAux pBs,
@@ -228,6 +231,72 @@ enum {
OVERWRITE_SUBSETSPS = 1 << 2
};
+
+//Decoder SPS and PPS global CTX
+typedef struct tagWelsWelsDecoderSpsPpsCTX {
+ SPosOffset sFrameCrop;
+
+ SSps sSpsBuffer[MAX_SPS_COUNT + 1];
+ SPps sPpsBuffer[MAX_PPS_COUNT + 1];
+
+ SSubsetSps sSubsetSpsBuffer[MAX_SPS_COUNT + 1];
+ SNalUnit sPrefixNal;
+
+ PSps pActiveLayerSps[MAX_LAYER_NUM];
+ bool bAvcBasedFlag; // For decoding bitstream:
+
+ // for EC parameter sets
+ bool bSpsExistAheadFlag; // whether does SPS NAL exist ahead of sequence?
+ bool bSubspsExistAheadFlag;// whether does Subset SPS NAL exist ahead of sequence?
+ bool bPpsExistAheadFlag; // whether does PPS NAL exist ahead of sequence?
+
+ int32_t iSpsErrorIgnored;
+ int32_t iSubSpsErrorIgnored;
+ int32_t iPpsErrorIgnored;
+
+ bool bSpsAvailFlags[MAX_SPS_COUNT];
+ bool bSubspsAvailFlags[MAX_SPS_COUNT];
+ bool bPpsAvailFlags[MAX_PPS_COUNT];
+ int32_t iPPSLastInvalidId;
+ int32_t iPPSInvalidNum;
+ int32_t iSPSLastInvalidId;
+ int32_t iSPSInvalidNum;
+ int32_t iSubSPSLastInvalidId;
+ int32_t iSubSPSInvalidNum;
+ int32_t iSeqId; //sequence id
+ int iOverwriteFlags;
+} SWelsDecoderSpsPpsCTX, *PWelsDecoderSpsPpsCTX;
+
+//Last Decoded Picture Info
+typedef struct tagSWelsLastDecPicInfo {
+ // Save the last nal header info
+ SNalUnitHeaderExt sLastNalHdrExt;
+ SSliceHeader sLastSliceHeader;
+ int32_t iPrevPicOrderCntMsb;
+ int32_t iPrevPicOrderCntLsb;
+ PPicture pPreviousDecodedPictureInDpb; //pointer to previously decoded picture in DPB for error concealment
+ int32_t iPrevFrameNum;// frame number of previous frame well decoded for non-truncated mode yet
+ bool bLastHasMmco5;
+ uint32_t uiDecodingTimeStamp; //represent relative decoding time stamps
+} SWelsLastDecPicInfo, *PWelsLastDecPicInfo;
+
+typedef struct tagPictInfo {
+ SBufferInfo sBufferInfo;
+ int32_t iPOC;
+ int32_t iPicBuffIdx;
+ uint32_t uiDecodingTimeStamp;
+ bool bLastGOP;
+} SPictInfo, *PPictInfo;
+
+typedef struct tagPictReoderingStatus {
+ int32_t iPictInfoIndex;
+ int32_t iMinPOC;
+ int32_t iNumOfPicts;
+ int32_t iLastGOPRemainPicts;
+ int32_t iLastWrittenPOC;
+ int32_t iLargestBufferedPicIndex;
+} SPictReoderingStatus, *PPictReoderingStatus;
+
/*
* SWelsDecoderContext: to maintail all modules data over decoder@framework
*/
@@ -262,9 +331,6 @@ typedef struct TagWelsDecoderContext {
EWelsSliceType eSliceType; // Slice type
bool bUsedAsRef; //flag as ref
int32_t iFrameNum;
- int32_t
- iPrevFrameNum; // frame number of previous frame well decoded for non-truncated mode yet
- bool bLastHasMmco5; //
int32_t iErrorCode; // error code return while decoding in case packets lost
SFmo sFmoList[MAX_PPS_COUNT]; // list for FMO storage
PFmo pFmo; // current fmo context after parsed slice_header
@@ -304,6 +370,7 @@ typedef struct TagWelsDecoderContext {
uint32_t iMbHeight;
} sMb;
+
// reconstruction picture
PPicture pDec; //pointer to current picture being reconstructed
@@ -312,65 +379,44 @@ typedef struct TagWelsDecoderContext {
// reference pictures
SRefPic sRefPic;
-
- SVlcTable sVlcTable; // vlc table
+ SRefPic sTmpRefPic; //used to temporarily save RefPic for next active thread
+ SVlcTable* pVlcTable; // vlc table
SBitStringAux sBs;
int32_t iMaxBsBufferSizeInByte; //actual memory size for BS buffer
/* Global memory external */
+ SWelsDecoderSpsPpsCTX sSpsPpsCtx;
+ bool bHasNewSps;
SPosOffset sFrameCrop;
- SSps sSpsBuffer[MAX_SPS_COUNT + 1];
- SPps sPpsBuffer[MAX_PPS_COUNT + 1];
PSliceHeader pSliceHeader;
PPicBuff pPicBuff; // Initially allocated memory for pictures which are used in decoding.
int32_t iPicQueueNumber;
- SSubsetSps sSubsetSpsBuffer[MAX_SPS_COUNT + 1];
- SNalUnit sPrefixNal;
-
PAccessUnit pAccessUnitList; // current access unit list to be performed
- PSps pActiveLayerSps[MAX_LAYER_NUM];
+ //PSps pActiveLayerSps[MAX_LAYER_NUM];
PSps pSps; // used by current AU
PPps pPps; // used by current AU
// Memory for pAccessUnitList is dynamically held till decoder destruction.
PDqLayer
pCurDqLayer; // current DQ layer representation, also carry reference base layer if applicable
PDqLayer pDqLayersList[LAYER_NUM_EXCHANGEABLE]; // DQ layers list with memory allocated
-
+ PNalUnit pNalCur; // point to current NAL Nnit
+ uint8_t uiNalRefIdc; // NalRefIdc for easy access;
int32_t iPicWidthReq; // picture width have requested the memory
int32_t iPicHeightReq; // picture height have requested the memory
uint8_t uiTargetDqId; // maximal DQ ID in current access unit, meaning target layer ID
- bool bAvcBasedFlag; // For decoding bitstream:
+ //bool bAvcBasedFlag; // For decoding bitstream:
bool bEndOfStreamFlag; // Flag on end of stream requested by external application layer
bool bInstantDecFlag; // Flag for no-delay decoding
bool bInitialDqLayersMem; // dq layers related memory is available?
bool bOnlyOneLayerInCurAuFlag; //only one layer in current AU: 1
-// for EC parameter sets
- bool bSpsExistAheadFlag; // whether does SPS NAL exist ahead of sequence?
- bool bSubspsExistAheadFlag;// whether does Subset SPS NAL exist ahead of sequence?
- bool bPpsExistAheadFlag; // whether does PPS NAL exist ahead of sequence?
-
- int32_t iSpsErrorIgnored;
- int32_t iSubSpsErrorIgnored;
- int32_t iPpsErrorIgnored;
-
- bool bSpsAvailFlags[MAX_SPS_COUNT];
- bool bSubspsAvailFlags[MAX_SPS_COUNT];
- bool bPpsAvailFlags[MAX_PPS_COUNT];
- int32_t iPPSLastInvalidId;
- int32_t iPPSInvalidNum;
- int32_t iSPSLastInvalidId;
- int32_t iSPSInvalidNum;
- int32_t iSubSPSLastInvalidId;
- int32_t iSubSPSInvalidNum;
-
bool bReferenceLostAtT0Flag;
int32_t iTotalNumMbRec; //record current number of decoded MB
#ifdef LONG_TERM_REF
@@ -384,7 +430,6 @@ typedef struct TagWelsDecoderContext {
#endif
bool bNewSeqBegin;
bool bNextNewSeqBegin;
- int iOverwriteFlags;
//for Parse only
bool bFramePending;
@@ -396,7 +441,7 @@ typedef struct TagWelsDecoderContext {
SPpsBsInfo sPpsBsInfo [MAX_PPS_COUNT];
SParserBsInfo* pParserBsInfo;
- PPicture pPreviousDecodedPictureInDpb; //pointer to previously decoded picture in DPB for error concealment
+ //PPicture pPreviousDecodedPictureInDpb; //pointer to previously decoded picture in DPB for error concealment
PGetIntraPredFunc pGetI16x16LumaPredFunc[7]; //h264_predict_copy_16x16;
PGetIntraPredFunc pGetI4x4LumaPredFunc[14]; // h264_predict_4x4_t
PGetIntraPredFunc pGetIChromaPredFunc[7]; // h264_predict_8x8_t
@@ -436,18 +481,14 @@ typedef struct TagWelsDecoderContext {
//trace handle
void* pTraceHandle;
-//Save the last nal header info
- SNalUnitHeaderExt sLastNalHdrExt;
- SSliceHeader sLastSliceHeader;
- int32_t iPrevPicOrderCntMsb;
- int32_t iPrevPicOrderCntLsb;
+ PWelsLastDecPicInfo pLastDecPicInfo;
SWelsCabacCtx sWelsCabacContexts[4][WELS_QP_MAX + 1][WELS_CONTEXT_COUNT];
bool bCabacInited;
SWelsCabacCtx pCabacCtx[WELS_CONTEXT_COUNT];
PWelsCabacDecEngine pCabacDecEngine;
double dDecTime;
- SDecoderStatistics sDecoderStatistics;// For real time debugging
+ SDecoderStatistics* pDecoderStatistics; // For real time debugging
int32_t iMbEcedNum;
int32_t iMbEcedPropNum;
int32_t iMbNum;
@@ -456,6 +497,7 @@ typedef struct TagWelsDecoderContext {
int32_t iECMVs[16][2];
PPicture pECRefPic[16];
unsigned long long uiTimeStamp;
+ uint32_t uiDecodingTimeStamp; //represent relative decoding time stamps
// To support scaling list HP
uint16_t pDequant_coeff_buffer4x4[6][52][16];
uint16_t pDequant_coeff_buffer8x8[6][52][64];
@@ -465,15 +507,56 @@ typedef struct TagWelsDecoderContext {
bool bDequantCoeff4x4Init;
bool bUseScalingList;
CMemoryAlign* pMemAlign;
+ void* pThreadCtx;
+ void* pLastThreadCtx;
+ WELS_MUTEX* pCsDecoder;
+ int16_t lastReadyHeightOffset[LIST_A][MAX_REF_PIC_COUNT]; //last ready reference MB offset
+ PPictInfo pPictInfoList;
+ PPictReoderingStatus pPictReoderingStatus;
} SWelsDecoderContext, *PWelsDecoderContext;
+typedef struct tagSWelsDecThread {
+ SWelsDecSemphore* sIsBusy;
+ SWelsDecSemphore sIsActivated;
+ SWelsDecSemphore sIsIdle;
+ SWelsDecThread sThrHandle;
+ uint32_t uiCommand;
+ uint32_t uiThrNum;
+ uint32_t uiThrMaxNum;
+ uint32_t uiThrStackSize;
+ DECLARE_PROCTHREAD_PTR (pThrProcMain);
+} SWelsDecThreadInfo, *PWelsDecThreadInfo;
+
+typedef struct tagSWelsDecThreadCtx {
+ SWelsDecThreadInfo sThreadInfo;
+ PWelsDecoderContext pCtx;
+ void* threadCtxOwner;
+ uint8_t* kpSrc;
+ int32_t kiSrcLen;
+ uint8_t** ppDst;
+ SBufferInfo sDstInfo;
+ PPicture pDec;
+ SWelsDecEvent sImageReady;
+ SWelsDecEvent sSliceDecodeStart;
+ SWelsDecEvent sSliceDecodeFinish;
+ int32_t iPicBuffIdx; //picBuff Index
+} SWelsDecoderThreadCTX, *PWelsDecoderThreadCTX;
+
static inline void ResetActiveSPSForEachLayer (PWelsDecoderContext pCtx) {
if (pCtx->iTotalNumMbRec == 0) {
for (int i = 0; i < MAX_LAYER_NUM; i++) {
- pCtx->pActiveLayerSps[i] = NULL;
+ pCtx->sSpsPpsCtx.pActiveLayerSps[i] = NULL;
}
}
}
+static inline int32_t GetThreadCount (PWelsDecoderContext pCtx) {
+ int32_t iThreadCount = 0;
+ if (pCtx->pThreadCtx != NULL) {
+ PWelsDecoderThreadCTX pThreadCtx = (PWelsDecoderThreadCTX)pCtx->pThreadCtx;
+ iThreadCount = pThreadCtx->sThreadInfo.uiThrMaxNum;
+ }
+ return iThreadCount;
+}
//#ifdef __cplusplus
//}
//#endif//__cplusplus
diff --git a/chromium/third_party/openh264/src/codec/decoder/core/inc/decoder_core.h b/chromium/third_party/openh264/src/codec/decoder/core/inc/decoder_core.h
index 41cd1d6a98a..df253399671 100644
--- a/chromium/third_party/openh264/src/codec/decoder/core/inc/decoder_core.h
+++ b/chromium/third_party/openh264/src/codec/decoder/core/inc/decoder_core.h
@@ -129,6 +129,34 @@ int32_t ParseSliceHeaderSyntaxs (PWelsDecoderContext pCtx, PBitStringAux pBs, co
*/
bool PrefetchNalHeaderExtSyntax (PWelsDecoderContext pCtx, PNalUnit const kpDst, PNalUnit const kpSrc);
+/*
+* WelsDecodeInitAccessUnitStart
+* check and (re)allocate picture buffers on new sequence begin
+* bit_len: size in bit length of data
+* buf_len: size in byte length of data
+* coded_au: mark an Access Unit decoding finished
+* return:
+* 0 - success; otherwise returned error_no defined in error_no.h
+*/
+int32_t WelsDecodeInitAccessUnitStart (PWelsDecoderContext pCtx, SBufferInfo* pDstInfo);
+/*
+* AllocPicBuffOnNewSeqBegin
+* check and (re)allocate picture buffers on new sequence begin
+* return:
+* 0 - success; otherwise returned error_no defined in error_no.h
+*/
+int32_t AllocPicBuffOnNewSeqBegin (PWelsDecoderContext pCtx);
+
+/*
+* InitConstructAccessUnit
+* Init before constructing an access unit for given input bitstream, maybe partial NAL Unit, one or more Units are involved to
+* joint a collective access unit.
+* parameter\
+* SBufferInfo: Buffer info
+* return:
+* 0 - success; otherwise returned error_no defined in error_no.h
+*/
+int32_t InitConstructAccessUnit (PWelsDecoderContext pCtx, SBufferInfo* pDstInfo);
/*
* ConstructAccessUnit
diff --git a/chromium/third_party/openh264/src/codec/decoder/core/inc/manage_dec_ref.h b/chromium/third_party/openh264/src/codec/decoder/core/inc/manage_dec_ref.h
index 7be76f291b1..164ae15d6c7 100644
--- a/chromium/third_party/openh264/src/codec/decoder/core/inc/manage_dec_ref.h
+++ b/chromium/third_party/openh264/src/codec/decoder/core/inc/manage_dec_ref.h
@@ -48,11 +48,12 @@
namespace WelsDec {
void WelsResetRefPic (PWelsDecoderContext pCtx);
+void WelsResetRefPicWithoutUnRef (PWelsDecoderContext pCtx);
int32_t WelsInitRefList (PWelsDecoderContext pCtx, int32_t iPoc);
int32_t WelsInitBSliceRefList (PWelsDecoderContext pCtx, int32_t iPoc);
int32_t WelsReorderRefList (PWelsDecoderContext pCtx);
int32_t WelsReorderRefList2 (PWelsDecoderContext pCtx);
-int32_t WelsMarkAsRef (PWelsDecoderContext pCtx);
+int32_t WelsMarkAsRef (PWelsDecoderContext pCtx, PPicture pLastDec = NULL);
} // namespace WelsDec
diff --git a/chromium/third_party/openh264/src/codec/decoder/core/inc/mv_pred.h b/chromium/third_party/openh264/src/codec/decoder/core/inc/mv_pred.h
index daf640155c9..bdbc7cd20b8 100644
--- a/chromium/third_party/openh264/src/codec/decoder/core/inc/mv_pred.h
+++ b/chromium/third_party/openh264/src/codec/decoder/core/inc/mv_pred.h
@@ -91,7 +91,7 @@ void UpdateP8x16MotionInfo (PDqLayer pCurDqLayer, int16_t iMotionVector[LIST_A][
* \param
* \param output iMvp[]
*/
-void PredPSkipMvFromNeighbor (PDqLayer pCurLayer, int16_t iMvp[2]);
+void PredPSkipMvFromNeighbor (PDqLayer pCurDqLayer, int16_t iMvp[2]);
/*!
* \brief get the motion predictor and reference for B-slice direct mode version 2
@@ -111,7 +111,8 @@ int32_t GetColocatedMb (PWelsDecoderContext pCtx, MbType& mbType, SubMbType& sub
/*!
* \brief get the motion predictor for B-slice temporal direct mode 16x16
*/
-int32_t PredBDirectTemporal (PWelsDecoderContext pCtx, int16_t iMvp[LIST_A][2], int8_t ref[LIST_A]);
+int32_t PredBDirectTemporal (PWelsDecoderContext pCtx, int16_t iMvp[LIST_A][2], int8_t ref[LIST_A],
+ SubMbType& subMbType);
/*!
* \brief get the motion params for B-slice spatial direct mode
@@ -143,6 +144,48 @@ void PredInter16x8Mv (int16_t iMotionVector[LIST_A][30][MV_A], int8_t iRefIndex[
void PredInter8x16Mv (int16_t iMotionVector[LIST_A][30][MV_A], int8_t iRefIndex[LIST_A][30],
int32_t listIdx, int32_t iPartIdx, int8_t iRef, int16_t iMVP[2]);
+/*!
+* \brief Fill the spatial direct motion vectors for 8x8 direct MB
+* \param
+* \param output motion vector cache and motion vector deviation cache
+*/
+void FillSpatialDirect8x8Mv (PDqLayer pCurDqLayer, const int16_t& iIdx8, const int8_t& iPartCount, const int8_t& iPartW,
+ const SubMbType& subMbType, const bool& bIsLongRef, int16_t pMvDirect[LIST_A][2], int8_t iRef[LIST_A],
+ int16_t pMotionVector[LIST_A][30][MV_A], int16_t pMvdCache[LIST_A][30][MV_A]);
+
+/*!
+* \brief Fill the temporal direct motion vectors for 8x8 direct MB
+* \param
+* \param output motion vector cache and motion vector deviation cache
+*/
+void FillTemporalDirect8x8Mv (PDqLayer pCurDqLayer, const int16_t& iIdx8, const int8_t& iPartCount,
+ const int8_t& iPartW,
+ const SubMbType& subMbType, int8_t iRef[LIST_A], int16_t (*mvColoc)[2],
+ int16_t pMotionVector[LIST_A][30][MV_A], int16_t pMvdCache[LIST_A][30][MV_A]);
+
+/*!
+* \brief returns ref_index in List_0 from the colocated ref_index in LIST_0.
+* \param
+* returns ref_index in List_0 of ref picture LIST_0
+*/
+int8_t MapColToList0 (PWelsDecoderContext& pCtx, const int8_t& colocRefIndexL0,
+ const int32_t& ref0Count); //ISO/IEC 14496-10:2009(E) (8-193)
+
+/*!
+* \brief update ref_index cache for current MB, for 8x8
+* \param
+* \param
+*/
+void Update8x8RefIdx (PDqLayer& pCurDqLayer, const int16_t& iPartIdx, const int32_t& listIdx, const int8_t& iRef);
+
+inline uint32_t* GetMbType (PDqLayer& pCurDqLayer) {
+ if (pCurDqLayer->pDec != NULL) {
+ return pCurDqLayer->pDec->pMbType;
+ } else {
+ return pCurDqLayer->pMbType;
+ }
+}
+
} // namespace WelsDec
#endif//WELS_MV_PRED_H__
diff --git a/chromium/third_party/openh264/src/codec/decoder/core/inc/parse_mb_syn_cabac.h b/chromium/third_party/openh264/src/codec/decoder/core/inc/parse_mb_syn_cabac.h
index 1aca12002c9..8f31543ae40 100644
--- a/chromium/third_party/openh264/src/codec/decoder/core/inc/parse_mb_syn_cabac.h
+++ b/chromium/third_party/openh264/src/codec/decoder/core/inc/parse_mb_syn_cabac.h
@@ -82,6 +82,8 @@ void UpdateP8x8RefIdxCabac (PDqLayer pCurDqLayer, int8_t pRefIndex[LIST_A][30
const int8_t iListIdx);
void UpdateP8x8DirectCabac (PDqLayer pCurDqLayer, int32_t iPartIdx);
void UpdateP16x16DirectCabac (PDqLayer pCurDqLayer);
+void UpdateP8x8RefCacheIdxCabac (int8_t pRefIndex[LIST_A][30], const int16_t& iPartIdx, const int32_t& listIdx,
+ const int8_t& iRef);
}
//#pragma pack()
#endif
diff --git a/chromium/third_party/openh264/src/codec/decoder/core/inc/parse_mb_syn_cavlc.h b/chromium/third_party/openh264/src/codec/decoder/core/inc/parse_mb_syn_cavlc.h
index ab09bbe0c18..d048551640f 100644
--- a/chromium/third_party/openh264/src/codec/decoder/core/inc/parse_mb_syn_cavlc.h
+++ b/chromium/third_party/openh264/src/codec/decoder/core/inc/parse_mb_syn_cavlc.h
@@ -51,18 +51,18 @@ namespace WelsDec {
-void GetNeighborAvailMbType (PWelsNeighAvail pNeighAvail, PDqLayer pCurLayer);
-void WelsFillCacheNonZeroCount (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount, PDqLayer pCurLayer);
+void GetNeighborAvailMbType (PWelsNeighAvail pNeighAvail, PDqLayer pCurDqLayer);
+void WelsFillCacheNonZeroCount (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount, PDqLayer pCurDqLayer);
void WelsFillCacheConstrain0IntraNxN (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount, int8_t* pIntraPredMode,
- PDqLayer pCurLayer);
+ PDqLayer pCurDqLayer);
void WelsFillCacheConstrain1IntraNxN (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount, int8_t* pIntraPredMode,
- PDqLayer pCurLayer);
+ PDqLayer pCurDqLayer);
void WelsFillCacheInterCabac (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount,
int16_t iMvArray[LIST_A][30][MV_A], int16_t iMvdCache[LIST_A][30][MV_A], int8_t iRefIdxArray[LIST_A][30],
- PDqLayer pCurLayer);
-void WelsFillDirectCacheCabac (PWelsNeighAvail pNeighAvail, int8_t iDirect[30], PDqLayer pCurLayer);
+ PDqLayer pCurDqLayer);
+void WelsFillDirectCacheCabac (PWelsNeighAvail pNeighAvail, int8_t iDirect[30], PDqLayer pCurDqLayer);
void WelsFillCacheInter (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount,
- int16_t iMvArray[LIST_A][30][MV_A], int8_t iRefIdxArray[LIST_A][30], PDqLayer pCurLayer);
+ int16_t iMvArray[LIST_A][30][MV_A], int8_t iRefIdxArray[LIST_A][30], PDqLayer pCurDqLayer);
/*!
* \brief check iPredMode for intra16x16 eligible or not
@@ -131,6 +131,7 @@ int32_t WelsResidualBlockCavlc8x8 (SVlcTable* pVlcTable,
*/
int32_t ParseInterInfo (PWelsDecoderContext pCtx, int16_t iMvArray[LIST_A][30][MV_A], int8_t iRefIdxArray[LIST_A][30],
PBitStringAux pBs);
-
+int32_t ParseInterBInfo (PWelsDecoderContext pCtx, int16_t iMvArray[LIST_A][30][MV_A],
+ int8_t iRefIdxArray[LIST_A][30], PBitStringAux pBs);
} // namespace WelsDec
#endif//WELS_PARSE_MB_SYN_CAVLC_H__
diff --git a/chromium/third_party/openh264/src/codec/decoder/core/inc/pic_queue.h b/chromium/third_party/openh264/src/codec/decoder/core/inc/pic_queue.h
index 7e7e17ff9b7..473f80ff119 100644
--- a/chromium/third_party/openh264/src/codec/decoder/core/inc/pic_queue.h
+++ b/chromium/third_party/openh264/src/codec/decoder/core/inc/pic_queue.h
@@ -53,6 +53,9 @@ typedef struct TagPicBuff {
*/
PPicture PrefetchPic (PPicBuff pPicBuff); // To get current node applicable
+PPicture PrefetchPicForThread (PPicBuff pPicBuff); // To get current node applicable in the case of threaded mode
+PPicture PrefetchLastPicForThread (PPicBuff pPicBuff,
+ const int32_t& iLast); // To get last node applicable in the case of threaded mode
} // namespace WelsDec
diff --git a/chromium/third_party/openh264/src/codec/decoder/core/inc/picture.h b/chromium/third_party/openh264/src/codec/decoder/core/inc/picture.h
index 46bdd3b360f..bdacc364cf6 100644
--- a/chromium/third_party/openh264/src/codec/decoder/core/inc/picture.h
+++ b/chromium/third_party/openh264/src/codec/decoder/core/inc/picture.h
@@ -37,6 +37,7 @@
#include "typedefs.h"
#include "wels_common_defs.h"
#include "wels_const_common.h"
+#include "wels_decoder_thread.h"
using namespace WelsCommon;
@@ -68,8 +69,7 @@ struct SPicture {
/*******************************sef_definition for misc use****************************/
bool bUsedAsRef; //for ref pic management
bool bIsLongRef; // long term reference frame flag //for ref pic management
- uint8_t uiRefCount;
- bool bAvailableFlag; // indicate whether it is available in this picture memory block.
+ int8_t iRefCount;
bool bIsComplete; // indicate whether current picture is complete, not from EC
/*******************************for future use****************************/
@@ -85,15 +85,22 @@ struct SPicture {
int32_t iSpsId; //against mosaic caused by cross-IDR interval reference.
int32_t iPpsId;
unsigned long long uiTimeStamp;
+ uint32_t uiDecodingTimeStamp; //represent relative decoding time stamps
+ int32_t iPicBuffIdx;
+ EWelsSliceType eSliceType;
+ bool bIsUngroupedMultiSlice; //multi-slice picture with each each slice group contains one slice.
bool bNewSeqBegin;
int32_t iMbEcedNum;
int32_t iMbEcedPropNum;
int32_t iMbNum;
+ bool* pMbCorrectlyDecodedFlag;
+ int8_t (*pNzc)[24];
uint32_t* pMbType; // mb type used for direct mode
int16_t (*pMv[LIST_A])[MB_BLOCK4x4_NUM][MV_A]; // used for direct mode
int8_t (*pRefIndex[LIST_A])[MB_BLOCK4x4_NUM]; //used for direct mode
struct SPicture* pRefPic[LIST_A][17]; //ref pictures used for direct mode
+ SWelsDecEvent* pReadyEvent; //MB line ready event
};// "Picture" declaration is comflict with Mac system
diff --git a/chromium/third_party/openh264/src/codec/decoder/core/inc/rec_mb.h b/chromium/third_party/openh264/src/codec/decoder/core/inc/rec_mb.h
index 8d7a9ea1333..be0c4a7407a 100644
--- a/chromium/third_party/openh264/src/codec/decoder/core/inc/rec_mb.h
+++ b/chromium/third_party/openh264/src/codec/decoder/core/inc/rec_mb.h
@@ -74,10 +74,11 @@ typedef struct TagMCRefMember {
int32_t iPicHeight;
} sMCRefMember;
-void BaseMC (sMCRefMember* pMCRefMem, int32_t iXOffset, int32_t iYOffset, SMcFunc* pMCFunc,
+void BaseMC (PWelsDecoderContext pCtx, sMCRefMember* pMCRefMem, const int32_t& listIdx, const int8_t& iRefIdx,
+ int32_t iXOffset, int32_t iYOffset, SMcFunc* pMCFunc,
int32_t iBlkWidth, int32_t iBlkHeight, int16_t iMVs[2]);
-void WelsFillRecNeededMbInfo (PWelsDecoderContext pCtx, bool bOutput, PDqLayer pCurLayer);
+void WelsFillRecNeededMbInfo (PWelsDecoderContext pCtx, bool bOutput, PDqLayer pCurDqLayer);
int32_t RecI4x4Mb (int32_t iMBXY, PWelsDecoderContext pCtx, int16_t* pScoeffLevel, PDqLayer pDqLayer);
@@ -93,7 +94,7 @@ int32_t RecI16x16Mb (int32_t iMBXY, PWelsDecoderContext pCtx, int16_t* pScoeffLe
int32_t RecChroma (int32_t iMBXY, PWelsDecoderContext pCtx, int16_t* pScoeffLevel, PDqLayer pDqLayer);
-void GetInterPred (uint8_t* pPredY, uint8_t* pPredCb, uint8_t* pPredCr, PWelsDecoderContext pCtx);
+int32_t GetInterPred (uint8_t* pPredY, uint8_t* pPredCb, uint8_t* pPredCr, PWelsDecoderContext pCtx);
int32_t GetInterBPred (uint8_t* pPredYCbCr[3], uint8_t* pTempPredYCbCr[3], PWelsDecoderContext pCtx);
diff --git a/chromium/third_party/openh264/src/codec/decoder/core/inc/slice.h b/chromium/third_party/openh264/src/codec/decoder/core/inc/slice.h
index 75ad646b4de..9295eb71b14 100644
--- a/chromium/third_party/openh264/src/codec/decoder/core/inc/slice.h
+++ b/chromium/third_party/openh264/src/codec/decoder/core/inc/slice.h
@@ -122,7 +122,7 @@ typedef struct TagSliceHeaders {
PPps pPps;
int32_t iSpsId;
int32_t iPpsId;
- bool bIdrFlag;
+ bool bIdrFlag;
/*********************got from other layer for efficency if possible*********************/
SRefPicListReorderSyn pRefPicListReordering; // Reference picture list reordering syntaxs
diff --git a/chromium/third_party/openh264/src/codec/decoder/core/inc/wels_common_basis.h b/chromium/third_party/openh264/src/codec/decoder/core/inc/wels_common_basis.h
index 833bc8bca3e..19aae110e33 100644
--- a/chromium/third_party/openh264/src/codec/decoder/core/inc/wels_common_basis.h
+++ b/chromium/third_party/openh264/src/codec/decoder/core/inc/wels_common_basis.h
@@ -275,7 +275,7 @@ static const SPartMbInfo g_ksInterBMbTypeInfo[] = {
{ MB_TYPE_8x8 | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_P1L0 | MB_TYPE_P1L1, 4, 4 } //B_8x8
};
-//Table 7.17 – Sub-macroblock types in B macroblocks.
+//Table 7.17 Sub-macroblock types in B macroblocks.
static const SPartMbInfo g_ksInterPSubMbTypeInfo[4] = {
{SUB_MB_TYPE_8x8, 1, 2},
{SUB_MB_TYPE_8x4, 2, 2},
@@ -283,7 +283,7 @@ static const SPartMbInfo g_ksInterPSubMbTypeInfo[4] = {
{SUB_MB_TYPE_4x4, 4, 1},
};
-//Table 7.18 – Sub-macroblock types in B macroblocks.
+//Table 7.18 Sub-macroblock types in B macroblocks.
static const SPartMbInfo g_ksInterBSubMbTypeInfo[] = {
{ MB_TYPE_DIRECT, 1, 2 }, //B_Direct_8x8
{ SUB_MB_TYPE_8x8 | MB_TYPE_P0L0, 1, 2 }, //B_L0_8x8
diff --git a/chromium/third_party/openh264/src/codec/decoder/core/inc/wels_decoder_thread.h b/chromium/third_party/openh264/src/codec/decoder/core/inc/wels_decoder_thread.h
new file mode 100644
index 00000000000..ebb8015c9ca
--- /dev/null
+++ b/chromium/third_party/openh264/src/codec/decoder/core/inc/wels_decoder_thread.h
@@ -0,0 +1,170 @@
+/*!
+ * \copy
+ * Copyright (c) 2009-2019, Cisco Systems
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ *
+ * \file wels_decoder_thread.h
+ *
+ * \brief Interfaces introduced in thread programming
+ *
+ * \date 08/06/2018 Created
+ *
+ *************************************************************************************
+ */
+
+#ifndef _WELS_DECODER_THREAD_H_
+#define _WELS_DECODER_THREAD_H_
+
+#include "WelsThreadLib.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define WELS_DEC_MAX_NUM_CPU 16
+#define WELS_DEC_MAX_THREAD_STACK_SIZE 4096
+#define WELS_DEC_THREAD_COMMAND_RUN 0
+#define WELS_DEC_THREAD_COMMAND_ABORT 1
+
+#if defined(_WIN32) || defined(__CYGWIN__)
+typedef struct tagWelsDecSemphore {
+ WELS_THREAD_HANDLE h;
+} SWelsDecSemphore;
+
+typedef struct tagWelsDecEvent {
+ WELS_THREAD_HANDLE h;
+ int isSignaled;
+} SWelsDecEvent;
+
+typedef struct tagWelsDecThread {
+ WELS_THREAD_HANDLE h;
+} SWelsDecThread;
+
+#define WelsDecThreadFunc(fn,a) DWORD WINAPI fn(LPVOID a)
+#define WelsDecThreadFuncArg(a) LPWELS_THREAD_ROUTINE a
+#define WELS_DEC_THREAD_WAIT_TIMEDOUT WAIT_TIMEOUT
+#define WELS_DEC_THREAD_WAIT_SIGNALED WAIT_OBJECT_0
+#define WELS_DEC_THREAD_WAIT_INFINITE INFINITE
+
+#else // NON-WINDOWS
+
+typedef pthread_mutexattr_t WELS_MUTEX_ATTR;
+
+typedef struct tagWelsDecSemphore {
+ long max;
+ long v;
+ WELS_EVENT e;
+ WELS_MUTEX m;
+} SWelsDecSemphore;
+
+typedef struct tagWelsDecEvent {
+ int manualReset;
+ int isSignaled;
+ pthread_cond_t c;
+ WELS_MUTEX m;
+} SWelsDecEvent;
+
+typedef struct tagWelsDecThread {
+ WELS_THREAD_HANDLE h;
+} SWelsDecThread;
+
+#define WelsDecThreadFunc(fn,a) void* fn(void* a)
+#define WelsDecThreadFuncArg(a) void* (*a)(void*)
+
+#define WELS_DEC_THREAD_WAIT_TIMEDOUT ETIMEDOUT
+#define WELS_DEC_THREAD_WAIT_SIGNALED EINTR
+#define WELS_DEC_THREAD_WAIT_INFINITE -1
+
+#endif//_WIN32
+
+#define WelsDecThreadReturn WELS_THREAD_ROUTINE_RETURN(0);
+
+int32_t GetCPUCount();
+
+// Event
+int EventCreate (SWelsDecEvent* e, int manualReset, int initialState);
+void EventPost (SWelsDecEvent* e);
+int EventWait (SWelsDecEvent* e, int32_t timeout);
+void EventReset (SWelsDecEvent* e);
+void EventDestroy (SWelsDecEvent* e);
+
+// Semaphore
+int SemCreate (SWelsDecSemphore* s, long value, long max);
+int SemWait (SWelsDecSemphore* s, int32_t timeout);
+void SemRelease (SWelsDecSemphore* s, long* prev_count);
+void SemDestroy (SWelsDecSemphore* s);
+
+// Thread
+int ThreadCreate (SWelsDecThread* t, LPWELS_THREAD_ROUTINE tf, void* ta);
+int ThreadWait (SWelsDecThread* t);
+
+#define DECLARE_PROCTHREAD(name, argument) \
+ WelsDecThreadFunc(name,argument)
+
+#define DECLARE_PROCTHREAD_PTR(name) \
+ LPWELS_THREAD_ROUTINE name
+
+#define CREATE_THREAD(ph, threadproc,argument) \
+ ThreadCreate(ph, threadproc, (void*)argument)
+
+#define CREATE_EVENT(ph, manualreset,initial_state,name) \
+ EventCreate(ph,(int)(manualreset),(int)(initial_state))
+
+#define CREATE_SEMAPHORE(ph, initial_count,max_count, name) \
+ SemCreate(ph, (long)initial_count,(long)(max_count))
+
+#define CLOSE_EVENT(ph) \
+ EventDestroy(ph)
+
+#define CLOSE_SEMAPHORE(ph) \
+ SemDestroy(ph)
+
+#define SET_EVENT(ph) \
+ EventPost(ph)
+
+#define RESET_EVENT(ph) \
+ EventReset(ph)
+
+#define RELEASE_SEMAPHORE(ph) \
+ SemRelease(ph,NULL)
+
+#define WAIT_EVENT(ph,timeout) \
+ EventWait(ph, (int32_t)timeout)
+
+#define WAIT_THREAD(ph) \
+ ThreadWait(ph)
+
+#define WAIT_SEMAPHORE(ph,timeout) \
+ SemWait(ph,(int32_t)timeout)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/chromium/third_party/openh264/src/codec/decoder/core/src/au_parser.cpp b/chromium/third_party/openh264/src/codec/decoder/core/src/au_parser.cpp
index 9f7091e4781..91f89b4374b 100644
--- a/chromium/third_party/openh264/src/codec/decoder/core/src/au_parser.cpp
+++ b/chromium/third_party/openh264/src/codec/decoder/core/src/au_parser.cpp
@@ -148,48 +148,50 @@ uint8_t* ParseNalHeader (PWelsDecoderContext pCtx, SNalUnitHeader* pNalUnitHeade
++ (*pConsumedBytes);
if (! (IS_SEI_NAL (pNalUnitHeader->eNalUnitType) || IS_SPS_NAL (pNalUnitHeader->eNalUnitType)
- || IS_AU_DELIMITER_NAL (pNalUnitHeader->eNalUnitType) || pCtx->bSpsExistAheadFlag)) {
- if (pCtx->bPrintFrameErrorTraceFlag && pCtx->iSpsErrorIgnored == 0) {
+ || IS_AU_DELIMITER_NAL (pNalUnitHeader->eNalUnitType) || pCtx->sSpsPpsCtx.bSpsExistAheadFlag)) {
+ if (pCtx->bPrintFrameErrorTraceFlag && pCtx->sSpsPpsCtx.iSpsErrorIgnored == 0) {
WelsLog (pLogCtx, WELS_LOG_WARNING,
"parse_nal(), no exist Sequence Parameter Sets ahead of sequence when try to decode NAL(type:%d).",
pNalUnitHeader->eNalUnitType);
} else {
- pCtx->iSpsErrorIgnored++;
+ pCtx->sSpsPpsCtx.iSpsErrorIgnored++;
}
- pCtx->sDecoderStatistics.iSpsNoExistNalNum++;
+ pCtx->pDecoderStatistics->iSpsNoExistNalNum++;
pCtx->iErrorCode = dsNoParamSets;
return NULL;
}
- pCtx->iSpsErrorIgnored = 0;
+ pCtx->sSpsPpsCtx.iSpsErrorIgnored = 0;
if (! (IS_SEI_NAL (pNalUnitHeader->eNalUnitType) || IS_PARAM_SETS_NALS (pNalUnitHeader->eNalUnitType)
- || IS_AU_DELIMITER_NAL (pNalUnitHeader->eNalUnitType) || pCtx->bPpsExistAheadFlag)) {
- if (pCtx->bPrintFrameErrorTraceFlag && pCtx->iPpsErrorIgnored == 0) {
+ || IS_AU_DELIMITER_NAL (pNalUnitHeader->eNalUnitType) || pCtx->sSpsPpsCtx.bPpsExistAheadFlag)) {
+ if (pCtx->bPrintFrameErrorTraceFlag && pCtx->sSpsPpsCtx.iPpsErrorIgnored == 0) {
WelsLog (pLogCtx, WELS_LOG_WARNING,
"parse_nal(), no exist Picture Parameter Sets ahead of sequence when try to decode NAL(type:%d).",
pNalUnitHeader->eNalUnitType);
} else {
- pCtx->iPpsErrorIgnored++;
+ pCtx->sSpsPpsCtx.iPpsErrorIgnored++;
}
- pCtx->sDecoderStatistics.iPpsNoExistNalNum++;
+ pCtx->pDecoderStatistics->iPpsNoExistNalNum++;
pCtx->iErrorCode = dsNoParamSets;
return NULL;
}
- pCtx->iPpsErrorIgnored = 0;
- if ((IS_VCL_NAL_AVC_BASE (pNalUnitHeader->eNalUnitType) && ! (pCtx->bSpsExistAheadFlag || pCtx->bPpsExistAheadFlag)) ||
- (IS_NEW_INTRODUCED_SVC_NAL (pNalUnitHeader->eNalUnitType) && ! (pCtx->bSpsExistAheadFlag || pCtx->bSubspsExistAheadFlag
- || pCtx->bPpsExistAheadFlag))) {
- if (pCtx->bPrintFrameErrorTraceFlag && pCtx->iSubSpsErrorIgnored == 0) {
+ pCtx->sSpsPpsCtx.iPpsErrorIgnored = 0;
+ if ((IS_VCL_NAL_AVC_BASE (pNalUnitHeader->eNalUnitType) && ! (pCtx->sSpsPpsCtx.bSpsExistAheadFlag
+ || pCtx->sSpsPpsCtx.bPpsExistAheadFlag)) ||
+ (IS_NEW_INTRODUCED_SVC_NAL (pNalUnitHeader->eNalUnitType) && ! (pCtx->sSpsPpsCtx.bSpsExistAheadFlag
+ || pCtx->sSpsPpsCtx.bSubspsExistAheadFlag
+ || pCtx->sSpsPpsCtx.bPpsExistAheadFlag))) {
+ if (pCtx->bPrintFrameErrorTraceFlag && pCtx->sSpsPpsCtx.iSubSpsErrorIgnored == 0) {
WelsLog (pLogCtx, WELS_LOG_WARNING,
"ParseNalHeader(), no exist Parameter Sets ahead of sequence when try to decode slice(type:%d).",
pNalUnitHeader->eNalUnitType);
} else {
- pCtx->iSubSpsErrorIgnored++;
+ pCtx->sSpsPpsCtx.iSubSpsErrorIgnored++;
}
- pCtx->sDecoderStatistics.iSubSpsNoExistNalNum++;
+ pCtx->pDecoderStatistics->iSubSpsNoExistNalNum++;
pCtx->iErrorCode |= dsNoParamSets;
return NULL;
}
- pCtx->iSubSpsErrorIgnored = 0;
+ pCtx->sSpsPpsCtx.iSubSpsErrorIgnored = 0;
switch (pNalUnitHeader->eNalUnitType) {
case NAL_UNIT_AU_DELIMITER:
@@ -201,7 +203,7 @@ uint8_t* ParseNalHeader (PWelsDecoderContext pCtx, SNalUnitHeader* pNalUnitHeade
break;
case NAL_UNIT_PREFIX:
- pCurNal = &pCtx->sPrefixNal;
+ pCurNal = &pCtx->sSpsPpsCtx.sPrefixNal;
pCurNal->uiTimeStamp = pCtx->uiTimeStamp;
if (iNalSize < NAL_UNIT_HEADER_EXT_SIZE) {
@@ -365,9 +367,9 @@ uint8_t* ParseNalHeader (PWelsDecoderContext pCtx, SNalUnitHeader* pNalUnitHeade
memcpy (pSavedData->pCurPos + iStartDeltaByte, pSrcNal, iActualLen);
pSavedData->pCurPos += iStartDeltaByte + iActualLen;
}
- if (NAL_UNIT_PREFIX == pCtx->sPrefixNal.sNalHeaderExt.sNalUnitHeader.eNalUnitType) {
- if (pCtx->sPrefixNal.sNalData.sPrefixNal.bPrefixNalCorrectFlag) {
- PrefetchNalHeaderExtSyntax (pCtx, pCurNal, &pCtx->sPrefixNal);
+ if (NAL_UNIT_PREFIX == pCtx->sSpsPpsCtx.sPrefixNal.sNalHeaderExt.sNalUnitHeader.eNalUnitType) {
+ if (pCtx->sSpsPpsCtx.sPrefixNal.sNalData.sPrefixNal.bPrefixNalCorrectFlag) {
+ PrefetchNalHeaderExtSyntax (pCtx, pCurNal, &pCtx->sSpsPpsCtx.sPrefixNal);
}
}
@@ -496,8 +498,8 @@ bool CheckAccessUnitBoundary (PWelsDecoderContext pCtx, const PNalUnit kpCurNal,
const PNalUnitHeaderExt kpCurNalHeaderExt = &kpCurNal->sNalHeaderExt;
const SSliceHeader* kpLastSliceHeader = &kpLastNal->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader;
const SSliceHeader* kpCurSliceHeader = &kpCurNal->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader;
- if (pCtx->pActiveLayerSps[kpCurNalHeaderExt->uiDependencyId] != NULL
- && pCtx->pActiveLayerSps[kpCurNalHeaderExt->uiDependencyId] != kpSps) {
+ if (pCtx->sSpsPpsCtx.pActiveLayerSps[kpCurNalHeaderExt->uiDependencyId] != NULL
+ && pCtx->sSpsPpsCtx.pActiveLayerSps[kpCurNalHeaderExt->uiDependencyId] != kpSps) {
return true; // the active sps changed, new sequence begins, so the current au is ready
}
@@ -548,8 +550,8 @@ bool CheckAccessUnitBoundary (PWelsDecoderContext pCtx, const PNalUnit kpCurNal,
bool CheckNextAuNewSeq (PWelsDecoderContext pCtx, const PNalUnit kpCurNal, const PSps kpSps) {
const PNalUnitHeaderExt kpCurNalHeaderExt = &kpCurNal->sNalHeaderExt;
- if (pCtx->pActiveLayerSps[kpCurNalHeaderExt->uiDependencyId] != NULL
- && pCtx->pActiveLayerSps[kpCurNalHeaderExt->uiDependencyId] != kpSps)
+ if (pCtx->sSpsPpsCtx.pActiveLayerSps[kpCurNalHeaderExt->uiDependencyId] != NULL
+ && pCtx->sSpsPpsCtx.pActiveLayerSps[kpCurNalHeaderExt->uiDependencyId] != kpSps)
return true;
if (kpCurNalHeaderExt->bIdrFlag)
return true;
@@ -606,7 +608,7 @@ int32_t ParseNonVclNal (PWelsDecoderContext pCtx, uint8_t* pRbsp, const int32_t
pCtx->iErrorCode |= dsBitstreamError;
return iErr;
}
-
+ pCtx->bHasNewSps = true;
break;
case NAL_UNIT_PPS:
@@ -620,17 +622,18 @@ int32_t ParseNonVclNal (PWelsDecoderContext pCtx, uint8_t* pRbsp, const int32_t
return iErr;
}
}
- iErr = ParsePps (pCtx, &pCtx->sPpsBuffer[0], pBs, pSrcNal, kSrcNalLen);
+ iErr = ParsePps (pCtx, &pCtx->sSpsPpsCtx.sPpsBuffer[0], pBs, pSrcNal, kSrcNalLen);
if (ERR_NONE != iErr) { // modified for pps invalid, 12/1/2009
if (pCtx->pParam->eEcActiveIdc == ERROR_CON_DISABLE)
pCtx->iErrorCode |= dsNoParamSets;
else
pCtx->iErrorCode |= dsBitstreamError;
+ pCtx->bHasNewSps = false;
return iErr;
}
- pCtx->bPpsExistAheadFlag = true;
-
+ pCtx->sSpsPpsCtx.bPpsExistAheadFlag = true;
+ ++ (pCtx->sSpsPpsCtx.iSeqId);
break;
case NAL_UNIT_SEI:
@@ -683,7 +686,7 @@ int32_t ParseRefBasePicMarking (PBitStringAux pBs, PRefBasePicMarking pRefBasePi
}
int32_t ParsePrefixNalUnit (PWelsDecoderContext pCtx, PBitStringAux pBs) {
- PNalUnit pCurNal = &pCtx->sPrefixNal;
+ PNalUnit pCurNal = &pCtx->sSpsPpsCtx.sPrefixNal;
uint32_t uiCode;
if (pCurNal->sNalHeaderExt.sNalUnitHeader.uiNalRefIdc != 0) {
@@ -834,12 +837,12 @@ const SLevelLimits* GetLevelLimits (int32_t iLevelIdx, bool bConstraint3) {
bool CheckSpsActive (PWelsDecoderContext pCtx, PSps pSps, bool bUseSubsetFlag) {
for (int i = 0; i < MAX_LAYER_NUM; i++) {
- if (pCtx->pActiveLayerSps[i] == pSps)
+ if (pCtx->sSpsPpsCtx.pActiveLayerSps[i] == pSps)
return true;
}
// Pre-active, will be used soon
if (bUseSubsetFlag) {
- if (pSps->iMbWidth > 0 && pSps->iMbHeight > 0 && pCtx->bSubspsAvailFlags[pSps->iSpsId]) {
+ if (pSps->iMbWidth > 0 && pSps->iMbHeight > 0 && pCtx->sSpsPpsCtx.bSubspsAvailFlags[pSps->iSpsId]) {
if (pCtx->iTotalNumMbRec > 0) {
return true;
}
@@ -857,7 +860,7 @@ bool CheckSpsActive (PWelsDecoderContext pCtx, PSps pSps, bool bUseSubsetFlag) {
}
}
} else {
- if (pSps->iMbWidth > 0 && pSps->iMbHeight > 0 && pCtx->bSpsAvailFlags[pSps->iSpsId]) {
+ if (pSps->iMbWidth > 0 && pSps->iMbHeight > 0 && pCtx->sSpsPpsCtx.bSpsAvailFlags[pSps->iSpsId]) {
if (pCtx->iTotalNumMbRec > 0) {
return true;
}
@@ -1251,57 +1254,57 @@ int32_t ParseSps (PWelsDecoderContext pCtx, PBitStringAux pBsAux, int32_t* pPicW
if (PRO_SCALABLE_BASELINE == uiProfileIdc || PRO_SCALABLE_HIGH == uiProfileIdc)
- pCtx->bAvcBasedFlag = false;
+ pCtx->sSpsPpsCtx.bAvcBasedFlag = false;
*pPicWidth = pSps->iMbWidth << 4;
*pPicHeight = pSps->iMbHeight << 4;
PSps pTmpSps = NULL;
if (kbUseSubsetFlag) {
- pTmpSps = &pCtx->sSubsetSpsBuffer[iSpsId].sSps;
+ pTmpSps = &pCtx->sSpsPpsCtx.sSubsetSpsBuffer[iSpsId].sSps;
} else {
- pTmpSps = &pCtx->sSpsBuffer[iSpsId];
+ pTmpSps = &pCtx->sSpsPpsCtx.sSpsBuffer[iSpsId];
}
if (CheckSpsActive (pCtx, pTmpSps, kbUseSubsetFlag)) {
// we are overwriting the active sps, copy a temp buffer
if (kbUseSubsetFlag) {
- if (memcmp (&pCtx->sSubsetSpsBuffer[iSpsId], pSubsetSps, sizeof (SSubsetSps)) != 0) {
+ if (memcmp (&pCtx->sSpsPpsCtx.sSubsetSpsBuffer[iSpsId], pSubsetSps, sizeof (SSubsetSps)) != 0) {
if (pCtx->pAccessUnitList->uiAvailUnitsNum > 0) {
- memcpy (&pCtx->sSubsetSpsBuffer[MAX_SPS_COUNT], pSubsetSps, sizeof (SSubsetSps));
+ memcpy (&pCtx->sSpsPpsCtx.sSubsetSpsBuffer[MAX_SPS_COUNT], pSubsetSps, sizeof (SSubsetSps));
pCtx->bAuReadyFlag = true;
pCtx->pAccessUnitList->uiEndPos = pCtx->pAccessUnitList->uiAvailUnitsNum - 1;
- pCtx->iOverwriteFlags |= OVERWRITE_SUBSETSPS;
+ pCtx->sSpsPpsCtx.iOverwriteFlags |= OVERWRITE_SUBSETSPS;
} else if ((pCtx->pSps != NULL) && (pCtx->pSps->iSpsId == pSubsetSps->sSps.iSpsId)) {
- memcpy (&pCtx->sSubsetSpsBuffer[MAX_SPS_COUNT], pSubsetSps, sizeof (SSubsetSps));
- pCtx->iOverwriteFlags |= OVERWRITE_SUBSETSPS;
+ memcpy (&pCtx->sSpsPpsCtx.sSubsetSpsBuffer[MAX_SPS_COUNT], pSubsetSps, sizeof (SSubsetSps));
+ pCtx->sSpsPpsCtx.iOverwriteFlags |= OVERWRITE_SUBSETSPS;
} else {
- memcpy (&pCtx->sSubsetSpsBuffer[iSpsId], pSubsetSps, sizeof (SSubsetSps));
+ memcpy (&pCtx->sSpsPpsCtx.sSubsetSpsBuffer[iSpsId], pSubsetSps, sizeof (SSubsetSps));
}
}
} else {
- if (memcmp (&pCtx->sSpsBuffer[iSpsId], pSps, sizeof (SSps)) != 0) {
+ if (memcmp (&pCtx->sSpsPpsCtx.sSpsBuffer[iSpsId], pSps, sizeof (SSps)) != 0) {
if (pCtx->pAccessUnitList->uiAvailUnitsNum > 0) {
- memcpy (&pCtx->sSpsBuffer[MAX_SPS_COUNT], pSps, sizeof (SSps));
- pCtx->iOverwriteFlags |= OVERWRITE_SPS;
+ memcpy (&pCtx->sSpsPpsCtx.sSpsBuffer[MAX_SPS_COUNT], pSps, sizeof (SSps));
+ pCtx->sSpsPpsCtx.iOverwriteFlags |= OVERWRITE_SPS;
pCtx->bAuReadyFlag = true;
pCtx->pAccessUnitList->uiEndPos = pCtx->pAccessUnitList->uiAvailUnitsNum - 1;
} else if ((pCtx->pSps != NULL) && (pCtx->pSps->iSpsId == pSps->iSpsId)) {
- memcpy (&pCtx->sSpsBuffer[MAX_SPS_COUNT], pSps, sizeof (SSps));
- pCtx->iOverwriteFlags |= OVERWRITE_SPS;
+ memcpy (&pCtx->sSpsPpsCtx.sSpsBuffer[MAX_SPS_COUNT], pSps, sizeof (SSps));
+ pCtx->sSpsPpsCtx.iOverwriteFlags |= OVERWRITE_SPS;
} else {
- memcpy (&pCtx->sSpsBuffer[iSpsId], pSps, sizeof (SSps));
+ memcpy (&pCtx->sSpsPpsCtx.sSpsBuffer[iSpsId], pSps, sizeof (SSps));
}
}
}
}
// Not overwrite active sps, just copy to final place
else if (kbUseSubsetFlag) {
- memcpy (&pCtx->sSubsetSpsBuffer[iSpsId], pSubsetSps, sizeof (SSubsetSps));
- pCtx->bSubspsAvailFlags[iSpsId] = true;
- pCtx->bSubspsExistAheadFlag = true;
+ memcpy (&pCtx->sSpsPpsCtx.sSubsetSpsBuffer[iSpsId], pSubsetSps, sizeof (SSubsetSps));
+ pCtx->sSpsPpsCtx.bSubspsAvailFlags[iSpsId] = true;
+ pCtx->sSpsPpsCtx.bSubspsExistAheadFlag = true;
} else {
- memcpy (&pCtx->sSpsBuffer[iSpsId], pSps, sizeof (SSps));
- pCtx->bSpsAvailFlags[iSpsId] = true;
- pCtx->bSpsExistAheadFlag = true;
+ memcpy (&pCtx->sSpsPpsCtx.sSpsBuffer[iSpsId], pSps, sizeof (SSps));
+ pCtx->sSpsPpsCtx.bSpsAvailFlags[iSpsId] = true;
+ pCtx->sSpsPpsCtx.bSpsExistAheadFlag = true;
}
return ERR_NONE;
}
@@ -1421,8 +1424,8 @@ int32_t ParsePps (PWelsDecoderContext pCtx, PPps pPpsList, PBitStringAux pBsAux,
WELS_READ_VERIFY (BsGetOneBit (pBsAux, &uiCode)); //pic_scaling_matrix_present_flag
pPps->bPicScalingMatrixPresentFlag = !!uiCode;
if (pPps->bPicScalingMatrixPresentFlag) {
- if (pCtx->bSpsAvailFlags[pPps->iSpsId]) {
- WELS_READ_VERIFY (ParseScalingList (&pCtx->sSpsBuffer[pPps->iSpsId], pBsAux, 1, pPps->bTransform8x8ModeFlag,
+ if (pCtx->sSpsPpsCtx.bSpsAvailFlags[pPps->iSpsId]) {
+ WELS_READ_VERIFY (ParseScalingList (&pCtx->sSpsPpsCtx.sSpsBuffer[pPps->iSpsId], pBsAux, 1, pPps->bTransform8x8ModeFlag,
pPps->bPicScalingListPresentFlag, pPps->iScalingList4x4, pPps->iScalingList8x8));
} else {
WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING,
@@ -1440,16 +1443,16 @@ int32_t ParsePps (PWelsDecoderContext pCtx, PPps pPpsList, PBitStringAux pBsAux,
if (pCtx->pPps != NULL && pCtx->pPps->iPpsId == pPps->iPpsId) {
if (memcmp (pCtx->pPps, pPps, sizeof (*pPps)) != 0) {
- memcpy (&pCtx->sPpsBuffer[MAX_PPS_COUNT], pPps, sizeof (SPps));
- pCtx->iOverwriteFlags |= OVERWRITE_PPS;
+ memcpy (&pCtx->sSpsPpsCtx.sPpsBuffer[MAX_PPS_COUNT], pPps, sizeof (SPps));
+ pCtx->sSpsPpsCtx.iOverwriteFlags |= OVERWRITE_PPS;
if (pCtx->pAccessUnitList->uiAvailUnitsNum > 0) {
pCtx->bAuReadyFlag = true;
pCtx->pAccessUnitList->uiEndPos = pCtx->pAccessUnitList->uiAvailUnitsNum - 1;
}
}
} else {
- memcpy (&pCtx->sPpsBuffer[uiPpsId], pPps, sizeof (SPps));
- pCtx->bPpsAvailFlags[uiPpsId] = true;
+ memcpy (&pCtx->sSpsPpsCtx.sPpsBuffer[uiPpsId], pPps, sizeof (SPps));
+ pCtx->sSpsPpsCtx.bPpsAvailFlags[uiPpsId] = true;
}
if (pCtx->pParam->bParseOnly) {
if (kSrcNalLen >= SPS_PPS_BS_SIZE - 4) { //pps bs exceeds
diff --git a/chromium/third_party/openh264/src/codec/decoder/core/src/deblocking.cpp b/chromium/third_party/openh264/src/codec/decoder/core/src/deblocking.cpp
index 40ad5efade9..1817ab5b44b 100644
--- a/chromium/third_party/openh264/src/codec/decoder/core/src/deblocking.cpp
+++ b/chromium/third_party/openh264/src/codec/decoder/core/src/deblocking.cpp
@@ -55,17 +55,17 @@ namespace WelsDec {
#define g_kiBetaTable(x) g_kiBetaTable[(x)+12]
#define g_kiTc0Table(x) g_kiTc0Table[(x)+12]
-#define MB_BS_MV(iRefIndex, iMotionVector, iMbXy, iMbBn, iIndex, iNeighIndex) \
+#define MB_BS_MV(pRefPic0, pRefPic1, iMotionVector, iMbXy, iMbBn, iIndex, iNeighIndex) \
(\
- ( iRefIndex[iMbXy][iIndex] - iRefIndex[iMbBn][iNeighIndex] )||\
+ ( pRefPic0 != pRefPic1) ||\
( WELS_ABS( iMotionVector[iMbXy][iIndex][0] - iMotionVector[iMbBn][iNeighIndex][0] ) >= 4 ) ||\
( WELS_ABS( iMotionVector[iMbXy][iIndex][1] - iMotionVector[iMbBn][iNeighIndex][1] ) >= 4 )\
)
#if defined(SAME_MB_DIFF_REFIDX)
-#define SMB_EDGE_MV(iRefIndex, iMotionVector, iIndex, iNeighIndex) \
+#define SMB_EDGE_MV(pRefPics, iMotionVector, iIndex, iNeighIndex) \
(\
- ( iRefIndex[iIndex] - iRefIndex[iNeighIndex] )||(\
+ ( pRefPics[iIndex] != pRefPics[iNeighIndex] )||(\
( WELS_ABS( iMotionVector[iIndex][0] - iMotionVector[iNeighIndex][0] ) &(~3) ) |\
( WELS_ABS( iMotionVector[iIndex][1] - iMotionVector[iNeighIndex][1] ) &(~3) ))\
)
@@ -76,8 +76,8 @@ namespace WelsDec {
)
#endif
-#define BS_EDGE(bsx1, iRefIndex, iMotionVector, iIndex, iNeighIndex) \
-( (bsx1|SMB_EDGE_MV(iRefIndex, iMotionVector, iIndex, iNeighIndex))<<((uint8_t)(!!bsx1)))
+#define BS_EDGE(bsx1, pRefPics, iMotionVector, iIndex, iNeighIndex) \
+( (bsx1|SMB_EDGE_MV(pRefPics, iMotionVector, iIndex, iNeighIndex))<<((uint8_t)(!!bsx1)))
#define GET_ALPHA_BETA_FROM_QP(iQp, iAlphaOffset, iBetaOffset, iIndex, iAlpha, iBeta) \
{\
@@ -148,13 +148,13 @@ static const uint8_t g_kuiTableB8x8Idx[2][16] = {
8, 9, 12, 13, 10, 11, 14, 15
},
};
-
+//fix Bugzilla 1486223
#define TC0_TBL_LOOKUP(tc, iIndexA, pBS, bChroma) \
{\
- tc[0] = g_kiTc0Table(iIndexA)[pBS[0]] + bChroma;\
- tc[1] = g_kiTc0Table(iIndexA)[pBS[1]] + bChroma;\
- tc[2] = g_kiTc0Table(iIndexA)[pBS[2]] + bChroma;\
- tc[3] = g_kiTc0Table(iIndexA)[pBS[3]] + bChroma;\
+ tc[0] = g_kiTc0Table(iIndexA)[pBS[0] & 3] + bChroma;\
+ tc[1] = g_kiTc0Table(iIndexA)[pBS[1] & 3] + bChroma;\
+ tc[2] = g_kiTc0Table(iIndexA)[pBS[2] & 3] + bChroma;\
+ tc[3] = g_kiTc0Table(iIndexA)[pBS[3] & 3] + bChroma;\
}
void inline DeblockingBSInsideMBAvsbase (int8_t* pNnzTab, uint8_t nBS[2][4][4], int32_t iLShiftFactor) {
@@ -201,14 +201,25 @@ void inline DeblockingBSInsideMBAvsbase8x8 (int8_t* pNnzTab, uint8_t nBS[2][4][4
nBS[1][2][2] = nBS[1][2][3] = (i8x8NnzTab[1] | i8x8NnzTab[3]) << iLShiftFactor;
}
-void static inline DeblockingBSInsideMBNormal (PDqLayer pCurDqLayer, uint8_t nBS[2][4][4], int8_t* pNnzTab,
+void static inline DeblockingBSInsideMBNormal (PDeblockingFilter pFilter, PDqLayer pCurDqLayer, uint8_t nBS[2][4][4],
+ int8_t* pNnzTab,
int32_t iMbXy) {
uint32_t uiNnz32b0, uiNnz32b1, uiNnz32b2, uiNnz32b3;
- int8_t* iRefIndex = pCurDqLayer->pRefIndex[LIST_0][iMbXy];
+ int8_t* iRefIdx = pCurDqLayer->pDec->pRefIndex[LIST_0][iMbXy];
+ void* iRefs[MB_BLOCK4x4_NUM];
+ int i;
ENFORCE_STACK_ALIGN_1D (uint8_t, uiBsx4, 4, 4);
int8_t i8x8NnzTab[4];
+ /* Look up each reference picture based on indices */
+ for (i = 0; i < MB_BLOCK4x4_NUM; i++) {
+ if (iRefIdx[i] > REF_NOT_IN_LIST)
+ iRefs[i] = pFilter->pRefPics[LIST_0][iRefIdx[i]];
+ else
+ iRefs[i] = NULL;
+ }
+
if (pCurDqLayer->pTransformSize8x8Flag[iMbXy]) {
for (int32_t i = 0; i < 4; i++) {
int32_t iBlkIdx = i << 2;
@@ -216,15 +227,15 @@ void static inline DeblockingBSInsideMBNormal (PDqLayer pCurDqLayer, uint8_t nBS
pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx + 2]] | pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx + 3]]);
}
//vertical
- nBS[0][2][0] = nBS[0][2][1] = BS_EDGE ((i8x8NnzTab[0] | i8x8NnzTab[1]), iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy],
+ nBS[0][2][0] = nBS[0][2][1] = BS_EDGE ((i8x8NnzTab[0] | i8x8NnzTab[1]), iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy],
g_kuiMbCountScan4Idx[1 << 2], g_kuiMbCountScan4Idx[0]);
- nBS[0][2][2] = nBS[0][2][3] = BS_EDGE ((i8x8NnzTab[2] | i8x8NnzTab[3]), iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy],
+ nBS[0][2][2] = nBS[0][2][3] = BS_EDGE ((i8x8NnzTab[2] | i8x8NnzTab[3]), iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy],
g_kuiMbCountScan4Idx[3 << 2], g_kuiMbCountScan4Idx[2 << 2]);
//horizontal
- nBS[1][2][0] = nBS[1][2][1] = BS_EDGE ((i8x8NnzTab[0] | i8x8NnzTab[2]), iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy],
+ nBS[1][2][0] = nBS[1][2][1] = BS_EDGE ((i8x8NnzTab[0] | i8x8NnzTab[2]), iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy],
g_kuiMbCountScan4Idx[2 << 2], g_kuiMbCountScan4Idx[0]);
- nBS[1][2][2] = nBS[1][2][3] = BS_EDGE ((i8x8NnzTab[1] | i8x8NnzTab[3]), iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy],
+ nBS[1][2][2] = nBS[1][2][3] = BS_EDGE ((i8x8NnzTab[1] | i8x8NnzTab[3]), iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy],
g_kuiMbCountScan4Idx[3 << 2], g_kuiMbCountScan4Idx[1 << 2]);
} else {
uiNnz32b0 = * (uint32_t*) (pNnzTab + 0);
@@ -234,58 +245,70 @@ void static inline DeblockingBSInsideMBNormal (PDqLayer pCurDqLayer, uint8_t nBS
for (int i = 0; i < 3; i++)
uiBsx4[i] = pNnzTab[i] | pNnzTab[i + 1];
- nBS[0][1][0] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 1, 0);
- nBS[0][2][0] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 2, 1);
- nBS[0][3][0] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 3, 2);
+ nBS[0][1][0] = BS_EDGE (uiBsx4[0], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 1, 0);
+ nBS[0][2][0] = BS_EDGE (uiBsx4[1], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 2, 1);
+ nBS[0][3][0] = BS_EDGE (uiBsx4[2], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 3, 2);
for (int i = 0; i < 3; i++)
uiBsx4[i] = pNnzTab[4 + i] | pNnzTab[4 + i + 1];
- nBS[0][1][1] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 5, 4);
- nBS[0][2][1] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 6, 5);
- nBS[0][3][1] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 7, 6);
+ nBS[0][1][1] = BS_EDGE (uiBsx4[0], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 5, 4);
+ nBS[0][2][1] = BS_EDGE (uiBsx4[1], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 6, 5);
+ nBS[0][3][1] = BS_EDGE (uiBsx4[2], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 7, 6);
for (int i = 0; i < 3; i++)
uiBsx4[i] = pNnzTab[8 + i] | pNnzTab[8 + i + 1];
- nBS[0][1][2] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 9, 8);
- nBS[0][2][2] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 10, 9);
- nBS[0][3][2] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 11, 10);
+ nBS[0][1][2] = BS_EDGE (uiBsx4[0], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 9, 8);
+ nBS[0][2][2] = BS_EDGE (uiBsx4[1], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 10, 9);
+ nBS[0][3][2] = BS_EDGE (uiBsx4[2], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 11, 10);
for (int i = 0; i < 3; i++)
uiBsx4[i] = pNnzTab[12 + i] | pNnzTab[12 + i + 1];
- nBS[0][1][3] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 13, 12);
- nBS[0][2][3] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 14, 13);
- nBS[0][3][3] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 15, 14);
+ nBS[0][1][3] = BS_EDGE (uiBsx4[0], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 13, 12);
+ nBS[0][2][3] = BS_EDGE (uiBsx4[1], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 14, 13);
+ nBS[0][3][3] = BS_EDGE (uiBsx4[2], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 15, 14);
// horizontal
* (uint32_t*)uiBsx4 = (uiNnz32b0 | uiNnz32b1);
- nBS[1][1][0] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 4, 0);
- nBS[1][1][1] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 5, 1);
- nBS[1][1][2] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 6, 2);
- nBS[1][1][3] = BS_EDGE (uiBsx4[3], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 7, 3);
+ nBS[1][1][0] = BS_EDGE (uiBsx4[0], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 4, 0);
+ nBS[1][1][1] = BS_EDGE (uiBsx4[1], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 5, 1);
+ nBS[1][1][2] = BS_EDGE (uiBsx4[2], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 6, 2);
+ nBS[1][1][3] = BS_EDGE (uiBsx4[3], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 7, 3);
* (uint32_t*)uiBsx4 = (uiNnz32b1 | uiNnz32b2);
- nBS[1][2][0] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 8, 4);
- nBS[1][2][1] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 9, 5);
- nBS[1][2][2] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 10, 6);
- nBS[1][2][3] = BS_EDGE (uiBsx4[3], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 11, 7);
+ nBS[1][2][0] = BS_EDGE (uiBsx4[0], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 8, 4);
+ nBS[1][2][1] = BS_EDGE (uiBsx4[1], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 9, 5);
+ nBS[1][2][2] = BS_EDGE (uiBsx4[2], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 10, 6);
+ nBS[1][2][3] = BS_EDGE (uiBsx4[3], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 11, 7);
* (uint32_t*)uiBsx4 = (uiNnz32b2 | uiNnz32b3);
- nBS[1][3][0] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 12, 8);
- nBS[1][3][1] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 13, 9);
- nBS[1][3][2] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 14, 10);
- nBS[1][3][3] = BS_EDGE (uiBsx4[3], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 15, 11);
+ nBS[1][3][0] = BS_EDGE (uiBsx4[0], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 12, 8);
+ nBS[1][3][1] = BS_EDGE (uiBsx4[1], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 13, 9);
+ nBS[1][3][2] = BS_EDGE (uiBsx4[2], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 14, 10);
+ nBS[1][3][3] = BS_EDGE (uiBsx4[3], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 15, 11);
}
}
-void static inline DeblockingBSliceBSInsideMBNormal (PDqLayer pCurDqLayer, uint8_t nBS[2][4][4], int8_t* pNnzTab,
+void static inline DeblockingBSliceBSInsideMBNormal (PDeblockingFilter pFilter, PDqLayer pCurDqLayer,
+ uint8_t nBS[2][4][4], int8_t* pNnzTab,
int32_t iMbXy) {
uint32_t uiNnz32b0, uiNnz32b1, uiNnz32b2, uiNnz32b3;
- int8_t* iRefIndex[LIST_A];
- iRefIndex[LIST_0] = pCurDqLayer->pRefIndex[LIST_0][iMbXy];
- iRefIndex[LIST_1] = pCurDqLayer->pRefIndex[LIST_1][iMbXy];
- ENFORCE_STACK_ALIGN_1D (uint8_t, uiBsx4, 4, 4);
+ void* iRefs[LIST_A][MB_BLOCK4x4_NUM];
+ ENFORCE_STACK_ALIGN_1D (uint8_t, uiBsx4, 4, 4);
int8_t i8x8NnzTab[4];
+ int l;
+
+ for (l = 0; l < LIST_A; l++) {
+ int8_t* iRefIdx = pCurDqLayer->pDec->pRefIndex[l][iMbXy];
+ int i;
+ /* Look up each reference picture based on indices */
+ for (i = 0; i < MB_BLOCK4x4_NUM; i++) {
+ if (iRefIdx[i] > REF_NOT_IN_LIST)
+ iRefs[l][i] = pFilter->pRefPics[l][iRefIdx[i]];
+ else
+ iRefs[l][i] = NULL;
+ }
+ }
if (pCurDqLayer->pTransformSize8x8Flag[iMbXy]) {
for (int32_t i = 0; i < 4; i++) {
@@ -298,9 +321,9 @@ void static inline DeblockingBSliceBSInsideMBNormal (PDqLayer pCurDqLayer, uint8
int8_t iNeigborIndex = g_kuiMbCountScan4Idx[0];
nBS[0][2][0] = nBS[0][2][1] = 1;
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
- if (iRefIndex[listIdx][iIndex] > REF_NOT_IN_LIST && iRefIndex[listIdx][iNeigborIndex] > REF_NOT_IN_LIST) {
- nBS[0][2][0] = nBS[0][2][1] = BS_EDGE ((i8x8NnzTab[0] | i8x8NnzTab[1]), iRefIndex[listIdx],
- pCurDqLayer->pMv[listIdx][iMbXy],
+ if (iRefs[listIdx][iIndex] && iRefs[listIdx][iNeigborIndex]) {
+ nBS[0][2][0] = nBS[0][2][1] = BS_EDGE ((i8x8NnzTab[0] | i8x8NnzTab[1]), iRefs[listIdx],
+ pCurDqLayer->pDec->pMv[listIdx][iMbXy],
iIndex, iNeigborIndex);
break;
}
@@ -309,9 +332,9 @@ void static inline DeblockingBSliceBSInsideMBNormal (PDqLayer pCurDqLayer, uint8
iNeigborIndex = g_kuiMbCountScan4Idx[2 << 2];
nBS[0][2][2] = nBS[0][2][3] = 1;
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
- if (iRefIndex[listIdx][iIndex] > REF_NOT_IN_LIST && iRefIndex[listIdx][iNeigborIndex] > REF_NOT_IN_LIST) {
- nBS[0][2][2] = nBS[0][2][3] = BS_EDGE ((i8x8NnzTab[2] | i8x8NnzTab[3]), iRefIndex[listIdx],
- pCurDqLayer->pMv[listIdx][iMbXy],
+ if (iRefs[listIdx][iIndex] && iRefs[listIdx][iNeigborIndex]) {
+ nBS[0][2][2] = nBS[0][2][3] = BS_EDGE ((i8x8NnzTab[2] | i8x8NnzTab[3]), iRefs[listIdx],
+ pCurDqLayer->pDec->pMv[listIdx][iMbXy],
iIndex, iNeigborIndex);
break;
}
@@ -322,9 +345,9 @@ void static inline DeblockingBSliceBSInsideMBNormal (PDqLayer pCurDqLayer, uint8
iNeigborIndex = g_kuiMbCountScan4Idx[0];
nBS[1][2][0] = nBS[1][2][1] = 1;
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
- if (iRefIndex[listIdx][iIndex] > REF_NOT_IN_LIST && iRefIndex[listIdx][iNeigborIndex] > REF_NOT_IN_LIST) {
- nBS[1][2][0] = nBS[1][2][1] = BS_EDGE ((i8x8NnzTab[0] | i8x8NnzTab[2]), iRefIndex[listIdx],
- pCurDqLayer->pMv[listIdx][iMbXy],
+ if (iRefs[listIdx][iIndex] && iRefs[listIdx][iNeigborIndex]) {
+ nBS[1][2][0] = nBS[1][2][1] = BS_EDGE ((i8x8NnzTab[0] | i8x8NnzTab[2]), iRefs[listIdx],
+ pCurDqLayer->pDec->pMv[listIdx][iMbXy],
iIndex, iNeigborIndex);
break;
}
@@ -334,9 +357,9 @@ void static inline DeblockingBSliceBSInsideMBNormal (PDqLayer pCurDqLayer, uint8
iNeigborIndex = g_kuiMbCountScan4Idx[1 << 2];
nBS[1][2][2] = nBS[1][2][3] = 1;
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
- if (iRefIndex[listIdx][iIndex] > REF_NOT_IN_LIST && iRefIndex[listIdx][iNeigborIndex] > REF_NOT_IN_LIST) {
- nBS[1][2][2] = nBS[1][2][3] = BS_EDGE ((i8x8NnzTab[1] | i8x8NnzTab[3]), iRefIndex[listIdx],
- pCurDqLayer->pMv[listIdx][iMbXy],
+ if (iRefs[listIdx][iIndex] && iRefs[listIdx][iNeigborIndex]) {
+ nBS[1][2][2] = nBS[1][2][3] = BS_EDGE ((i8x8NnzTab[1] | i8x8NnzTab[3]), iRefs[listIdx],
+ pCurDqLayer->pDec->pMv[listIdx][iMbXy],
iIndex, iNeigborIndex);
break;
}
@@ -351,22 +374,22 @@ void static inline DeblockingBSliceBSInsideMBNormal (PDqLayer pCurDqLayer, uint8
uiBsx4[i] = pNnzTab[i] | pNnzTab[i + 1];
nBS[0][1][0] = 1;
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
- if (iRefIndex[listIdx][1] > REF_NOT_IN_LIST && iRefIndex[listIdx][0] > REF_NOT_IN_LIST) {
- nBS[0][1][0] = BS_EDGE (uiBsx4[0], iRefIndex[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 1, 0);
+ if (iRefs[listIdx][1] && iRefs[listIdx][0]) {
+ nBS[0][1][0] = BS_EDGE (uiBsx4[0], iRefs[listIdx], pCurDqLayer->pDec->pMv[listIdx][iMbXy], 1, 0);
break;
}
}
nBS[0][2][0] = 1;
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
- if (iRefIndex[listIdx][2] > REF_NOT_IN_LIST && iRefIndex[listIdx][1] > REF_NOT_IN_LIST) {
- nBS[0][2][0] = BS_EDGE (uiBsx4[1], iRefIndex[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 2, 1);
+ if (iRefs[listIdx][2] && iRefs[listIdx][1]) {
+ nBS[0][2][0] = BS_EDGE (uiBsx4[1], iRefs[listIdx], pCurDqLayer->pDec->pMv[listIdx][iMbXy], 2, 1);
break;
}
}
nBS[0][3][0] = 1;
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
- if (iRefIndex[listIdx][3] > REF_NOT_IN_LIST && iRefIndex[listIdx][2] > REF_NOT_IN_LIST) {
- nBS[0][3][0] = BS_EDGE (uiBsx4[2], iRefIndex[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 3, 2);
+ if (iRefs[listIdx][3] && iRefs[listIdx][2]) {
+ nBS[0][3][0] = BS_EDGE (uiBsx4[2], iRefs[listIdx], pCurDqLayer->pDec->pMv[listIdx][iMbXy], 3, 2);
break;
}
}
@@ -375,22 +398,22 @@ void static inline DeblockingBSliceBSInsideMBNormal (PDqLayer pCurDqLayer, uint8
uiBsx4[i] = pNnzTab[4 + i] | pNnzTab[4 + i + 1];
nBS[0][1][1] = 1;
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
- if (iRefIndex[listIdx][5] > REF_NOT_IN_LIST && iRefIndex[listIdx][4] > REF_NOT_IN_LIST) {
- nBS[0][1][1] = BS_EDGE (uiBsx4[0], iRefIndex[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 5, 4);
+ if (iRefs[listIdx][5] && iRefs[listIdx][4]) {
+ nBS[0][1][1] = BS_EDGE (uiBsx4[0], iRefs[listIdx], pCurDqLayer->pDec->pMv[listIdx][iMbXy], 5, 4);
break;
}
}
nBS[0][2][1] = 1;
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
- if (iRefIndex[listIdx][6] > REF_NOT_IN_LIST && iRefIndex[listIdx][5] > REF_NOT_IN_LIST) {
- nBS[0][2][1] = BS_EDGE (uiBsx4[1], iRefIndex[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 6, 5);
+ if (iRefs[listIdx][6] && iRefs[listIdx][5]) {
+ nBS[0][2][1] = BS_EDGE (uiBsx4[1], iRefs[listIdx], pCurDqLayer->pDec->pMv[listIdx][iMbXy], 6, 5);
break;
}
}
nBS[0][3][1] = 1;
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
- if (iRefIndex[listIdx][7] > REF_NOT_IN_LIST && iRefIndex[listIdx][6] > REF_NOT_IN_LIST) {
- nBS[0][3][1] = BS_EDGE (uiBsx4[2], iRefIndex[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 7, 6);
+ if (iRefs[listIdx][7] && iRefs[listIdx][6]) {
+ nBS[0][3][1] = BS_EDGE (uiBsx4[2], iRefs[listIdx], pCurDqLayer->pDec->pMv[listIdx][iMbXy], 7, 6);
break;
}
}
@@ -399,22 +422,22 @@ void static inline DeblockingBSliceBSInsideMBNormal (PDqLayer pCurDqLayer, uint8
uiBsx4[i] = pNnzTab[8 + i] | pNnzTab[8 + i + 1];
nBS[0][1][2] = 1;
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
- if (iRefIndex[listIdx][9] > REF_NOT_IN_LIST && iRefIndex[listIdx][8] > REF_NOT_IN_LIST) {
- nBS[0][1][2] = BS_EDGE (uiBsx4[0], iRefIndex[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 9, 8);
+ if (iRefs[listIdx][9] && iRefs[listIdx][8]) {
+ nBS[0][1][2] = BS_EDGE (uiBsx4[0], iRefs[listIdx], pCurDqLayer->pDec->pMv[listIdx][iMbXy], 9, 8);
break;
}
}
nBS[0][2][2] = 1;
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
- if (iRefIndex[listIdx][10] > REF_NOT_IN_LIST && iRefIndex[listIdx][9] > REF_NOT_IN_LIST) {
- nBS[0][2][2] = BS_EDGE (uiBsx4[1], iRefIndex[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 10, 9);
+ if (iRefs[listIdx][10] && iRefs[listIdx][9]) {
+ nBS[0][2][2] = BS_EDGE (uiBsx4[1], iRefs[listIdx], pCurDqLayer->pDec->pMv[listIdx][iMbXy], 10, 9);
break;
}
}
nBS[0][3][2] = 1;
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
- if (iRefIndex[listIdx][11] > REF_NOT_IN_LIST && iRefIndex[listIdx][10] > REF_NOT_IN_LIST) {
- nBS[0][3][2] = BS_EDGE (uiBsx4[2], iRefIndex[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 11, 10);
+ if (iRefs[listIdx][11] && iRefs[listIdx][10]) {
+ nBS[0][3][2] = BS_EDGE (uiBsx4[2], iRefs[listIdx], pCurDqLayer->pDec->pMv[listIdx][iMbXy], 11, 10);
break;
}
}
@@ -423,22 +446,22 @@ void static inline DeblockingBSliceBSInsideMBNormal (PDqLayer pCurDqLayer, uint8
uiBsx4[i] = pNnzTab[12 + i] | pNnzTab[12 + i + 1];
nBS[0][1][3] = 1;
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
- if (iRefIndex[listIdx][13] > REF_NOT_IN_LIST && iRefIndex[listIdx][12] > REF_NOT_IN_LIST) {
- nBS[0][1][3] = BS_EDGE (uiBsx4[0], iRefIndex[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 13, 12);
+ if (iRefs[listIdx][13] && iRefs[listIdx][12]) {
+ nBS[0][1][3] = BS_EDGE (uiBsx4[0], iRefs[listIdx], pCurDqLayer->pDec->pMv[listIdx][iMbXy], 13, 12);
break;
}
}
nBS[0][2][3] = 1;
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
- if (iRefIndex[listIdx][14] > REF_NOT_IN_LIST && iRefIndex[listIdx][13] > REF_NOT_IN_LIST) {
- nBS[0][2][3] = BS_EDGE (uiBsx4[1], iRefIndex[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 14, 13);
+ if (iRefs[listIdx][14] && iRefs[listIdx][13]) {
+ nBS[0][2][3] = BS_EDGE (uiBsx4[1], iRefs[listIdx], pCurDqLayer->pDec->pMv[listIdx][iMbXy], 14, 13);
break;
}
}
nBS[0][3][3] = 1;
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
- if (iRefIndex[listIdx][15] > REF_NOT_IN_LIST && iRefIndex[listIdx][14] > REF_NOT_IN_LIST) {
- nBS[0][3][3] = BS_EDGE (uiBsx4[2], iRefIndex[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 15, 14);
+ if (iRefs[listIdx][15] && iRefs[listIdx][14]) {
+ nBS[0][3][3] = BS_EDGE (uiBsx4[2], iRefs[listIdx], pCurDqLayer->pDec->pMv[listIdx][iMbXy], 15, 14);
break;
}
}
@@ -447,29 +470,29 @@ void static inline DeblockingBSliceBSInsideMBNormal (PDqLayer pCurDqLayer, uint8
* (uint32_t*)uiBsx4 = (uiNnz32b0 | uiNnz32b1);
nBS[1][1][0] = 1;
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
- if (iRefIndex[listIdx][4] > REF_NOT_IN_LIST && iRefIndex[listIdx][0] > REF_NOT_IN_LIST) {
- nBS[1][1][0] = BS_EDGE (uiBsx4[0], iRefIndex[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 4, 0);
+ if (iRefs[listIdx][4] && iRefs[listIdx][0]) {
+ nBS[1][1][0] = BS_EDGE (uiBsx4[0], iRefs[listIdx], pCurDqLayer->pDec->pMv[listIdx][iMbXy], 4, 0);
break;
}
}
nBS[1][1][1] = 1;
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
- if (iRefIndex[listIdx][5] > REF_NOT_IN_LIST && iRefIndex[listIdx][1] > REF_NOT_IN_LIST) {
- nBS[1][1][1] = BS_EDGE (uiBsx4[1], iRefIndex[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 5, 1);
+ if (iRefs[listIdx][5] && iRefs[listIdx][1]) {
+ nBS[1][1][1] = BS_EDGE (uiBsx4[1], iRefs[listIdx], pCurDqLayer->pDec->pMv[listIdx][iMbXy], 5, 1);
break;
}
}
nBS[1][1][2] = 1;
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
- if (iRefIndex[listIdx][6] > REF_NOT_IN_LIST && iRefIndex[listIdx][2] > REF_NOT_IN_LIST) {
- nBS[1][1][2] = BS_EDGE (uiBsx4[2], iRefIndex[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 6, 2);
+ if (iRefs[listIdx][6] && iRefs[listIdx][2]) {
+ nBS[1][1][2] = BS_EDGE (uiBsx4[2], iRefs[listIdx], pCurDqLayer->pDec->pMv[listIdx][iMbXy], 6, 2);
break;
}
}
nBS[1][1][3] = 1;
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
- if (iRefIndex[listIdx][7] > REF_NOT_IN_LIST && iRefIndex[listIdx][3] > REF_NOT_IN_LIST) {
- nBS[1][1][3] = BS_EDGE (uiBsx4[3], iRefIndex[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 7, 3);
+ if (iRefs[listIdx][7] && iRefs[listIdx][3]) {
+ nBS[1][1][3] = BS_EDGE (uiBsx4[3], iRefs[listIdx], pCurDqLayer->pDec->pMv[listIdx][iMbXy], 7, 3);
break;
}
}
@@ -477,29 +500,29 @@ void static inline DeblockingBSliceBSInsideMBNormal (PDqLayer pCurDqLayer, uint8
* (uint32_t*)uiBsx4 = (uiNnz32b1 | uiNnz32b2);
nBS[1][2][0] = 1;
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
- if (iRefIndex[listIdx][8] > REF_NOT_IN_LIST && iRefIndex[listIdx][4] > REF_NOT_IN_LIST) {
- nBS[1][2][0] = BS_EDGE (uiBsx4[0], iRefIndex[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 8, 4);
+ if (iRefs[listIdx][8] && iRefs[listIdx][4]) {
+ nBS[1][2][0] = BS_EDGE (uiBsx4[0], iRefs[listIdx], pCurDqLayer->pDec->pMv[listIdx][iMbXy], 8, 4);
break;
}
}
nBS[1][2][1] = 1;
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
- if (iRefIndex[listIdx][9] > REF_NOT_IN_LIST && iRefIndex[listIdx][5] > REF_NOT_IN_LIST) {
- nBS[1][2][1] = BS_EDGE (uiBsx4[1], iRefIndex[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 9, 5);
+ if (iRefs[listIdx][9] && iRefs[listIdx][5]) {
+ nBS[1][2][1] = BS_EDGE (uiBsx4[1], iRefs[listIdx], pCurDqLayer->pDec->pMv[listIdx][iMbXy], 9, 5);
break;
}
}
nBS[1][2][2] = 1;
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
- if (iRefIndex[listIdx][10] > REF_NOT_IN_LIST && iRefIndex[listIdx][6] > REF_NOT_IN_LIST) {
- nBS[1][2][2] = BS_EDGE (uiBsx4[2], iRefIndex[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 10, 6);
+ if (iRefs[listIdx][10] && iRefs[listIdx][6]) {
+ nBS[1][2][2] = BS_EDGE (uiBsx4[2], iRefs[listIdx], pCurDqLayer->pDec->pMv[listIdx][iMbXy], 10, 6);
break;
}
}
nBS[1][2][3] = 1;
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
- if (iRefIndex[listIdx][11] > REF_NOT_IN_LIST && iRefIndex[listIdx][7] > REF_NOT_IN_LIST) {
- nBS[1][2][3] = BS_EDGE (uiBsx4[3], iRefIndex[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 11, 7);
+ if (iRefs[listIdx][11] && iRefs[listIdx][7]) {
+ nBS[1][2][3] = BS_EDGE (uiBsx4[3], iRefs[listIdx], pCurDqLayer->pDec->pMv[listIdx][iMbXy], 11, 7);
break;
}
}
@@ -507,29 +530,29 @@ void static inline DeblockingBSliceBSInsideMBNormal (PDqLayer pCurDqLayer, uint8
* (uint32_t*)uiBsx4 = (uiNnz32b2 | uiNnz32b3);
nBS[1][3][0] = 1;
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
- if (iRefIndex[listIdx][12] > REF_NOT_IN_LIST && iRefIndex[listIdx][8] > REF_NOT_IN_LIST) {
- nBS[1][3][0] = BS_EDGE (uiBsx4[0], iRefIndex[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 12, 8);
+ if (iRefs[listIdx][12] && iRefs[listIdx][8]) {
+ nBS[1][3][0] = BS_EDGE (uiBsx4[0], iRefs[listIdx], pCurDqLayer->pDec->pMv[listIdx][iMbXy], 12, 8);
break;
}
}
nBS[1][3][1] = 1;
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
- if (iRefIndex[listIdx][13] > REF_NOT_IN_LIST && iRefIndex[listIdx][9] > REF_NOT_IN_LIST) {
- nBS[1][3][1] = BS_EDGE (uiBsx4[1], iRefIndex[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 13, 9);
+ if (iRefs[listIdx][13] && iRefs[listIdx][9]) {
+ nBS[1][3][1] = BS_EDGE (uiBsx4[1], iRefs[listIdx], pCurDqLayer->pDec->pMv[listIdx][iMbXy], 13, 9);
break;
}
}
nBS[1][3][2] = 1;
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
- if (iRefIndex[listIdx][14] > REF_NOT_IN_LIST && iRefIndex[listIdx][10] > REF_NOT_IN_LIST) {
- nBS[1][3][2] = BS_EDGE (uiBsx4[2], iRefIndex[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 14, 10);
+ if (iRefs[listIdx][14] && iRefs[listIdx][10]) {
+ nBS[1][3][2] = BS_EDGE (uiBsx4[2], iRefs[listIdx], pCurDqLayer->pDec->pMv[listIdx][iMbXy], 14, 10);
break;
}
}
nBS[1][3][3] = 1;
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
- if (iRefIndex[listIdx][15] > REF_NOT_IN_LIST && iRefIndex[listIdx][11] > REF_NOT_IN_LIST) {
- nBS[1][3][3] = BS_EDGE (uiBsx4[3], iRefIndex[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 15, 11);
+ if (iRefs[listIdx][15] && iRefs[listIdx][11]) {
+ nBS[1][3][3] = BS_EDGE (uiBsx4[3], iRefs[listIdx], pCurDqLayer->pDec->pMv[listIdx][iMbXy], 15, 11);
break;
}
}
@@ -537,7 +560,8 @@ void static inline DeblockingBSliceBSInsideMBNormal (PDqLayer pCurDqLayer, uint8
}
-uint32_t DeblockingBsMarginalMBAvcbase (PDqLayer pCurDqLayer, int32_t iEdge, int32_t iNeighMb, int32_t iMbXy) {
+uint32_t DeblockingBsMarginalMBAvcbase (PDeblockingFilter pFilter, PDqLayer pCurDqLayer, int32_t iEdge,
+ int32_t iNeighMb, int32_t iMbXy) {
int32_t i, j;
uint32_t uiBSx4;
uint8_t* pBS = (uint8_t*) (&uiBSx4);
@@ -545,17 +569,23 @@ uint32_t DeblockingBsMarginalMBAvcbase (PDqLayer pCurDqLayer, int32_t iEdge, int
const uint8_t* pBnIdx = &g_kuiTableBIdx[iEdge][4];
const uint8_t* pB8x8Idx = &g_kuiTableB8x8Idx[iEdge][0];
const uint8_t* pBn8x8Idx = &g_kuiTableB8x8Idx[iEdge][8];
+ int8_t (*iRefIdx)[MB_BLOCK4x4_NUM] = pCurDqLayer->pDec != NULL ? pCurDqLayer->pDec->pRefIndex[LIST_0] :
+ pCurDqLayer->pRefIndex[LIST_0];
if (pCurDqLayer->pTransformSize8x8Flag[iMbXy] && pCurDqLayer->pTransformSize8x8Flag[iNeighMb]) {
for (i = 0; i < 2; i++) {
uint8_t uiNzc = 0;
for (j = 0; uiNzc == 0 && j < 4; j++) {
- uiNzc |= (pCurDqLayer->pNzc[iMbXy][* (pB8x8Idx + j)] | pCurDqLayer->pNzc[iNeighMb][* (pBn8x8Idx + j)]);
+ uiNzc |= (GetPNzc (pCurDqLayer, iMbXy)[* (pB8x8Idx + j)] | GetPNzc (pCurDqLayer, iNeighMb)[* (pBn8x8Idx + j)]);
}
if (uiNzc) {
pBS[i << 1] = pBS[1 + (i << 1)] = 2;
} else {
- pBS[i << 1] = pBS[1 + (i << 1)] = MB_BS_MV (pCurDqLayer->pRefIndex[LIST_0], pCurDqLayer->pMv[LIST_0], iMbXy, iNeighMb,
+ PPicture ref0, ref1;
+ ref0 = (iRefIdx[iMbXy][*pB8x8Idx] > REF_NOT_IN_LIST) ? pFilter->pRefPics[LIST_0][iRefIdx[iMbXy][*pB8x8Idx]] : NULL;
+ ref1 = (iRefIdx[iNeighMb][*pBn8x8Idx] > REF_NOT_IN_LIST) ? pFilter->pRefPics[LIST_0][iRefIdx[iNeighMb][*pBn8x8Idx]] :
+ NULL;
+ pBS[i << 1] = pBS[1 + (i << 1)] = MB_BS_MV (ref0, ref1, pCurDqLayer->pDec->pMv[LIST_0], iMbXy, iNeighMb,
*pB8x8Idx, *pBn8x8Idx);
}
pB8x8Idx += 4;
@@ -565,13 +595,17 @@ uint32_t DeblockingBsMarginalMBAvcbase (PDqLayer pCurDqLayer, int32_t iEdge, int
for (i = 0; i < 2; i++) {
uint8_t uiNzc = 0;
for (j = 0; uiNzc == 0 && j < 4; j++) {
- uiNzc |= pCurDqLayer->pNzc[iMbXy][* (pB8x8Idx + j)];
+ uiNzc |= GetPNzc (pCurDqLayer, iMbXy)[* (pB8x8Idx + j)];
}
for (j = 0; j < 2; j++) {
- if (uiNzc | pCurDqLayer->pNzc[iNeighMb][*pBnIdx]) {
+ if (uiNzc | GetPNzc (pCurDqLayer, iNeighMb)[*pBnIdx]) {
pBS[j + (i << 1)] = 2;
} else {
- pBS[j + (i << 1)] = MB_BS_MV (pCurDqLayer->pRefIndex[LIST_0], pCurDqLayer->pMv[LIST_0], iMbXy, iNeighMb, *pB8x8Idx,
+ PPicture ref0, ref1;
+ ref0 = (iRefIdx[iMbXy][*pB8x8Idx] > REF_NOT_IN_LIST) ? pFilter->pRefPics[LIST_0][iRefIdx[iMbXy][*pB8x8Idx]] : NULL;
+ ref1 = (iRefIdx[iNeighMb][*pBnIdx] > REF_NOT_IN_LIST) ? pFilter->pRefPics[LIST_0][iRefIdx[iNeighMb][*pBnIdx]] : NULL;
+ pBS[j + (i << 1)] = MB_BS_MV (ref0, ref1,
+ (pCurDqLayer->pDec != NULL ? pCurDqLayer->pDec->pMv[LIST_0] : pCurDqLayer->pMv[LIST_0]), iMbXy, iNeighMb, *pB8x8Idx,
*pBnIdx);
}
pBnIdx++;
@@ -582,13 +616,18 @@ uint32_t DeblockingBsMarginalMBAvcbase (PDqLayer pCurDqLayer, int32_t iEdge, int
for (i = 0; i < 2; i++) {
uint8_t uiNzc = 0;
for (j = 0; uiNzc == 0 && j < 4; j++) {
- uiNzc |= pCurDqLayer->pNzc[iNeighMb][* (pBn8x8Idx + j)];
+ uiNzc |= GetPNzc (pCurDqLayer, iNeighMb)[* (pBn8x8Idx + j)];
}
for (j = 0; j < 2; j++) {
- if (uiNzc | pCurDqLayer->pNzc[iMbXy][*pBIdx]) {
+ if (uiNzc | GetPNzc (pCurDqLayer, iMbXy)[*pBIdx]) {
pBS[j + (i << 1)] = 2;
} else {
- pBS[j + (i << 1)] = MB_BS_MV (pCurDqLayer->pRefIndex[LIST_0], pCurDqLayer->pMv[LIST_0], iMbXy, iNeighMb, *pBIdx,
+ PPicture ref0, ref1;
+ ref0 = (iRefIdx[iMbXy][*pBIdx] > REF_NOT_IN_LIST) ? pFilter->pRefPics[LIST_0][iRefIdx[iMbXy][*pBIdx]] : NULL;
+ ref1 = (iRefIdx[iNeighMb][*pBn8x8Idx] > REF_NOT_IN_LIST) ? pFilter->pRefPics[LIST_0][iRefIdx[iNeighMb][*pBn8x8Idx]] :
+ NULL;
+ pBS[j + (i << 1)] = MB_BS_MV (ref0, ref1,
+ (pCurDqLayer->pDec != NULL ? pCurDqLayer->pDec->pMv[LIST_0] : pCurDqLayer->pMv[LIST_0]), iMbXy, iNeighMb, *pBIdx,
*pBn8x8Idx);
}
pBIdx++;
@@ -598,11 +637,14 @@ uint32_t DeblockingBsMarginalMBAvcbase (PDqLayer pCurDqLayer, int32_t iEdge, int
} else {
// only 4x4 transform
for (i = 0; i < 4; i++) {
- if (pCurDqLayer->pNzc[iMbXy][*pBIdx] | pCurDqLayer->pNzc[iNeighMb][*pBnIdx]) {
+ if (GetPNzc (pCurDqLayer, iMbXy)[*pBIdx] | GetPNzc (pCurDqLayer, iNeighMb)[*pBnIdx]) {
pBS[i] = 2;
} else {
- pBS[i] = MB_BS_MV (pCurDqLayer->pRefIndex[LIST_0], pCurDqLayer->pMv[LIST_0], iMbXy, iNeighMb, *pBIdx,
- *pBnIdx);
+ PPicture ref0, ref1;
+ ref0 = (iRefIdx[iMbXy][*pBIdx] > REF_NOT_IN_LIST) ? pFilter->pRefPics[LIST_0][iRefIdx[iMbXy][*pBIdx]] : NULL;
+ ref1 = (iRefIdx[iNeighMb][*pBnIdx] > REF_NOT_IN_LIST) ? pFilter->pRefPics[LIST_0][iRefIdx[iNeighMb][*pBnIdx]] : NULL;
+ pBS[i] = MB_BS_MV (ref0, ref1, (pCurDqLayer->pDec != NULL ? pCurDqLayer->pDec->pMv[LIST_0] : pCurDqLayer->pMv[LIST_0]),
+ iMbXy, iNeighMb, *pBIdx, *pBnIdx);
}
pBIdx++;
pBnIdx++;
@@ -611,7 +653,8 @@ uint32_t DeblockingBsMarginalMBAvcbase (PDqLayer pCurDqLayer, int32_t iEdge, int
return uiBSx4;
}
-uint32_t DeblockingBSliceBsMarginalMBAvcbase (PDqLayer pCurDqLayer, int32_t iEdge, int32_t iNeighMb, int32_t iMbXy) {
+uint32_t DeblockingBSliceBsMarginalMBAvcbase (PDeblockingFilter pFilter, PDqLayer pCurDqLayer, int32_t iEdge,
+ int32_t iNeighMb, int32_t iMbXy) {
int32_t i, j;
uint32_t uiBSx4;
uint8_t* pBS = (uint8_t*) (&uiBSx4);
@@ -619,21 +662,25 @@ uint32_t DeblockingBSliceBsMarginalMBAvcbase (PDqLayer pCurDqLayer, int32_t iEdg
const uint8_t* pBnIdx = &g_kuiTableBIdx[iEdge][4];
const uint8_t* pB8x8Idx = &g_kuiTableB8x8Idx[iEdge][0];
const uint8_t* pBn8x8Idx = &g_kuiTableB8x8Idx[iEdge][8];
+ PPicture ref0, ref1;
if (pCurDqLayer->pTransformSize8x8Flag[iMbXy] && pCurDqLayer->pTransformSize8x8Flag[iNeighMb]) {
for (i = 0; i < 2; i++) {
uint8_t uiNzc = 0;
for (j = 0; uiNzc == 0 && j < 4; j++) {
- uiNzc |= (pCurDqLayer->pNzc[iMbXy][* (pB8x8Idx + j)] | pCurDqLayer->pNzc[iNeighMb][* (pBn8x8Idx + j)]);
+ uiNzc |= (GetPNzc (pCurDqLayer, iMbXy)[* (pB8x8Idx + j)] | GetPNzc (pCurDqLayer, iNeighMb)[* (pBn8x8Idx + j)]);
}
if (uiNzc) {
pBS[i << 1] = pBS[1 + (i << 1)] = 2;
} else {
pBS[i << 1] = pBS[1 + (i << 1)] = 1;
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
- if (pCurDqLayer->pRefIndex[listIdx][iMbXy][*pB8x8Idx] > REF_NOT_IN_LIST
- && pCurDqLayer->pRefIndex[listIdx][iMbXy][*pBn8x8Idx] > REF_NOT_IN_LIST) {
- pBS[i << 1] = pBS[1 + (i << 1)] = MB_BS_MV (pCurDqLayer->pRefIndex[listIdx], pCurDqLayer->pMv[listIdx], iMbXy, iNeighMb,
+ if (pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][*pB8x8Idx] > REF_NOT_IN_LIST
+ && pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][*pBn8x8Idx] > REF_NOT_IN_LIST) {
+ int8_t (*iRefIdx)[MB_BLOCK4x4_NUM] = pCurDqLayer->pDec->pRefIndex[listIdx];
+ ref0 = pFilter->pRefPics[listIdx][iRefIdx[iMbXy][*pB8x8Idx]];
+ ref1 = pFilter->pRefPics[listIdx][iRefIdx[iNeighMb][*pBn8x8Idx]];
+ pBS[i << 1] = pBS[1 + (i << 1)] = MB_BS_MV (ref0, ref1, pCurDqLayer->pDec->pMv[listIdx], iMbXy, iNeighMb,
*pB8x8Idx, *pBn8x8Idx);
break;
}
@@ -646,17 +693,20 @@ uint32_t DeblockingBSliceBsMarginalMBAvcbase (PDqLayer pCurDqLayer, int32_t iEdg
for (i = 0; i < 2; i++) {
uint8_t uiNzc = 0;
for (j = 0; uiNzc == 0 && j < 4; j++) {
- uiNzc |= pCurDqLayer->pNzc[iMbXy][* (pB8x8Idx + j)];
+ uiNzc |= GetPNzc (pCurDqLayer, iMbXy)[* (pB8x8Idx + j)];
}
for (j = 0; j < 2; j++) {
- if (uiNzc | pCurDqLayer->pNzc[iNeighMb][*pBnIdx]) {
+ if (uiNzc | GetPNzc (pCurDqLayer, iNeighMb)[*pBnIdx]) {
pBS[j + (i << 1)] = 2;
} else {
pBS[j + (i << 1)] = 1;
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
- if (pCurDqLayer->pRefIndex[listIdx][iMbXy][*pB8x8Idx] > REF_NOT_IN_LIST
- && pCurDqLayer->pRefIndex[listIdx][iMbXy][*pBnIdx] > REF_NOT_IN_LIST) {
- pBS[j + (i << 1)] = MB_BS_MV (pCurDqLayer->pRefIndex[listIdx], pCurDqLayer->pMv[listIdx], iMbXy, iNeighMb, *pB8x8Idx,
+ if (pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][*pB8x8Idx] > REF_NOT_IN_LIST
+ && pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][*pBnIdx] > REF_NOT_IN_LIST) {
+ int8_t (*iRefIdx)[MB_BLOCK4x4_NUM] = pCurDqLayer->pDec->pRefIndex[listIdx];
+ ref0 = pFilter->pRefPics[listIdx][iRefIdx[iMbXy][*pB8x8Idx]];
+ ref1 = pFilter->pRefPics[listIdx][iRefIdx[iNeighMb][*pBnIdx]];
+ pBS[j + (i << 1)] = MB_BS_MV (ref0, ref1, pCurDqLayer->pDec->pMv[listIdx], iMbXy, iNeighMb, *pB8x8Idx,
*pBnIdx);
break;
}
@@ -670,18 +720,20 @@ uint32_t DeblockingBSliceBsMarginalMBAvcbase (PDqLayer pCurDqLayer, int32_t iEdg
for (i = 0; i < 2; i++) {
uint8_t uiNzc = 0;
for (j = 0; uiNzc == 0 && j < 4; j++) {
- uiNzc |= pCurDqLayer->pNzc[iNeighMb][* (pBn8x8Idx + j)];
+ uiNzc |= GetPNzc (pCurDqLayer, iNeighMb)[* (pBn8x8Idx + j)];
}
for (j = 0; j < 2; j++) {
- if (uiNzc | pCurDqLayer->pNzc[iMbXy][*pBIdx]) {
+ if (uiNzc | GetPNzc (pCurDqLayer, iMbXy)[*pBIdx]) {
pBS[j + (i << 1)] = 2;
} else {
pBS[j + (i << 1)] = 1;
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
- if (pCurDqLayer->pRefIndex[listIdx][iMbXy][*pBIdx] > REF_NOT_IN_LIST
- && pCurDqLayer->pRefIndex[listIdx][iMbXy][*pBn8x8Idx] > REF_NOT_IN_LIST) {
- pBS[j + (i << 1)] = MB_BS_MV (pCurDqLayer->pRefIndex[listIdx], pCurDqLayer->pMv[listIdx], iMbXy, iNeighMb, *pBIdx,
- *pBn8x8Idx);
+ if (pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][*pBIdx] > REF_NOT_IN_LIST
+ && pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][*pBn8x8Idx] > REF_NOT_IN_LIST) {
+ int8_t (*iRefIdx)[MB_BLOCK4x4_NUM] = pCurDqLayer->pDec->pRefIndex[listIdx];
+ ref0 = pFilter->pRefPics[listIdx][iRefIdx[iMbXy][*pBIdx]];
+ ref1 = pFilter->pRefPics[listIdx][iRefIdx[iNeighMb][*pBn8x8Idx]];
+ pBS[j + (i << 1)] = MB_BS_MV (ref0, ref1, pCurDqLayer->pDec->pMv[listIdx], iMbXy, iNeighMb, *pBIdx, *pBn8x8Idx);
break;
}
}
@@ -693,14 +745,17 @@ uint32_t DeblockingBSliceBsMarginalMBAvcbase (PDqLayer pCurDqLayer, int32_t iEdg
} else {
// only 4x4 transform
for (i = 0; i < 4; i++) {
- if (pCurDqLayer->pNzc[iMbXy][*pBIdx] | pCurDqLayer->pNzc[iNeighMb][*pBnIdx]) {
+ if (GetPNzc (pCurDqLayer, iMbXy)[*pBIdx] | GetPNzc (pCurDqLayer, iNeighMb)[*pBnIdx]) {
pBS[i] = 2;
} else {
pBS[i] = 1;
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
- if (pCurDqLayer->pRefIndex[listIdx][iMbXy][*pBIdx] > REF_NOT_IN_LIST
- && pCurDqLayer->pRefIndex[listIdx][iMbXy][*pBnIdx] > REF_NOT_IN_LIST) {
- pBS[i] = MB_BS_MV (pCurDqLayer->pRefIndex[listIdx], pCurDqLayer->pMv[listIdx], iMbXy, iNeighMb, *pBIdx, *pBnIdx);
+ if (pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][*pBIdx] > REF_NOT_IN_LIST
+ && pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][*pBnIdx] > REF_NOT_IN_LIST) {
+ int8_t (*iRefIdx)[MB_BLOCK4x4_NUM] = pCurDqLayer->pDec->pRefIndex[listIdx];
+ ref0 = pFilter->pRefPics[listIdx][iRefIdx[iMbXy][*pBIdx]];
+ ref1 = pFilter->pRefPics[listIdx][iRefIdx[iNeighMb][*pBnIdx]];
+ pBS[i] = MB_BS_MV (ref0, ref1, pCurDqLayer->pDec->pMv[listIdx], iMbXy, iNeighMb, *pBIdx, *pBnIdx);
break;
}
}
@@ -926,8 +981,8 @@ void FilteringEdgeChromaIntraV (SDeblockingFilter* pFilter, uint8_t* pPixCb, uin
}
-void DeblockingInterMb (PDqLayer pCurDqLayer, PDeblockingFilter pFilter, uint8_t nBS[2][4][4],
- int32_t iBoundryFlag) {
+static void DeblockingInterMb (PDqLayer pCurDqLayer, PDeblockingFilter pFilter, uint8_t nBS[2][4][4],
+ int32_t iBoundryFlag) {
int32_t iMbXyIndex = pCurDqLayer->iMbXyIndex;
int32_t iMbX = pCurDqLayer->iMbX;
int32_t iMbY = pCurDqLayer->iMbY;
@@ -943,7 +998,7 @@ void DeblockingInterMb (PDqLayer pCurDqLayer, PDeblockingFilter pFilter, uint8_
pDestCb = pFilter->pCsData[1] + ((iMbY * iLineSizeUV + iMbX) << 3);
pDestCr = pFilter->pCsData[2] + ((iMbY * iLineSizeUV + iMbX) << 3);
-//Vertical margrin
+ //Vertical margin
if (iBoundryFlag & LEFT_FLAG_MASK) {
int32_t iLeftXyIndex = iMbXyIndex - 1;
pFilter->iLumaQP = (iCurLumaQp + pCurDqLayer->pLumaQp[iLeftXyIndex] + 1) >> 1;
@@ -1170,7 +1225,7 @@ void FilteringEdgeChromaHV (PDqLayer pCurDqLayer, PDeblockingFilter pFilter, in
}
// merge h&v lookup table operation to save performance
-void DeblockingIntraMb (PDqLayer pCurDqLayer, PDeblockingFilter pFilter, int32_t iBoundryFlag) {
+static void DeblockingIntraMb (PDqLayer pCurDqLayer, PDeblockingFilter pFilter, int32_t iBoundryFlag) {
FilteringEdgeLumaHV (pCurDqLayer, pFilter, iBoundryFlag);
FilteringEdgeChromaHV (pCurDqLayer, pFilter, iBoundryFlag);
}
@@ -1179,7 +1234,8 @@ void WelsDeblockingMb (PDqLayer pCurDqLayer, PDeblockingFilter pFilter, int32_t
uint8_t nBS[2][4][4] = {{{ 0 }}};
int32_t iMbXyIndex = pCurDqLayer->iMbXyIndex;
- uint32_t iCurMbType = pCurDqLayer->pMbType[iMbXyIndex];
+ uint32_t iCurMbType = pCurDqLayer->pDec != NULL ? pCurDqLayer->pDec->pMbType[iMbXyIndex] :
+ pCurDqLayer->pMbType[iMbXyIndex];
int32_t iMbNb;
PSlice pSlice = &pCurDqLayer->sLayerInfo.sSliceInLayer;
@@ -1197,24 +1253,28 @@ void WelsDeblockingMb (PDqLayer pCurDqLayer, PDeblockingFilter pFilter, int32_t
if (iBoundryFlag & LEFT_FLAG_MASK) {
iMbNb = iMbXyIndex - 1;
+ uint32_t uiMbType = pCurDqLayer->pDec != NULL ? pCurDqLayer->pDec->pMbType[iMbNb] : pCurDqLayer->pMbType[iMbNb];
if (bBSlice) {
- * (uint32_t*)nBS[0][0] = IS_INTRA (pCurDqLayer->pMbType[iMbNb]) ? 0x04040404 : DeblockingBSliceBsMarginalMBAvcbase (
- pCurDqLayer, 0, iMbNb, iMbXyIndex);
+ * (uint32_t*)nBS[0][0] = IS_INTRA (uiMbType) ? 0x04040404 :
+ DeblockingBSliceBsMarginalMBAvcbase (
+ pFilter, pCurDqLayer, 0, iMbNb, iMbXyIndex);
} else {
- * (uint32_t*)nBS[0][0] = IS_INTRA (pCurDqLayer->pMbType[iMbNb]) ? 0x04040404 : DeblockingBsMarginalMBAvcbase (
- pCurDqLayer, 0, iMbNb, iMbXyIndex);
+ * (uint32_t*)nBS[0][0] = IS_INTRA (uiMbType) ? 0x04040404 : DeblockingBsMarginalMBAvcbase (
+ pFilter, pCurDqLayer, 0, iMbNb, iMbXyIndex);
}
} else {
* (uint32_t*)nBS[0][0] = 0;
}
if (iBoundryFlag & TOP_FLAG_MASK) {
iMbNb = iMbXyIndex - pCurDqLayer->iMbWidth;
+ uint32_t uiMbType = pCurDqLayer->pDec != NULL ? pCurDqLayer->pDec->pMbType[iMbNb] : pCurDqLayer->pMbType[iMbNb];
if (bBSlice) {
- * (uint32_t*)nBS[1][0] = IS_INTRA (pCurDqLayer->pMbType[iMbNb]) ? 0x04040404 : DeblockingBSliceBsMarginalMBAvcbase (
- pCurDqLayer, 1, iMbNb, iMbXyIndex);
+ * (uint32_t*)nBS[1][0] = IS_INTRA (uiMbType) ? 0x04040404 :
+ DeblockingBSliceBsMarginalMBAvcbase (
+ pFilter, pCurDqLayer, 1, iMbNb, iMbXyIndex);
} else {
- * (uint32_t*)nBS[1][0] = IS_INTRA (pCurDqLayer->pMbType[iMbNb]) ? 0x04040404 : DeblockingBsMarginalMBAvcbase (
- pCurDqLayer, 1, iMbNb, iMbXyIndex);
+ * (uint32_t*)nBS[1][0] = IS_INTRA (uiMbType) ? 0x04040404 : DeblockingBsMarginalMBAvcbase (
+ pFilter, pCurDqLayer, 1, iMbNb, iMbXyIndex);
}
} else {
* (uint32_t*)nBS[1][0] = 0;
@@ -1226,16 +1286,16 @@ void WelsDeblockingMb (PDqLayer pCurDqLayer, PDeblockingFilter pFilter, int32_t
} else {
if (IS_INTER_16x16 (iCurMbType)) {
if (!pCurDqLayer->pTransformSize8x8Flag[pCurDqLayer->iMbXyIndex]) {
- DeblockingBSInsideMBAvsbase (pCurDqLayer->pNzc[iMbXyIndex], nBS, 1);
+ DeblockingBSInsideMBAvsbase (GetPNzc (pCurDqLayer, iMbXyIndex), nBS, 1);
} else {
- DeblockingBSInsideMBAvsbase8x8 (pCurDqLayer->pNzc[iMbXyIndex], nBS, 1);
+ DeblockingBSInsideMBAvsbase8x8 (GetPNzc (pCurDqLayer, iMbXyIndex), nBS, 1);
}
} else {
if (bBSlice) {
- DeblockingBSliceBSInsideMBNormal (pCurDqLayer, nBS, pCurDqLayer->pNzc[iMbXyIndex], iMbXyIndex);
+ DeblockingBSliceBSInsideMBNormal (pFilter, pCurDqLayer, nBS, GetPNzc (pCurDqLayer, iMbXyIndex), iMbXyIndex);
} else {
- DeblockingBSInsideMBNormal (pCurDqLayer, nBS, pCurDqLayer->pNzc[iMbXyIndex], iMbXyIndex);
+ DeblockingBSInsideMBNormal (pFilter, pCurDqLayer, nBS, GetPNzc (pCurDqLayer, iMbXyIndex), iMbXyIndex);
}
}
}
@@ -1280,6 +1340,8 @@ void WelsDeblockingFilterSlice (PWelsDecoderContext pCtx, PDeblockingFilterMbFun
pFilter.iSliceBetaOffset = pSliceHeaderExt->sSliceHeader.iSliceBetaOffset;
pFilter.pLoopf = &pCtx->sDeblockingFunc;
+ pFilter.pRefPics[0] = pCtx->sRefPic.pRefList[0];
+ pFilter.pRefPics[1] = pCtx->sRefPic.pRefList[1];
/* Step2: macroblock deblocking */
if (0 == iFilterIdc || 2 == iFilterIdc) {
@@ -1313,6 +1375,56 @@ void WelsDeblockingFilterSlice (PWelsDecoderContext pCtx, PDeblockingFilterMbFun
} while (1);
}
}
+
+/*!
+* \brief AVC slice init deblocking filtering target layer
+*
+* \in and out param SDeblockingFilter
+* \in and out param iFilterIdc
+*
+* \return NONE
+*/
+void WelsDeblockingInitFilter (PWelsDecoderContext pCtx, SDeblockingFilter& pFilter, int32_t& iFilterIdc) {
+ PDqLayer pCurDqLayer = pCtx->pCurDqLayer;
+ PSliceHeaderExt pSliceHeaderExt = &pCurDqLayer->sLayerInfo.sSliceInLayer.sSliceHeaderExt;
+
+ memset (&pFilter, 0, sizeof (pFilter));
+
+ iFilterIdc = pCurDqLayer->sLayerInfo.sSliceInLayer.sSliceHeaderExt.sSliceHeader.uiDisableDeblockingFilterIdc;
+
+ /* Step1: parameters set */
+ pFilter.pCsData[0] = pCtx->pDec->pData[0];
+ pFilter.pCsData[1] = pCtx->pDec->pData[1];
+ pFilter.pCsData[2] = pCtx->pDec->pData[2];
+
+ pFilter.iCsStride[0] = pCtx->pDec->iLinesize[0];
+ pFilter.iCsStride[1] = pCtx->pDec->iLinesize[1];
+
+ pFilter.eSliceType = (EWelsSliceType)pCurDqLayer->sLayerInfo.sSliceInLayer.eSliceType;
+
+ pFilter.iSliceAlphaC0Offset = pSliceHeaderExt->sSliceHeader.iSliceAlphaC0Offset;
+ pFilter.iSliceBetaOffset = pSliceHeaderExt->sSliceHeader.iSliceBetaOffset;
+
+ pFilter.pLoopf = &pCtx->sDeblockingFunc;
+ pFilter.pRefPics[0] = pCtx->sRefPic.pRefList[0];
+ pFilter.pRefPics[1] = pCtx->sRefPic.pRefList[1];
+}
+
+/*!
+* \brief AVC MB deblocking filtering target layer
+*
+* \param DqLayer which has the current location of MB to be deblocked.
+*
+* \return NONE
+*/
+void WelsDeblockingFilterMB (PDqLayer pCurDqLayer, SDeblockingFilter& pFilter, int32_t& iFilterIdc,
+ PDeblockingFilterMbFunc pDeblockMb) {
+ /* macroblock deblocking */
+ if (0 == iFilterIdc || 2 == iFilterIdc) {
+ int32_t iBoundryFlag = DeblockingAvailableNoInterlayer (pCurDqLayer, iFilterIdc);
+ pDeblockMb (pCurDqLayer, &pFilter, iBoundryFlag);
+ }
+}
/*!
* \brief deblocking module initialize
*
@@ -1391,6 +1503,19 @@ void DeblockingInit (SDeblockingFunc* pFunc, int32_t iCpu) {
pFunc->pfChromaDeblockingEQ4Hor = DeblockChromaEq4H_mmi;
}
#endif//HAVE_MMI
+
+#if defined(HAVE_MSA)
+ if (iCpu & WELS_CPU_MSA) {
+ pFunc->pfLumaDeblockingLT4Ver = DeblockLumaLt4V_msa;
+ pFunc->pfLumaDeblockingEQ4Ver = DeblockLumaEq4V_msa;
+ pFunc->pfLumaDeblockingLT4Hor = DeblockLumaLt4H_msa;
+ pFunc->pfLumaDeblockingEQ4Hor = DeblockLumaEq4H_msa;
+ pFunc->pfChromaDeblockingLT4Ver = DeblockChromaLt4V_msa;
+ pFunc->pfChromaDeblockingEQ4Ver = DeblockChromaEq4V_msa;
+ pFunc->pfChromaDeblockingLT4Hor = DeblockChromaLt4H_msa;
+ pFunc->pfChromaDeblockingEQ4Hor = DeblockChromaEq4H_msa;
+ }
+#endif//HAVE_MSA
}
} // namespace WelsDec
diff --git a/chromium/third_party/openh264/src/codec/decoder/core/src/decode_slice.cpp b/chromium/third_party/openh264/src/codec/decoder/core/src/decode_slice.cpp
index 14935587275..d06a7d77f12 100644
--- a/chromium/third_party/openh264/src/codec/decoder/core/src/decode_slice.cpp
+++ b/chromium/third_party/openh264/src/codec/decoder/core/src/decode_slice.cpp
@@ -54,45 +54,61 @@ namespace WelsDec {
extern void FreePicture (PPicture pPic, CMemoryAlign* pMa);
-static inline int32_t iAbs (int32_t x) {
- static const int32_t INT_BITS = (sizeof (int) * CHAR_BIT) - 1;
- int32_t y = x >> INT_BITS;
- return (x ^ y) - y;
-}
-
extern PPicture AllocPicture (PWelsDecoderContext pCtx, const int32_t kiPicWidth, const int32_t kiPicHeight);
+static bool CheckRefPics (const PWelsDecoderContext& pCtx) {
+ int32_t listCount = 1;
+ if (pCtx->eSliceType == B_SLICE) {
+ ++listCount;
+ }
+ for (int32_t list = LIST_0; list < listCount; ++list) {
+ int32_t shortRefCount = pCtx->sRefPic.uiShortRefCount[list];
+ for (int32_t refIdx = 0; refIdx < shortRefCount; ++refIdx) {
+ if (!pCtx->sRefPic.pShortRefList[list][refIdx]) {
+ return false;
+ }
+ }
+ int32_t longRefCount = pCtx->sRefPic.uiLongRefCount[list];
+ for (int32_t refIdx = 0; refIdx < longRefCount; ++refIdx) {
+ if (!pCtx->sRefPic.pLongRefList[list][refIdx]) {
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
int32_t WelsTargetSliceConstruction (PWelsDecoderContext pCtx) {
- PDqLayer pCurLayer = pCtx->pCurDqLayer;
- PSlice pCurSlice = &pCurLayer->sLayerInfo.sSliceInLayer;
+ PDqLayer pCurDqLayer = pCtx->pCurDqLayer;
+ PSlice pCurSlice = &pCurDqLayer->sLayerInfo.sSliceInLayer;
PSliceHeader pSliceHeader = &pCurSlice->sSliceHeaderExt.sSliceHeader;
int32_t iTotalMbTargetLayer = pSliceHeader->pSps->uiTotalMbCount;
- int32_t iCurLayerWidth = pCurLayer->iMbWidth << 4;
- int32_t iCurLayerHeight = pCurLayer->iMbHeight << 4;
+ int32_t iCurLayerWidth = pCurDqLayer->iMbWidth << 4;
+ int32_t iCurLayerHeight = pCurDqLayer->iMbHeight << 4;
int32_t iNextMbXyIndex = 0;
PFmo pFmo = pCtx->pFmo;
int32_t iTotalNumMb = pCurSlice->iTotalMbInCurSlice;
int32_t iCountNumMb = 0;
- PDeblockingFilterMbFunc pDeblockMb;
+ PDeblockingFilterMbFunc pDeblockMb = WelsDeblockingMb;
- if (!pCtx->bAvcBasedFlag && iCurLayerWidth != pCtx->iCurSeqIntervalMaxPicWidth) {
+ if (!pCtx->sSpsPpsCtx.bAvcBasedFlag && iCurLayerWidth != pCtx->iCurSeqIntervalMaxPicWidth) {
return ERR_INFO_WIDTH_MISMATCH;
}
iNextMbXyIndex = pSliceHeader->iFirstMbInSlice;
- pCurLayer->iMbX = iNextMbXyIndex % pCurLayer->iMbWidth;
- pCurLayer->iMbY = iNextMbXyIndex / pCurLayer->iMbWidth;
- pCurLayer->iMbXyIndex = iNextMbXyIndex;
+ pCurDqLayer->iMbX = iNextMbXyIndex % pCurDqLayer->iMbWidth;
+ pCurDqLayer->iMbY = iNextMbXyIndex / pCurDqLayer->iMbWidth;
+ pCurDqLayer->iMbXyIndex = iNextMbXyIndex;
if (0 == iNextMbXyIndex) {
- pCurLayer->pDec->iSpsId = pCtx->pSps->iSpsId;
- pCurLayer->pDec->iPpsId = pCtx->pPps->iPpsId;
+ pCurDqLayer->pDec->iSpsId = pCtx->pSps->iSpsId;
+ pCurDqLayer->pDec->iPpsId = pCtx->pPps->iPpsId;
- pCurLayer->pDec->uiQualityId = pCurLayer->sLayerInfo.sNalHeaderExt.uiQualityId;
+ pCurDqLayer->pDec->uiQualityId = pCurDqLayer->sLayerInfo.sNalHeaderExt.uiQualityId;
}
do {
@@ -104,16 +120,16 @@ int32_t WelsTargetSliceConstruction (PWelsDecoderContext pCtx) {
if (WelsTargetMbConstruction (pCtx)) {
WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING,
"WelsTargetSliceConstruction():::MB(%d, %d) construction error. pCurSlice_type:%d",
- pCurLayer->iMbX, pCurLayer->iMbY, pCurSlice->eSliceType);
+ pCurDqLayer->iMbX, pCurDqLayer->iMbY, pCurSlice->eSliceType);
return ERR_INFO_MB_RECON_FAIL;
}
}
++iCountNumMb;
- if (!pCurLayer->pMbCorrectlyDecodedFlag[iNextMbXyIndex]) { //already con-ed, overwrite
- pCurLayer->pMbCorrectlyDecodedFlag[iNextMbXyIndex] = true;
- pCtx->pDec->iMbEcedPropNum += (pCurLayer->pMbRefConcealedFlag[iNextMbXyIndex] ? 1 : 0);
+ if (!pCurDqLayer->pMbCorrectlyDecodedFlag[iNextMbXyIndex]) { //already con-ed, overwrite
+ pCurDqLayer->pMbCorrectlyDecodedFlag[iNextMbXyIndex] = true;
+ pCtx->pDec->iMbEcedPropNum += (pCurDqLayer->pMbRefConcealedFlag[iNextMbXyIndex] ? 1 : 0);
++pCtx->iTotalNumMbRec;
}
@@ -133,9 +149,9 @@ int32_t WelsTargetSliceConstruction (PWelsDecoderContext pCtx) {
if (-1 == iNextMbXyIndex || iNextMbXyIndex >= iTotalMbTargetLayer) { // slice group boundary or end of a frame
break;
}
- pCurLayer->iMbX = iNextMbXyIndex % pCurLayer->iMbWidth;
- pCurLayer->iMbY = iNextMbXyIndex / pCurLayer->iMbWidth;
- pCurLayer->iMbXyIndex = iNextMbXyIndex;
+ pCurDqLayer->iMbX = iNextMbXyIndex % pCurDqLayer->iMbWidth;
+ pCurDqLayer->iMbY = iNextMbXyIndex / pCurDqLayer->iMbWidth;
+ pCurDqLayer->iMbXyIndex = iNextMbXyIndex;
} while (1);
pCtx->pDec->iWidthInPixel = iCurLayerWidth;
@@ -147,8 +163,6 @@ int32_t WelsTargetSliceConstruction (PWelsDecoderContext pCtx) {
if (pCtx->pParam->bParseOnly) //for parse only, deblocking should not go on
return ERR_NONE;
- pDeblockMb = WelsDeblockingMb;
-
if (1 == pSliceHeader->uiDisableDeblockingFilterIdc
|| pCtx->pCurDqLayer->sLayerInfo.sSliceInLayer.iTotalMbInCurSlice <= 0) {
return ERR_NONE;//NO_SUPPORTED_FILTER_IDX
@@ -160,32 +174,32 @@ int32_t WelsTargetSliceConstruction (PWelsDecoderContext pCtx) {
return ERR_NONE;
}
-int32_t WelsMbInterSampleConstruction (PWelsDecoderContext pCtx, PDqLayer pCurLayer,
+int32_t WelsMbInterSampleConstruction (PWelsDecoderContext pCtx, PDqLayer pCurDqLayer,
uint8_t* pDstY, uint8_t* pDstU, uint8_t* pDstV, int32_t iStrideL, int32_t iStrideC) {
- int32_t iMbXy = pCurLayer->iMbXyIndex;
+ int32_t iMbXy = pCurDqLayer->iMbXyIndex;
int32_t i, iIndex, iOffset;
- if (pCurLayer->pTransformSize8x8Flag[iMbXy]) {
+ if (pCurDqLayer->pTransformSize8x8Flag[iMbXy]) {
for (i = 0; i < 4; i++) {
iIndex = g_kuiMbCountScan4Idx[i << 2];
- if (pCurLayer->pNzc[iMbXy][iIndex] || pCurLayer->pNzc[iMbXy][iIndex + 1] || pCurLayer->pNzc[iMbXy][iIndex + 4]
- || pCurLayer->pNzc[iMbXy][iIndex + 5]) {
+ if (pCurDqLayer->pNzc[iMbXy][iIndex] || pCurDqLayer->pNzc[iMbXy][iIndex + 1] || pCurDqLayer->pNzc[iMbXy][iIndex + 4]
+ || pCurDqLayer->pNzc[iMbXy][iIndex + 5]) {
iOffset = ((iIndex >> 2) << 2) * iStrideL + ((iIndex % 4) << 2);
- pCtx->pIdctResAddPredFunc8x8 (pDstY + iOffset, iStrideL, pCurLayer->pScaledTCoeff[iMbXy] + (i << 6));
+ pCtx->pIdctResAddPredFunc8x8 (pDstY + iOffset, iStrideL, pCurDqLayer->pScaledTCoeff[iMbXy] + (i << 6));
}
}
} else {
// luma.
- const int8_t* pNzc = pCurLayer->pNzc[iMbXy];
- int16_t* pScaledTCoeff = pCurLayer->pScaledTCoeff[iMbXy];
+ const int8_t* pNzc = pCurDqLayer->pNzc[iMbXy];
+ int16_t* pScaledTCoeff = pCurDqLayer->pScaledTCoeff[iMbXy];
pCtx->pIdctFourResAddPredFunc (pDstY + 0 * iStrideL + 0, iStrideL, pScaledTCoeff + 0 * 64, pNzc + 0);
pCtx->pIdctFourResAddPredFunc (pDstY + 0 * iStrideL + 8, iStrideL, pScaledTCoeff + 1 * 64, pNzc + 2);
pCtx->pIdctFourResAddPredFunc (pDstY + 8 * iStrideL + 0, iStrideL, pScaledTCoeff + 2 * 64, pNzc + 8);
pCtx->pIdctFourResAddPredFunc (pDstY + 8 * iStrideL + 8, iStrideL, pScaledTCoeff + 3 * 64, pNzc + 10);
}
- const int8_t* pNzc = pCurLayer->pNzc[iMbXy];
- int16_t* pScaledTCoeff = pCurLayer->pScaledTCoeff[iMbXy];
+ const int8_t* pNzc = pCurDqLayer->pNzc[iMbXy];
+ int16_t* pScaledTCoeff = pCurDqLayer->pScaledTCoeff[iMbXy];
// Cb.
pCtx->pIdctFourResAddPredFunc (pDstU, iStrideC, pScaledTCoeff + 4 * 64, pNzc + 16);
// Cr.
@@ -193,29 +207,23 @@ int32_t WelsMbInterSampleConstruction (PWelsDecoderContext pCtx, PDqLayer pCurLa
return ERR_NONE;
}
-int32_t WelsMbInterConstruction (PWelsDecoderContext pCtx, PDqLayer pCurLayer) {
- int32_t iMbX = pCurLayer->iMbX;
- int32_t iMbY = pCurLayer->iMbY;
+int32_t WelsMbInterConstruction (PWelsDecoderContext pCtx, PDqLayer pCurDqLayer) {
+ int32_t iMbX = pCurDqLayer->iMbX;
+ int32_t iMbY = pCurDqLayer->iMbY;
uint8_t* pDstY, *pDstCb, *pDstCr;
int32_t iLumaStride = pCtx->pDec->iLinesize[0];
int32_t iChromaStride = pCtx->pDec->iLinesize[1];
- pDstY = pCurLayer->pDec->pData[0] + ((iMbY * iLumaStride + iMbX) << 4);
- pDstCb = pCurLayer->pDec->pData[1] + ((iMbY * iChromaStride + iMbX) << 3);
- pDstCr = pCurLayer->pDec->pData[2] + ((iMbY * iChromaStride + iMbX) << 3);
+ pDstY = pCurDqLayer->pDec->pData[0] + ((iMbY * iLumaStride + iMbX) << 4);
+ pDstCb = pCurDqLayer->pDec->pData[1] + ((iMbY * iChromaStride + iMbX) << 3);
+ pDstCr = pCurDqLayer->pDec->pData[2] + ((iMbY * iChromaStride + iMbX) << 3);
if (pCtx->eSliceType == P_SLICE) {
- GetInterPred (pDstY, pDstCb, pDstCr, pCtx);
+ WELS_B_MB_REC_VERIFY (GetInterPred (pDstY, pDstCb, pDstCr, pCtx));
} else {
if (pCtx->pTempDec == NULL)
pCtx->pTempDec = AllocPicture (pCtx, pCtx->pSps->iMbWidth << 4, pCtx->pSps->iMbHeight << 4);
- else {
- if (pCtx->pTempDec->iLinesize[0] != pCtx->pDec->iLinesize[0]) {
- FreePicture (pCtx->pTempDec, pCtx->pMemAlign);
- pCtx->pTempDec = AllocPicture (pCtx, pCtx->pSps->iMbWidth << 4, pCtx->pSps->iMbHeight << 4);
- }
- }
uint8_t* pTempDstYCbCr[3];
uint8_t* pDstYCbCr[3];
pTempDstYCbCr[0] = pCtx->pTempDec->pData[0] + ((iMbY * iLumaStride + iMbX) << 4);
@@ -226,10 +234,12 @@ int32_t WelsMbInterConstruction (PWelsDecoderContext pCtx, PDqLayer pCurLayer) {
pDstYCbCr[2] = pDstCr;
WELS_B_MB_REC_VERIFY (GetInterBPred (pDstYCbCr, pTempDstYCbCr, pCtx));
}
- WelsMbInterSampleConstruction (pCtx, pCurLayer, pDstY, pDstCb, pDstCr, iLumaStride, iChromaStride);
+ WelsMbInterSampleConstruction (pCtx, pCurDqLayer, pDstY, pDstCb, pDstCr, iLumaStride, iChromaStride);
- pCtx->sBlockFunc.pWelsSetNonZeroCountFunc (
- pCurLayer->pNzc[pCurLayer->iMbXyIndex]); // set all none-zero nzc to 1; dbk can be opti!
+ if (GetThreadCount (pCtx) <= 1) {
+ pCtx->sBlockFunc.pWelsSetNonZeroCountFunc (
+ pCurDqLayer->pNzc[pCurDqLayer->iMbXyIndex]); // set all none-zero nzc to 1; dbk can be opti!
+ }
return ERR_NONE;
}
@@ -275,36 +285,36 @@ void WelsLumaDcDequantIdct (int16_t* pBlock, int32_t iQp, PWelsDecoderContext pC
#undef STRIDE
}
-int32_t WelsMbIntraPredictionConstruction (PWelsDecoderContext pCtx, PDqLayer pCurLayer, bool bOutput) {
+int32_t WelsMbIntraPredictionConstruction (PWelsDecoderContext pCtx, PDqLayer pCurDqLayer, bool bOutput) {
//seems IPCM should not enter this path
- int32_t iMbXy = pCurLayer->iMbXyIndex;
+ int32_t iMbXy = pCurDqLayer->iMbXyIndex;
- WelsFillRecNeededMbInfo (pCtx, bOutput, pCurLayer);
+ WelsFillRecNeededMbInfo (pCtx, bOutput, pCurDqLayer);
- if (IS_INTRA16x16 (pCurLayer->pMbType[iMbXy])) {
- RecI16x16Mb (iMbXy, pCtx, pCurLayer->pScaledTCoeff[iMbXy], pCurLayer);
- } else if (IS_INTRA8x8 (pCurLayer->pMbType[iMbXy])) {
- RecI8x8Mb (iMbXy, pCtx, pCurLayer->pScaledTCoeff[iMbXy], pCurLayer);
- } else if (IS_INTRA4x4 (pCurLayer->pMbType[iMbXy])) {
- RecI4x4Mb (iMbXy, pCtx, pCurLayer->pScaledTCoeff[iMbXy], pCurLayer);
+ if (IS_INTRA16x16 (pCurDqLayer->pDec->pMbType[iMbXy])) {
+ RecI16x16Mb (iMbXy, pCtx, pCurDqLayer->pScaledTCoeff[iMbXy], pCurDqLayer);
+ } else if (IS_INTRA8x8 (pCurDqLayer->pDec->pMbType[iMbXy])) {
+ RecI8x8Mb (iMbXy, pCtx, pCurDqLayer->pScaledTCoeff[iMbXy], pCurDqLayer);
+ } else if (IS_INTRA4x4 (pCurDqLayer->pDec->pMbType[iMbXy])) {
+ RecI4x4Mb (iMbXy, pCtx, pCurDqLayer->pScaledTCoeff[iMbXy], pCurDqLayer);
}
return ERR_NONE;
}
-int32_t WelsMbInterPrediction (PWelsDecoderContext pCtx, PDqLayer pCurLayer) {
- int32_t iMbX = pCurLayer->iMbX;
- int32_t iMbY = pCurLayer->iMbY;
+int32_t WelsMbInterPrediction (PWelsDecoderContext pCtx, PDqLayer pCurDqLayer) {
+ int32_t iMbX = pCurDqLayer->iMbX;
+ int32_t iMbY = pCurDqLayer->iMbY;
uint8_t* pDstY, *pDstCb, *pDstCr;
int32_t iLumaStride = pCtx->pDec->iLinesize[0];
int32_t iChromaStride = pCtx->pDec->iLinesize[1];
- pDstY = pCurLayer->pDec->pData[0] + ((iMbY * iLumaStride + iMbX) << 4);
- pDstCb = pCurLayer->pDec->pData[1] + ((iMbY * iChromaStride + iMbX) << 3);
- pDstCr = pCurLayer->pDec->pData[2] + ((iMbY * iChromaStride + iMbX) << 3);
+ pDstY = pCurDqLayer->pDec->pData[0] + ((iMbY * iLumaStride + iMbX) << 4);
+ pDstCb = pCurDqLayer->pDec->pData[1] + ((iMbY * iChromaStride + iMbX) << 3);
+ pDstCr = pCurDqLayer->pDec->pData[2] + ((iMbY * iChromaStride + iMbX) << 3);
if (pCtx->eSliceType == P_SLICE) {
- GetInterPred (pDstY, pDstCb, pDstCr, pCtx);
+ WELS_B_MB_REC_VERIFY (GetInterPred (pDstY, pDstCb, pDstCr, pCtx));
} else {
if (pCtx->pTempDec == NULL)
pCtx->pTempDec = AllocPicture (pCtx, pCtx->pSps->iMbWidth << 4, pCtx->pSps->iMbHeight << 4);
@@ -316,27 +326,30 @@ int32_t WelsMbInterPrediction (PWelsDecoderContext pCtx, PDqLayer pCurLayer) {
pDstYCbCr[0] = pDstY;
pDstYCbCr[1] = pDstCb;
pDstYCbCr[2] = pDstCr;
- GetInterBPred (pDstYCbCr, pTempDstYCbCr, pCtx);
+ WELS_B_MB_REC_VERIFY (GetInterBPred (pDstYCbCr, pTempDstYCbCr, pCtx));
}
return ERR_NONE;
}
int32_t WelsTargetMbConstruction (PWelsDecoderContext pCtx) {
- PDqLayer pCurLayer = pCtx->pCurDqLayer;
- if (MB_TYPE_INTRA_PCM == pCurLayer->pMbType[pCurLayer->iMbXyIndex]) {
+ PDqLayer pCurDqLayer = pCtx->pCurDqLayer;
+ if (MB_TYPE_INTRA_PCM == pCurDqLayer->pDec->pMbType[pCurDqLayer->iMbXyIndex]) {
//already decoded and reconstructed when parsing
return ERR_NONE;
- } else if (IS_INTRA (pCurLayer->pMbType[pCurLayer->iMbXyIndex])) {
- WelsMbIntraPredictionConstruction (pCtx, pCurLayer, 1);
- } else if (IS_INTER (pCurLayer->pMbType[pCurLayer->iMbXyIndex])) { //InterMB
- if (0 == pCurLayer->pCbp[pCurLayer->iMbXyIndex]) { //uiCbp==0 include SKIP
- WelsMbInterPrediction (pCtx, pCurLayer);
+ } else if (IS_INTRA (pCurDqLayer->pDec->pMbType[pCurDqLayer->iMbXyIndex])) {
+ WelsMbIntraPredictionConstruction (pCtx, pCurDqLayer, 1);
+ } else if (IS_INTER (pCurDqLayer->pDec->pMbType[pCurDqLayer->iMbXyIndex])) { //InterMB
+ if (0 == pCurDqLayer->pCbp[pCurDqLayer->iMbXyIndex]) { //uiCbp==0 include SKIP
+ if (!CheckRefPics (pCtx)) {
+ return ERR_INFO_MB_RECON_FAIL;
+ }
+ return WelsMbInterPrediction (pCtx, pCurDqLayer);
} else {
- WelsMbInterConstruction (pCtx, pCurLayer);
+ WelsMbInterConstruction (pCtx, pCurDqLayer);
}
} else {
WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "WelsTargetMbConstruction():::::Unknown MB type: %d",
- pCurLayer->pMbType[pCurLayer->iMbXyIndex]);
+ pCurDqLayer->pDec->pMbType[pCurDqLayer->iMbXyIndex]);
return ERR_INFO_MB_RECON_FAIL;
}
@@ -631,25 +644,25 @@ int32_t ParseIntra16x16Mode (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAva
}
int32_t WelsDecodeMbCabacISliceBaseMode0 (PWelsDecoderContext pCtx, uint32_t& uiEosFlag) {
- PDqLayer pCurLayer = pCtx->pCurDqLayer;
- PBitStringAux pBsAux = pCurLayer->pBitStringAux;
- PSlice pSlice = &pCurLayer->sLayerInfo.sSliceInLayer;
+ PDqLayer pCurDqLayer = pCtx->pCurDqLayer;
+ PBitStringAux pBsAux = pCurDqLayer->pBitStringAux;
+ PSlice pSlice = &pCurDqLayer->sLayerInfo.sSliceInLayer;
PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader;
SWelsNeighAvail sNeighAvail;
int32_t iScanIdxStart = pSlice->sSliceHeaderExt.uiScanIdxStart;
int32_t iScanIdxEnd = pSlice->sSliceHeaderExt.uiScanIdxEnd;
- int32_t iMbXy = pCurLayer->iMbXyIndex;
+ int32_t iMbXy = pCurDqLayer->iMbXyIndex;
int32_t i;
uint32_t uiMbType = 0, uiCbp = 0, uiCbpLuma = 0, uiCbpChroma = 0;
ENFORCE_STACK_ALIGN_1D (uint8_t, pNonZeroCount, 48, 16);
- pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
- pCurLayer->pTransformSize8x8Flag[iMbXy] = false;
+ pCurDqLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
+ pCurDqLayer->pTransformSize8x8Flag[iMbXy] = false;
- pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;
- pCurLayer->pResidualPredFlag[iMbXy] = pSlice->sSliceHeaderExt.bDefaultResidualPredFlag;
- GetNeighborAvailMbType (&sNeighAvail, pCurLayer);
+ pCurDqLayer->pInterPredictionDoneFlag[iMbXy] = 0;
+ pCurDqLayer->pResidualPredFlag[iMbXy] = pSlice->sSliceHeaderExt.bDefaultResidualPredFlag;
+ GetNeighborAvailMbType (&sNeighAvail, pCurDqLayer);
WELS_READ_VERIFY (ParseMBTypeISliceCabac (pCtx, &sNeighAvail, uiMbType));
if (uiMbType > 25) {
return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_TYPE);
@@ -667,104 +680,104 @@ int32_t WelsDecodeMbCabacISliceBaseMode0 (PWelsDecoderContext pCtx, uint32_t& ui
return ERR_NONE;
} else if (0 == uiMbType) { //I4x4
ENFORCE_STACK_ALIGN_1D (int8_t, pIntraPredMode, 48, 16);
- pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA4x4;
+ pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA4x4;
if (pCtx->pPps->bTransform8x8ModeFlag) {
// Transform 8x8 cabac will be added soon
WELS_READ_VERIFY (ParseTransformSize8x8FlagCabac (pCtx, &sNeighAvail, pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy]));
}
if (pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy]) {
- uiMbType = pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA8x8;
- pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
- WELS_READ_VERIFY (ParseIntra8x8Mode (pCtx, &sNeighAvail, pIntraPredMode, pBsAux, pCurLayer));
+ uiMbType = pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA8x8;
+ pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurDqLayer);
+ WELS_READ_VERIFY (ParseIntra8x8Mode (pCtx, &sNeighAvail, pIntraPredMode, pBsAux, pCurDqLayer));
} else {
- pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
- WELS_READ_VERIFY (ParseIntra4x4Mode (pCtx, &sNeighAvail, pIntraPredMode, pBsAux, pCurLayer));
+ pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurDqLayer);
+ WELS_READ_VERIFY (ParseIntra4x4Mode (pCtx, &sNeighAvail, pIntraPredMode, pBsAux, pCurDqLayer));
}
//get uiCbp for I4x4
WELS_READ_VERIFY (ParseCbpInfoCabac (pCtx, &sNeighAvail, uiCbp));
- pCurLayer->pCbp[iMbXy] = uiCbp;
+ pCurDqLayer->pCbp[iMbXy] = uiCbp;
pSlice->iLastDeltaQp = uiCbp == 0 ? 0 : pSlice->iLastDeltaQp;
uiCbpChroma = pCtx->pSps->uiChromaFormatIdc ? uiCbp >> 4 : 0;
uiCbpLuma = uiCbp & 15;
} else { //I16x16;
- pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA16x16;
- pCurLayer->pTransformSize8x8Flag[iMbXy] = false;
- pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
- pCurLayer->pIntraPredMode[iMbXy][7] = (uiMbType - 1) & 3;
- pCurLayer->pCbp[iMbXy] = g_kuiI16CbpTable[ (uiMbType - 1) >> 2];
- uiCbpChroma = pCtx->pSps->uiChromaFormatIdc ? pCurLayer->pCbp[iMbXy] >> 4 : 0 ;
- uiCbpLuma = pCurLayer->pCbp[iMbXy] & 15;
- WelsFillCacheNonZeroCount (&sNeighAvail, pNonZeroCount, pCurLayer);
- WELS_READ_VERIFY (ParseIntra16x16Mode (pCtx, &sNeighAvail, pBsAux, pCurLayer));
- }
-
- ST32 (&pCurLayer->pNzc[iMbXy][0], 0);
- ST32 (&pCurLayer->pNzc[iMbXy][4], 0);
- ST32 (&pCurLayer->pNzc[iMbXy][8], 0);
- ST32 (&pCurLayer->pNzc[iMbXy][12], 0);
- ST32 (&pCurLayer->pNzc[iMbXy][16], 0);
- ST32 (&pCurLayer->pNzc[iMbXy][20], 0);
- pCurLayer->pCbfDc[iMbXy] = 0;
-
- if (pCurLayer->pCbp[iMbXy] == 0 && IS_INTRANxN (pCurLayer->pMbType[iMbXy])) {
- pCurLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp;
+ pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA16x16;
+ pCurDqLayer->pTransformSize8x8Flag[iMbXy] = false;
+ pCurDqLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
+ pCurDqLayer->pIntraPredMode[iMbXy][7] = (uiMbType - 1) & 3;
+ pCurDqLayer->pCbp[iMbXy] = g_kuiI16CbpTable[ (uiMbType - 1) >> 2];
+ uiCbpChroma = pCtx->pSps->uiChromaFormatIdc ? pCurDqLayer->pCbp[iMbXy] >> 4 : 0 ;
+ uiCbpLuma = pCurDqLayer->pCbp[iMbXy] & 15;
+ WelsFillCacheNonZeroCount (&sNeighAvail, pNonZeroCount, pCurDqLayer);
+ WELS_READ_VERIFY (ParseIntra16x16Mode (pCtx, &sNeighAvail, pBsAux, pCurDqLayer));
+ }
+
+ ST32 (&pCurDqLayer->pNzc[iMbXy][0], 0);
+ ST32 (&pCurDqLayer->pNzc[iMbXy][4], 0);
+ ST32 (&pCurDqLayer->pNzc[iMbXy][8], 0);
+ ST32 (&pCurDqLayer->pNzc[iMbXy][12], 0);
+ ST32 (&pCurDqLayer->pNzc[iMbXy][16], 0);
+ ST32 (&pCurDqLayer->pNzc[iMbXy][20], 0);
+ pCurDqLayer->pCbfDc[iMbXy] = 0;
+
+ if (pCurDqLayer->pCbp[iMbXy] == 0 && IS_INTRANxN (pCurDqLayer->pDec->pMbType[iMbXy])) {
+ pCurDqLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp;
for (i = 0; i < 2; i++) {
- pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 ((pCurLayer->pLumaQp[iMbXy] +
- pSliceHeader->pPps->iChromaQpIndexOffset[i]), 0, 51)];
+ pCurDqLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 ((pCurDqLayer->pLumaQp[iMbXy] +
+ pSliceHeader->pPps->iChromaQpIndexOffset[i]), 0, 51)];
}
}
- if (pCurLayer->pCbp[iMbXy] || MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) {
- memset (pCurLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (pCurLayer->pScaledTCoeff[iMbXy][0]));
+ if (pCurDqLayer->pCbp[iMbXy] || MB_TYPE_INTRA16x16 == pCurDqLayer->pDec->pMbType[iMbXy]) {
+ memset (pCurDqLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (pCurDqLayer->pScaledTCoeff[iMbXy][0]));
int32_t iQpDelta, iId8x8, iId4x4;
WELS_READ_VERIFY (ParseDeltaQpCabac (pCtx, iQpDelta));
if (iQpDelta > 25 || iQpDelta < -26) {//out of iQpDelta range
return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_QP);
}
- pCurLayer->pLumaQp[iMbXy] = (pSlice->iLastMbQp + iQpDelta + 52) % 52; //update last_mb_qp
- pSlice->iLastMbQp = pCurLayer->pLumaQp[iMbXy];
+ pCurDqLayer->pLumaQp[iMbXy] = (pSlice->iLastMbQp + iQpDelta + 52) % 52; //update last_mb_qp
+ pSlice->iLastMbQp = pCurDqLayer->pLumaQp[iMbXy];
for (i = 0; i < 2; i++) {
- pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 ((pSlice->iLastMbQp +
- pSliceHeader->pPps->iChromaQpIndexOffset[i]), 0, 51)];
+ pCurDqLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 ((pSlice->iLastMbQp +
+ pSliceHeader->pPps->iChromaQpIndexOffset[i]), 0, 51)];
}
- if (MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) {
+ if (MB_TYPE_INTRA16x16 == pCurDqLayer->pDec->pMbType[iMbXy]) {
//step1: Luma DC
WELS_READ_VERIFY (ParseResidualBlockCabac (&sNeighAvail, pNonZeroCount, pBsAux, 0, 16, g_kuiLumaDcZigzagScan,
- I16_LUMA_DC, pCurLayer->pScaledTCoeff[iMbXy], pCurLayer->pLumaQp[iMbXy], pCtx));
+ I16_LUMA_DC, pCurDqLayer->pScaledTCoeff[iMbXy], pCurDqLayer->pLumaQp[iMbXy], pCtx));
//step2: Luma AC
if (uiCbpLuma) {
for (i = 0; i < 16; i++) {
WELS_READ_VERIFY (ParseResidualBlockCabac (&sNeighAvail, pNonZeroCount, pBsAux, i,
iScanIdxEnd - WELS_MAX (iScanIdxStart, 1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), I16_LUMA_AC,
- pCurLayer->pScaledTCoeff[iMbXy] + (i << 4), pCurLayer->pLumaQp[iMbXy], pCtx));
+ pCurDqLayer->pScaledTCoeff[iMbXy] + (i << 4), pCurDqLayer->pLumaQp[iMbXy], pCtx));
}
- ST32 (&pCurLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
- ST32 (&pCurLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
- ST32 (&pCurLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
- ST32 (&pCurLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
+ ST32 (&pCurDqLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
+ ST32 (&pCurDqLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
+ ST32 (&pCurDqLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
+ ST32 (&pCurDqLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
} else { //pNonZeroCount = 0
- ST32 (&pCurLayer->pNzc[iMbXy][0], 0);
- ST32 (&pCurLayer->pNzc[iMbXy][4], 0);
- ST32 (&pCurLayer->pNzc[iMbXy][8], 0);
- ST32 (&pCurLayer->pNzc[iMbXy][12], 0);
+ ST32 (&pCurDqLayer->pNzc[iMbXy][0], 0);
+ ST32 (&pCurDqLayer->pNzc[iMbXy][4], 0);
+ ST32 (&pCurDqLayer->pNzc[iMbXy][8], 0);
+ ST32 (&pCurDqLayer->pNzc[iMbXy][12], 0);
}
} else { //non-MB_TYPE_INTRA16x16
- if (pCurLayer->pTransformSize8x8Flag[iMbXy]) {
+ if (pCurDqLayer->pTransformSize8x8Flag[iMbXy]) {
// Transform 8x8 support for CABAC
for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
if (uiCbpLuma & (1 << iId8x8)) {
WELS_READ_VERIFY (ParseResidualBlockCabac8x8 (&sNeighAvail, pNonZeroCount, pBsAux, (iId8x8 << 2),
iScanIdxEnd - iScanIdxStart + 1, g_kuiZigzagScan8x8 + iScanIdxStart, LUMA_DC_AC_INTRA_8,
- pCurLayer->pScaledTCoeff[iMbXy] + (iId8x8 << 6), pCurLayer->pLumaQp[iMbXy], pCtx));
+ pCurDqLayer->pScaledTCoeff[iMbXy] + (iId8x8 << 6), pCurDqLayer->pLumaQp[iMbXy], pCtx));
} else {
ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2)]], 0);
ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2) + 2]], 0);
}
}
- ST32 (&pCurLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
- ST32 (&pCurLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
- ST32 (&pCurLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
- ST32 (&pCurLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
+ ST32 (&pCurDqLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
+ ST32 (&pCurDqLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
+ ST32 (&pCurDqLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
+ ST32 (&pCurDqLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
} else {
for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
if (uiCbpLuma & (1 << iId8x8)) {
@@ -772,8 +785,8 @@ int32_t WelsDecodeMbCabacISliceBaseMode0 (PWelsDecoderContext pCtx, uint32_t& ui
for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
//Luma (DC and AC decoding together)
WELS_READ_VERIFY (ParseResidualBlockCabac (&sNeighAvail, pNonZeroCount, pBsAux, iIdx, iScanIdxEnd - iScanIdxStart + 1,
- g_kuiZigzagScan + iScanIdxStart, LUMA_DC_AC_INTRA, pCurLayer->pScaledTCoeff[iMbXy] + (iIdx << 4),
- pCurLayer->pLumaQp[iMbXy], pCtx));
+ g_kuiZigzagScan + iScanIdxStart, LUMA_DC_AC_INTRA, pCurDqLayer->pScaledTCoeff[iMbXy] + (iIdx << 4),
+ pCurDqLayer->pLumaQp[iMbXy], pCtx));
iIdx++;
}
} else {
@@ -781,10 +794,10 @@ int32_t WelsDecodeMbCabacISliceBaseMode0 (PWelsDecoderContext pCtx, uint32_t& ui
ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2) + 2]], 0);
}
}
- ST32 (&pCurLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
- ST32 (&pCurLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
- ST32 (&pCurLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
- ST32 (&pCurLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
+ ST32 (&pCurDqLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
+ ST32 (&pCurDqLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
+ ST32 (&pCurDqLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
+ ST32 (&pCurDqLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
}
}
int32_t iMbResProperty;
@@ -795,7 +808,7 @@ int32_t WelsDecodeMbCabacISliceBaseMode0 (PWelsDecoderContext pCtx, uint32_t& ui
for (i = 0; i < 2; i++) {
iMbResProperty = i ? CHROMA_DC_V : CHROMA_DC_U;
WELS_READ_VERIFY (ParseResidualBlockCabac (&sNeighAvail, pNonZeroCount, pBsAux, 16 + (i << 2), 4, g_kuiChromaDcScan,
- iMbResProperty, pCurLayer->pScaledTCoeff[iMbXy] + 256 + (i << 6), pCurLayer->pChromaQp[iMbXy][i], pCtx));
+ iMbResProperty, pCurDqLayer->pScaledTCoeff[iMbXy] + 256 + (i << 6), pCurDqLayer->pChromaQp[iMbXy][i], pCtx));
}
}
@@ -807,27 +820,27 @@ int32_t WelsDecodeMbCabacISliceBaseMode0 (PWelsDecoderContext pCtx, uint32_t& ui
for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
WELS_READ_VERIFY (ParseResidualBlockCabac (&sNeighAvail, pNonZeroCount, pBsAux, iIdx,
iScanIdxEnd - WELS_MAX (iScanIdxStart, 1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), iMbResProperty,
- pCurLayer->pScaledTCoeff[iMbXy] + (iIdx << 4), pCurLayer->pChromaQp[iMbXy][i], pCtx));
+ pCurDqLayer->pScaledTCoeff[iMbXy] + (iIdx << 4), pCurDqLayer->pChromaQp[iMbXy][i], pCtx));
iIdx++;
}
}
- ST16 (&pCurLayer->pNzc[iMbXy][16], LD16 (&pNonZeroCount[6 + 8 * 1]));
- ST16 (&pCurLayer->pNzc[iMbXy][20], LD16 (&pNonZeroCount[6 + 8 * 2]));
- ST16 (&pCurLayer->pNzc[iMbXy][18], LD16 (&pNonZeroCount[6 + 8 * 4]));
- ST16 (&pCurLayer->pNzc[iMbXy][22], LD16 (&pNonZeroCount[6 + 8 * 5]));
+ ST16 (&pCurDqLayer->pNzc[iMbXy][16], LD16 (&pNonZeroCount[6 + 8 * 1]));
+ ST16 (&pCurDqLayer->pNzc[iMbXy][20], LD16 (&pNonZeroCount[6 + 8 * 2]));
+ ST16 (&pCurDqLayer->pNzc[iMbXy][18], LD16 (&pNonZeroCount[6 + 8 * 4]));
+ ST16 (&pCurDqLayer->pNzc[iMbXy][22], LD16 (&pNonZeroCount[6 + 8 * 5]));
} else {
- ST16 (&pCurLayer->pNzc[iMbXy][16], 0);
- ST16 (&pCurLayer->pNzc[iMbXy][20], 0);
- ST16 (&pCurLayer->pNzc[iMbXy][18], 0);
- ST16 (&pCurLayer->pNzc[iMbXy][22], 0);
+ ST16 (&pCurDqLayer->pNzc[iMbXy][16], 0);
+ ST16 (&pCurDqLayer->pNzc[iMbXy][20], 0);
+ ST16 (&pCurDqLayer->pNzc[iMbXy][18], 0);
+ ST16 (&pCurDqLayer->pNzc[iMbXy][22], 0);
}
} else {
- ST32 (&pCurLayer->pNzc[iMbXy][0], 0);
- ST32 (&pCurLayer->pNzc[iMbXy][4], 0);
- ST32 (&pCurLayer->pNzc[iMbXy][8], 0);
- ST32 (&pCurLayer->pNzc[iMbXy][12], 0);
- ST32 (&pCurLayer->pNzc[iMbXy][16], 0);
- ST32 (&pCurLayer->pNzc[iMbXy][20], 0);
+ ST32 (&pCurDqLayer->pNzc[iMbXy][0], 0);
+ ST32 (&pCurDqLayer->pNzc[iMbXy][4], 0);
+ ST32 (&pCurDqLayer->pNzc[iMbXy][8], 0);
+ ST32 (&pCurDqLayer->pNzc[iMbXy][12], 0);
+ ST32 (&pCurDqLayer->pNzc[iMbXy][16], 0);
+ ST32 (&pCurDqLayer->pNzc[iMbXy][20], 0);
}
WELS_READ_VERIFY (ParseEndOfSliceCabac (pCtx, uiEosFlag));
@@ -843,21 +856,21 @@ int32_t WelsDecodeMbCabacISlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uin
}
int32_t WelsDecodeMbCabacPSliceBaseMode0 (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint32_t& uiEosFlag) {
- PDqLayer pCurLayer = pCtx->pCurDqLayer;
- PBitStringAux pBsAux = pCurLayer->pBitStringAux;
- PSlice pSlice = &pCurLayer->sLayerInfo.sSliceInLayer;
+ PDqLayer pCurDqLayer = pCtx->pCurDqLayer;
+ PBitStringAux pBsAux = pCurDqLayer->pBitStringAux;
+ PSlice pSlice = &pCurDqLayer->sLayerInfo.sSliceInLayer;
PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader;
int32_t iScanIdxStart = pSlice->sSliceHeaderExt.uiScanIdxStart;
int32_t iScanIdxEnd = pSlice->sSliceHeaderExt.uiScanIdxEnd;
- int32_t iMbXy = pCurLayer->iMbXyIndex;
+ int32_t iMbXy = pCurDqLayer->iMbXyIndex;
int32_t iMbResProperty;
int32_t i;
uint32_t uiMbType = 0, uiCbp = 0, uiCbpLuma = 0, uiCbpChroma = 0;
ENFORCE_STACK_ALIGN_1D (uint8_t, pNonZeroCount, 48, 16);
- pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;
+ pCurDqLayer->pInterPredictionDoneFlag[iMbXy] = 0;
WELS_READ_VERIFY (ParseMBTypePSliceCabac (pCtx, pNeighAvail, uiMbType));
// uiMbType = 4 is not allowded.
@@ -865,10 +878,10 @@ int32_t WelsDecodeMbCabacPSliceBaseMode0 (PWelsDecoderContext pCtx, PWelsNeighAv
int16_t pMotionVector[LIST_A][30][MV_A];
int16_t pMvdCache[LIST_A][30][MV_A];
int8_t pRefIndex[LIST_A][30];
- pCurLayer->pMbType[iMbXy] = g_ksInterPMbTypeInfo[uiMbType].iType;
- WelsFillCacheInterCabac (pNeighAvail, pNonZeroCount, pMotionVector, pMvdCache, pRefIndex, pCurLayer);
+ pCurDqLayer->pDec->pMbType[iMbXy] = g_ksInterPMbTypeInfo[uiMbType].iType;
+ WelsFillCacheInterCabac (pNeighAvail, pNonZeroCount, pMotionVector, pMvdCache, pRefIndex, pCurDqLayer);
WELS_READ_VERIFY (ParseInterPMotionInfoCabac (pCtx, pNeighAvail, pNonZeroCount, pMotionVector, pMvdCache, pRefIndex));
- pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;
+ pCurDqLayer->pInterPredictionDoneFlag[iMbXy] = 0;
} else { //Intra mode
uiMbType -= 5;
if (uiMbType > 25)
@@ -888,58 +901,58 @@ int32_t WelsDecodeMbCabacPSliceBaseMode0 (PWelsDecoderContext pCtx, PWelsNeighAv
} else { //normal Intra mode
if (0 == uiMbType) { //Intra4x4
ENFORCE_STACK_ALIGN_1D (int8_t, pIntraPredMode, 48, 16);
- pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA4x4;
+ pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA4x4;
if (pCtx->pPps->bTransform8x8ModeFlag) {
WELS_READ_VERIFY (ParseTransformSize8x8FlagCabac (pCtx, pNeighAvail, pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy]));
}
if (pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy]) {
- uiMbType = pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA8x8;
- pCtx->pFillInfoCacheIntraNxNFunc (pNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
- WELS_READ_VERIFY (ParseIntra8x8Mode (pCtx, pNeighAvail, pIntraPredMode, pBsAux, pCurLayer));
+ uiMbType = pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA8x8;
+ pCtx->pFillInfoCacheIntraNxNFunc (pNeighAvail, pNonZeroCount, pIntraPredMode, pCurDqLayer);
+ WELS_READ_VERIFY (ParseIntra8x8Mode (pCtx, pNeighAvail, pIntraPredMode, pBsAux, pCurDqLayer));
} else {
- pCtx->pFillInfoCacheIntraNxNFunc (pNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
- WELS_READ_VERIFY (ParseIntra4x4Mode (pCtx, pNeighAvail, pIntraPredMode, pBsAux, pCurLayer));
+ pCtx->pFillInfoCacheIntraNxNFunc (pNeighAvail, pNonZeroCount, pIntraPredMode, pCurDqLayer);
+ WELS_READ_VERIFY (ParseIntra4x4Mode (pCtx, pNeighAvail, pIntraPredMode, pBsAux, pCurDqLayer));
}
} else { //Intra16x16
- pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA16x16;
- pCurLayer->pTransformSize8x8Flag[iMbXy] = false;
- pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
- pCurLayer->pIntraPredMode[iMbXy][7] = (uiMbType - 1) & 3;
- pCurLayer->pCbp[iMbXy] = g_kuiI16CbpTable[ (uiMbType - 1) >> 2];
- uiCbpChroma = pCtx->pSps->uiChromaFormatIdc ? pCurLayer->pCbp[iMbXy] >> 4 : 0;
- uiCbpLuma = pCurLayer->pCbp[iMbXy] & 15;
- WelsFillCacheNonZeroCount (pNeighAvail, pNonZeroCount, pCurLayer);
- WELS_READ_VERIFY (ParseIntra16x16Mode (pCtx, pNeighAvail, pBsAux, pCurLayer));
+ pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA16x16;
+ pCurDqLayer->pTransformSize8x8Flag[iMbXy] = false;
+ pCurDqLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
+ pCurDqLayer->pIntraPredMode[iMbXy][7] = (uiMbType - 1) & 3;
+ pCurDqLayer->pCbp[iMbXy] = g_kuiI16CbpTable[ (uiMbType - 1) >> 2];
+ uiCbpChroma = pCtx->pSps->uiChromaFormatIdc ? pCurDqLayer->pCbp[iMbXy] >> 4 : 0;
+ uiCbpLuma = pCurDqLayer->pCbp[iMbXy] & 15;
+ WelsFillCacheNonZeroCount (pNeighAvail, pNonZeroCount, pCurDqLayer);
+ WELS_READ_VERIFY (ParseIntra16x16Mode (pCtx, pNeighAvail, pBsAux, pCurDqLayer));
}
}
}
- ST32 (&pCurLayer->pNzc[iMbXy][0], 0);
- ST32 (&pCurLayer->pNzc[iMbXy][4], 0);
- ST32 (&pCurLayer->pNzc[iMbXy][8], 0);
- ST32 (&pCurLayer->pNzc[iMbXy][12], 0);
- ST32 (&pCurLayer->pNzc[iMbXy][16], 0);
- ST32 (&pCurLayer->pNzc[iMbXy][20], 0);
+ ST32 (&pCurDqLayer->pNzc[iMbXy][0], 0);
+ ST32 (&pCurDqLayer->pNzc[iMbXy][4], 0);
+ ST32 (&pCurDqLayer->pNzc[iMbXy][8], 0);
+ ST32 (&pCurDqLayer->pNzc[iMbXy][12], 0);
+ ST32 (&pCurDqLayer->pNzc[iMbXy][16], 0);
+ ST32 (&pCurDqLayer->pNzc[iMbXy][20], 0);
- if (MB_TYPE_INTRA16x16 != pCurLayer->pMbType[iMbXy]) {
+ if (MB_TYPE_INTRA16x16 != pCurDqLayer->pDec->pMbType[iMbXy]) {
WELS_READ_VERIFY (ParseCbpInfoCabac (pCtx, pNeighAvail, uiCbp));
- pCurLayer->pCbp[iMbXy] = uiCbp;
+ pCurDqLayer->pCbp[iMbXy] = uiCbp;
pSlice->iLastDeltaQp = uiCbp == 0 ? 0 : pSlice->iLastDeltaQp;
- uiCbpChroma = pCtx->pSps->uiChromaFormatIdc ? pCurLayer->pCbp[iMbXy] >> 4 : 0 ;
- uiCbpLuma = pCurLayer->pCbp[iMbXy] & 15;
+ uiCbpChroma = pCtx->pSps->uiChromaFormatIdc ? pCurDqLayer->pCbp[iMbXy] >> 4 : 0 ;
+ uiCbpLuma = pCurDqLayer->pCbp[iMbXy] & 15;
}
- if (pCurLayer->pCbp[iMbXy] || MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) {
+ if (pCurDqLayer->pCbp[iMbXy] || MB_TYPE_INTRA16x16 == pCurDqLayer->pDec->pMbType[iMbXy]) {
- if (MB_TYPE_INTRA16x16 != pCurLayer->pMbType[iMbXy]) {
+ if (MB_TYPE_INTRA16x16 != pCurDqLayer->pDec->pMbType[iMbXy]) {
// Need modification when B picutre add in
bool bNeedParseTransformSize8x8Flag =
- (((pCurLayer->pMbType[iMbXy] >= MB_TYPE_16x16 && pCurLayer->pMbType[iMbXy] <= MB_TYPE_8x16)
- || pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy])
- && (pCurLayer->pMbType[iMbXy] != MB_TYPE_INTRA8x8)
- && (pCurLayer->pMbType[iMbXy] != MB_TYPE_INTRA4x4)
- && ((pCurLayer->pCbp[iMbXy] & 0x0F) > 0)
+ (((pCurDqLayer->pDec->pMbType[iMbXy] >= MB_TYPE_16x16 && pCurDqLayer->pDec->pMbType[iMbXy] <= MB_TYPE_8x16)
+ || pCurDqLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy])
+ && (pCurDqLayer->pDec->pMbType[iMbXy] != MB_TYPE_INTRA8x8)
+ && (pCurDqLayer->pDec->pMbType[iMbXy] != MB_TYPE_INTRA4x4)
+ && ((pCurDqLayer->pCbp[iMbXy] & 0x0F) > 0)
&& (pCtx->pPps->bTransform8x8ModeFlag));
if (bNeedParseTransformSize8x8Flag) {
@@ -948,7 +961,7 @@ int32_t WelsDecodeMbCabacPSliceBaseMode0 (PWelsDecoderContext pCtx, PWelsNeighAv
}
}
- memset (pCurLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (pCurLayer->pScaledTCoeff[iMbXy][0]));
+ memset (pCurDqLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (pCurDqLayer->pScaledTCoeff[iMbXy][0]));
int32_t iQpDelta, iId8x8, iId4x4;
@@ -956,33 +969,33 @@ int32_t WelsDecodeMbCabacPSliceBaseMode0 (PWelsDecoderContext pCtx, PWelsNeighAv
if (iQpDelta > 25 || iQpDelta < -26) { //out of iQpDelta range
return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_QP);
}
- pCurLayer->pLumaQp[iMbXy] = (pSlice->iLastMbQp + iQpDelta + 52) % 52; //update last_mb_qp
- pSlice->iLastMbQp = pCurLayer->pLumaQp[iMbXy];
+ pCurDqLayer->pLumaQp[iMbXy] = (pSlice->iLastMbQp + iQpDelta + 52) % 52; //update last_mb_qp
+ pSlice->iLastMbQp = pCurDqLayer->pLumaQp[iMbXy];
for (i = 0; i < 2; i++) {
- pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pSlice->iLastMbQp +
- pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)];
+ pCurDqLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pSlice->iLastMbQp +
+ pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)];
}
- if (MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) {
+ if (MB_TYPE_INTRA16x16 == pCurDqLayer->pDec->pMbType[iMbXy]) {
//step1: Luma DC
WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, 0, 16, g_kuiLumaDcZigzagScan,
- I16_LUMA_DC, pCurLayer->pScaledTCoeff[iMbXy], pCurLayer->pLumaQp[iMbXy], pCtx));
+ I16_LUMA_DC, pCurDqLayer->pScaledTCoeff[iMbXy], pCurDqLayer->pLumaQp[iMbXy], pCtx));
//step2: Luma AC
if (uiCbpLuma) {
for (i = 0; i < 16; i++) {
WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, i, iScanIdxEnd - WELS_MAX (iScanIdxStart,
- 1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), I16_LUMA_AC, pCurLayer->pScaledTCoeff[iMbXy] + (i << 4),
- pCurLayer->pLumaQp[iMbXy], pCtx));
+ 1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), I16_LUMA_AC, pCurDqLayer->pScaledTCoeff[iMbXy] + (i << 4),
+ pCurDqLayer->pLumaQp[iMbXy], pCtx));
}
- ST32 (&pCurLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
- ST32 (&pCurLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
- ST32 (&pCurLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
- ST32 (&pCurLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
+ ST32 (&pCurDqLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
+ ST32 (&pCurDqLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
+ ST32 (&pCurDqLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
+ ST32 (&pCurDqLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
} else {
- ST32 (&pCurLayer->pNzc[iMbXy][0], 0);
- ST32 (&pCurLayer->pNzc[iMbXy][4], 0);
- ST32 (&pCurLayer->pNzc[iMbXy][8], 0);
- ST32 (&pCurLayer->pNzc[iMbXy][12], 0);
+ ST32 (&pCurDqLayer->pNzc[iMbXy][0], 0);
+ ST32 (&pCurDqLayer->pNzc[iMbXy][4], 0);
+ ST32 (&pCurDqLayer->pNzc[iMbXy][8], 0);
+ ST32 (&pCurDqLayer->pNzc[iMbXy][12], 0);
}
} else { //non-MB_TYPE_INTRA16x16
if (pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy]) {
@@ -991,27 +1004,27 @@ int32_t WelsDecodeMbCabacPSliceBaseMode0 (PWelsDecoderContext pCtx, PWelsNeighAv
if (uiCbpLuma & (1 << iId8x8)) {
WELS_READ_VERIFY (ParseResidualBlockCabac8x8 (pNeighAvail, pNonZeroCount, pBsAux, (iId8x8 << 2),
iScanIdxEnd - iScanIdxStart + 1, g_kuiZigzagScan8x8 + iScanIdxStart,
- IS_INTRA (pCurLayer->pMbType[iMbXy]) ? LUMA_DC_AC_INTRA_8 : LUMA_DC_AC_INTER_8,
- pCurLayer->pScaledTCoeff[iMbXy] + (iId8x8 << 6), pCurLayer->pLumaQp[iMbXy], pCtx));
+ IS_INTRA (pCurDqLayer->pDec->pMbType[iMbXy]) ? LUMA_DC_AC_INTRA_8 : LUMA_DC_AC_INTER_8,
+ pCurDqLayer->pScaledTCoeff[iMbXy] + (iId8x8 << 6), pCurDqLayer->pLumaQp[iMbXy], pCtx));
} else {
ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2)]], 0);
ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2) + 2]], 0);
}
}
- ST32 (&pCurLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
- ST32 (&pCurLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
- ST32 (&pCurLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
- ST32 (&pCurLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
+ ST32 (&pCurDqLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
+ ST32 (&pCurDqLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
+ ST32 (&pCurDqLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
+ ST32 (&pCurDqLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
} else {
- iMbResProperty = (IS_INTRA (pCurLayer->pMbType[iMbXy])) ? LUMA_DC_AC_INTRA : LUMA_DC_AC_INTER;
+ iMbResProperty = (IS_INTRA (pCurDqLayer->pDec->pMbType[iMbXy])) ? LUMA_DC_AC_INTRA : LUMA_DC_AC_INTER;
for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
if (uiCbpLuma & (1 << iId8x8)) {
int32_t iIdx = (iId8x8 << 2);
for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
//Luma (DC and AC decoding together)
WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, iIdx, iScanIdxEnd - iScanIdxStart + 1,
- g_kuiZigzagScan + iScanIdxStart, iMbResProperty, pCurLayer->pScaledTCoeff[iMbXy] + (iIdx << 4),
- pCurLayer->pLumaQp[iMbXy],
+ g_kuiZigzagScan + iScanIdxStart, iMbResProperty, pCurDqLayer->pScaledTCoeff[iMbXy] + (iIdx << 4),
+ pCurDqLayer->pLumaQp[iMbXy],
pCtx));
iIdx++;
}
@@ -1020,10 +1033,10 @@ int32_t WelsDecodeMbCabacPSliceBaseMode0 (PWelsDecoderContext pCtx, PWelsNeighAv
ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2) + 2]], 0);
}
}
- ST32 (&pCurLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
- ST32 (&pCurLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
- ST32 (&pCurLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
- ST32 (&pCurLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
+ ST32 (&pCurDqLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
+ ST32 (&pCurDqLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
+ ST32 (&pCurDqLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
+ ST32 (&pCurDqLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
}
}
@@ -1031,19 +1044,19 @@ int32_t WelsDecodeMbCabacPSliceBaseMode0 (PWelsDecoderContext pCtx, PWelsNeighAv
//step1: DC
if (1 == uiCbpChroma || 2 == uiCbpChroma) {
for (i = 0; i < 2; i++) {
- if (IS_INTRA (pCurLayer->pMbType[iMbXy]))
+ if (IS_INTRA (pCurDqLayer->pDec->pMbType[iMbXy]))
iMbResProperty = i ? CHROMA_DC_V : CHROMA_DC_U;
else
iMbResProperty = i ? CHROMA_DC_V_INTER : CHROMA_DC_U_INTER;
WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, 16 + (i << 2), 4, g_kuiChromaDcScan,
- iMbResProperty, pCurLayer->pScaledTCoeff[iMbXy] + 256 + (i << 6), pCurLayer->pChromaQp[iMbXy][i], pCtx));
+ iMbResProperty, pCurDqLayer->pScaledTCoeff[iMbXy] + 256 + (i << 6), pCurDqLayer->pChromaQp[iMbXy][i], pCtx));
}
}
//step2: AC
if (2 == uiCbpChroma) {
for (i = 0; i < 2; i++) {
- if (IS_INTRA (pCurLayer->pMbType[iMbXy]))
+ if (IS_INTRA (pCurDqLayer->pDec->pMbType[iMbXy]))
iMbResProperty = i ? CHROMA_AC_V : CHROMA_AC_U;
else
iMbResProperty = i ? CHROMA_AC_V_INTER : CHROMA_AC_U_INTER;
@@ -1051,23 +1064,23 @@ int32_t WelsDecodeMbCabacPSliceBaseMode0 (PWelsDecoderContext pCtx, PWelsNeighAv
for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, index,
iScanIdxEnd - WELS_MAX (iScanIdxStart, 1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1),
- iMbResProperty, pCurLayer->pScaledTCoeff[iMbXy] + (index << 4), pCurLayer->pChromaQp[iMbXy][i], pCtx));
+ iMbResProperty, pCurDqLayer->pScaledTCoeff[iMbXy] + (index << 4), pCurDqLayer->pChromaQp[iMbXy][i], pCtx));
index++;
}
}
- ST16 (&pCurLayer->pNzc[iMbXy][16], LD16 (&pNonZeroCount[6 + 8 * 1]));
- ST16 (&pCurLayer->pNzc[iMbXy][20], LD16 (&pNonZeroCount[6 + 8 * 2]));
- ST16 (&pCurLayer->pNzc[iMbXy][18], LD16 (&pNonZeroCount[6 + 8 * 4]));
- ST16 (&pCurLayer->pNzc[iMbXy][22], LD16 (&pNonZeroCount[6 + 8 * 5]));
+ ST16 (&pCurDqLayer->pNzc[iMbXy][16], LD16 (&pNonZeroCount[6 + 8 * 1]));
+ ST16 (&pCurDqLayer->pNzc[iMbXy][20], LD16 (&pNonZeroCount[6 + 8 * 2]));
+ ST16 (&pCurDqLayer->pNzc[iMbXy][18], LD16 (&pNonZeroCount[6 + 8 * 4]));
+ ST16 (&pCurDqLayer->pNzc[iMbXy][22], LD16 (&pNonZeroCount[6 + 8 * 5]));
} else {
- ST32 (&pCurLayer->pNzc[iMbXy][16], 0);
- ST32 (&pCurLayer->pNzc[iMbXy][20], 0);
+ ST32 (&pCurDqLayer->pNzc[iMbXy][16], 0);
+ ST32 (&pCurDqLayer->pNzc[iMbXy][20], 0);
}
} else {
- pCurLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp;
+ pCurDqLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp;
for (i = 0; i < 2; i++) {
- pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pCurLayer->pLumaQp[iMbXy] +
- pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)];
+ pCurDqLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pCurDqLayer->pLumaQp[iMbXy] +
+ pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)];
}
}
@@ -1080,21 +1093,21 @@ int32_t WelsDecodeMbCabacPSliceBaseMode0 (PWelsDecoderContext pCtx, PWelsNeighAv
}
int32_t WelsDecodeMbCabacBSliceBaseMode0 (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint32_t& uiEosFlag) {
- PDqLayer pCurLayer = pCtx->pCurDqLayer;
- PBitStringAux pBsAux = pCurLayer->pBitStringAux;
- PSlice pSlice = &pCurLayer->sLayerInfo.sSliceInLayer;
+ PDqLayer pCurDqLayer = pCtx->pCurDqLayer;
+ PBitStringAux pBsAux = pCurDqLayer->pBitStringAux;
+ PSlice pSlice = &pCurDqLayer->sLayerInfo.sSliceInLayer;
PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader;
int32_t iScanIdxStart = pSlice->sSliceHeaderExt.uiScanIdxStart;
int32_t iScanIdxEnd = pSlice->sSliceHeaderExt.uiScanIdxEnd;
- int32_t iMbXy = pCurLayer->iMbXyIndex;
+ int32_t iMbXy = pCurDqLayer->iMbXyIndex;
int32_t iMbResProperty;
int32_t i;
uint32_t uiMbType = 0, uiCbp = 0, uiCbpLuma = 0, uiCbpChroma = 0;
ENFORCE_STACK_ALIGN_1D (uint8_t, pNonZeroCount, 48, 16);
- pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;
+ pCurDqLayer->pInterPredictionDoneFlag[iMbXy] = 0;
WELS_READ_VERIFY (ParseMBTypeBSliceCabac (pCtx, pNeighAvail, uiMbType));
@@ -1103,12 +1116,12 @@ int32_t WelsDecodeMbCabacBSliceBaseMode0 (PWelsDecoderContext pCtx, PWelsNeighAv
int16_t pMvdCache[LIST_A][30][MV_A];
int8_t pRefIndex[LIST_A][30];
int8_t pDirect[30];
- pCurLayer->pMbType[iMbXy] = g_ksInterBMbTypeInfo[uiMbType].iType;
- WelsFillCacheInterCabac (pNeighAvail, pNonZeroCount, pMotionVector, pMvdCache, pRefIndex, pCurLayer);
- WelsFillDirectCacheCabac (pNeighAvail, pDirect, pCurLayer);
+ pCurDqLayer->pDec->pMbType[iMbXy] = g_ksInterBMbTypeInfo[uiMbType].iType;
+ WelsFillCacheInterCabac (pNeighAvail, pNonZeroCount, pMotionVector, pMvdCache, pRefIndex, pCurDqLayer);
+ WelsFillDirectCacheCabac (pNeighAvail, pDirect, pCurDqLayer);
WELS_READ_VERIFY (ParseInterBMotionInfoCabac (pCtx, pNeighAvail, pNonZeroCount, pMotionVector, pMvdCache, pRefIndex,
pDirect));
- pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;
+ pCurDqLayer->pInterPredictionDoneFlag[iMbXy] = 0;
} else { //Intra mode
uiMbType -= 23;
if (uiMbType > 25)
@@ -1117,7 +1130,7 @@ int32_t WelsDecodeMbCabacBSliceBaseMode0 (PWelsDecoderContext pCtx, PWelsNeighAv
return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_TYPE);
if (25 == uiMbType) { //I_PCM
- WelsLog (& (pCtx->sLogCtx), WELS_LOG_DEBUG, "I_PCM mode exists in P slice!");
+ WelsLog (& (pCtx->sLogCtx), WELS_LOG_DEBUG, "I_PCM mode exists in B slice!");
WELS_READ_VERIFY (ParseIPCMInfoCabac (pCtx));
pSlice->iLastDeltaQp = 0;
WELS_READ_VERIFY (ParseEndOfSliceCabac (pCtx, uiEosFlag));
@@ -1128,59 +1141,59 @@ int32_t WelsDecodeMbCabacBSliceBaseMode0 (PWelsDecoderContext pCtx, PWelsNeighAv
} else { //normal Intra mode
if (0 == uiMbType) { //Intra4x4
ENFORCE_STACK_ALIGN_1D (int8_t, pIntraPredMode, 48, 16);
- pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA4x4;
+ pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA4x4;
if (pCtx->pPps->bTransform8x8ModeFlag) {
WELS_READ_VERIFY (ParseTransformSize8x8FlagCabac (pCtx, pNeighAvail, pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy]));
}
if (pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy]) {
- uiMbType = pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA8x8;
- pCtx->pFillInfoCacheIntraNxNFunc (pNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
- WELS_READ_VERIFY (ParseIntra8x8Mode (pCtx, pNeighAvail, pIntraPredMode, pBsAux, pCurLayer));
+ uiMbType = pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA8x8;
+ pCtx->pFillInfoCacheIntraNxNFunc (pNeighAvail, pNonZeroCount, pIntraPredMode, pCurDqLayer);
+ WELS_READ_VERIFY (ParseIntra8x8Mode (pCtx, pNeighAvail, pIntraPredMode, pBsAux, pCurDqLayer));
} else {
- pCtx->pFillInfoCacheIntraNxNFunc (pNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
- WELS_READ_VERIFY (ParseIntra4x4Mode (pCtx, pNeighAvail, pIntraPredMode, pBsAux, pCurLayer));
+ pCtx->pFillInfoCacheIntraNxNFunc (pNeighAvail, pNonZeroCount, pIntraPredMode, pCurDqLayer);
+ WELS_READ_VERIFY (ParseIntra4x4Mode (pCtx, pNeighAvail, pIntraPredMode, pBsAux, pCurDqLayer));
}
} else { //Intra16x16
- pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA16x16;
- pCurLayer->pTransformSize8x8Flag[iMbXy] = false;
- pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
- pCurLayer->pIntraPredMode[iMbXy][7] = (uiMbType - 1) & 3;
- pCurLayer->pCbp[iMbXy] = g_kuiI16CbpTable[ (uiMbType - 1) >> 2];
- uiCbpChroma = pCtx->pSps->uiChromaFormatIdc ? pCurLayer->pCbp[iMbXy] >> 4 : 0;
- uiCbpLuma = pCurLayer->pCbp[iMbXy] & 15;
- WelsFillCacheNonZeroCount (pNeighAvail, pNonZeroCount, pCurLayer);
- WELS_READ_VERIFY (ParseIntra16x16Mode (pCtx, pNeighAvail, pBsAux, pCurLayer));
+ pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA16x16;
+ pCurDqLayer->pTransformSize8x8Flag[iMbXy] = false;
+ pCurDqLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
+ pCurDqLayer->pIntraPredMode[iMbXy][7] = (uiMbType - 1) & 3;
+ pCurDqLayer->pCbp[iMbXy] = g_kuiI16CbpTable[ (uiMbType - 1) >> 2];
+ uiCbpChroma = pCtx->pSps->uiChromaFormatIdc ? pCurDqLayer->pCbp[iMbXy] >> 4 : 0;
+ uiCbpLuma = pCurDqLayer->pCbp[iMbXy] & 15;
+ WelsFillCacheNonZeroCount (pNeighAvail, pNonZeroCount, pCurDqLayer);
+ WELS_READ_VERIFY (ParseIntra16x16Mode (pCtx, pNeighAvail, pBsAux, pCurDqLayer));
}
}
}
- ST32 (&pCurLayer->pNzc[iMbXy][0], 0);
- ST32 (&pCurLayer->pNzc[iMbXy][4], 0);
- ST32 (&pCurLayer->pNzc[iMbXy][8], 0);
- ST32 (&pCurLayer->pNzc[iMbXy][12], 0);
- ST32 (&pCurLayer->pNzc[iMbXy][16], 0);
- ST32 (&pCurLayer->pNzc[iMbXy][20], 0);
+ ST32 (&pCurDqLayer->pNzc[iMbXy][0], 0);
+ ST32 (&pCurDqLayer->pNzc[iMbXy][4], 0);
+ ST32 (&pCurDqLayer->pNzc[iMbXy][8], 0);
+ ST32 (&pCurDqLayer->pNzc[iMbXy][12], 0);
+ ST32 (&pCurDqLayer->pNzc[iMbXy][16], 0);
+ ST32 (&pCurDqLayer->pNzc[iMbXy][20], 0);
- if (MB_TYPE_INTRA16x16 != pCurLayer->pMbType[iMbXy]) {
+ if (MB_TYPE_INTRA16x16 != pCurDqLayer->pDec->pMbType[iMbXy]) {
WELS_READ_VERIFY (ParseCbpInfoCabac (pCtx, pNeighAvail, uiCbp));
- pCurLayer->pCbp[iMbXy] = uiCbp;
+ pCurDqLayer->pCbp[iMbXy] = uiCbp;
pSlice->iLastDeltaQp = uiCbp == 0 ? 0 : pSlice->iLastDeltaQp;
- uiCbpChroma = pCtx->pSps->uiChromaFormatIdc ? pCurLayer->pCbp[iMbXy] >> 4 : 0;
- uiCbpLuma = pCurLayer->pCbp[iMbXy] & 15;
+ uiCbpChroma = pCtx->pSps->uiChromaFormatIdc ? pCurDqLayer->pCbp[iMbXy] >> 4 : 0;
+ uiCbpLuma = pCurDqLayer->pCbp[iMbXy] & 15;
}
- if (pCurLayer->pCbp[iMbXy] || MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) {
+ if (pCurDqLayer->pCbp[iMbXy] || MB_TYPE_INTRA16x16 == pCurDqLayer->pDec->pMbType[iMbXy]) {
- if (MB_TYPE_INTRA16x16 != pCurLayer->pMbType[iMbXy]) {
+ if (MB_TYPE_INTRA16x16 != pCurDqLayer->pDec->pMbType[iMbXy]) {
// Need modification when B picutre add in
bool bNeedParseTransformSize8x8Flag =
- (((IS_INTER_16x16 (pCurLayer->pMbType[iMbXy]) || IS_DIRECT (pCurLayer->pMbType[iMbXy])
- || IS_INTER_16x8 (pCurLayer->pMbType[iMbXy]) || IS_INTER_8x16 (pCurLayer->pMbType[iMbXy]))
- || pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy])
- && (pCurLayer->pMbType[iMbXy] != MB_TYPE_INTRA8x8)
- && (pCurLayer->pMbType[iMbXy] != MB_TYPE_INTRA4x4)
- && ((pCurLayer->pCbp[iMbXy] & 0x0F) > 0)
+ (((IS_INTER_16x16 (pCurDqLayer->pDec->pMbType[iMbXy]) || IS_DIRECT (pCurDqLayer->pDec->pMbType[iMbXy])
+ || IS_INTER_16x8 (pCurDqLayer->pDec->pMbType[iMbXy]) || IS_INTER_8x16 (pCurDqLayer->pDec->pMbType[iMbXy]))
+ || pCurDqLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy])
+ && (pCurDqLayer->pDec->pMbType[iMbXy] != MB_TYPE_INTRA8x8)
+ && (pCurDqLayer->pDec->pMbType[iMbXy] != MB_TYPE_INTRA4x4)
+ && ((pCurDqLayer->pCbp[iMbXy] & 0x0F) > 0)
&& (pCtx->pPps->bTransform8x8ModeFlag));
if (bNeedParseTransformSize8x8Flag) {
@@ -1189,7 +1202,7 @@ int32_t WelsDecodeMbCabacBSliceBaseMode0 (PWelsDecoderContext pCtx, PWelsNeighAv
}
}
- memset (pCurLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (pCurLayer->pScaledTCoeff[iMbXy][0]));
+ memset (pCurDqLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (pCurDqLayer->pScaledTCoeff[iMbXy][0]));
int32_t iQpDelta, iId8x8, iId4x4;
@@ -1197,33 +1210,33 @@ int32_t WelsDecodeMbCabacBSliceBaseMode0 (PWelsDecoderContext pCtx, PWelsNeighAv
if (iQpDelta > 25 || iQpDelta < -26) { //out of iQpDelta range
return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_QP);
}
- pCurLayer->pLumaQp[iMbXy] = (pSlice->iLastMbQp + iQpDelta + 52) % 52; //update last_mb_qp
- pSlice->iLastMbQp = pCurLayer->pLumaQp[iMbXy];
+ pCurDqLayer->pLumaQp[iMbXy] = (pSlice->iLastMbQp + iQpDelta + 52) % 52; //update last_mb_qp
+ pSlice->iLastMbQp = pCurDqLayer->pLumaQp[iMbXy];
for (i = 0; i < 2; i++) {
- pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pSlice->iLastMbQp +
- pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)];
+ pCurDqLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pSlice->iLastMbQp +
+ pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)];
}
- if (MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) {
+ if (MB_TYPE_INTRA16x16 == pCurDqLayer->pDec->pMbType[iMbXy]) {
//step1: Luma DC
WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, 0, 16, g_kuiLumaDcZigzagScan,
- I16_LUMA_DC, pCurLayer->pScaledTCoeff[iMbXy], pCurLayer->pLumaQp[iMbXy], pCtx));
+ I16_LUMA_DC, pCurDqLayer->pScaledTCoeff[iMbXy], pCurDqLayer->pLumaQp[iMbXy], pCtx));
//step2: Luma AC
if (uiCbpLuma) {
for (i = 0; i < 16; i++) {
WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, i, iScanIdxEnd - WELS_MAX (iScanIdxStart,
- 1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), I16_LUMA_AC, pCurLayer->pScaledTCoeff[iMbXy] + (i << 4),
- pCurLayer->pLumaQp[iMbXy], pCtx));
+ 1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), I16_LUMA_AC, pCurDqLayer->pScaledTCoeff[iMbXy] + (i << 4),
+ pCurDqLayer->pLumaQp[iMbXy], pCtx));
}
- ST32 (&pCurLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
- ST32 (&pCurLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
- ST32 (&pCurLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
- ST32 (&pCurLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
+ ST32 (&pCurDqLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
+ ST32 (&pCurDqLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
+ ST32 (&pCurDqLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
+ ST32 (&pCurDqLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
} else {
- ST32 (&pCurLayer->pNzc[iMbXy][0], 0);
- ST32 (&pCurLayer->pNzc[iMbXy][4], 0);
- ST32 (&pCurLayer->pNzc[iMbXy][8], 0);
- ST32 (&pCurLayer->pNzc[iMbXy][12], 0);
+ ST32 (&pCurDqLayer->pNzc[iMbXy][0], 0);
+ ST32 (&pCurDqLayer->pNzc[iMbXy][4], 0);
+ ST32 (&pCurDqLayer->pNzc[iMbXy][8], 0);
+ ST32 (&pCurDqLayer->pNzc[iMbXy][12], 0);
}
} else { //non-MB_TYPE_INTRA16x16
if (pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy]) {
@@ -1232,27 +1245,27 @@ int32_t WelsDecodeMbCabacBSliceBaseMode0 (PWelsDecoderContext pCtx, PWelsNeighAv
if (uiCbpLuma & (1 << iId8x8)) {
WELS_READ_VERIFY (ParseResidualBlockCabac8x8 (pNeighAvail, pNonZeroCount, pBsAux, (iId8x8 << 2),
iScanIdxEnd - iScanIdxStart + 1, g_kuiZigzagScan8x8 + iScanIdxStart,
- IS_INTRA (pCurLayer->pMbType[iMbXy]) ? LUMA_DC_AC_INTRA_8 : LUMA_DC_AC_INTER_8,
- pCurLayer->pScaledTCoeff[iMbXy] + (iId8x8 << 6), pCurLayer->pLumaQp[iMbXy], pCtx));
+ IS_INTRA (pCurDqLayer->pDec->pMbType[iMbXy]) ? LUMA_DC_AC_INTRA_8 : LUMA_DC_AC_INTER_8,
+ pCurDqLayer->pScaledTCoeff[iMbXy] + (iId8x8 << 6), pCurDqLayer->pLumaQp[iMbXy], pCtx));
} else {
ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2)]], 0);
ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2) + 2]], 0);
}
}
- ST32 (&pCurLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
- ST32 (&pCurLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
- ST32 (&pCurLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
- ST32 (&pCurLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
+ ST32 (&pCurDqLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
+ ST32 (&pCurDqLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
+ ST32 (&pCurDqLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
+ ST32 (&pCurDqLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
} else {
- iMbResProperty = (IS_INTRA (pCurLayer->pMbType[iMbXy])) ? LUMA_DC_AC_INTRA : LUMA_DC_AC_INTER;
+ iMbResProperty = (IS_INTRA (pCurDqLayer->pDec->pMbType[iMbXy])) ? LUMA_DC_AC_INTRA : LUMA_DC_AC_INTER;
for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
if (uiCbpLuma & (1 << iId8x8)) {
int32_t iIdx = (iId8x8 << 2);
for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
//Luma (DC and AC decoding together)
WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, iIdx, iScanIdxEnd - iScanIdxStart + 1,
- g_kuiZigzagScan + iScanIdxStart, iMbResProperty, pCurLayer->pScaledTCoeff[iMbXy] + (iIdx << 4),
- pCurLayer->pLumaQp[iMbXy],
+ g_kuiZigzagScan + iScanIdxStart, iMbResProperty, pCurDqLayer->pScaledTCoeff[iMbXy] + (iIdx << 4),
+ pCurDqLayer->pLumaQp[iMbXy],
pCtx));
iIdx++;
}
@@ -1261,10 +1274,10 @@ int32_t WelsDecodeMbCabacBSliceBaseMode0 (PWelsDecoderContext pCtx, PWelsNeighAv
ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2) + 2]], 0);
}
}
- ST32 (&pCurLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
- ST32 (&pCurLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
- ST32 (&pCurLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
- ST32 (&pCurLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
+ ST32 (&pCurDqLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
+ ST32 (&pCurDqLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
+ ST32 (&pCurDqLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
+ ST32 (&pCurDqLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
}
}
@@ -1272,19 +1285,19 @@ int32_t WelsDecodeMbCabacBSliceBaseMode0 (PWelsDecoderContext pCtx, PWelsNeighAv
//step1: DC
if (1 == uiCbpChroma || 2 == uiCbpChroma) {
for (i = 0; i < 2; i++) {
- if (IS_INTRA (pCurLayer->pMbType[iMbXy]))
+ if (IS_INTRA (pCurDqLayer->pDec->pMbType[iMbXy]))
iMbResProperty = i ? CHROMA_DC_V : CHROMA_DC_U;
else
iMbResProperty = i ? CHROMA_DC_V_INTER : CHROMA_DC_U_INTER;
WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, 16 + (i << 2), 4, g_kuiChromaDcScan,
- iMbResProperty, pCurLayer->pScaledTCoeff[iMbXy] + 256 + (i << 6), pCurLayer->pChromaQp[iMbXy][i], pCtx));
+ iMbResProperty, pCurDqLayer->pScaledTCoeff[iMbXy] + 256 + (i << 6), pCurDqLayer->pChromaQp[iMbXy][i], pCtx));
}
}
//step2: AC
if (2 == uiCbpChroma) {
for (i = 0; i < 2; i++) {
- if (IS_INTRA (pCurLayer->pMbType[iMbXy]))
+ if (IS_INTRA (pCurDqLayer->pDec->pMbType[iMbXy]))
iMbResProperty = i ? CHROMA_AC_V : CHROMA_AC_U;
else
iMbResProperty = i ? CHROMA_AC_V_INTER : CHROMA_AC_U_INTER;
@@ -1292,23 +1305,23 @@ int32_t WelsDecodeMbCabacBSliceBaseMode0 (PWelsDecoderContext pCtx, PWelsNeighAv
for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, index,
iScanIdxEnd - WELS_MAX (iScanIdxStart, 1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1),
- iMbResProperty, pCurLayer->pScaledTCoeff[iMbXy] + (index << 4), pCurLayer->pChromaQp[iMbXy][i], pCtx));
+ iMbResProperty, pCurDqLayer->pScaledTCoeff[iMbXy] + (index << 4), pCurDqLayer->pChromaQp[iMbXy][i], pCtx));
index++;
}
}
- ST16 (&pCurLayer->pNzc[iMbXy][16], LD16 (&pNonZeroCount[6 + 8 * 1]));
- ST16 (&pCurLayer->pNzc[iMbXy][20], LD16 (&pNonZeroCount[6 + 8 * 2]));
- ST16 (&pCurLayer->pNzc[iMbXy][18], LD16 (&pNonZeroCount[6 + 8 * 4]));
- ST16 (&pCurLayer->pNzc[iMbXy][22], LD16 (&pNonZeroCount[6 + 8 * 5]));
+ ST16 (&pCurDqLayer->pNzc[iMbXy][16], LD16 (&pNonZeroCount[6 + 8 * 1]));
+ ST16 (&pCurDqLayer->pNzc[iMbXy][20], LD16 (&pNonZeroCount[6 + 8 * 2]));
+ ST16 (&pCurDqLayer->pNzc[iMbXy][18], LD16 (&pNonZeroCount[6 + 8 * 4]));
+ ST16 (&pCurDqLayer->pNzc[iMbXy][22], LD16 (&pNonZeroCount[6 + 8 * 5]));
} else {
- ST32 (&pCurLayer->pNzc[iMbXy][16], 0);
- ST32 (&pCurLayer->pNzc[iMbXy][20], 0);
+ ST32 (&pCurDqLayer->pNzc[iMbXy][16], 0);
+ ST32 (&pCurDqLayer->pNzc[iMbXy][20], 0);
}
} else {
- pCurLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp;
+ pCurDqLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp;
for (i = 0; i < 2; i++) {
- pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pCurLayer->pLumaQp[iMbXy] +
- pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)];
+ pCurDqLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pCurDqLayer->pLumaQp[iMbXy] +
+ pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)];
}
}
@@ -1322,53 +1335,55 @@ int32_t WelsDecodeMbCabacBSliceBaseMode0 (PWelsDecoderContext pCtx, PWelsNeighAv
int32_t WelsDecodeMbCabacPSlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag) {
- PDqLayer pCurLayer = pCtx->pCurDqLayer;
- PSlice pSlice = &pCurLayer->sLayerInfo.sSliceInLayer;
+ PDqLayer pCurDqLayer = pCtx->pCurDqLayer;
+ PSlice pSlice = &pCurDqLayer->sLayerInfo.sSliceInLayer;
PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader;
PPicture* ppRefPic = pCtx->sRefPic.pRefList[LIST_0];
uint32_t uiCode;
- int32_t iMbXy = pCurLayer->iMbXyIndex;
+ int32_t iMbXy = pCurDqLayer->iMbXyIndex;
int32_t i;
SWelsNeighAvail uiNeighAvail;
- pCurLayer->pCbp[iMbXy] = 0;
- pCurLayer->pCbfDc[iMbXy] = 0;
- pCurLayer->pChromaPredMode[iMbXy] = C_PRED_DC;
+ pCurDqLayer->pCbp[iMbXy] = 0;
+ pCurDqLayer->pCbfDc[iMbXy] = 0;
+ pCurDqLayer->pChromaPredMode[iMbXy] = C_PRED_DC;
- pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
- pCurLayer->pTransformSize8x8Flag[iMbXy] = false;
+ pCurDqLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
+ pCurDqLayer->pTransformSize8x8Flag[iMbXy] = false;
- GetNeighborAvailMbType (&uiNeighAvail, pCurLayer);
+ GetNeighborAvailMbType (&uiNeighAvail, pCurDqLayer);
WELS_READ_VERIFY (ParseSkipFlagCabac (pCtx, &uiNeighAvail, uiCode));
if (uiCode) {
int16_t pMv[2] = {0};
- pCurLayer->pMbType[iMbXy] = MB_TYPE_SKIP;
- ST32 (&pCurLayer->pNzc[iMbXy][0], 0);
- ST32 (&pCurLayer->pNzc[iMbXy][4], 0);
- ST32 (&pCurLayer->pNzc[iMbXy][8], 0);
- ST32 (&pCurLayer->pNzc[iMbXy][12], 0);
- ST32 (&pCurLayer->pNzc[iMbXy][16], 0);
- ST32 (&pCurLayer->pNzc[iMbXy][20], 0);
-
- pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;
- memset (pCurLayer->pRefIndex[0][iMbXy], 0, sizeof (int8_t) * 16);
- pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[0] && ppRefPic[0]->bIsComplete);
+ pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_SKIP;
+ ST32 (&pCurDqLayer->pNzc[iMbXy][0], 0);
+ ST32 (&pCurDqLayer->pNzc[iMbXy][4], 0);
+ ST32 (&pCurDqLayer->pNzc[iMbXy][8], 0);
+ ST32 (&pCurDqLayer->pNzc[iMbXy][12], 0);
+ ST32 (&pCurDqLayer->pNzc[iMbXy][16], 0);
+ ST32 (&pCurDqLayer->pNzc[iMbXy][20], 0);
+
+ pCurDqLayer->pInterPredictionDoneFlag[iMbXy] = 0;
+ memset (pCurDqLayer->pDec->pRefIndex[0][iMbXy], 0, sizeof (int8_t) * 16);
+ bool bIsPending = GetThreadCount (pCtx) > 1;
+ pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[0] && (ppRefPic[0]->bIsComplete
+ || bIsPending));
//predict mv
- PredPSkipMvFromNeighbor (pCurLayer, pMv);
+ PredPSkipMvFromNeighbor (pCurDqLayer, pMv);
for (i = 0; i < 16; i++) {
- ST32 (pCurLayer->pMv[0][iMbXy][i], * (uint32_t*)pMv);
- ST32 (pCurLayer->pMvd[0][iMbXy][i], 0);
+ ST32 (pCurDqLayer->pDec->pMv[0][iMbXy][i], * (uint32_t*)pMv);
+ ST32 (pCurDqLayer->pMvd[0][iMbXy][i], 0);
}
//if (!pSlice->sSliceHeaderExt.bDefaultResidualPredFlag) {
- // memset (pCurLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (int16_t));
+ // memset (pCurDqLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (int16_t));
//}
//reset rS
- pCurLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp; //??????????????? dqaunt of previous mb
+ pCurDqLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp; //??????????????? dqaunt of previous mb
for (i = 0; i < 2; i++) {
- pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pCurLayer->pLumaQp[iMbXy] +
- pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)];
+ pCurDqLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pCurDqLayer->pLumaQp[iMbXy] +
+ pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)];
}
//for neighboring CABAC usage
@@ -1385,57 +1400,63 @@ int32_t WelsDecodeMbCabacPSlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uin
int32_t WelsDecodeMbCabacBSlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag) {
- PDqLayer pCurLayer = pCtx->pCurDqLayer;
- PSlice pSlice = &pCurLayer->sLayerInfo.sSliceInLayer;
+ PDqLayer pCurDqLayer = pCtx->pCurDqLayer;
+ PSlice pSlice = &pCurDqLayer->sLayerInfo.sSliceInLayer;
PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader;
PPicture* ppRefPicL0 = pCtx->sRefPic.pRefList[LIST_0];
PPicture* ppRefPicL1 = pCtx->sRefPic.pRefList[LIST_1];
uint32_t uiCode;
- int32_t iMbXy = pCurLayer->iMbXyIndex;
+ int32_t iMbXy = pCurDqLayer->iMbXyIndex;
int32_t i;
SWelsNeighAvail uiNeighAvail;
- pCurLayer->pCbp[iMbXy] = 0;
- pCurLayer->pCbfDc[iMbXy] = 0;
- pCurLayer->pChromaPredMode[iMbXy] = C_PRED_DC;
+ pCurDqLayer->pCbp[iMbXy] = 0;
+ pCurDqLayer->pCbfDc[iMbXy] = 0;
+ pCurDqLayer->pChromaPredMode[iMbXy] = C_PRED_DC;
- pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
- pCurLayer->pTransformSize8x8Flag[iMbXy] = false;
+ pCurDqLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
+ pCurDqLayer->pTransformSize8x8Flag[iMbXy] = false;
- GetNeighborAvailMbType (&uiNeighAvail, pCurLayer);
+ GetNeighborAvailMbType (&uiNeighAvail, pCurDqLayer);
WELS_READ_VERIFY (ParseSkipFlagCabac (pCtx, &uiNeighAvail, uiCode));
- memset (pCurLayer->pDirect[iMbXy], 0, sizeof (int8_t) * 16);
+ memset (pCurDqLayer->pDirect[iMbXy], 0, sizeof (int8_t) * 16);
+
+ bool bIsPending = GetThreadCount (pCtx) > 1;
if (uiCode) {
int16_t pMv[LIST_A][2] = { {0, 0}, { 0, 0 } };
int8_t ref[LIST_A] = { 0 };
- pCurLayer->pMbType[iMbXy] = MB_TYPE_SKIP | MB_TYPE_DIRECT;
- ST32 (&pCurLayer->pNzc[iMbXy][0], 0);
- ST32 (&pCurLayer->pNzc[iMbXy][4], 0);
- ST32 (&pCurLayer->pNzc[iMbXy][8], 0);
- ST32 (&pCurLayer->pNzc[iMbXy][12], 0);
- ST32 (&pCurLayer->pNzc[iMbXy][16], 0);
- ST32 (&pCurLayer->pNzc[iMbXy][20], 0);
-
- pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;
- memset (pCurLayer->pRefIndex[LIST_0][iMbXy], 0, sizeof (int8_t) * 16);
- memset (pCurLayer->pRefIndex[LIST_1][iMbXy], 0, sizeof (int8_t) * 16);
- pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPicL0[0] && ppRefPicL0[0]->bIsComplete)
- || ! (ppRefPicL1[0] && ppRefPicL1[0]->bIsComplete);
-
-
+ pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_SKIP | MB_TYPE_DIRECT;
+ ST32 (&pCurDqLayer->pNzc[iMbXy][0], 0);
+ ST32 (&pCurDqLayer->pNzc[iMbXy][4], 0);
+ ST32 (&pCurDqLayer->pNzc[iMbXy][8], 0);
+ ST32 (&pCurDqLayer->pNzc[iMbXy][12], 0);
+ ST32 (&pCurDqLayer->pNzc[iMbXy][16], 0);
+ ST32 (&pCurDqLayer->pNzc[iMbXy][20], 0);
+
+ pCurDqLayer->pInterPredictionDoneFlag[iMbXy] = 0;
+ memset (pCurDqLayer->pDec->pRefIndex[LIST_0][iMbXy], 0, sizeof (int8_t) * 16);
+ memset (pCurDqLayer->pDec->pRefIndex[LIST_1][iMbXy], 0, sizeof (int8_t) * 16);
+ pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPicL0[0] && (ppRefPicL0[0]->bIsComplete
+ || bIsPending)) || ! (ppRefPicL1[0] && (ppRefPicL1[0]->bIsComplete || bIsPending));
+
+ if (pCtx->bMbRefConcealed) {
+ SLogContext* pLogCtx = & (pCtx->sLogCtx);
+ WelsLog (pLogCtx, WELS_LOG_ERROR, "Ref Picture for B-Slice is lost, B-Slice decoding cannot be continued!");
+ return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_DATA, ERR_INFO_REFERENCE_PIC_LOST);
+ }
+
+ SubMbType subMbType;
if (pSliceHeader->iDirectSpatialMvPredFlag) {
//predict direct spatial mv
- SubMbType subMbType;
int32_t ret = PredMvBDirectSpatial (pCtx, pMv, ref, subMbType);
if (ret != ERR_NONE) {
return ret;
}
} else {
//temporal direct mode
- ComputeColocated (pCtx);
- int32_t ret = PredBDirectTemporal (pCtx, pMv, ref);
+ int32_t ret = PredBDirectTemporal (pCtx, pMv, ref, subMbType);
if (ret != ERR_NONE) {
return ret;
}
@@ -1443,10 +1464,10 @@ int32_t WelsDecodeMbCabacBSlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uin
//reset rS
- pCurLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp; //??????????????? dqaunt of previous mb
+ pCurDqLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp; //??????????????? dqaunt of previous mb
for (i = 0; i < 2; i++) {
- pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pCurLayer->pLumaQp[iMbXy] +
- pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)];
+ pCurDqLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pCurDqLayer->pLumaQp[iMbXy] +
+ pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)];
}
//for neighboring CABAC usage
@@ -1492,12 +1513,12 @@ int32_t WelsCalcDeqCoeffScalingList (PWelsDecoderContext pCtx) {
}
int32_t WelsDecodeSlice (PWelsDecoderContext pCtx, bool bFirstSliceInLayer, PNalUnit pNalCur) {
- PDqLayer pCurLayer = pCtx->pCurDqLayer;
+ PDqLayer pCurDqLayer = pCtx->pCurDqLayer;
PFmo pFmo = pCtx->pFmo;
int32_t iRet;
int32_t iNextMbXyIndex, iSliceIdc;
- PSlice pSlice = &pCurLayer->sLayerInfo.sSliceInLayer;
+ PSlice pSlice = &pCurDqLayer->sLayerInfo.sSliceInLayer;
PSliceHeaderExt pSliceHeaderExt = &pSlice->sSliceHeaderExt;
PSliceHeader pSliceHeader = &pSliceHeaderExt->sSliceHeader;
int32_t iMbX, iMbY;
@@ -1525,6 +1546,8 @@ int32_t WelsDecodeSlice (PWelsDecoderContext pCtx, bool bFirstSliceInLayer, PNal
} else {
if (P_SLICE == pSliceHeader->eSliceType) {
pDecMbFunc = WelsDecodeMbCavlcPSlice;
+ } else if (B_SLICE == pSliceHeader->eSliceType) {
+ pDecMbFunc = WelsDecodeMbCavlcBSlice;
} else { //I_SLICE
pDecMbFunc = WelsDecodeMbCavlcISlice;
}
@@ -1541,7 +1564,7 @@ int32_t WelsDecodeSlice (PWelsDecoderContext pCtx, bool bFirstSliceInLayer, PNal
}
pCtx->eSliceType = pSliceHeader->eSliceType;
- if (pCurLayer->sLayerInfo.pPps->bEntropyCodingModeFlag == 1) {
+ if (pCurDqLayer->sLayerInfo.pPps->bEntropyCodingModeFlag == 1) {
int32_t iQp = pSlice->sSliceHeaderExt.sSliceHeader.iSliceQp;
int32_t iCabacInitIdc = pSlice->sSliceHeaderExt.sSliceHeader.iCabacInitIdc;
WelsCabacContextInit (pCtx, pSlice->eSliceType, iCabacInitIdc, iQp);
@@ -1553,24 +1576,24 @@ int32_t WelsDecodeSlice (PWelsDecoderContext pCtx, bool bFirstSliceInLayer, PNal
WelsCalcDeqCoeffScalingList (pCtx);
iNextMbXyIndex = pSliceHeader->iFirstMbInSlice;
- iMbX = iNextMbXyIndex % pCurLayer->iMbWidth;
- iMbY = iNextMbXyIndex / pCurLayer->iMbWidth; // error is introduced by multiple slices case, 11/23/2009
+ iMbX = iNextMbXyIndex % pCurDqLayer->iMbWidth;
+ iMbY = iNextMbXyIndex / pCurDqLayer->iMbWidth; // error is introduced by multiple slices case, 11/23/2009
pSlice->iMbSkipRun = -1;
- iSliceIdc = (pSliceHeader->iFirstMbInSlice << 7) + pCurLayer->uiLayerDqId;
+ iSliceIdc = (pSliceHeader->iFirstMbInSlice << 7) + pCurDqLayer->uiLayerDqId;
- pCurLayer->iMbX = iMbX;
- pCurLayer->iMbY = iMbY;
- pCurLayer->iMbXyIndex = iNextMbXyIndex;
+ pCurDqLayer->iMbX = iMbX;
+ pCurDqLayer->iMbY = iMbY;
+ pCurDqLayer->iMbXyIndex = iNextMbXyIndex;
do {
if ((-1 == iNextMbXyIndex) || (iNextMbXyIndex >= kiCountNumMb)) { // slice group boundary or end of a frame
break;
}
- pCurLayer->pSliceIdc[iNextMbXyIndex] = iSliceIdc;
+ pCurDqLayer->pSliceIdc[iNextMbXyIndex] = iSliceIdc;
pCtx->bMbRefConcealed = false;
iRet = pDecMbFunc (pCtx, pNalCur, uiEosFlag);
- pCurLayer->pMbRefConcealedFlag[iNextMbXyIndex] = pCtx->bMbRefConcealed;
+ pCurDqLayer->pMbRefConcealedFlag[iNextMbXyIndex] = pCtx->bMbRefConcealed;
if (iRet != ERR_NONE) {
return iRet;
}
@@ -1584,21 +1607,185 @@ int32_t WelsDecodeSlice (PWelsDecoderContext pCtx, bool bFirstSliceInLayer, PNal
} else {
++iNextMbXyIndex;
}
- iMbX = iNextMbXyIndex % pCurLayer->iMbWidth;
- iMbY = iNextMbXyIndex / pCurLayer->iMbWidth;
- pCurLayer->iMbX = iMbX;
- pCurLayer->iMbY = iMbY;
- pCurLayer->iMbXyIndex = iNextMbXyIndex;
+ iMbX = iNextMbXyIndex % pCurDqLayer->iMbWidth;
+ iMbY = iNextMbXyIndex / pCurDqLayer->iMbWidth;
+ pCurDqLayer->iMbX = iMbX;
+ pCurDqLayer->iMbY = iMbY;
+ pCurDqLayer->iMbXyIndex = iNextMbXyIndex;
} while (1);
return ERR_NONE;
}
+int32_t WelsDecodeAndConstructSlice (PWelsDecoderContext pCtx) {
+ PNalUnit pNalCur = pCtx->pNalCur;
+ PDqLayer pCurDqLayer = pCtx->pCurDqLayer;
+ PFmo pFmo = pCtx->pFmo;
+ int32_t iRet;
+ int32_t iNextMbXyIndex, iSliceIdc;
+
+ PSlice pSlice = &pCurDqLayer->sLayerInfo.sSliceInLayer;
+ PSliceHeaderExt pSliceHeaderExt = &pSlice->sSliceHeaderExt;
+ PSliceHeader pSliceHeader = &pSliceHeaderExt->sSliceHeader;
+ int32_t iMbX, iMbY;
+ const int32_t kiCountNumMb = pSliceHeader->pSps->uiTotalMbCount; //need to be correct when fmo or multi slice
+ int32_t iTotalMbTargetLayer = kiCountNumMb;
+ uint32_t uiEosFlag = 0;
+ PWelsDecMbFunc pDecMbFunc;
+
+ pSlice->iTotalMbInCurSlice = 0; //initialize at the starting of slice decoding.
+
+ if (pCtx->pPps->bEntropyCodingModeFlag) {
+ if (pSlice->sSliceHeaderExt.bAdaptiveMotionPredFlag ||
+ pSlice->sSliceHeaderExt.bAdaptiveBaseModeFlag ||
+ pSlice->sSliceHeaderExt.bAdaptiveResidualPredFlag) {
+ WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR,
+ "WelsDecodeSlice()::::ILP flag exist, not supported with CABAC enabled!");
+ pCtx->iErrorCode |= dsBitstreamError;
+ return dsBitstreamError;
+ }
+ if (P_SLICE == pSliceHeader->eSliceType)
+ pDecMbFunc = WelsDecodeMbCabacPSlice;
+ else if (B_SLICE == pSliceHeader->eSliceType)
+ pDecMbFunc = WelsDecodeMbCabacBSlice;
+ else //I_SLICE. B_SLICE is being supported
+ pDecMbFunc = WelsDecodeMbCabacISlice;
+ } else {
+ if (P_SLICE == pSliceHeader->eSliceType) {
+ pDecMbFunc = WelsDecodeMbCavlcPSlice;
+ } else if (B_SLICE == pSliceHeader->eSliceType) {
+ pDecMbFunc = WelsDecodeMbCavlcBSlice;
+ } else { //I_SLICE
+ pDecMbFunc = WelsDecodeMbCavlcISlice;
+ }
+ }
+
+ if (pSliceHeader->pPps->bConstainedIntraPredFlag) {
+ pCtx->pFillInfoCacheIntraNxNFunc = WelsFillCacheConstrain1IntraNxN;
+ pCtx->pMapNxNNeighToSampleFunc = WelsMapNxNNeighToSampleConstrain1;
+ pCtx->pMap16x16NeighToSampleFunc = WelsMap16x16NeighToSampleConstrain1;
+ } else {
+ pCtx->pFillInfoCacheIntraNxNFunc = WelsFillCacheConstrain0IntraNxN;
+ pCtx->pMapNxNNeighToSampleFunc = WelsMapNxNNeighToSampleNormal;
+ pCtx->pMap16x16NeighToSampleFunc = WelsMap16x16NeighToSampleNormal;
+ }
+
+ pCtx->eSliceType = pSliceHeader->eSliceType;
+ if (pCurDqLayer->sLayerInfo.pPps->bEntropyCodingModeFlag == 1) {
+ int32_t iQp = pSlice->sSliceHeaderExt.sSliceHeader.iSliceQp;
+ int32_t iCabacInitIdc = pSlice->sSliceHeaderExt.sSliceHeader.iCabacInitIdc;
+ WelsCabacContextInit (pCtx, pSlice->eSliceType, iCabacInitIdc, iQp);
+ //InitCabacCtx (pCtx->pCabacCtx, pSlice->eSliceType, iCabacInitIdc, iQp);
+ pSlice->iLastDeltaQp = 0;
+ WELS_READ_VERIFY (InitCabacDecEngineFromBS (pCtx->pCabacDecEngine, pCtx->pCurDqLayer->pBitStringAux));
+ }
+ //try to calculate the dequant_coeff
+ WelsCalcDeqCoeffScalingList (pCtx);
+
+ iNextMbXyIndex = pSliceHeader->iFirstMbInSlice;
+ iMbX = iNextMbXyIndex % pCurDqLayer->iMbWidth;
+ iMbY = iNextMbXyIndex / pCurDqLayer->iMbWidth; // error is introduced by multiple slices case, 11/23/2009
+ pSlice->iMbSkipRun = -1;
+ iSliceIdc = (pSliceHeader->iFirstMbInSlice << 7) + pCurDqLayer->uiLayerDqId;
+
+ pCurDqLayer->iMbX = iMbX;
+ pCurDqLayer->iMbY = iMbY;
+ pCurDqLayer->iMbXyIndex = iNextMbXyIndex;
+
+ PDeblockingFilterMbFunc pDeblockMb = WelsDeblockingMb;
+
+ SDeblockingFilter pFilter;
+ int32_t iFilterIdc = 1;
+ if (pSliceHeader->uiDisableDeblockingFilterIdc != 1) {
+ WelsDeblockingInitFilter (pCtx, pFilter, iFilterIdc);
+ }
+
+ do {
+ if ((-1 == iNextMbXyIndex) || (iNextMbXyIndex >= kiCountNumMb)) { // slice group boundary or end of a frame
+ break;
+ }
+
+ pCurDqLayer->pSliceIdc[iNextMbXyIndex] = iSliceIdc;
+ pCtx->bMbRefConcealed = false;
+ iRet = pDecMbFunc (pCtx, pNalCur, uiEosFlag);
+ pCurDqLayer->pMbRefConcealedFlag[iNextMbXyIndex] = pCtx->bMbRefConcealed;
+ if (iRet != ERR_NONE) {
+ return iRet;
+ }
+ if (WelsTargetMbConstruction (pCtx)) {
+ WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING,
+ "WelsTargetSliceConstruction():::MB(%d, %d) construction error. pCurSlice_type:%d",
+ pCurDqLayer->iMbX, pCurDqLayer->iMbY, pSlice->eSliceType);
+
+ return ERR_INFO_MB_RECON_FAIL;
+ }
+ memcpy (pCtx->pDec->pNzc[pCurDqLayer->iMbXyIndex], pCurDqLayer->pNzc[pCurDqLayer->iMbXyIndex], 24);
+ if (pCtx->eSliceType != I_SLICE) {
+ pCtx->sBlockFunc.pWelsSetNonZeroCountFunc (
+ pCtx->pDec->pNzc[pCurDqLayer->iMbXyIndex]); // set all none-zero nzc to 1; dbk can be opti!
+ }
+ WelsDeblockingFilterMB (pCurDqLayer, pFilter, iFilterIdc, pDeblockMb);
+ if (pCtx->uiNalRefIdc > 0) {
+ if (pCurDqLayer->iMbX == 0 || pCurDqLayer->iMbX == pCurDqLayer->iMbWidth - 1 || pCurDqLayer->iMbY == 0
+ || pCurDqLayer->iMbY == pCurDqLayer->iMbHeight - 1) {
+ PadMBLuma_c (pCurDqLayer->pDec->pData[0], pCurDqLayer->pDec->iLinesize[0], pCurDqLayer->pDec->iWidthInPixel,
+ pCurDqLayer->pDec->iHeightInPixel, pCurDqLayer->iMbX, pCurDqLayer->iMbY, pCurDqLayer->iMbWidth, pCurDqLayer->iMbHeight);
+ PadMBChroma_c (pCurDqLayer->pDec->pData[1], pCurDqLayer->pDec->iLinesize[1], pCurDqLayer->pDec->iWidthInPixel / 2,
+ pCurDqLayer->pDec->iHeightInPixel / 2, pCurDqLayer->iMbX, pCurDqLayer->iMbY, pCurDqLayer->iMbWidth,
+ pCurDqLayer->iMbHeight);
+ PadMBChroma_c (pCurDqLayer->pDec->pData[2], pCurDqLayer->pDec->iLinesize[2], pCurDqLayer->pDec->iWidthInPixel / 2,
+ pCurDqLayer->pDec->iHeightInPixel / 2, pCurDqLayer->iMbX, pCurDqLayer->iMbY, pCurDqLayer->iMbWidth,
+ pCurDqLayer->iMbHeight);
+ }
+ }
+ if (!pCurDqLayer->pMbCorrectlyDecodedFlag[iNextMbXyIndex]) { //already con-ed, overwrite
+ pCurDqLayer->pMbCorrectlyDecodedFlag[iNextMbXyIndex] = true;
+ pCtx->pDec->iMbEcedPropNum += (pCurDqLayer->pMbRefConcealedFlag[iNextMbXyIndex] ? 1 : 0);
+ ++pCtx->iTotalNumMbRec;
+ }
+
+ if (pCtx->iTotalNumMbRec > iTotalMbTargetLayer) {
+ WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING,
+ "WelsTargetSliceConstruction():::pCtx->iTotalNumMbRec:%d, iTotalMbTargetLayer:%d",
+ pCtx->iTotalNumMbRec, iTotalMbTargetLayer);
+
+ return ERR_INFO_MB_NUM_EXCEED_FAIL;
+ }
+
+ ++pSlice->iTotalMbInCurSlice;
+ if (uiEosFlag) { //end of slice
+ SET_EVENT (&pCtx->pDec->pReadyEvent[pCurDqLayer->iMbY]);
+ break;
+ }
+ if (pSliceHeader->pPps->uiNumSliceGroups > 1) {
+ iNextMbXyIndex = FmoNextMb (pFmo, iNextMbXyIndex);
+ } else {
+ ++iNextMbXyIndex;
+ }
+ int32_t iLastMby = iMbY;
+ int32_t iLastMbx = iMbX;
+ iMbX = iNextMbXyIndex % pCurDqLayer->iMbWidth;
+ iMbY = iNextMbXyIndex / pCurDqLayer->iMbWidth;
+ pCurDqLayer->iMbX = iMbX;
+ pCurDqLayer->iMbY = iMbY;
+ pCurDqLayer->iMbXyIndex = iNextMbXyIndex;
+ if (GetThreadCount (pCtx) > 1) {
+ if ((iMbY > iLastMby) && (iLastMbx == pCurDqLayer->iMbWidth - 1)) {
+ SET_EVENT (&pCtx->pDec->pReadyEvent[iLastMby]);
+ }
+ }
+ } while (1);
+ if (GetThreadCount (pCtx) > 1) {
+ SET_EVENT (&pCtx->pDec->pReadyEvent[pCurDqLayer->iMbY]);
+ }
+ return ERR_NONE;
+}
+
int32_t WelsActualDecodeMbCavlcISlice (PWelsDecoderContext pCtx) {
- SVlcTable* pVlcTable = &pCtx->sVlcTable;
- PDqLayer pCurLayer = pCtx->pCurDqLayer;
- PBitStringAux pBs = pCurLayer->pBitStringAux;
- PSlice pSlice = &pCurLayer->sLayerInfo.sSliceInLayer;
+ SVlcTable* pVlcTable = pCtx->pVlcTable;
+ PDqLayer pCurDqLayer = pCtx->pCurDqLayer;
+ PBitStringAux pBs = pCurDqLayer->pBitStringAux;
+ PSlice pSlice = &pCurDqLayer->sLayerInfo.sSliceInLayer;
PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader;
SWelsNeighAvail sNeighAvail;
@@ -1607,10 +1794,10 @@ int32_t WelsActualDecodeMbCavlcISlice (PWelsDecoderContext pCtx) {
int32_t iScanIdxStart = pSlice->sSliceHeaderExt.uiScanIdxStart;
int32_t iScanIdxEnd = pSlice->sSliceHeaderExt.uiScanIdxEnd;
- int32_t iMbX = pCurLayer->iMbX;
- int32_t iMbY = pCurLayer->iMbY;
- const int32_t iMbXy = pCurLayer->iMbXyIndex;
- int8_t* pNzc = pCurLayer->pNzc[iMbXy];
+ int32_t iMbX = pCurDqLayer->iMbX;
+ int32_t iMbY = pCurDqLayer->iMbY;
+ const int32_t iMbXy = pCurDqLayer->iMbXyIndex;
+ int8_t* pNzc = pCurDqLayer->pNzc[iMbXy];
int32_t i;
int32_t iRet = ERR_NONE;
uint32_t uiMbType = 0, uiCbp = 0, uiCbpL = 0, uiCbpC = 0;
@@ -1618,12 +1805,12 @@ int32_t WelsActualDecodeMbCavlcISlice (PWelsDecoderContext pCtx) {
int32_t iCode;
ENFORCE_STACK_ALIGN_1D (uint8_t, pNonZeroCount, 48, 16);
- GetNeighborAvailMbType (&sNeighAvail, pCurLayer);
- pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;
- pCurLayer->pResidualPredFlag[iMbXy] = pSlice->sSliceHeaderExt.bDefaultResidualPredFlag;
+ GetNeighborAvailMbType (&sNeighAvail, pCurDqLayer);
+ pCurDqLayer->pInterPredictionDoneFlag[iMbXy] = 0;
+ pCurDqLayer->pResidualPredFlag[iMbXy] = pSlice->sSliceHeaderExt.bDefaultResidualPredFlag;
- pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
- pCurLayer->pTransformSize8x8Flag[iMbXy] = false;
+ pCurDqLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
+ pCurDqLayer->pTransformSize8x8Flag[iMbXy] = false;
WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //uiMbType
uiMbType = uiCode;
@@ -1634,15 +1821,15 @@ int32_t WelsActualDecodeMbCavlcISlice (PWelsDecoderContext pCtx) {
if (25 == uiMbType) {
WelsLog (& (pCtx->sLogCtx), WELS_LOG_DEBUG, "I_PCM mode exists in I slice!");
- int32_t iDecStrideL = pCurLayer->pDec->iLinesize[0];
- int32_t iDecStrideC = pCurLayer->pDec->iLinesize[1];
+ int32_t iDecStrideL = pCurDqLayer->pDec->iLinesize[0];
+ int32_t iDecStrideC = pCurDqLayer->pDec->iLinesize[1];
int32_t iOffsetL = (iMbX + iMbY * iDecStrideL) << 4;
int32_t iOffsetC = (iMbX + iMbY * iDecStrideC) << 3;
- uint8_t* pDecY = pCurLayer->pDec->pData[0] + iOffsetL;
- uint8_t* pDecU = pCurLayer->pDec->pData[1] + iOffsetC;
- uint8_t* pDecV = pCurLayer->pDec->pData[2] + iOffsetC;
+ uint8_t* pDecY = pCurDqLayer->pDec->pData[0] + iOffsetL;
+ uint8_t* pDecU = pCurDqLayer->pDec->pData[1] + iOffsetC;
+ uint8_t* pDecV = pCurDqLayer->pDec->pData[2] + iOffsetC;
uint8_t* pTmpBsBuf;
@@ -1653,7 +1840,7 @@ int32_t WelsActualDecodeMbCavlcISlice (PWelsDecoderContext pCtx) {
int32_t iIndex = ((-pBs->iLeftBits) >> 3) + 2;
- pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA_PCM;
+ pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA_PCM;
//step 1: locating bit-stream pointer [must align into integer byte]
pBs->pCurBuf -= iIndex;
@@ -1681,27 +1868,27 @@ int32_t WelsActualDecodeMbCavlcISlice (PWelsDecoderContext pCtx) {
pBs->pCurBuf += 384;
//step 3: update QP and pNonZeroCount
- pCurLayer->pLumaQp[iMbXy] = 0;
- memset (pCurLayer->pChromaQp[iMbXy], 0, sizeof (pCurLayer->pChromaQp[iMbXy]));
- memset (pNzc, 16, sizeof (pCurLayer->pNzc[iMbXy])); //Rec. 9.2.1 for PCM, nzc=16
+ pCurDqLayer->pLumaQp[iMbXy] = 0;
+ memset (pCurDqLayer->pChromaQp[iMbXy], 0, sizeof (pCurDqLayer->pChromaQp[iMbXy]));
+ memset (pNzc, 16, sizeof (pCurDqLayer->pNzc[iMbXy])); //Rec. 9.2.1 for PCM, nzc=16
WELS_READ_VERIFY (InitReadBits (pBs, 0));
return ERR_NONE;
} else if (0 == uiMbType) { //reference to JM
ENFORCE_STACK_ALIGN_1D (int8_t, pIntraPredMode, 48, 16);
- pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA4x4;
+ pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA4x4;
if (pCtx->pPps->bTransform8x8ModeFlag) {
WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //transform_size_8x8_flag
- pCurLayer->pTransformSize8x8Flag[iMbXy] = !!uiCode;
- if (pCurLayer->pTransformSize8x8Flag[iMbXy]) {
- uiMbType = pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA8x8;
+ pCurDqLayer->pTransformSize8x8Flag[iMbXy] = !!uiCode;
+ if (pCurDqLayer->pTransformSize8x8Flag[iMbXy]) {
+ uiMbType = pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA8x8;
}
}
- if (!pCurLayer->pTransformSize8x8Flag[iMbXy]) {
- pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
- WELS_READ_VERIFY (ParseIntra4x4Mode (pCtx, &sNeighAvail, pIntraPredMode, pBs, pCurLayer));
+ if (!pCurDqLayer->pTransformSize8x8Flag[iMbXy]) {
+ pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurDqLayer);
+ WELS_READ_VERIFY (ParseIntra4x4Mode (pCtx, &sNeighAvail, pIntraPredMode, pBs, pCurDqLayer));
} else {
- pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
- WELS_READ_VERIFY (ParseIntra8x8Mode (pCtx, &sNeighAvail, pIntraPredMode, pBs, pCurLayer));
+ pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurDqLayer);
+ WELS_READ_VERIFY (ParseIntra8x8Mode (pCtx, &sNeighAvail, pIntraPredMode, pBs, pCurDqLayer));
}
//uiCbp
@@ -1717,19 +1904,19 @@ int32_t WelsActualDecodeMbCavlcISlice (PWelsDecoderContext pCtx) {
uiCbp = g_kuiIntra4x4CbpTable[uiCbp];
else
uiCbp = g_kuiIntra4x4CbpTable400[uiCbp];
- pCurLayer->pCbp[iMbXy] = uiCbp;
+ pCurDqLayer->pCbp[iMbXy] = uiCbp;
uiCbpC = uiCbp >> 4;
uiCbpL = uiCbp & 15;
} else { //I_PCM exclude, we can ignore it
- pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA16x16;
- pCurLayer->pTransformSize8x8Flag[iMbXy] = false;
- pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
- pCurLayer->pIntraPredMode[iMbXy][7] = (uiMbType - 1) & 3;
- pCurLayer->pCbp[iMbXy] = g_kuiI16CbpTable[ (uiMbType - 1) >> 2];
- uiCbpC = pCtx->pSps->uiChromaFormatIdc ? pCurLayer->pCbp[iMbXy] >> 4 : 0;
- uiCbpL = pCurLayer->pCbp[iMbXy] & 15;
- WelsFillCacheNonZeroCount (&sNeighAvail, pNonZeroCount, pCurLayer);
- WELS_READ_VERIFY (ParseIntra16x16Mode (pCtx, &sNeighAvail, pBs, pCurLayer));
+ pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA16x16;
+ pCurDqLayer->pTransformSize8x8Flag[iMbXy] = false;
+ pCurDqLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
+ pCurDqLayer->pIntraPredMode[iMbXy][7] = (uiMbType - 1) & 3;
+ pCurDqLayer->pCbp[iMbXy] = g_kuiI16CbpTable[ (uiMbType - 1) >> 2];
+ uiCbpC = pCtx->pSps->uiChromaFormatIdc ? pCurDqLayer->pCbp[iMbXy] >> 4 : 0;
+ uiCbpL = pCurDqLayer->pCbp[iMbXy] & 15;
+ WelsFillCacheNonZeroCount (&sNeighAvail, pNonZeroCount, pCurDqLayer);
+ WELS_READ_VERIFY (ParseIntra16x16Mode (pCtx, &sNeighAvail, pBs, pCurDqLayer));
}
ST32A4 (&pNzc[0], 0);
@@ -1739,17 +1926,17 @@ int32_t WelsActualDecodeMbCavlcISlice (PWelsDecoderContext pCtx) {
ST32A4 (&pNzc[16], 0);
ST32A4 (&pNzc[20], 0);
- if (pCurLayer->pCbp[iMbXy] == 0 && IS_INTRANxN (pCurLayer->pMbType[iMbXy])) {
- pCurLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp;
+ if (pCurDqLayer->pCbp[iMbXy] == 0 && IS_INTRANxN (pCurDqLayer->pDec->pMbType[iMbXy])) {
+ pCurDqLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp;
for (i = 0; i < 2; i++) {
- pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pCurLayer->pLumaQp[iMbXy] +
- pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)];
+ pCurDqLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pCurDqLayer->pLumaQp[iMbXy] +
+ pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)];
}
}
- if (pCurLayer->pCbp[iMbXy] || MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) {
- memset (pCurLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (pCurLayer->pScaledTCoeff[iMbXy][0]));
+ if (pCurDqLayer->pCbp[iMbXy] || MB_TYPE_INTRA16x16 == pCurDqLayer->pDec->pMbType[iMbXy]) {
+ memset (pCurDqLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (pCurDqLayer->pScaledTCoeff[iMbXy][0]));
int32_t iQpDelta, iId8x8, iId4x4;
WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //mb_qp_delta
@@ -1759,29 +1946,29 @@ int32_t WelsActualDecodeMbCavlcISlice (PWelsDecoderContext pCtx) {
return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_QP);
}
- pCurLayer->pLumaQp[iMbXy] = (pSlice->iLastMbQp + iQpDelta + 52) % 52; //update last_mb_qp
- pSlice->iLastMbQp = pCurLayer->pLumaQp[iMbXy];
+ pCurDqLayer->pLumaQp[iMbXy] = (pSlice->iLastMbQp + iQpDelta + 52) % 52; //update last_mb_qp
+ pSlice->iLastMbQp = pCurDqLayer->pLumaQp[iMbXy];
for (i = 0; i < 2; i++) {
- pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pSlice->iLastMbQp +
- pSliceHeader->pPps->iChromaQpIndexOffset[i], 0,
- 51)];
+ pCurDqLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pSlice->iLastMbQp +
+ pSliceHeader->pPps->iChromaQpIndexOffset[i], 0,
+ 51)];
}
BsStartCavlc (pBs);
- if (MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) {
+ if (MB_TYPE_INTRA16x16 == pCurDqLayer->pDec->pMbType[iMbXy]) {
//step1: Luma DC
if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, 0, 16, g_kuiLumaDcZigzagScan, I16_LUMA_DC,
- pCurLayer->pScaledTCoeff[iMbXy], pCurLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) {
+ pCurDqLayer->pScaledTCoeff[iMbXy], pCurDqLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) {
return iRet;//abnormal
}
//step2: Luma AC
if (uiCbpL) {
for (i = 0; i < 16; i++) {
if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, i, iScanIdxEnd - WELS_MAX (iScanIdxStart, 1) + 1,
- g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), I16_LUMA_AC, pCurLayer->pScaledTCoeff[iMbXy] + (i << 4),
- pCurLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) {
+ g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), I16_LUMA_AC, pCurDqLayer->pScaledTCoeff[iMbXy] + (i << 4),
+ pCurDqLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) {
return iRet;//abnormal
}
}
@@ -1791,15 +1978,15 @@ int32_t WelsActualDecodeMbCavlcISlice (PWelsDecoderContext pCtx) {
ST32A4 (&pNzc[12], LD32 (&pNonZeroCount[1 + 8 * 4]));
}
} else { //non-MB_TYPE_INTRA16x16
- if (pCurLayer->pTransformSize8x8Flag[iMbXy]) {
+ if (pCurDqLayer->pTransformSize8x8Flag[iMbXy]) {
for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
- iMbResProperty = (IS_INTRA (pCurLayer->pMbType[iMbXy])) ? LUMA_DC_AC_INTRA_8 : LUMA_DC_AC_INTER_8;
+ iMbResProperty = (IS_INTRA (pCurDqLayer->pDec->pMbType[iMbXy])) ? LUMA_DC_AC_INTRA_8 : LUMA_DC_AC_INTER_8;
if (uiCbpL & (1 << iId8x8)) {
int32_t iIndex = (iId8x8 << 2);
for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
if ((iRet = WelsResidualBlockCavlc8x8 (pVlcTable, pNonZeroCount, pBs, iIndex, iScanIdxEnd - iScanIdxStart + 1,
- g_kuiZigzagScan8x8 + iScanIdxStart, iMbResProperty, pCurLayer->pScaledTCoeff[iMbXy] + (iId8x8 << 6), iId4x4,
- pCurLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) {
+ g_kuiZigzagScan8x8 + iScanIdxStart, iMbResProperty, pCurDqLayer->pScaledTCoeff[iMbXy] + (iId8x8 << 6), iId4x4,
+ pCurDqLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) {
return iRet;
}
iIndex++;
@@ -1820,8 +2007,8 @@ int32_t WelsActualDecodeMbCavlcISlice (PWelsDecoderContext pCtx) {
for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
//Luma (DC and AC decoding together)
if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, iIndex, iScanIdxEnd - iScanIdxStart + 1,
- g_kuiZigzagScan + iScanIdxStart, LUMA_DC_AC_INTRA, pCurLayer->pScaledTCoeff[iMbXy] + (iIndex << 4),
- pCurLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) {
+ g_kuiZigzagScan + iScanIdxStart, LUMA_DC_AC_INTRA, pCurDqLayer->pScaledTCoeff[iMbXy] + (iIndex << 4),
+ pCurDqLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) {
return iRet;//abnormal
}
iIndex++;
@@ -1844,7 +2031,7 @@ int32_t WelsActualDecodeMbCavlcISlice (PWelsDecoderContext pCtx) {
for (i = 0; i < 2; i++) { //Cb Cr
iMbResProperty = i ? CHROMA_DC_V : CHROMA_DC_U;
if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, 16 + (i << 2), 4, g_kuiChromaDcScan, iMbResProperty,
- pCurLayer->pScaledTCoeff[iMbXy] + 256 + (i << 6), pCurLayer->pChromaQp[iMbXy][i], pCtx)) != ERR_NONE) {
+ pCurDqLayer->pScaledTCoeff[iMbXy] + 256 + (i << 6), pCurDqLayer->pChromaQp[iMbXy][i], pCtx)) != ERR_NONE) {
return iRet;//abnormal
}
}
@@ -1857,8 +2044,9 @@ int32_t WelsActualDecodeMbCavlcISlice (PWelsDecoderContext pCtx) {
int32_t iIndex = 16 + (i << 2);
for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, iIndex, iScanIdxEnd - WELS_MAX (iScanIdxStart,
- 1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), iMbResProperty, pCurLayer->pScaledTCoeff[iMbXy] + (iIndex << 4),
- pCurLayer->pChromaQp[iMbXy][i], pCtx)) != ERR_NONE) {
+ 1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), iMbResProperty,
+ pCurDqLayer->pScaledTCoeff[iMbXy] + (iIndex << 4),
+ pCurDqLayer->pChromaQp[iMbXy][i], pCtx)) != ERR_NONE) {
return iRet;//abnormal
}
iIndex++;
@@ -1876,9 +2064,9 @@ int32_t WelsActualDecodeMbCavlcISlice (PWelsDecoderContext pCtx) {
}
int32_t WelsDecodeMbCavlcISlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag) {
- PDqLayer pCurLayer = pCtx->pCurDqLayer;
- PBitStringAux pBs = pCurLayer->pBitStringAux;
- PSliceHeaderExt pSliceHeaderExt = &pCurLayer->sLayerInfo.sSliceInLayer.sSliceHeaderExt;
+ PDqLayer pCurDqLayer = pCtx->pCurDqLayer;
+ PBitStringAux pBs = pCurDqLayer->pBitStringAux;
+ PSliceHeaderExt pSliceHeaderExt = &pCurDqLayer->sLayerInfo.sSliceInLayer.sSliceHeaderExt;
int32_t iBaseModeFlag;
int32_t iRet = 0; //should have the return value to indicate decoding error or not, It's NECESSARY--2010.4.15
uint32_t uiCode;
@@ -1903,7 +2091,7 @@ int32_t WelsDecodeMbCavlcISlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uin
// check whether there is left bits to read next time in case multiple slices
iUsedBits = ((pBs->pCurBuf - pBs->pStartBuf) << 3) - (16 - pBs->iLeftBits);
// sub 1, for stop bit
- if ((iUsedBits == (pBs->iBits - 1)) && (0 >= pCurLayer->sLayerInfo.sSliceInLayer.iMbSkipRun)) { // slice boundary
+ if ((iUsedBits == (pBs->iBits - 1)) && (0 >= pCurDqLayer->sLayerInfo.sSliceInLayer.iMbSkipRun)) { // slice boundary
uiEosFlag = 1;
}
if (iUsedBits > (pBs->iBits -
@@ -1917,20 +2105,20 @@ int32_t WelsDecodeMbCavlcISlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uin
}
int32_t WelsActualDecodeMbCavlcPSlice (PWelsDecoderContext pCtx) {
- SVlcTable* pVlcTable = &pCtx->sVlcTable;
- PDqLayer pCurLayer = pCtx->pCurDqLayer;
- PBitStringAux pBs = pCurLayer->pBitStringAux;
- PSlice pSlice = &pCurLayer->sLayerInfo.sSliceInLayer;
+ SVlcTable* pVlcTable = pCtx->pVlcTable;
+ PDqLayer pCurDqLayer = pCtx->pCurDqLayer;
+ PBitStringAux pBs = pCurDqLayer->pBitStringAux;
+ PSlice pSlice = &pCurDqLayer->sLayerInfo.sSliceInLayer;
PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader;
int32_t iScanIdxStart = pSlice->sSliceHeaderExt.uiScanIdxStart;
int32_t iScanIdxEnd = pSlice->sSliceHeaderExt.uiScanIdxEnd;
SWelsNeighAvail sNeighAvail;
- int32_t iMbX = pCurLayer->iMbX;
- int32_t iMbY = pCurLayer->iMbY;
- const int32_t iMbXy = pCurLayer->iMbXyIndex;
- int8_t* pNzc = pCurLayer->pNzc[iMbXy];
+ int32_t iMbX = pCurDqLayer->iMbX;
+ int32_t iMbY = pCurDqLayer->iMbY;
+ const int32_t iMbXy = pCurDqLayer->iMbXyIndex;
+ int8_t* pNzc = pCurDqLayer->pNzc[iMbXy];
int32_t i;
int32_t iRet = ERR_NONE;
uint32_t uiMbType = 0, uiCbp = 0, uiCbpL = 0, uiCbpC = 0;
@@ -1938,16 +2126,16 @@ int32_t WelsActualDecodeMbCavlcPSlice (PWelsDecoderContext pCtx) {
int32_t iCode;
int32_t iMbResProperty;
- GetNeighborAvailMbType (&sNeighAvail, pCurLayer);
+ GetNeighborAvailMbType (&sNeighAvail, pCurDqLayer);
ENFORCE_STACK_ALIGN_1D (uint8_t, pNonZeroCount, 48, 16);
- pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;//2009.10.23
+ pCurDqLayer->pInterPredictionDoneFlag[iMbXy] = 0;//2009.10.23
WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //uiMbType
uiMbType = uiCode;
if (uiMbType < 5) { //inter MB type
int16_t iMotionVector[LIST_A][30][MV_A];
int8_t iRefIndex[LIST_A][30];
- pCurLayer->pMbType[iMbXy] = g_ksInterPMbTypeInfo[uiMbType].iType;
- WelsFillCacheInter (&sNeighAvail, pNonZeroCount, iMotionVector, iRefIndex, pCurLayer);
+ pCurDqLayer->pDec->pMbType[iMbXy] = g_ksInterPMbTypeInfo[uiMbType].iType;
+ WelsFillCacheInter (&sNeighAvail, pNonZeroCount, iMotionVector, iRefIndex, pCurDqLayer);
if ((iRet = ParseInterInfo (pCtx, iMotionVector, iRefIndex, pBs)) != ERR_NONE) {
return iRet;//abnormal
@@ -1955,13 +2143,13 @@ int32_t WelsActualDecodeMbCavlcPSlice (PWelsDecoderContext pCtx) {
if (pSlice->sSliceHeaderExt.bAdaptiveResidualPredFlag == 1) {
WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //residual_prediction_flag
- pCurLayer->pResidualPredFlag[iMbXy] = uiCode;
+ pCurDqLayer->pResidualPredFlag[iMbXy] = uiCode;
} else {
- pCurLayer->pResidualPredFlag[iMbXy] = pSlice->sSliceHeaderExt.bDefaultResidualPredFlag;
+ pCurDqLayer->pResidualPredFlag[iMbXy] = pSlice->sSliceHeaderExt.bDefaultResidualPredFlag;
}
- if (pCurLayer->pResidualPredFlag[iMbXy] == 0) {
- pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;
+ if (pCurDqLayer->pResidualPredFlag[iMbXy] == 0) {
+ pCurDqLayer->pInterPredictionDoneFlag[iMbXy] = 0;
} else {
WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "residual_pred_flag = 1 not supported.");
return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_UNSUPPORTED_ILP);
@@ -1975,15 +2163,15 @@ int32_t WelsActualDecodeMbCavlcPSlice (PWelsDecoderContext pCtx) {
if (25 == uiMbType) {
WelsLog (& (pCtx->sLogCtx), WELS_LOG_DEBUG, "I_PCM mode exists in P slice!");
- int32_t iDecStrideL = pCurLayer->pDec->iLinesize[0];
- int32_t iDecStrideC = pCurLayer->pDec->iLinesize[1];
+ int32_t iDecStrideL = pCurDqLayer->pDec->iLinesize[0];
+ int32_t iDecStrideC = pCurDqLayer->pDec->iLinesize[1];
int32_t iOffsetL = (iMbX + iMbY * iDecStrideL) << 4;
int32_t iOffsetC = (iMbX + iMbY * iDecStrideC) << 3;
- uint8_t* pDecY = pCurLayer->pDec->pData[0] + iOffsetL;
- uint8_t* pDecU = pCurLayer->pDec->pData[1] + iOffsetC;
- uint8_t* pDecV = pCurLayer->pDec->pData[2] + iOffsetC;
+ uint8_t* pDecY = pCurDqLayer->pDec->pData[0] + iOffsetL;
+ uint8_t* pDecU = pCurDqLayer->pDec->pData[1] + iOffsetC;
+ uint8_t* pDecV = pCurDqLayer->pDec->pData[2] + iOffsetC;
uint8_t* pTmpBsBuf;
@@ -1993,7 +2181,7 @@ int32_t WelsActualDecodeMbCavlcPSlice (PWelsDecoderContext pCtx) {
int32_t iIndex = ((-pBs->iLeftBits) >> 3) + 2;
- pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA_PCM;
+ pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA_PCM;
//step 1: locating bit-stream pointer [must align into integer byte]
pBs->pCurBuf -= iIndex;
@@ -2022,8 +2210,8 @@ int32_t WelsActualDecodeMbCavlcPSlice (PWelsDecoderContext pCtx) {
pBs->pCurBuf += 384;
//step 3: update QP and pNonZeroCount
- pCurLayer->pLumaQp[iMbXy] = 0;
- pCurLayer->pChromaQp[iMbXy][0] = pCurLayer->pChromaQp[iMbXy][1] = 0;
+ pCurDqLayer->pLumaQp[iMbXy] = 0;
+ pCurDqLayer->pChromaQp[iMbXy][0] = pCurDqLayer->pChromaQp[iMbXy][1] = 0;
//Rec. 9.2.1 for PCM, nzc=16
ST32A4 (&pNzc[0], 0x10101010);
ST32A4 (&pNzc[4], 0x10101010);
@@ -2036,38 +2224,38 @@ int32_t WelsActualDecodeMbCavlcPSlice (PWelsDecoderContext pCtx) {
} else {
if (0 == uiMbType) {
ENFORCE_STACK_ALIGN_1D (int8_t, pIntraPredMode, 48, 16);
- pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA4x4;
+ pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA4x4;
if (pCtx->pPps->bTransform8x8ModeFlag) {
WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //transform_size_8x8_flag
- pCurLayer->pTransformSize8x8Flag[iMbXy] = !!uiCode;
- if (pCurLayer->pTransformSize8x8Flag[iMbXy]) {
- uiMbType = pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA8x8;
+ pCurDqLayer->pTransformSize8x8Flag[iMbXy] = !!uiCode;
+ if (pCurDqLayer->pTransformSize8x8Flag[iMbXy]) {
+ uiMbType = pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA8x8;
}
}
- if (!pCurLayer->pTransformSize8x8Flag[iMbXy]) {
- pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
- WELS_READ_VERIFY (ParseIntra4x4Mode (pCtx, &sNeighAvail, pIntraPredMode, pBs, pCurLayer));
+ if (!pCurDqLayer->pTransformSize8x8Flag[iMbXy]) {
+ pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurDqLayer);
+ WELS_READ_VERIFY (ParseIntra4x4Mode (pCtx, &sNeighAvail, pIntraPredMode, pBs, pCurDqLayer));
} else {
- pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
- WELS_READ_VERIFY (ParseIntra8x8Mode (pCtx, &sNeighAvail, pIntraPredMode, pBs, pCurLayer));
+ pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurDqLayer);
+ WELS_READ_VERIFY (ParseIntra8x8Mode (pCtx, &sNeighAvail, pIntraPredMode, pBs, pCurDqLayer));
}
} else { //I_PCM exclude, we can ignore it
- pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA16x16;
- pCurLayer->pTransformSize8x8Flag[iMbXy] = false;
- pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
- pCurLayer->pIntraPredMode[iMbXy][7] = (uiMbType - 1) & 3;
- pCurLayer->pCbp[iMbXy] = g_kuiI16CbpTable[ (uiMbType - 1) >> 2];
- uiCbpC = pCtx->pSps->uiChromaFormatIdc ? pCurLayer->pCbp[iMbXy] >> 4 : 0;
- uiCbpL = pCurLayer->pCbp[iMbXy] & 15;
- WelsFillCacheNonZeroCount (&sNeighAvail, pNonZeroCount, pCurLayer);
- if ((iRet = ParseIntra16x16Mode (pCtx, &sNeighAvail, pBs, pCurLayer)) != ERR_NONE) {
+ pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA16x16;
+ pCurDqLayer->pTransformSize8x8Flag[iMbXy] = false;
+ pCurDqLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
+ pCurDqLayer->pIntraPredMode[iMbXy][7] = (uiMbType - 1) & 3;
+ pCurDqLayer->pCbp[iMbXy] = g_kuiI16CbpTable[ (uiMbType - 1) >> 2];
+ uiCbpC = pCtx->pSps->uiChromaFormatIdc ? pCurDqLayer->pCbp[iMbXy] >> 4 : 0;
+ uiCbpL = pCurDqLayer->pCbp[iMbXy] & 15;
+ WelsFillCacheNonZeroCount (&sNeighAvail, pNonZeroCount, pCurDqLayer);
+ if ((iRet = ParseIntra16x16Mode (pCtx, &sNeighAvail, pBs, pCurDqLayer)) != ERR_NONE) {
return iRet;
}
}
}
}
- if (MB_TYPE_INTRA16x16 != pCurLayer->pMbType[iMbXy]) {
+ if (MB_TYPE_INTRA16x16 != pCurDqLayer->pDec->pMbType[iMbXy]) {
WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //coded_block_pattern
uiCbp = uiCode;
{
@@ -2075,29 +2263,29 @@ int32_t WelsActualDecodeMbCavlcPSlice (PWelsDecoderContext pCtx) {
return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_CBP);
if (!pCtx->pSps->uiChromaFormatIdc && (uiCbp > 15))
return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_CBP);
- if (MB_TYPE_INTRA4x4 == pCurLayer->pMbType[iMbXy] || MB_TYPE_INTRA8x8 == pCurLayer->pMbType[iMbXy]) {
+ if (MB_TYPE_INTRA4x4 == pCurDqLayer->pDec->pMbType[iMbXy] || MB_TYPE_INTRA8x8 == pCurDqLayer->pDec->pMbType[iMbXy]) {
uiCbp = pCtx->pSps->uiChromaFormatIdc ? g_kuiIntra4x4CbpTable[uiCbp] : g_kuiIntra4x4CbpTable400[uiCbp];
} else //inter
uiCbp = pCtx->pSps->uiChromaFormatIdc ? g_kuiInterCbpTable[uiCbp] : g_kuiInterCbpTable400[uiCbp];
}
- pCurLayer->pCbp[iMbXy] = uiCbp;
- uiCbpC = pCurLayer->pCbp[iMbXy] >> 4;
- uiCbpL = pCurLayer->pCbp[iMbXy] & 15;
+ pCurDqLayer->pCbp[iMbXy] = uiCbp;
+ uiCbpC = pCurDqLayer->pCbp[iMbXy] >> 4;
+ uiCbpL = pCurDqLayer->pCbp[iMbXy] & 15;
// Need modification when B picutre add in
bool bNeedParseTransformSize8x8Flag =
- (((pCurLayer->pMbType[iMbXy] >= MB_TYPE_16x16 && pCurLayer->pMbType[iMbXy] <= MB_TYPE_8x16)
- || pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy])
- && (pCurLayer->pMbType[iMbXy] != MB_TYPE_INTRA8x8)
- && (pCurLayer->pMbType[iMbXy] != MB_TYPE_INTRA4x4)
+ (((pCurDqLayer->pDec->pMbType[iMbXy] >= MB_TYPE_16x16 && pCurDqLayer->pDec->pMbType[iMbXy] <= MB_TYPE_8x16)
+ || pCurDqLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy])
+ && (pCurDqLayer->pDec->pMbType[iMbXy] != MB_TYPE_INTRA8x8)
+ && (pCurDqLayer->pDec->pMbType[iMbXy] != MB_TYPE_INTRA4x4)
&& (uiCbpL > 0)
&& (pCtx->pPps->bTransform8x8ModeFlag));
if (bNeedParseTransformSize8x8Flag) {
WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //transform_size_8x8_flag
- pCurLayer->pTransformSize8x8Flag[iMbXy] = !!uiCode;
+ pCurDqLayer->pTransformSize8x8Flag[iMbXy] = !!uiCode;
}
}
@@ -2107,17 +2295,18 @@ int32_t WelsActualDecodeMbCavlcPSlice (PWelsDecoderContext pCtx) {
ST32A4 (&pNzc[12], 0);
ST32A4 (&pNzc[16], 0);
ST32A4 (&pNzc[20], 0);
- if (pCurLayer->pCbp[iMbXy] == 0 && !IS_INTRA16x16 (pCurLayer->pMbType[iMbXy]) && !IS_I_BL (pCurLayer->pMbType[iMbXy])) {
- pCurLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp;
+ if (pCurDqLayer->pCbp[iMbXy] == 0 && !IS_INTRA16x16 (pCurDqLayer->pDec->pMbType[iMbXy])
+ && !IS_I_BL (pCurDqLayer->pDec->pMbType[iMbXy])) {
+ pCurDqLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp;
for (i = 0; i < 2; i++) {
- pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pCurLayer->pLumaQp[iMbXy] +
- pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)];
+ pCurDqLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pCurDqLayer->pLumaQp[iMbXy] +
+ pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)];
}
}
- if (pCurLayer->pCbp[iMbXy] || MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) {
+ if (pCurDqLayer->pCbp[iMbXy] || MB_TYPE_INTRA16x16 == pCurDqLayer->pDec->pMbType[iMbXy]) {
int32_t iQpDelta, iId8x8, iId4x4;
- memset (pCurLayer->pScaledTCoeff[iMbXy], 0, MB_COEFF_LIST_SIZE * sizeof (int16_t));
+ memset (pCurDqLayer->pScaledTCoeff[iMbXy], 0, MB_COEFF_LIST_SIZE * sizeof (int16_t));
WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //mb_qp_delta
iQpDelta = iCode;
@@ -2125,28 +2314,28 @@ int32_t WelsActualDecodeMbCavlcPSlice (PWelsDecoderContext pCtx) {
return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_QP);
}
- pCurLayer->pLumaQp[iMbXy] = (pSlice->iLastMbQp + iQpDelta + 52) % 52; //update last_mb_qp
- pSlice->iLastMbQp = pCurLayer->pLumaQp[iMbXy];
+ pCurDqLayer->pLumaQp[iMbXy] = (pSlice->iLastMbQp + iQpDelta + 52) % 52; //update last_mb_qp
+ pSlice->iLastMbQp = pCurDqLayer->pLumaQp[iMbXy];
for (i = 0; i < 2; i++) {
- pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pSlice->iLastMbQp +
- pSliceHeader->pPps->iChromaQpIndexOffset[i], 0,
- 51)];
+ pCurDqLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pSlice->iLastMbQp +
+ pSliceHeader->pPps->iChromaQpIndexOffset[i], 0,
+ 51)];
}
BsStartCavlc (pBs);
- if (MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) {
+ if (MB_TYPE_INTRA16x16 == pCurDqLayer->pDec->pMbType[iMbXy]) {
//step1: Luma DC
if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, 0, 16, g_kuiLumaDcZigzagScan, I16_LUMA_DC,
- pCurLayer->pScaledTCoeff[iMbXy], pCurLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) {
+ pCurDqLayer->pScaledTCoeff[iMbXy], pCurDqLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) {
return iRet;//abnormal
}
//step2: Luma AC
if (uiCbpL) {
for (i = 0; i < 16; i++) {
if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, i, iScanIdxEnd - WELS_MAX (iScanIdxStart, 1) + 1,
- g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), I16_LUMA_AC, pCurLayer->pScaledTCoeff[iMbXy] + (i << 4),
- pCurLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) {
+ g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), I16_LUMA_AC, pCurDqLayer->pScaledTCoeff[iMbXy] + (i << 4),
+ pCurDqLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) {
return iRet;//abnormal
}
}
@@ -2156,15 +2345,15 @@ int32_t WelsActualDecodeMbCavlcPSlice (PWelsDecoderContext pCtx) {
ST32A4 (&pNzc[12], LD32 (&pNonZeroCount[1 + 8 * 4]));
}
} else { //non-MB_TYPE_INTRA16x16
- if (pCurLayer->pTransformSize8x8Flag[iMbXy]) {
+ if (pCurDqLayer->pTransformSize8x8Flag[iMbXy]) {
for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
- iMbResProperty = (IS_INTRA (pCurLayer->pMbType[iMbXy])) ? LUMA_DC_AC_INTRA_8 : LUMA_DC_AC_INTER_8;
+ iMbResProperty = (IS_INTRA (pCurDqLayer->pDec->pMbType[iMbXy])) ? LUMA_DC_AC_INTRA_8 : LUMA_DC_AC_INTER_8;
if (uiCbpL & (1 << iId8x8)) {
int32_t iIndex = (iId8x8 << 2);
for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
if ((iRet = WelsResidualBlockCavlc8x8 (pVlcTable, pNonZeroCount, pBs, iIndex, iScanIdxEnd - iScanIdxStart + 1,
- g_kuiZigzagScan8x8 + iScanIdxStart, iMbResProperty, pCurLayer->pScaledTCoeff[iMbXy] + (iId8x8 << 6), iId4x4,
- pCurLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) {
+ g_kuiZigzagScan8x8 + iScanIdxStart, iMbResProperty, pCurDqLayer->pScaledTCoeff[iMbXy] + (iId8x8 << 6), iId4x4,
+ pCurDqLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) {
return iRet;
}
iIndex++;
@@ -2180,14 +2369,14 @@ int32_t WelsActualDecodeMbCavlcPSlice (PWelsDecoderContext pCtx) {
ST32A4 (&pNzc[12], LD32 (&pNonZeroCount[1 + 8 * 4]));
} else { // Normal T4x4
for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
- iMbResProperty = (IS_INTRA (pCurLayer->pMbType[iMbXy])) ? LUMA_DC_AC_INTRA : LUMA_DC_AC_INTER;
+ iMbResProperty = (IS_INTRA (pCurDqLayer->pDec->pMbType[iMbXy])) ? LUMA_DC_AC_INTRA : LUMA_DC_AC_INTER;
if (uiCbpL & (1 << iId8x8)) {
int32_t iIndex = (iId8x8 << 2);
for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
//Luma (DC and AC decoding together)
if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, iIndex, iScanIdxEnd - iScanIdxStart + 1,
- g_kuiZigzagScan + iScanIdxStart, iMbResProperty, pCurLayer->pScaledTCoeff[iMbXy] + (iIndex << 4),
- pCurLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) {
+ g_kuiZigzagScan + iScanIdxStart, iMbResProperty, pCurDqLayer->pScaledTCoeff[iMbXy] + (iIndex << 4),
+ pCurDqLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) {
return iRet;//abnormal
}
iIndex++;
@@ -2209,13 +2398,13 @@ int32_t WelsActualDecodeMbCavlcPSlice (PWelsDecoderContext pCtx) {
//step1: DC
if (1 == uiCbpC || 2 == uiCbpC) {
for (i = 0; i < 2; i++) { //Cb Cr
- if (IS_INTRA (pCurLayer->pMbType[iMbXy]))
+ if (IS_INTRA (pCurDqLayer->pDec->pMbType[iMbXy]))
iMbResProperty = i ? CHROMA_DC_V : CHROMA_DC_U;
else
iMbResProperty = i ? CHROMA_DC_V_INTER : CHROMA_DC_U_INTER;
if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, 16 + (i << 2), 4, g_kuiChromaDcScan, iMbResProperty,
- pCurLayer->pScaledTCoeff[iMbXy] + 256 + (i << 6), pCurLayer->pChromaQp[iMbXy][i], pCtx)) != ERR_NONE) {
+ pCurDqLayer->pScaledTCoeff[iMbXy] + 256 + (i << 6), pCurDqLayer->pChromaQp[iMbXy][i], pCtx)) != ERR_NONE) {
return iRet;//abnormal
}
}
@@ -2224,7 +2413,7 @@ int32_t WelsActualDecodeMbCavlcPSlice (PWelsDecoderContext pCtx) {
//step2: AC
if (2 == uiCbpC) {
for (i = 0; i < 2; i++) { //Cb Cr
- if (IS_INTRA (pCurLayer->pMbType[iMbXy]))
+ if (IS_INTRA (pCurDqLayer->pDec->pMbType[iMbXy]))
iMbResProperty = i ? CHROMA_AC_V : CHROMA_AC_U;
else
iMbResProperty = i ? CHROMA_AC_V_INTER : CHROMA_AC_U_INTER;
@@ -2232,8 +2421,9 @@ int32_t WelsActualDecodeMbCavlcPSlice (PWelsDecoderContext pCtx) {
int32_t iIndex = 16 + (i << 2);
for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, iIndex, iScanIdxEnd - WELS_MAX (iScanIdxStart,
- 1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), iMbResProperty, pCurLayer->pScaledTCoeff[iMbXy] + (iIndex << 4),
- pCurLayer->pChromaQp[iMbXy][i], pCtx)) != ERR_NONE) {
+ 1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), iMbResProperty,
+ pCurDqLayer->pScaledTCoeff[iMbXy] + (iIndex << 4),
+ pCurDqLayer->pChromaQp[iMbXy][i], pCtx)) != ERR_NONE) {
return iRet;//abnormal
}
iIndex++;
@@ -2251,20 +2441,20 @@ int32_t WelsActualDecodeMbCavlcPSlice (PWelsDecoderContext pCtx) {
}
int32_t WelsDecodeMbCavlcPSlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag) {
- PDqLayer pCurLayer = pCtx->pCurDqLayer;
- PBitStringAux pBs = pCurLayer->pBitStringAux;
- PSlice pSlice = &pCurLayer->sLayerInfo.sSliceInLayer;
+ PDqLayer pCurDqLayer = pCtx->pCurDqLayer;
+ PBitStringAux pBs = pCurDqLayer->pBitStringAux;
+ PSlice pSlice = &pCurDqLayer->sLayerInfo.sSliceInLayer;
PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader;
PPicture* ppRefPic = pCtx->sRefPic.pRefList[LIST_0];
intX_t iUsedBits;
- const int32_t iMbXy = pCurLayer->iMbXyIndex;
- int8_t* pNzc = pCurLayer->pNzc[iMbXy];
+ const int32_t iMbXy = pCurDqLayer->iMbXyIndex;
+ int8_t* pNzc = pCurDqLayer->pNzc[iMbXy];
int32_t iBaseModeFlag, i;
int32_t iRet = 0; //should have the return value to indicate decoding error or not, It's NECESSARY--2010.4.15
uint32_t uiCode;
- pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
- pCurLayer->pTransformSize8x8Flag[iMbXy] = false;
+ pCurDqLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
+ pCurDqLayer->pTransformSize8x8Flag[iMbXy] = false;
if (-1 == pSlice->iMbSkipRun) {
WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //mb_skip_run
@@ -2276,7 +2466,7 @@ int32_t WelsDecodeMbCavlcPSlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uin
if (pSlice->iMbSkipRun--) {
int16_t iMv[2];
- pCurLayer->pMbType[iMbXy] = MB_TYPE_SKIP;
+ pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_SKIP;
ST32A4 (&pNzc[0], 0);
ST32A4 (&pNzc[4], 0);
ST32A4 (&pNzc[8], 0);
@@ -2284,30 +2474,32 @@ int32_t WelsDecodeMbCavlcPSlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uin
ST32A4 (&pNzc[16], 0);
ST32A4 (&pNzc[20], 0);
- pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;
- memset (pCurLayer->pRefIndex[0][iMbXy], 0, sizeof (int8_t) * 16);
- pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[0] && ppRefPic[0]->bIsComplete);
+ pCurDqLayer->pInterPredictionDoneFlag[iMbXy] = 0;
+ memset (pCurDqLayer->pDec->pRefIndex[0][iMbXy], 0, sizeof (int8_t) * 16);
+ bool bIsPending = GetThreadCount (pCtx) > 1;
+ pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[0] && (ppRefPic[0]->bIsComplete
+ || bIsPending));
//predict iMv
- PredPSkipMvFromNeighbor (pCurLayer, iMv);
+ PredPSkipMvFromNeighbor (pCurDqLayer, iMv);
for (i = 0; i < 16; i++) {
- ST32A2 (pCurLayer->pMv[0][iMbXy][i], * (uint32_t*)iMv);
+ ST32A2 (pCurDqLayer->pDec->pMv[0][iMbXy][i], * (uint32_t*)iMv);
}
//if (!pSlice->sSliceHeaderExt.bDefaultResidualPredFlag) {
- // memset (pCurLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (int16_t));
+ // memset (pCurDqLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (int16_t));
//}
//reset rS
if (!pSlice->sSliceHeaderExt.bDefaultResidualPredFlag ||
(pNalCur->sNalHeaderExt.uiQualityId == 0 && pNalCur->sNalHeaderExt.uiDependencyId == 0)) {
- pCurLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp;
+ pCurDqLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp;
for (i = 0; i < 2; i++) {
- pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pCurLayer->pLumaQp[iMbXy] +
- pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)];
+ pCurDqLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pCurDqLayer->pLumaQp[iMbXy] +
+ pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)];
}
}
- pCurLayer->pCbp[iMbXy] = 0;
+ pCurDqLayer->pCbp[iMbXy] = 0;
} else {
if (pSlice->sSliceHeaderExt.bAdaptiveBaseModeFlag == 1) {
WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //base_mode_flag
@@ -2329,7 +2521,7 @@ int32_t WelsDecodeMbCavlcPSlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uin
// check whether there is left bits to read next time in case multiple slices
iUsedBits = ((pBs->pCurBuf - pBs->pStartBuf) << 3) - (16 - pBs->iLeftBits);
// sub 1, for stop bit
- if ((iUsedBits == (pBs->iBits - 1)) && (0 >= pCurLayer->sLayerInfo.sSliceInLayer.iMbSkipRun)) { // slice boundary
+ if ((iUsedBits == (pBs->iBits - 1)) && (0 >= pCurDqLayer->sLayerInfo.sSliceInLayer.iMbSkipRun)) { // slice boundary
uiEosFlag = 1;
}
if (iUsedBits > (pBs->iBits -
@@ -2342,7 +2534,457 @@ int32_t WelsDecodeMbCavlcPSlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uin
return ERR_NONE;
}
-void WelsBlockFuncInit (SBlockFunc* pFunc, int32_t iCpu) {
+int32_t WelsDecodeMbCavlcBSlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag) {
+ PDqLayer pCurDqLayer = pCtx->pCurDqLayer;
+ PBitStringAux pBs = pCurDqLayer->pBitStringAux;
+ PSlice pSlice = &pCurDqLayer->sLayerInfo.sSliceInLayer;
+ PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader;
+ PPicture* ppRefPicL0 = pCtx->sRefPic.pRefList[LIST_0];
+ PPicture* ppRefPicL1 = pCtx->sRefPic.pRefList[LIST_1];
+ intX_t iUsedBits;
+ const int32_t iMbXy = pCurDqLayer->iMbXyIndex;
+ int8_t* pNzc = pCurDqLayer->pNzc[iMbXy];
+ int32_t iBaseModeFlag, i;
+ int32_t iRet = 0; //should have the return value to indicate decoding error or not, It's NECESSARY--2010.4.15
+ uint32_t uiCode;
+
+ pCurDqLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
+ pCurDqLayer->pTransformSize8x8Flag[iMbXy] = false;
+
+ if (-1 == pSlice->iMbSkipRun) {
+ WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //mb_skip_run
+ pSlice->iMbSkipRun = uiCode;
+ if (-1 == pSlice->iMbSkipRun) {
+ return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_SKIP_RUN);
+ }
+ }
+ if (pSlice->iMbSkipRun--) {
+ int16_t iMv[LIST_A][2] = { { 0, 0 }, { 0, 0 } };
+ int8_t ref[LIST_A] = { 0 };
+
+ pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_SKIP | MB_TYPE_DIRECT;
+ ST32A4 (&pNzc[0], 0);
+ ST32A4 (&pNzc[4], 0);
+ ST32A4 (&pNzc[8], 0);
+ ST32A4 (&pNzc[12], 0);
+ ST32A4 (&pNzc[16], 0);
+ ST32A4 (&pNzc[20], 0);
+
+ pCurDqLayer->pInterPredictionDoneFlag[iMbXy] = 0;
+ memset (pCurDqLayer->pDec->pRefIndex[LIST_0][iMbXy], 0, sizeof (int8_t) * 16);
+ memset (pCurDqLayer->pDec->pRefIndex[LIST_1][iMbXy], 0, sizeof (int8_t) * 16);
+ bool bIsPending = GetThreadCount (pCtx) > 1;
+ pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPicL0[0] && (ppRefPicL0[0]->bIsComplete
+ || bIsPending)) || ! (ppRefPicL1[0] && (ppRefPicL1[0]->bIsComplete || bIsPending));
+
+ /*if (pCtx->bMbRefConcealed) {
+ SLogContext* pLogCtx = & (pCtx->sLogCtx);
+ WelsLog (pLogCtx, WELS_LOG_ERROR, "Ref Picture for B-Slice is lost, B-Slice decoding cannot be continued!");
+ return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_DATA, ERR_INFO_REFERENCE_PIC_LOST);
+ }*/
+ //predict iMv
+ SubMbType subMbType;
+ if (pSliceHeader->iDirectSpatialMvPredFlag) {
+
+ //predict direct spatial mv
+ int32_t ret = PredMvBDirectSpatial (pCtx, iMv, ref, subMbType);
+ if (ret != ERR_NONE) {
+ return ret;
+ }
+ } else {
+ //temporal direct mode
+ int32_t ret = PredBDirectTemporal (pCtx, iMv, ref, subMbType);
+ if (ret != ERR_NONE) {
+ return ret;
+ }
+ }
+
+ //if (!pSlice->sSliceHeaderExt.bDefaultResidualPredFlag) {
+ // memset (pCurDqLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (int16_t));
+ //}
+
+ //reset rS
+ if (!pSlice->sSliceHeaderExt.bDefaultResidualPredFlag ||
+ (pNalCur->sNalHeaderExt.uiQualityId == 0 && pNalCur->sNalHeaderExt.uiDependencyId == 0)) {
+ pCurDqLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp;
+ for (i = 0; i < 2; i++) {
+ pCurDqLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pCurDqLayer->pLumaQp[iMbXy] +
+ pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)];
+ }
+ }
+
+ pCurDqLayer->pCbp[iMbXy] = 0;
+ } else {
+ if (pSlice->sSliceHeaderExt.bAdaptiveBaseModeFlag == 1) {
+ WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //base_mode_flag
+ iBaseModeFlag = uiCode;
+ } else {
+ iBaseModeFlag = pSlice->sSliceHeaderExt.bDefaultBaseModeFlag;
+ }
+ if (!iBaseModeFlag) {
+ iRet = WelsActualDecodeMbCavlcBSlice (pCtx);
+ } else {
+ WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "iBaseModeFlag (%d) != 0, inter-layer prediction not supported.",
+ iBaseModeFlag);
+ return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_UNSUPPORTED_ILP);
+ }
+ if (iRet) { //occur error when parsing, MUST STOP decoding
+ return iRet;
+ }
+ }
+ // check whether there is left bits to read next time in case multiple slices
+ iUsedBits = ((pBs->pCurBuf - pBs->pStartBuf) << 3) - (16 - pBs->iLeftBits);
+ // sub 1, for stop bit
+ if ((iUsedBits == (pBs->iBits - 1)) && (0 >= pCurDqLayer->sLayerInfo.sSliceInLayer.iMbSkipRun)) { // slice boundary
+ uiEosFlag = 1;
+ }
+ if (iUsedBits > (pBs->iBits -
+ 1)) { //When BS incomplete, as long as find it, SHOULD stop decoding to avoid mosaic or crash.
+ WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING,
+ "WelsDecodeMbCavlcBSlice()::::pBs incomplete, iUsedBits:%" PRId64 " > pBs->iBits:%d, MUST stop decoding.",
+ (int64_t)iUsedBits, pBs->iBits);
+ return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_BS_INCOMPLETE);
+ }
+ return ERR_NONE;
+}
+
+int32_t WelsActualDecodeMbCavlcBSlice (PWelsDecoderContext pCtx) {
+ SVlcTable* pVlcTable = pCtx->pVlcTable;
+ PDqLayer pCurDqLayer = pCtx->pCurDqLayer;
+ PBitStringAux pBs = pCurDqLayer->pBitStringAux;
+ PSlice pSlice = &pCurDqLayer->sLayerInfo.sSliceInLayer;
+ PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader;
+
+ int32_t iScanIdxStart = pSlice->sSliceHeaderExt.uiScanIdxStart;
+ int32_t iScanIdxEnd = pSlice->sSliceHeaderExt.uiScanIdxEnd;
+
+ SWelsNeighAvail sNeighAvail;
+ int32_t iMbX = pCurDqLayer->iMbX;
+ int32_t iMbY = pCurDqLayer->iMbY;
+ const int32_t iMbXy = pCurDqLayer->iMbXyIndex;
+ int8_t* pNzc = pCurDqLayer->pNzc[iMbXy];
+ int32_t i;
+ int32_t iRet = ERR_NONE;
+ uint32_t uiMbType = 0, uiCbp = 0, uiCbpL = 0, uiCbpC = 0;
+ uint32_t uiCode;
+ int32_t iCode;
+ int32_t iMbResProperty;
+
+ GetNeighborAvailMbType (&sNeighAvail, pCurDqLayer);
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pNonZeroCount, 48, 16);
+ pCurDqLayer->pInterPredictionDoneFlag[iMbXy] = 0;//2009.10.23
+ WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //uiMbType
+ uiMbType = uiCode;
+ if (uiMbType < 23) { //inter MB type
+ int16_t iMotionVector[LIST_A][30][MV_A];
+ int8_t iRefIndex[LIST_A][30];
+ pCurDqLayer->pDec->pMbType[iMbXy] = g_ksInterBMbTypeInfo[uiMbType].iType;
+ WelsFillCacheInter (&sNeighAvail, pNonZeroCount, iMotionVector, iRefIndex, pCurDqLayer);
+
+ if ((iRet = ParseInterBInfo (pCtx, iMotionVector, iRefIndex, pBs)) != ERR_NONE) {
+ return iRet;//abnormal
+ }
+
+ if (pSlice->sSliceHeaderExt.bAdaptiveResidualPredFlag == 1) {
+ WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //residual_prediction_flag
+ pCurDqLayer->pResidualPredFlag[iMbXy] = uiCode;
+ } else {
+ pCurDqLayer->pResidualPredFlag[iMbXy] = pSlice->sSliceHeaderExt.bDefaultResidualPredFlag;
+ }
+
+ if (pCurDqLayer->pResidualPredFlag[iMbXy] == 0) {
+ pCurDqLayer->pInterPredictionDoneFlag[iMbXy] = 0;
+ } else {
+ WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "residual_pred_flag = 1 not supported.");
+ return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_UNSUPPORTED_ILP);
+ }
+ } else { //intra MB type
+ uiMbType -= 23;
+ if (uiMbType > 25)
+ return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_TYPE);
+ if (!pCtx->pSps->uiChromaFormatIdc && ((uiMbType >= 5 && uiMbType <= 12) || (uiMbType >= 17 && uiMbType <= 24)))
+ return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_TYPE);
+
+ if (25 == uiMbType) {
+ WelsLog (& (pCtx->sLogCtx), WELS_LOG_DEBUG, "I_PCM mode exists in B slice!");
+ int32_t iDecStrideL = pCurDqLayer->pDec->iLinesize[0];
+ int32_t iDecStrideC = pCurDqLayer->pDec->iLinesize[1];
+
+ int32_t iOffsetL = (iMbX + iMbY * iDecStrideL) << 4;
+ int32_t iOffsetC = (iMbX + iMbY * iDecStrideC) << 3;
+
+ uint8_t* pDecY = pCurDqLayer->pDec->pData[0] + iOffsetL;
+ uint8_t* pDecU = pCurDqLayer->pDec->pData[1] + iOffsetC;
+ uint8_t* pDecV = pCurDqLayer->pDec->pData[2] + iOffsetC;
+
+ uint8_t* pTmpBsBuf;
+
+ int32_t i;
+ int32_t iCopySizeY = (sizeof (uint8_t) << 4);
+ int32_t iCopySizeUV = (sizeof (uint8_t) << 3);
+
+ int32_t iIndex = ((-pBs->iLeftBits) >> 3) + 2;
+
+ pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA_PCM;
+
+ //step 1: locating bit-stream pointer [must align into integer byte]
+ pBs->pCurBuf -= iIndex;
+
+ //step 2: copy pixel from bit-stream into fdec [reconstruction]
+ pTmpBsBuf = pBs->pCurBuf;
+ if (!pCtx->pParam->bParseOnly) {
+ for (i = 0; i < 16; i++) { //luma
+ memcpy (pDecY, pTmpBsBuf, iCopySizeY);
+ pDecY += iDecStrideL;
+ pTmpBsBuf += 16;
+ }
+
+ for (i = 0; i < 8; i++) { //cb
+ memcpy (pDecU, pTmpBsBuf, iCopySizeUV);
+ pDecU += iDecStrideC;
+ pTmpBsBuf += 8;
+ }
+ for (i = 0; i < 8; i++) { //cr
+ memcpy (pDecV, pTmpBsBuf, iCopySizeUV);
+ pDecV += iDecStrideC;
+ pTmpBsBuf += 8;
+ }
+ }
+
+ pBs->pCurBuf += 384;
+
+ //step 3: update QP and pNonZeroCount
+ pCurDqLayer->pLumaQp[iMbXy] = 0;
+ pCurDqLayer->pChromaQp[iMbXy][0] = pCurDqLayer->pChromaQp[iMbXy][1] = 0;
+ //Rec. 9.2.1 for PCM, nzc=16
+ ST32A4 (&pNzc[0], 0x10101010);
+ ST32A4 (&pNzc[4], 0x10101010);
+ ST32A4 (&pNzc[8], 0x10101010);
+ ST32A4 (&pNzc[12], 0x10101010);
+ ST32A4 (&pNzc[16], 0x10101010);
+ ST32A4 (&pNzc[20], 0x10101010);
+ WELS_READ_VERIFY (InitReadBits (pBs, 0));
+ return ERR_NONE;
+ } else {
+ if (0 == uiMbType) {
+ ENFORCE_STACK_ALIGN_1D (int8_t, pIntraPredMode, 48, 16);
+ pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA4x4;
+ if (pCtx->pPps->bTransform8x8ModeFlag) {
+ WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //transform_size_8x8_flag
+ pCurDqLayer->pTransformSize8x8Flag[iMbXy] = !!uiCode;
+ if (pCurDqLayer->pTransformSize8x8Flag[iMbXy]) {
+ uiMbType = pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA8x8;
+ }
+ }
+ if (!pCurDqLayer->pTransformSize8x8Flag[iMbXy]) {
+ pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurDqLayer);
+ WELS_READ_VERIFY (ParseIntra4x4Mode (pCtx, &sNeighAvail, pIntraPredMode, pBs, pCurDqLayer));
+ } else {
+ pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurDqLayer);
+ WELS_READ_VERIFY (ParseIntra8x8Mode (pCtx, &sNeighAvail, pIntraPredMode, pBs, pCurDqLayer));
+ }
+ } else { //I_PCM exclude, we can ignore it
+ pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA16x16;
+ pCurDqLayer->pTransformSize8x8Flag[iMbXy] = false;
+ pCurDqLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
+ pCurDqLayer->pIntraPredMode[iMbXy][7] = (uiMbType - 1) & 3;
+ pCurDqLayer->pCbp[iMbXy] = g_kuiI16CbpTable[ (uiMbType - 1) >> 2];
+ uiCbpC = pCtx->pSps->uiChromaFormatIdc ? pCurDqLayer->pCbp[iMbXy] >> 4 : 0;
+ uiCbpL = pCurDqLayer->pCbp[iMbXy] & 15;
+ WelsFillCacheNonZeroCount (&sNeighAvail, pNonZeroCount, pCurDqLayer);
+ if ((iRet = ParseIntra16x16Mode (pCtx, &sNeighAvail, pBs, pCurDqLayer)) != ERR_NONE) {
+ return iRet;
+ }
+ }
+ }
+ }
+
+ if (MB_TYPE_INTRA16x16 != pCurDqLayer->pDec->pMbType[iMbXy]) {
+ WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //coded_block_pattern
+ uiCbp = uiCode;
+ {
+ if (pCtx->pSps->uiChromaFormatIdc && (uiCbp > 47))
+ return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_CBP);
+ if (!pCtx->pSps->uiChromaFormatIdc && (uiCbp > 15))
+ return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_CBP);
+ if (MB_TYPE_INTRA4x4 == pCurDqLayer->pDec->pMbType[iMbXy] || MB_TYPE_INTRA8x8 == pCurDqLayer->pDec->pMbType[iMbXy]) {
+
+ uiCbp = pCtx->pSps->uiChromaFormatIdc ? g_kuiIntra4x4CbpTable[uiCbp] : g_kuiIntra4x4CbpTable400[uiCbp];
+ } else //inter
+ uiCbp = pCtx->pSps->uiChromaFormatIdc ? g_kuiInterCbpTable[uiCbp] : g_kuiInterCbpTable400[uiCbp];
+ }
+
+ pCurDqLayer->pCbp[iMbXy] = uiCbp;
+ uiCbpC = pCurDqLayer->pCbp[iMbXy] >> 4;
+ uiCbpL = pCurDqLayer->pCbp[iMbXy] & 15;
+
+ // Need modification when B picutre add in
+ bool bNeedParseTransformSize8x8Flag =
+ (((pCurDqLayer->pDec->pMbType[iMbXy] >= MB_TYPE_16x16 && pCurDqLayer->pDec->pMbType[iMbXy] <= MB_TYPE_8x16)
+ || pCurDqLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy])
+ && (pCurDqLayer->pDec->pMbType[iMbXy] != MB_TYPE_INTRA8x8)
+ && (pCurDqLayer->pDec->pMbType[iMbXy] != MB_TYPE_INTRA4x4)
+ && (uiCbpL > 0)
+ && (pCtx->pPps->bTransform8x8ModeFlag));
+
+ if (bNeedParseTransformSize8x8Flag) {
+ WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //transform_size_8x8_flag
+ pCurDqLayer->pTransformSize8x8Flag[iMbXy] = !!uiCode;
+ }
+ }
+
+ ST32A4 (&pNzc[0], 0);
+ ST32A4 (&pNzc[4], 0);
+ ST32A4 (&pNzc[8], 0);
+ ST32A4 (&pNzc[12], 0);
+ ST32A4 (&pNzc[16], 0);
+ ST32A4 (&pNzc[20], 0);
+ if (pCurDqLayer->pCbp[iMbXy] == 0 && !IS_INTRA16x16 (pCurDqLayer->pDec->pMbType[iMbXy])
+ && !IS_I_BL (pCurDqLayer->pDec->pMbType[iMbXy])) {
+ pCurDqLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp;
+ for (i = 0; i < 2; i++) {
+ pCurDqLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pCurDqLayer->pLumaQp[iMbXy] +
+ pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)];
+ }
+ }
+
+ if (pCurDqLayer->pCbp[iMbXy] || MB_TYPE_INTRA16x16 == pCurDqLayer->pDec->pMbType[iMbXy]) {
+ int32_t iQpDelta, iId8x8, iId4x4;
+ memset (pCurDqLayer->pScaledTCoeff[iMbXy], 0, MB_COEFF_LIST_SIZE * sizeof (int16_t));
+ WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //mb_qp_delta
+ iQpDelta = iCode;
+
+ if (iQpDelta > 25 || iQpDelta < -26) { //out of iQpDelta range
+ return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_QP);
+ }
+
+ pCurDqLayer->pLumaQp[iMbXy] = (pSlice->iLastMbQp + iQpDelta + 52) % 52; //update last_mb_qp
+ pSlice->iLastMbQp = pCurDqLayer->pLumaQp[iMbXy];
+ for (i = 0; i < 2; i++) {
+ pCurDqLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pSlice->iLastMbQp +
+ pSliceHeader->pPps->iChromaQpIndexOffset[i], 0,
+ 51)];
+ }
+
+ BsStartCavlc (pBs);
+
+ if (MB_TYPE_INTRA16x16 == pCurDqLayer->pDec->pMbType[iMbXy]) {
+ //step1: Luma DC
+ if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, 0, 16, g_kuiLumaDcZigzagScan, I16_LUMA_DC,
+ pCurDqLayer->pScaledTCoeff[iMbXy], pCurDqLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) {
+ return iRet;//abnormal
+ }
+ //step2: Luma AC
+ if (uiCbpL) {
+ for (i = 0; i < 16; i++) {
+ if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, i, iScanIdxEnd - WELS_MAX (iScanIdxStart, 1) + 1,
+ g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), I16_LUMA_AC, pCurDqLayer->pScaledTCoeff[iMbXy] + (i << 4),
+ pCurDqLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) {
+ return iRet;//abnormal
+ }
+ }
+ ST32A4 (&pNzc[0], LD32 (&pNonZeroCount[1 + 8 * 1]));
+ ST32A4 (&pNzc[4], LD32 (&pNonZeroCount[1 + 8 * 2]));
+ ST32A4 (&pNzc[8], LD32 (&pNonZeroCount[1 + 8 * 3]));
+ ST32A4 (&pNzc[12], LD32 (&pNonZeroCount[1 + 8 * 4]));
+ }
+ } else { //non-MB_TYPE_INTRA16x16
+ if (pCurDqLayer->pTransformSize8x8Flag[iMbXy]) {
+ for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
+ iMbResProperty = (IS_INTRA (pCurDqLayer->pDec->pMbType[iMbXy])) ? LUMA_DC_AC_INTRA_8 : LUMA_DC_AC_INTER_8;
+ if (uiCbpL & (1 << iId8x8)) {
+ int32_t iIndex = (iId8x8 << 2);
+ for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
+ if ((iRet = WelsResidualBlockCavlc8x8 (pVlcTable, pNonZeroCount, pBs, iIndex, iScanIdxEnd - iScanIdxStart + 1,
+ g_kuiZigzagScan8x8 + iScanIdxStart, iMbResProperty, pCurDqLayer->pScaledTCoeff[iMbXy] + (iId8x8 << 6), iId4x4,
+ pCurDqLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) {
+ return iRet;
+ }
+ iIndex++;
+ }
+ } else {
+ ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[iId8x8 << 2]], 0);
+ ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[ (iId8x8 << 2) + 2]], 0);
+ }
+ }
+ ST32A4 (&pNzc[0], LD32 (&pNonZeroCount[1 + 8 * 1]));
+ ST32A4 (&pNzc[4], LD32 (&pNonZeroCount[1 + 8 * 2]));
+ ST32A4 (&pNzc[8], LD32 (&pNonZeroCount[1 + 8 * 3]));
+ ST32A4 (&pNzc[12], LD32 (&pNonZeroCount[1 + 8 * 4]));
+ } else { // Normal T4x4
+ for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
+ iMbResProperty = (IS_INTRA (pCurDqLayer->pDec->pMbType[iMbXy])) ? LUMA_DC_AC_INTRA : LUMA_DC_AC_INTER;
+ if (uiCbpL & (1 << iId8x8)) {
+ int32_t iIndex = (iId8x8 << 2);
+ for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
+ //Luma (DC and AC decoding together)
+ if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, iIndex, iScanIdxEnd - iScanIdxStart + 1,
+ g_kuiZigzagScan + iScanIdxStart, iMbResProperty, pCurDqLayer->pScaledTCoeff[iMbXy] + (iIndex << 4),
+ pCurDqLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) {
+ return iRet;//abnormal
+ }
+ iIndex++;
+ }
+ } else {
+ ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[iId8x8 << 2]], 0);
+ ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[ (iId8x8 << 2) + 2]], 0);
+ }
+ }
+ ST32A4 (&pNzc[0], LD32 (&pNonZeroCount[1 + 8 * 1]));
+ ST32A4 (&pNzc[4], LD32 (&pNonZeroCount[1 + 8 * 2]));
+ ST32A4 (&pNzc[8], LD32 (&pNonZeroCount[1 + 8 * 3]));
+ ST32A4 (&pNzc[12], LD32 (&pNonZeroCount[1 + 8 * 4]));
+ }
+ }
+
+
+ //chroma
+ //step1: DC
+ if (1 == uiCbpC || 2 == uiCbpC) {
+ for (i = 0; i < 2; i++) { //Cb Cr
+ if (IS_INTRA (pCurDqLayer->pDec->pMbType[iMbXy]))
+ iMbResProperty = i ? CHROMA_DC_V : CHROMA_DC_U;
+ else
+ iMbResProperty = i ? CHROMA_DC_V_INTER : CHROMA_DC_U_INTER;
+
+ if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, 16 + (i << 2), 4, g_kuiChromaDcScan, iMbResProperty,
+ pCurDqLayer->pScaledTCoeff[iMbXy] + 256 + (i << 6), pCurDqLayer->pChromaQp[iMbXy][i], pCtx)) != ERR_NONE) {
+ return iRet;//abnormal
+ }
+ }
+ } else {
+ }
+ //step2: AC
+ if (2 == uiCbpC) {
+ for (i = 0; i < 2; i++) { //Cb Cr
+ if (IS_INTRA (pCurDqLayer->pDec->pMbType[iMbXy]))
+ iMbResProperty = i ? CHROMA_AC_V : CHROMA_AC_U;
+ else
+ iMbResProperty = i ? CHROMA_AC_V_INTER : CHROMA_AC_U_INTER;
+
+ int32_t iIndex = 16 + (i << 2);
+ for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
+ if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, iIndex, iScanIdxEnd - WELS_MAX (iScanIdxStart,
+ 1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), iMbResProperty,
+ pCurDqLayer->pScaledTCoeff[iMbXy] + (iIndex << 4),
+ pCurDqLayer->pChromaQp[iMbXy][i], pCtx)) != ERR_NONE) {
+ return iRet;//abnormal
+ }
+ iIndex++;
+ }
+ }
+ ST16A2 (&pNzc[16], LD16A2 (&pNonZeroCount[6 + 8 * 1]));
+ ST16A2 (&pNzc[20], LD16A2 (&pNonZeroCount[6 + 8 * 2]));
+ ST16A2 (&pNzc[18], LD16A2 (&pNonZeroCount[6 + 8 * 4]));
+ ST16A2 (&pNzc[22], LD16A2 (&pNonZeroCount[6 + 8 * 5]));
+ }
+ BsEndCavlc (pBs);
+ }
+
+ return ERR_NONE;
+}
+
+void WelsBlockFuncInit (SBlockFunc* pFunc, int32_t iCpu) {
pFunc->pWelsSetNonZeroCountFunc = WelsNonZeroCount_c;
pFunc->pWelsBlockZero16x16Func = WelsBlockZero16x16_c;
pFunc->pWelsBlockZero8x8Func = WelsBlockZero8x8_c;
@@ -2389,31 +3031,34 @@ void WelsBlockZero16x16_c (int16_t* pBlock, int32_t iStride) {
void WelsBlockZero8x8_c (int16_t* pBlock, int32_t iStride) {
WelsBlockInit (pBlock, 8, 8, iStride, 0);
}
-bool ComputeColocated (PWelsDecoderContext pCtx) {
- PDqLayer pCurLayer = pCtx->pCurDqLayer;
- PSlice pCurSlice = &pCurLayer->sLayerInfo.sSliceInLayer;
+
+// Compute the temporal-direct scaling factor that's common
+// to all direct MBs in this slice, as per clause 8.4.1.2.3
+// of T-REC H.264 201704
+bool ComputeColocatedTemporalScaling (PWelsDecoderContext pCtx) {
+ PDqLayer pCurDqLayer = pCtx->pCurDqLayer;
+ PSlice pCurSlice = &pCurDqLayer->sLayerInfo.sSliceInLayer;
PSliceHeader pSliceHeader = &pCurSlice->sSliceHeaderExt.sSliceHeader;
if (!pSliceHeader->iDirectSpatialMvPredFlag) {
- uint32_t uiShortRefCount = pCtx->sRefPic.uiShortRefCount[LIST_0];
- for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
- for (uint32_t i = 0; i < uiShortRefCount; ++i) {
- int32_t iTRb = WELS_CLIP3 (-128, 127, pSliceHeader->iPicOrderCntLsb - pCtx->sRefPic.pRefList[listIdx][i]->iFramePoc);
- int32_t iTRp = WELS_CLIP3 (-128, 127,
- pCtx->sRefPic.pRefList[LIST_1][i]->iFramePoc - pCtx->sRefPic.pRefList[LIST_0][i]->iFramePoc);
- if (iTRp != 0) {
- int32_t prescale = (16384 + iAbs (iTRp / 2)) / iTRp;
- pCurSlice->iMvScale[listIdx][i] = WELS_CLIP3 (-1024, 1023, (iTRb * prescale + 32) >> 6);
- } else {
- pCurSlice->iMvScale[listIdx][i] = 0x03FFF;
+ uint32_t uiRefCount = pSliceHeader->uiRefCount[LIST_0];
+ if (pCtx->sRefPic.pRefList[LIST_1][0] != NULL) {
+ for (uint32_t i = 0; i < uiRefCount; ++i) {
+ if (pCtx->sRefPic.pRefList[LIST_0][i] != NULL) {
+ const int32_t poc0 = pCtx->sRefPic.pRefList[LIST_0][i]->iFramePoc;
+ const int32_t poc1 = pCtx->sRefPic.pRefList[LIST_1][0]->iFramePoc;
+ const int32_t poc = pSliceHeader->iPicOrderCntLsb;
+ const int32_t td = WELS_CLIP3 (poc1 - poc0, -128, 127);
+ if (td == 0) {
+ pCurSlice->iMvScale[LIST_0][i] = 1 << 8;
+ } else {
+ int32_t tb = WELS_CLIP3 (poc - poc0, -128, 127);
+ int32_t tx = (16384 + (abs (td) >> 1)) / td;
+ pCurSlice->iMvScale[LIST_0][i] = WELS_CLIP3 ((tb * tx + 32) >> 6, -1024, 1023);
+ }
}
}
}
}
- //Implement the following
- //get Mv_colocated_L1
- //and do calculation
- //iMvp[LIST_0] = Mv_colocated_L1 * (POC(cur) - POC(L0))/POC(L1) - POC(L0))
- //iMvp[LIST_1] = Mv_colocated_L1 * (POC(cur) - POC(L1))/POC(L1) - POC(L0))
return true;
}
} // namespace WelsDec
diff --git a/chromium/third_party/openh264/src/codec/decoder/core/src/decoder.cpp b/chromium/third_party/openh264/src/codec/decoder/core/src/decoder.cpp
index b957872b90b..a6f2da4374f 100644
--- a/chromium/third_party/openh264/src/codec/decoder/core/src/decoder.cpp
+++ b/chromium/third_party/openh264/src/codec/decoder/core/src/decoder.cpp
@@ -52,6 +52,7 @@
#include "decode_slice.h"
#include "error_concealment.h"
#include "memory_align.h"
+#include "wels_decoder_thread.h"
namespace WelsDec {
@@ -61,6 +62,7 @@ extern void FreePicture (PPicture pPic, CMemoryAlign* pMa);
static int32_t CreatePicBuff (PWelsDecoderContext pCtx, PPicBuff* ppPicBuf, const int32_t kiSize,
const int32_t kiPicWidth, const int32_t kiPicHeight) {
+
PPicBuff pPicBuf = NULL;
int32_t iPicIdx = 0;
if (kiSize <= 0 || kiPicWidth <= 0 || kiPicHeight <= 0) {
@@ -79,7 +81,7 @@ static int32_t CreatePicBuff (PWelsDecoderContext pCtx, PPicBuff* ppPicBuf, cons
if (NULL == pPicBuf->ppPic) {
pPicBuf->iCapacity = 0;
- DestroyPicBuff (&pPicBuf, pMa);
+ DestroyPicBuff (pCtx, &pPicBuf, pMa);
return ERR_INFO_OUT_OF_MEMORY;
}
@@ -88,7 +90,7 @@ static int32_t CreatePicBuff (PWelsDecoderContext pCtx, PPicBuff* ppPicBuf, cons
if (NULL == pPic) {
// init capacity first for free memory
pPicBuf->iCapacity = iPicIdx;
- DestroyPicBuff (&pPicBuf, pMa);
+ DestroyPicBuff (pCtx, &pPicBuf, pMa);
return ERR_INFO_OUT_OF_MEMORY;
}
pPicBuf->ppPic[iPicIdx] = pPic;
@@ -122,7 +124,7 @@ static int32_t IncreasePicBuff (PWelsDecoderContext pCtx, PPicBuff* ppPicBuf, co
if (NULL == pPicNewBuf->ppPic) {
pPicNewBuf->iCapacity = 0;
- DestroyPicBuff (&pPicNewBuf, pMa);
+ DestroyPicBuff (pCtx, &pPicNewBuf, pMa);
return ERR_INFO_OUT_OF_MEMORY;
}
@@ -132,7 +134,7 @@ static int32_t IncreasePicBuff (PWelsDecoderContext pCtx, PPicBuff* ppPicBuf, co
if (NULL == pPic) {
// Set maximum capacity as the new malloc memory at the tail
pPicNewBuf->iCapacity = iPicIdx;
- DestroyPicBuff (&pPicNewBuf, pMa);
+ DestroyPicBuff (pCtx, &pPicNewBuf, pMa);
return ERR_INFO_OUT_OF_MEMORY;
}
pPicNewBuf->ppPic[iPicIdx] = pPic;
@@ -149,8 +151,7 @@ static int32_t IncreasePicBuff (PWelsDecoderContext pCtx, PPicBuff* ppPicBuf, co
for (int32_t i = 0; i < pPicNewBuf->iCapacity; i++) {
pPicNewBuf->ppPic[i]->bUsedAsRef = false;
pPicNewBuf->ppPic[i]->bIsLongRef = false;
- pPicNewBuf->ppPic[i]->uiRefCount = 0;
- pPicNewBuf->ppPic[i]->bAvailableFlag = true;
+ pPicNewBuf->ppPic[i]->iRefCount = 0;
pPicNewBuf->ppPic[i]->bIsComplete = false;
}
// remove old PicBuf
@@ -186,13 +187,15 @@ static int32_t DecreasePicBuff (PWelsDecoderContext pCtx, PPicBuff* ppPicBuf, co
if (NULL == pPicNewBuf->ppPic) {
pPicNewBuf->iCapacity = 0;
- DestroyPicBuff (&pPicNewBuf, pMa);
+ DestroyPicBuff (pCtx, &pPicNewBuf, pMa);
return ERR_INFO_OUT_OF_MEMORY;
}
+ ResetReorderingPictureBuffers (pCtx->pPictReoderingStatus, pCtx->pPictInfoList, false);
+
int32_t iPrevPicIdx = -1;
for (iPrevPicIdx = 0; iPrevPicIdx < kiOldSize; ++iPrevPicIdx) {
- if (pCtx->pPreviousDecodedPictureInDpb == pPicOldBuf->ppPic[iPrevPicIdx]) {
+ if (pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb == pPicOldBuf->ppPic[iPrevPicIdx]) {
break;
}
}
@@ -209,6 +212,17 @@ static int32_t DecreasePicBuff (PWelsDecoderContext pCtx, PPicBuff* ppPicBuf, co
iDelIdx = kiNewSize;
}
+ //update references due to allocation changes
+ //all references' references have to be reset oss-buzz 14423
+ for (int32_t i = 0; i < kiNewSize; i++) {
+ for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+ int32_t j = -1;
+ while (++j < MAX_DPB_COUNT && pPicNewBuf->ppPic[i]->pRefPic[listIdx][j] != NULL) {
+ pPicNewBuf->ppPic[i]->pRefPic[listIdx][j] = NULL;
+ }
+ }
+ }
+
for (iPicIdx = iDelIdx; iPicIdx < kiOldSize; iPicIdx++) {
if (iPrevPicIdx != iPicIdx) {
if (pPicOldBuf->ppPic[iPicIdx] != NULL) {
@@ -220,13 +234,12 @@ static int32_t DecreasePicBuff (PWelsDecoderContext pCtx, PPicBuff* ppPicBuf, co
// initialize context in queue
pPicNewBuf->iCapacity = kiNewSize;
- *ppPicBuf = pPicNewBuf;
+ * ppPicBuf = pPicNewBuf;
for (int32_t i = 0; i < pPicNewBuf->iCapacity; i++) {
pPicNewBuf->ppPic[i]->bUsedAsRef = false;
pPicNewBuf->ppPic[i]->bIsLongRef = false;
- pPicNewBuf->ppPic[i]->uiRefCount = 0;
- pPicNewBuf->ppPic[i]->bAvailableFlag = true;
+ pPicNewBuf->ppPic[i]->iRefCount = 0;
pPicNewBuf->ppPic[i]->bIsComplete = false;
}
// remove old PicBuf
@@ -242,9 +255,11 @@ static int32_t DecreasePicBuff (PWelsDecoderContext pCtx, PPicBuff* ppPicBuf, co
return ERR_NONE;
}
-void DestroyPicBuff (PPicBuff* ppPicBuf, CMemoryAlign* pMa) {
+void DestroyPicBuff (PWelsDecoderContext pCtx, PPicBuff* ppPicBuf, CMemoryAlign* pMa) {
PPicBuff pPicBuf = NULL;
+ ResetReorderingPictureBuffers (pCtx->pPictReoderingStatus, pCtx->pPictInfoList, false);
+
if (NULL == ppPicBuf || NULL == *ppPicBuf)
return;
@@ -273,6 +288,24 @@ void DestroyPicBuff (PPicBuff* ppPicBuf, CMemoryAlign* pMa) {
*ppPicBuf = NULL;
}
+//reset picture reodering buffer list
+void ResetReorderingPictureBuffers (PPictReoderingStatus pPictReoderingStatus, PPictInfo pPictInfo,
+ const bool& fullReset) {
+ if (pPictReoderingStatus != NULL && pPictInfo != NULL) {
+ int32_t pictInfoListCount = fullReset ? 16 : (pPictReoderingStatus->iLargestBufferedPicIndex + 1);
+ pPictReoderingStatus->iPictInfoIndex = 0;
+ pPictReoderingStatus->iMinPOC = IMinInt32;
+ pPictReoderingStatus->iNumOfPicts = 0;
+ pPictReoderingStatus->iLastGOPRemainPicts = 0;
+ pPictReoderingStatus->iLastWrittenPOC = IMinInt32;
+ pPictReoderingStatus->iLargestBufferedPicIndex = 0;
+ for (int32_t i = 0; i < pictInfoListCount; ++i) {
+ pPictInfo[i].bLastGOP = false;
+ pPictInfo[i].iPOC = IMinInt32;
+ }
+ }
+}
+
/*
* fill data fields in default for decoder context
*/
@@ -297,7 +330,7 @@ void WelsDecoderDefaults (PWelsDecoderContext pCtx, SLogContext* pLogCtx) {
pCtx->bFreezeOutput = true;
pCtx->iFrameNum = -1;
- pCtx->iPrevFrameNum = -1;
+ pCtx->pLastDecPicInfo->iPrevFrameNum = -1;
pCtx->iErrorCode = ERR_NONE;
pCtx->pDec = NULL;
@@ -310,31 +343,91 @@ void WelsDecoderDefaults (PWelsDecoderContext pCtx, SLogContext* pLogCtx) {
pCtx->pPicBuff = NULL;
- pCtx->bAvcBasedFlag = true;
- pCtx->pPreviousDecodedPictureInDpb = NULL;
- pCtx->sDecoderStatistics.iAvgLumaQp = -1;
- pCtx->sDecoderStatistics.iStatisticsLogInterval = 1000;
+ //pCtx->sSpsPpsCtx.bAvcBasedFlag = true;
+ pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb = NULL;
+ pCtx->pDecoderStatistics->iAvgLumaQp = -1;
+ pCtx->pDecoderStatistics->iStatisticsLogInterval = 1000;
pCtx->bUseScalingList = false;
- pCtx->iSpsErrorIgnored = 0;
- pCtx->iSubSpsErrorIgnored = 0;
- pCtx->iPpsErrorIgnored = 0;
- pCtx->iPPSInvalidNum = 0;
- pCtx->iPPSLastInvalidId = -1;
- pCtx->iSPSInvalidNum = 0;
- pCtx->iSPSLastInvalidId = -1;
- pCtx->iSubSPSInvalidNum = 0;
- pCtx->iSubSPSLastInvalidId = -1;
+ /*pCtx->sSpsPpsCtx.iSpsErrorIgnored = 0;
+ pCtx->sSpsPpsCtx.iSubSpsErrorIgnored = 0;
+ pCtx->sSpsPpsCtx.iPpsErrorIgnored = 0;
+ pCtx->sSpsPpsCtx.iPPSInvalidNum = 0;
+ pCtx->sSpsPpsCtx.iPPSLastInvalidId = -1;
+ pCtx->sSpsPpsCtx.iSPSInvalidNum = 0;
+ pCtx->sSpsPpsCtx.iSPSLastInvalidId = -1;
+ pCtx->sSpsPpsCtx.iSubSPSInvalidNum = 0;
+ pCtx->sSpsPpsCtx.iSubSPSLastInvalidId = -1;
+ */
pCtx->iFeedbackNalRefIdc = -1; //initialize
- pCtx->iPrevPicOrderCntMsb = 0;
- pCtx->iPrevPicOrderCntLsb = 0;
+ pCtx->pLastDecPicInfo->iPrevPicOrderCntMsb = 0;
+ pCtx->pLastDecPicInfo->iPrevPicOrderCntLsb = 0;
}
/*
+* fill data fields in SPS and PPS default for decoder context
+*/
+void WelsDecoderSpsPpsDefaults (SWelsDecoderSpsPpsCTX& sSpsPpsCtx) {
+ sSpsPpsCtx.bSpsExistAheadFlag = false;
+ sSpsPpsCtx.bSubspsExistAheadFlag = false;
+ sSpsPpsCtx.bPpsExistAheadFlag = false;
+ sSpsPpsCtx.bAvcBasedFlag = true;
+ sSpsPpsCtx.iSpsErrorIgnored = 0;
+ sSpsPpsCtx.iSubSpsErrorIgnored = 0;
+ sSpsPpsCtx.iPpsErrorIgnored = 0;
+ sSpsPpsCtx.iPPSInvalidNum = 0;
+ sSpsPpsCtx.iPPSLastInvalidId = -1;
+ sSpsPpsCtx.iSPSInvalidNum = 0;
+ sSpsPpsCtx.iSPSLastInvalidId = -1;
+ sSpsPpsCtx.iSubSPSInvalidNum = 0;
+ sSpsPpsCtx.iSubSPSLastInvalidId = -1;
+ sSpsPpsCtx.iSeqId = -1;
+}
+
+/*
+* fill last decoded picture info
+*/
+void WelsDecoderLastDecPicInfoDefaults (SWelsLastDecPicInfo& sLastDecPicInfo) {
+ sLastDecPicInfo.iPrevPicOrderCntMsb = 0;
+ sLastDecPicInfo.iPrevPicOrderCntLsb = 0;
+ sLastDecPicInfo.pPreviousDecodedPictureInDpb = NULL;
+ sLastDecPicInfo.iPrevFrameNum = -1;
+ sLastDecPicInfo.bLastHasMmco5 = false;
+ sLastDecPicInfo.uiDecodingTimeStamp = 0;
+}
+
+/*!
+* \brief copy SpsPps from one Ctx to another ctx for threaded code
+*/
+void CopySpsPps (PWelsDecoderContext pFromCtx, PWelsDecoderContext pToCtx) {
+ pToCtx->sSpsPpsCtx = pFromCtx->sSpsPpsCtx;
+ PAccessUnit pFromCurAu = pFromCtx->pAccessUnitList;
+ PSps pTmpLayerSps[MAX_LAYER_NUM];
+ for (int i = 0; i < MAX_LAYER_NUM; i++) {
+ pTmpLayerSps[i] = NULL;
+ }
+ // track the layer sps for the current au
+ for (unsigned int i = pFromCurAu->uiStartPos; i <= pFromCurAu->uiEndPos; i++) {
+ uint32_t uiDid = pFromCurAu->pNalUnitsList[i]->sNalHeaderExt.uiDependencyId;
+ pTmpLayerSps[uiDid] = pFromCurAu->pNalUnitsList[i]->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.pSps;
+ for (unsigned int j = 0; j < MAX_SPS_COUNT + 1; ++j) {
+ if (&pFromCtx->sSpsPpsCtx.sSpsBuffer[j] == pTmpLayerSps[uiDid]) {
+ pTmpLayerSps[uiDid] = &pToCtx->sSpsPpsCtx.sSpsBuffer[j];
+ break;
+ }
+ }
+ }
+ for (int i = 0; i < MAX_LAYER_NUM; i++) {
+ if (pTmpLayerSps[i] != NULL) {
+ pToCtx->sSpsPpsCtx.pActiveLayerSps[i] = pTmpLayerSps[i];
+ }
+ }
+}
+
+/*
* destory_mb_blocks
*/
-
/*
* get size of reference picture list in target layer incoming, = (iNumRefFrames
*/
@@ -345,6 +438,9 @@ static inline int32_t GetTargetRefListSize (PWelsDecoderContext pCtx) {
iNumRefFrames = MAX_REF_PIC_COUNT + 2;
} else {
iNumRefFrames = pCtx->pSps->iNumRefFrames + 2;
+ if (GetThreadCount (pCtx) > 1) {
+ iNumRefFrames = MAX_REF_PIC_COUNT + 1;
+ }
}
#ifdef LONG_TERM_REF
@@ -386,7 +482,9 @@ int32_t WelsRequestMem (PWelsDecoderContext pCtx, const int32_t kiMbWidth, const
&& kiPicHeight == pCtx->iImgHeightInPixel) && (!bNeedChangePicQueue)) // have same scaled buffer
// sync update pRefList
- WelsResetRefPic (pCtx); // added to sync update ref list due to pictures are free
+ if (GetThreadCount (pCtx) <= 1) {
+ WelsResetRefPic (pCtx); // added to sync update ref list due to pictures are free
+ }
if (pCtx->bHaveGotMemory && (kiPicWidth == pCtx->iImgWidthInPixel && kiPicHeight == pCtx->iImgHeightInPixel)
&& pCtx->pPicBuff != NULL && pCtx->pPicBuff->iCapacity != iPicQueueSize) {
@@ -414,11 +512,11 @@ int32_t WelsRequestMem (PWelsDecoderContext pCtx, const int32_t kiMbWidth, const
// for Recycled_Pic_Queue
PPicBuff* ppPic = &pCtx->pPicBuff;
if (NULL != ppPic && NULL != *ppPic) {
- DestroyPicBuff (ppPic, pMa);
+ DestroyPicBuff (pCtx, ppPic, pMa);
}
- pCtx->pPreviousDecodedPictureInDpb = NULL;
+ pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb = NULL;
// currently only active for LIST_0 due to have no B frames
iErr = CreatePicBuff (pCtx, &pCtx->pPicBuff, iPicQueueSize, kiPicWidth, kiPicHeight);
@@ -460,7 +558,18 @@ void WelsFreeDynamicMemory (PWelsDecoderContext pCtx) {
PPicBuff* pPicBuff = &pCtx->pPicBuff;
if (NULL != pPicBuff && NULL != *pPicBuff) {
- DestroyPicBuff (pPicBuff, pMa);
+ DestroyPicBuff (pCtx, pPicBuff, pMa);
+ }
+ if (GetThreadCount (pCtx) > 1) {
+ //prevent from double destruction of PPicBuff
+ PWelsDecoderThreadCTX pThreadCtx = (PWelsDecoderThreadCTX) (pCtx->pThreadCtx);
+ int32_t threadCount = pThreadCtx->sThreadInfo.uiThrMaxNum;
+ int32_t id = pThreadCtx->sThreadInfo.uiThrNum;
+ for (int32_t i = 0; i < threadCount; ++i) {
+ if (pThreadCtx[i - id].pCtx != NULL) {
+ pThreadCtx[i - id].pCtx->pPicBuff = NULL;
+ }
+ }
}
if (pCtx->pTempDec) {
@@ -489,7 +598,7 @@ int32_t WelsOpenDecoder (PWelsDecoderContext pCtx, SLogContext* pLogCtx) {
InitDecFuncs (pCtx, pCtx->uiCpuFlag);
// vlc tables
- InitVlcTable (&pCtx->sVlcTable);
+ InitVlcTable (pCtx->pVlcTable);
// static memory
iRet = WelsInitStaticMemory (pCtx);
@@ -704,7 +813,11 @@ int32_t WelsDecodeBs (PWelsDecoderContext pCtx, const uint8_t* kpBsBuf, const in
}
CheckAndFinishLastPic (pCtx, ppDst, pDstBufInfo);
if (pCtx->bAuReadyFlag && pCtx->pAccessUnitList->uiAvailUnitsNum != 0) {
- ConstructAccessUnit (pCtx, ppDst, pDstBufInfo);
+ if (GetThreadCount (pCtx) <= 1) {
+ ConstructAccessUnit (pCtx, ppDst, pDstBufInfo);
+ } else {
+ pCtx->pAccessUnitList->uiAvailUnitsNum = 1;
+ }
}
}
DecodeFinishUpdate (pCtx);
@@ -760,9 +873,15 @@ int32_t WelsDecodeBs (PWelsDecoderContext pCtx, const uint8_t* kpBsBuf, const in
if (IS_PARAM_SETS_NALS (pCtx->sCurNalHead.eNalUnitType)) {
iRet = ParseNonVclNal (pCtx, pNalPayload, iDstIdx - iConsumedBytes, pSrcNal - 3, iSrcIdx + 3);
}
- CheckAndFinishLastPic (pCtx, ppDst, pDstBufInfo);
+ if (GetThreadCount (pCtx) <= 1) {
+ CheckAndFinishLastPic (pCtx, ppDst, pDstBufInfo);
+ }
if (pCtx->bAuReadyFlag && pCtx->pAccessUnitList->uiAvailUnitsNum != 0) {
- ConstructAccessUnit (pCtx, ppDst, pDstBufInfo);
+ if (GetThreadCount (pCtx) <= 1) {
+ ConstructAccessUnit (pCtx, ppDst, pDstBufInfo);
+ } else {
+ pCtx->pAccessUnitList->uiAvailUnitsNum = 1;
+ }
}
}
DecodeFinishUpdate (pCtx);
@@ -825,7 +944,12 @@ int32_t SyncPictureResolutionExt (PWelsDecoderContext pCtx, const int32_t kiMbWi
int32_t iErr = ERR_NONE;
const int32_t kiPicWidth = kiMbWidth << 4;
const int32_t kiPicHeight = kiMbHeight << 4;
-
+ //fix Bugzilla Bug1479656 reallocate temp dec picture
+ if (pCtx->pTempDec != NULL && (pCtx->pTempDec->iWidthInPixel != kiPicWidth
+ || pCtx->pTempDec->iHeightInPixel != kiPicHeight)) {
+ FreePicture (pCtx->pTempDec, pCtx->pMemAlign);
+ pCtx->pTempDec = AllocPicture (pCtx, pCtx->pSps->iMbWidth << 4, pCtx->pSps->iMbHeight << 4);
+ }
bool bReallocFlag = false;
iErr = WelsRequestMem (pCtx, kiMbWidth, kiMbHeight, bReallocFlag); // common memory used
if (ERR_NONE != iErr) {
@@ -1072,7 +1196,7 @@ void UpdateDecStatFreezingInfo (const bool kbIdrFlag, SDecoderStatistics* pDecSt
void UpdateDecStatNoFreezingInfo (PWelsDecoderContext pCtx) {
PDqLayer pCurDq = pCtx->pCurDqLayer;
PPicture pPic = pCtx->pDec;
- SDecoderStatistics* pDecStat = &pCtx->sDecoderStatistics;
+ SDecoderStatistics* pDecStat = pCtx->pDecoderStatistics;
if (pDecStat->iAvgLumaQp == -1) //first correct frame received
pDecStat->iAvgLumaQp = 0;
@@ -1114,7 +1238,7 @@ void UpdateDecStatNoFreezingInfo (PWelsDecoderContext pCtx) {
//update decoder statistics information
void UpdateDecStat (PWelsDecoderContext pCtx, const bool kbOutput) {
if (pCtx->bFreezeOutput)
- UpdateDecStatFreezingInfo (pCtx->pCurDqLayer->sLayerInfo.sNalHeaderExt.bIdrFlag, &pCtx->sDecoderStatistics);
+ UpdateDecStatFreezingInfo (pCtx->pCurDqLayer->sLayerInfo.sNalHeaderExt.bIdrFlag, pCtx->pDecoderStatistics);
else if (kbOutput)
UpdateDecStatNoFreezingInfo (pCtx);
}
diff --git a/chromium/third_party/openh264/src/codec/decoder/core/src/decoder_core.cpp b/chromium/third_party/openh264/src/codec/decoder/core/src/decoder_core.cpp
index b286aa37ed6..32da38e382c 100644
--- a/chromium/third_party/openh264/src/codec/decoder/core/src/decoder_core.cpp
+++ b/chromium/third_party/openh264/src/codec/decoder/core/src/decoder_core.cpp
@@ -77,11 +77,11 @@ static inline int32_t DecodeFrameConstruction (PWelsDecoderContext pCtx, uint8_t
if (pCtx->pParam->eEcActiveIdc == ERROR_CON_DISABLE) {
- if ((pCtx->sDecoderStatistics.uiWidth != (unsigned int) kiActualWidth)
- || (pCtx->sDecoderStatistics.uiHeight != (unsigned int) kiActualHeight)) {
- pCtx->sDecoderStatistics.uiResolutionChangeTimes++;
- pCtx->sDecoderStatistics.uiWidth = kiActualWidth;
- pCtx->sDecoderStatistics.uiHeight = kiActualHeight;
+ if ((pCtx->pDecoderStatistics->uiWidth != (unsigned int) kiActualWidth)
+ || (pCtx->pDecoderStatistics->uiHeight != (unsigned int) kiActualHeight)) {
+ pCtx->pDecoderStatistics->uiResolutionChangeTimes++;
+ pCtx->pDecoderStatistics->uiWidth = kiActualWidth;
+ pCtx->pDecoderStatistics->uiHeight = kiActualHeight;
}
UpdateDecStatNoFreezingInfo (pCtx);
}
@@ -194,8 +194,9 @@ static inline int32_t DecodeFrameConstruction (PWelsDecoderContext pCtx, uint8_t
"DecodeFrameConstruction(): iTotalNumMbRec:%d, total_num_mb_sps:%d, cur_layer_mb_width:%d, cur_layer_mb_height:%d ",
pCtx->iTotalNumMbRec, kiTotalNumMbInCurLayer, pCurDq->iMbWidth, pCurDq->iMbHeight);
bFrameCompleteFlag = false; //return later after output buffer is done
- if (pCtx->bInstantDecFlag) //no-delay decoding, wait for new slice
+ if (pCtx->bInstantDecFlag) { //no-delay decoding, wait for new slice
return ERR_INFO_MB_NUM_INADEQUATE;
+ }
} else if (pCurDq->sLayerInfo.sNalHeaderExt.bIdrFlag
&& (pCtx->iErrorCode == dsErrorFree)) { //complete non-ECed IDR frame done
pCtx->pDec->bIsComplete = true;
@@ -219,10 +220,30 @@ static inline int32_t DecodeFrameConstruction (PWelsDecoderContext pCtx, uint8_t
ppDst[0] = ppDst[0] + pCtx->sFrameCrop.iTopOffset * 2 * pPic->iLinesize[0] + pCtx->sFrameCrop.iLeftOffset * 2;
ppDst[1] = ppDst[1] + pCtx->sFrameCrop.iTopOffset * pPic->iLinesize[1] + pCtx->sFrameCrop.iLeftOffset;
ppDst[2] = ppDst[2] + pCtx->sFrameCrop.iTopOffset * pPic->iLinesize[1] + pCtx->sFrameCrop.iLeftOffset;
+ for (int i = 0; i < 3; ++i) {
+ pDstInfo->pDst[i] = ppDst[i];
+ }
pDstInfo->iBufferStatus = 1;
-
- bool bOutResChange = (pCtx->iLastImgWidthInPixel != pDstInfo->UsrData.sSystemBuffer.iWidth)
- || (pCtx->iLastImgHeightInPixel != pDstInfo->UsrData.sSystemBuffer.iHeight);
+ if (GetThreadCount (pCtx) > 1 && pPic->bIsComplete == false) {
+ pPic->bIsComplete = true;
+ }
+ if (GetThreadCount (pCtx) > 1) {
+ uint32_t uiMbHeight = (pCtx->pDec->iHeightInPixel + 15) >> 4;
+ for (uint32_t i = 0; i < uiMbHeight; ++i) {
+ SET_EVENT (&pCtx->pDec->pReadyEvent[i]);
+ }
+ }
+ bool bOutResChange = false;
+ if (GetThreadCount (pCtx) <= 1 || pCtx->pLastThreadCtx == NULL) {
+ bOutResChange = (pCtx->iLastImgWidthInPixel != pDstInfo->UsrData.sSystemBuffer.iWidth)
+ || (pCtx->iLastImgHeightInPixel != pDstInfo->UsrData.sSystemBuffer.iHeight);
+ } else {
+ if (pCtx->pLastThreadCtx != NULL) {
+ PWelsDecoderThreadCTX pLastThreadCtx = (PWelsDecoderThreadCTX) (pCtx->pLastThreadCtx);
+ bOutResChange = (pLastThreadCtx->pCtx->iLastImgWidthInPixel != pDstInfo->UsrData.sSystemBuffer.iWidth)
+ || (pLastThreadCtx->pCtx->iLastImgHeightInPixel != pDstInfo->UsrData.sSystemBuffer.iHeight);
+ }
+ }
pCtx->iLastImgWidthInPixel = pDstInfo->UsrData.sSystemBuffer.iWidth;
pCtx->iLastImgHeightInPixel = pDstInfo->UsrData.sSystemBuffer.iHeight;
if (pCtx->pParam->eEcActiveIdc == ERROR_CON_DISABLE) //no buffer output if EC is disabled and frame incomplete
@@ -250,11 +271,11 @@ static inline int32_t DecodeFrameConstruction (PWelsDecoderContext pCtx, uint8_t
pCtx->iMbNum = pPic->iMbNum;
pCtx->iMbEcedPropNum = pPic->iMbEcedPropNum;
if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) {
- if (pDstInfo->iBufferStatus && ((pCtx->sDecoderStatistics.uiWidth != (unsigned int) kiActualWidth)
- || (pCtx->sDecoderStatistics.uiHeight != (unsigned int) kiActualHeight))) {
- pCtx->sDecoderStatistics.uiResolutionChangeTimes++;
- pCtx->sDecoderStatistics.uiWidth = kiActualWidth;
- pCtx->sDecoderStatistics.uiHeight = kiActualHeight;
+ if (pDstInfo->iBufferStatus && ((pCtx->pDecoderStatistics->uiWidth != (unsigned int) kiActualWidth)
+ || (pCtx->pDecoderStatistics->uiHeight != (unsigned int) kiActualHeight))) {
+ pCtx->pDecoderStatistics->uiResolutionChangeTimes++;
+ pCtx->pDecoderStatistics->uiWidth = kiActualWidth;
+ pCtx->pDecoderStatistics->uiHeight = kiActualHeight;
}
UpdateDecStat (pCtx, pDstInfo->iBufferStatus != 0);
}
@@ -381,10 +402,13 @@ void CreateImplicitWeightTable (PWelsDecoderContext pCtx) {
if (pCurDqLayer->bUseWeightedBiPredIdc && pSliceHeader->pPps->uiWeightedBipredIdc == 2) {
int32_t iPoc = pSliceHeader->iPicOrderCntLsb;
- if (pSliceHeader->uiRefCount[0] == 1 && pSliceHeader->uiRefCount[1] == 1
- && pCtx->sRefPic.pRefList[LIST_0][0]->iFramePoc + pCtx->sRefPic.pRefList[LIST_1][0]->iFramePoc == 2 * iPoc) {
- pCurDqLayer->bUseWeightedBiPredIdc = false;
- return;
+ //fix Bugzilla 1485229 check if pointers are NULL
+ if (pCtx->sRefPic.pRefList[LIST_0][0] && pCtx->sRefPic.pRefList[LIST_1][0]) {
+ if (pSliceHeader->uiRefCount[0] == 1 && pSliceHeader->uiRefCount[1] == 1
+ && pCtx->sRefPic.pRefList[LIST_0][0]->iFramePoc + pCtx->sRefPic.pRefList[LIST_1][0]->iFramePoc == 2 * iPoc) {
+ pCurDqLayer->bUseWeightedBiPredIdc = false;
+ return;
+ }
}
pCurDqLayer->pPredWeightTable->uiLumaLog2WeightDenom = 5;
@@ -524,8 +548,8 @@ int32_t ParseDecRefPicMarking (PWelsDecoderContext pCtx, PBitStringAux pBs, PSli
WELS_VERIFY_RETURN_IF (-1, (!bAllowMmco5 || bMmco5Exist));
bMmco5Exist = true;
- pCtx->iPrevPicOrderCntLsb = 0;
- pCtx->iPrevPicOrderCntMsb = 0;
+ pCtx->pLastDecPicInfo->iPrevPicOrderCntLsb = 0;
+ pCtx->pLastDecPicInfo->iPrevPicOrderCntMsb = 0;
pSh->iPicOrderCntLsb = 0;
if (pCtx->pSliceHeader)
pCtx->pSliceHeader->iPicOrderCntLsb = 0;
@@ -843,8 +867,9 @@ void UpdateDecoderStatisticsForActiveParaset (SDecoderStatistics* pDecoderStatis
* Parse slice header of bitstream in avc for storing data structure
*/
int32_t ParseSliceHeaderSyntaxs (PWelsDecoderContext pCtx, PBitStringAux pBs, const bool kbExtensionFlag) {
- PNalUnit const kpCurNal = pCtx->pAccessUnitList->pNalUnitsList[pCtx->pAccessUnitList->uiAvailUnitsNum -
- 1];
+ PNalUnit const kpCurNal =
+ pCtx->pAccessUnitList->pNalUnitsList[pCtx->pAccessUnitList->uiAvailUnitsNum -
+ 1];
PNalUnitHeaderExt pNalHeaderExt = NULL;
PSliceHeader pSliceHead = NULL;
@@ -921,22 +946,22 @@ int32_t ParseSliceHeaderSyntaxs (PWelsDecoderContext pCtx, PBitStringAux pBs, co
iPpsId = uiCode;
//add check PPS available here
- if (pCtx->bPpsAvailFlags[iPpsId] == false) {
- pCtx->sDecoderStatistics.iPpsReportErrorNum++;
- if (pCtx->iPPSLastInvalidId != iPpsId) {
+ if (pCtx->sSpsPpsCtx.bPpsAvailFlags[iPpsId] == false) {
+ pCtx->pDecoderStatistics->iPpsReportErrorNum++;
+ if (pCtx->sSpsPpsCtx.iPPSLastInvalidId != iPpsId) {
WelsLog (pLogCtx, WELS_LOG_ERROR, "PPS id (%d) is invalid, previous id (%d) error ignored (%d)!", iPpsId,
- pCtx->iPPSLastInvalidId, pCtx->iPPSInvalidNum);
- pCtx->iPPSLastInvalidId = iPpsId;
- pCtx->iPPSInvalidNum = 0;
+ pCtx->sSpsPpsCtx.iPPSLastInvalidId, pCtx->sSpsPpsCtx.iPPSInvalidNum);
+ pCtx->sSpsPpsCtx.iPPSLastInvalidId = iPpsId;
+ pCtx->sSpsPpsCtx.iPPSInvalidNum = 0;
} else {
- pCtx->iPPSInvalidNum++;
+ pCtx->sSpsPpsCtx.iPPSInvalidNum++;
}
pCtx->iErrorCode |= dsNoParamSets;
return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_PPS_ID);
}
- pCtx->iPPSLastInvalidId = -1;
+ pCtx->sSpsPpsCtx.iPPSLastInvalidId = -1;
- pPps = &pCtx->sPpsBuffer[iPpsId];
+ pPps = &pCtx->sSpsPpsCtx.sPpsBuffer[iPpsId];
if (pPps->uiNumSliceGroups == 0) {
WelsLog (pLogCtx, WELS_LOG_WARNING, "Invalid PPS referenced");
@@ -945,38 +970,38 @@ int32_t ParseSliceHeaderSyntaxs (PWelsDecoderContext pCtx, PBitStringAux pBs, co
}
if (kbExtensionFlag) {
- pSubsetSps = &pCtx->sSubsetSpsBuffer[pPps->iSpsId];
+ pSubsetSps = &pCtx->sSpsPpsCtx.sSubsetSpsBuffer[pPps->iSpsId];
pSps = &pSubsetSps->sSps;
- if (pCtx->bSubspsAvailFlags[pPps->iSpsId] == false) {
- pCtx->sDecoderStatistics.iSubSpsReportErrorNum++;
- if (pCtx->iSubSPSLastInvalidId != pPps->iSpsId) {
+ if (pCtx->sSpsPpsCtx.bSubspsAvailFlags[pPps->iSpsId] == false) {
+ pCtx->pDecoderStatistics->iSubSpsReportErrorNum++;
+ if (pCtx->sSpsPpsCtx.iSubSPSLastInvalidId != pPps->iSpsId) {
WelsLog (pLogCtx, WELS_LOG_ERROR, "Sub SPS id (%d) is invalid, previous id (%d) error ignored (%d)!", pPps->iSpsId,
- pCtx->iSubSPSLastInvalidId, pCtx->iSubSPSInvalidNum);
- pCtx->iSubSPSLastInvalidId = pPps->iSpsId;
- pCtx->iSubSPSInvalidNum = 0;
+ pCtx->sSpsPpsCtx.iSubSPSLastInvalidId, pCtx->sSpsPpsCtx.iSubSPSInvalidNum);
+ pCtx->sSpsPpsCtx.iSubSPSLastInvalidId = pPps->iSpsId;
+ pCtx->sSpsPpsCtx.iSubSPSInvalidNum = 0;
} else {
- pCtx->iSubSPSInvalidNum++;
+ pCtx->sSpsPpsCtx.iSubSPSInvalidNum++;
}
pCtx->iErrorCode |= dsNoParamSets;
return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_SPS_ID);
}
- pCtx->iSubSPSLastInvalidId = -1;
+ pCtx->sSpsPpsCtx.iSubSPSLastInvalidId = -1;
} else {
- if (pCtx->bSpsAvailFlags[pPps->iSpsId] == false) {
- pCtx->sDecoderStatistics.iSpsReportErrorNum++;
- if (pCtx->iSPSLastInvalidId != pPps->iSpsId) {
+ if (pCtx->sSpsPpsCtx.bSpsAvailFlags[pPps->iSpsId] == false) {
+ pCtx->pDecoderStatistics->iSpsReportErrorNum++;
+ if (pCtx->sSpsPpsCtx.iSPSLastInvalidId != pPps->iSpsId) {
WelsLog (pLogCtx, WELS_LOG_ERROR, "SPS id (%d) is invalid, previous id (%d) error ignored (%d)!", pPps->iSpsId,
- pCtx->iSPSLastInvalidId, pCtx->iSPSInvalidNum);
- pCtx->iSPSLastInvalidId = pPps->iSpsId;
- pCtx->iSPSInvalidNum = 0;
+ pCtx->sSpsPpsCtx.iSPSLastInvalidId, pCtx->sSpsPpsCtx.iSPSInvalidNum);
+ pCtx->sSpsPpsCtx.iSPSLastInvalidId = pPps->iSpsId;
+ pCtx->sSpsPpsCtx.iSPSInvalidNum = 0;
} else {
- pCtx->iSPSInvalidNum++;
+ pCtx->sSpsPpsCtx.iSPSInvalidNum++;
}
pCtx->iErrorCode |= dsNoParamSets;
return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_SPS_ID);
}
- pCtx->iSPSLastInvalidId = -1;
- pSps = &pCtx->sSpsBuffer[pPps->iSpsId];
+ pCtx->sSpsPpsCtx.iSPSLastInvalidId = -1;
+ pSps = &pCtx->sSpsPpsCtx.sSpsBuffer[pPps->iSpsId];
}
pSliceHead->iPpsId = iPpsId;
pSliceHead->iSpsId = pPps->iSpsId;
@@ -1046,16 +1071,18 @@ int32_t ParseSliceHeaderSyntaxs (PWelsDecoderContext pCtx, PBitStringAux pBs, co
//Calculate poc if necessary
int32_t pocLsb = pSliceHead->iPicOrderCntLsb;
if (pSliceHead->bIdrFlag || kpCurNal->sNalHeaderExt.sNalUnitHeader.eNalUnitType == NAL_UNIT_CODED_SLICE_IDR) {
- pCtx->iPrevPicOrderCntMsb = 0;
- pCtx->iPrevPicOrderCntLsb = 0;
+ pCtx->pLastDecPicInfo->iPrevPicOrderCntMsb = 0;
+ pCtx->pLastDecPicInfo->iPrevPicOrderCntLsb = 0;
}
int32_t pocMsb;
- if (pocLsb < pCtx->iPrevPicOrderCntLsb && pCtx->iPrevPicOrderCntLsb - pocLsb >= iMaxPocLsb / 2)
- pocMsb = pCtx->iPrevPicOrderCntMsb + iMaxPocLsb;
- else if (pocLsb > pCtx->iPrevPicOrderCntLsb && pocLsb - pCtx->iPrevPicOrderCntLsb > iMaxPocLsb / 2)
- pocMsb = pCtx->iPrevPicOrderCntMsb - iMaxPocLsb;
+ if (pocLsb < pCtx->pLastDecPicInfo->iPrevPicOrderCntLsb
+ && pCtx->pLastDecPicInfo->iPrevPicOrderCntLsb - pocLsb >= iMaxPocLsb / 2)
+ pocMsb = pCtx->pLastDecPicInfo->iPrevPicOrderCntMsb + iMaxPocLsb;
+ else if (pocLsb > pCtx->pLastDecPicInfo->iPrevPicOrderCntLsb
+ && pocLsb - pCtx->pLastDecPicInfo->iPrevPicOrderCntLsb > iMaxPocLsb / 2)
+ pocMsb = pCtx->pLastDecPicInfo->iPrevPicOrderCntMsb - iMaxPocLsb;
else
- pocMsb = pCtx->iPrevPicOrderCntMsb;
+ pocMsb = pCtx->pLastDecPicInfo->iPrevPicOrderCntMsb;
pSliceHead->iPicOrderCntLsb = pocMsb + pocLsb;
if (pPps->bPicOrderPresentFlag && !pSliceHead->bFieldPicFlag) {
@@ -1063,8 +1090,8 @@ int32_t ParseSliceHeaderSyntaxs (PWelsDecoderContext pCtx, PBitStringAux pBs, co
}
if (kpCurNal->sNalHeaderExt.sNalUnitHeader.uiNalRefIdc != 0) {
- pCtx->iPrevPicOrderCntLsb = pocLsb;
- pCtx->iPrevPicOrderCntMsb = pocMsb;
+ pCtx->pLastDecPicInfo->iPrevPicOrderCntLsb = pocLsb;
+ pCtx->pLastDecPicInfo->iPrevPicOrderCntMsb = pocMsb;
}
//End of Calculating poc
} else if (pSps->uiPocType == 1 && !pSps->bDeltaPicOrderAlwaysZeroFlag) {
@@ -1373,7 +1400,7 @@ bool PrefetchNalHeaderExtSyntax (PWelsDecoderContext pCtx, PNalUnit const kppDst
pNalHdrExtS = &kpSrc->sNalHeaderExt;
pShExtD = &kppDst->sNalData.sVclNal.sSliceHeaderExt;
pPrefixS = &kpSrc->sNalData.sPrefixNal;
- pSps = &pCtx->sSpsBuffer[pCtx->sPpsBuffer[pShExtD->sSliceHeader.iPpsId].iSpsId];
+ pSps = &pCtx->sSpsPpsCtx.sSpsBuffer[pCtx->sSpsPpsCtx.sPpsBuffer[pShExtD->sSliceHeader.iPpsId].iSpsId];
pNalHdrExtD->uiDependencyId = pNalHdrExtS->uiDependencyId;
pNalHdrExtD->uiQualityId = pNalHdrExtS->uiQualityId;
@@ -1435,7 +1462,7 @@ int32_t UpdateAccessUnit (PWelsDecoderContext pCtx) {
if (uiActualIdx ==
pCurAu->uiActualUnitsNum) { // no found IDR nal within incoming AU, need exit to avoid mosaic issue, 11/19/2009
- pCtx->sDecoderStatistics.uiIDRLostNum++;
+ pCtx->pDecoderStatistics->uiIDRLostNum++;
if (!pCtx->bParamSetsLostFlag)
WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING,
"UpdateAccessUnit():::::Key frame lost.....CAN NOT find IDR from current AU.");
@@ -1457,7 +1484,6 @@ int32_t UpdateAccessUnit (PWelsDecoderContext pCtx) {
int32_t InitialDqLayersContext (PWelsDecoderContext pCtx, const int32_t kiMaxWidth, const int32_t kiMaxHeight) {
int32_t i = 0;
-
WELS_VERIFY_RETURN_IF (ERR_INFO_INVALID_PARAM, (NULL == pCtx || kiMaxWidth <= 0 || kiMaxHeight <= 0))
pCtx->sMb.iMbWidth = (kiMaxWidth + 15) >> 4;
pCtx->sMb.iMbHeight = (kiMaxHeight + 15) >> 4;
@@ -1503,7 +1529,8 @@ int32_t InitialDqLayersContext (PWelsDecoderContext pCtx, const int32_t kiMaxWid
sizeof (
bool),
"pCtx->sMb.pNoSubMbPartSizeLessThan8x8Flag[]");
- pCtx->sMb.pTransformSize8x8Flag[i] = (bool*)pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (bool),
+ pCtx->sMb.pTransformSize8x8Flag[i] = (bool*)pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (
+ bool),
"pCtx->sMb.pTransformSize8x8Flag[]");
pCtx->sMb.pChromaQp[i] = (int8_t (*)[2])pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (
int8_t) * 2,
@@ -1514,9 +1541,11 @@ int32_t InitialDqLayersContext (PWelsDecoderContext pCtx, const int32_t kiMaxWid
int16_t) * MV_A * MB_BLOCK4x4_NUM, "pCtx->sMb.pMvd[][]");
pCtx->sMb.pCbfDc[i] = (uint16_t*)pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (uint16_t),
"pCtx->sMb.pCbfDc[]");
- pCtx->sMb.pNzc[i] = (int8_t (*)[24])pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int8_t) * 24,
+ pCtx->sMb.pNzc[i] = (int8_t (*)[24])pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (
+ int8_t) * 24,
"pCtx->sMb.pNzc[]");
- pCtx->sMb.pNzcRs[i] = (int8_t (*)[24])pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int8_t) * 24,
+ pCtx->sMb.pNzcRs[i] = (int8_t (*)[24])pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (
+ int8_t) * 24,
"pCtx->sMb.pNzcRs[]");
pCtx->sMb.pScaledTCoeff[i] = (int16_t (*)[MB_COEFF_LIST_SIZE])pMa->WelsMallocz (pCtx->sMb.iMbWidth *
pCtx->sMb.iMbHeight *
@@ -1534,20 +1563,24 @@ int32_t InitialDqLayersContext (PWelsDecoderContext pCtx, const int32_t kiMaxWid
"pCtx->sMb.pChromaPredMode[]");
pCtx->sMb.pCbp[i] = (int8_t*)pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int8_t),
"pCtx->sMb.pCbp[]");
- pCtx->sMb.pSubMbType[i] = (uint32_t (*)[MB_PARTITION_SIZE])pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight *
+ pCtx->sMb.pSubMbType[i] = (uint32_t (*)[MB_PARTITION_SIZE])pMa->WelsMallocz (pCtx->sMb.iMbWidth *
+ pCtx->sMb.iMbHeight *
sizeof (
uint32_t) * MB_PARTITION_SIZE, "pCtx->sMb.pSubMbType[]");
pCtx->sMb.pSliceIdc[i] = (int32_t*) pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int32_t),
"pCtx->sMb.pSliceIdc[]"); // using int32_t for slice_idc, 4/21/2010
- pCtx->sMb.pResidualPredFlag[i] = (int8_t*) pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int8_t),
+ pCtx->sMb.pResidualPredFlag[i] = (int8_t*) pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (
+ int8_t),
"pCtx->sMb.pResidualPredFlag[]");
- pCtx->sMb.pInterPredictionDoneFlag[i] = (int8_t*) pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (
- int8_t), "pCtx->sMb.pInterPredictionDoneFlag[]");
+ pCtx->sMb.pInterPredictionDoneFlag[i] = (int8_t*) pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight *
+ sizeof (
+ int8_t), "pCtx->sMb.pInterPredictionDoneFlag[]");
pCtx->sMb.pMbCorrectlyDecodedFlag[i] = (bool*) pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (
bool),
"pCtx->sMb.pMbCorrectlyDecodedFlag[]");
- pCtx->sMb.pMbRefConcealedFlag[i] = (bool*) pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (bool),
+ pCtx->sMb.pMbRefConcealedFlag[i] = (bool*) pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (
+ bool),
"pCtx->pMbRefConcealedFlag[]");
// check memory block valid due above allocated..
@@ -1594,6 +1627,8 @@ int32_t InitialDqLayersContext (PWelsDecoderContext pCtx, const int32_t kiMaxWid
return ERR_NONE;
}
+
+
void UninitialDqLayersContext (PWelsDecoderContext pCtx) {
int32_t i = 0;
CMemoryAlign* pMa = pCtx->pMemAlign;
@@ -1822,9 +1857,9 @@ void ForceClearCurrentNal (PAccessUnit pAu) {
}
void ForceResetParaSetStatusAndAUList (PWelsDecoderContext pCtx) {
- pCtx->bSpsExistAheadFlag = false;
- pCtx->bSubspsExistAheadFlag = false;
- pCtx->bPpsExistAheadFlag = false;
+ pCtx->sSpsPpsCtx.bSpsExistAheadFlag = false;
+ pCtx->sSpsPpsCtx.bSubspsExistAheadFlag = false;
+ pCtx->sSpsPpsCtx.bPpsExistAheadFlag = false;
// Force clear the AU list
pCtx->pAccessUnitList->uiAvailUnitsNum = 0;
@@ -2098,14 +2133,14 @@ int32_t WelsDecodeAccessUnitStart (PWelsDecoderContext pCtx) {
return iRet;
pCtx->pAccessUnitList->uiStartPos = 0;
- if (!pCtx->bAvcBasedFlag && !CheckIntegrityNalUnitsList (pCtx)) {
+ if (!pCtx->sSpsPpsCtx.bAvcBasedFlag && !CheckIntegrityNalUnitsList (pCtx)) {
pCtx->iErrorCode |= dsBitstreamError;
return dsBitstreamError;
}
//check current AU has only one layer or not
//If YES, can use deblocking based on AVC
- if (!pCtx->bAvcBasedFlag) {
+ if (!pCtx->sSpsPpsCtx.bAvcBasedFlag) {
CheckOnlyOneLayerInAu (pCtx);
}
@@ -2116,8 +2151,8 @@ void WelsDecodeAccessUnitEnd (PWelsDecoderContext pCtx) {
//save previous header info
PAccessUnit pCurAu = pCtx->pAccessUnitList;
PNalUnit pCurNal = pCurAu->pNalUnitsList[pCurAu->uiEndPos];
- memcpy (&pCtx->sLastNalHdrExt, &pCurNal->sNalHeaderExt, sizeof (SNalUnitHeaderExt));
- memcpy (&pCtx->sLastSliceHeader,
+ memcpy (&pCtx->pLastDecPicInfo->sLastNalHdrExt, &pCurNal->sNalHeaderExt, sizeof (SNalUnitHeaderExt));
+ memcpy (&pCtx->pLastDecPicInfo->sLastSliceHeader,
&pCurNal->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader, sizeof (SSliceHeader));
// uninitialize context of current access unit and rbsp buffer clean
ResetCurrentAccessUnit (pCtx);
@@ -2144,7 +2179,7 @@ static bool CheckNewSeqBeginAndUpdateActiveLayerSps (PWelsDecoderContext pCtx) {
}
int iMaxActiveLayer = 0, iMaxCurrentLayer = 0;
for (int i = MAX_LAYER_NUM - 1; i >= 0; i--) {
- if (pCtx->pActiveLayerSps[i] != NULL) {
+ if (pCtx->sSpsPpsCtx.pActiveLayerSps[i] != NULL) {
iMaxActiveLayer = i;
break;
}
@@ -2156,37 +2191,39 @@ static bool CheckNewSeqBeginAndUpdateActiveLayerSps (PWelsDecoderContext pCtx) {
}
}
if ((iMaxCurrentLayer != iMaxActiveLayer)
- || (pTmpLayerSps[iMaxCurrentLayer] != pCtx->pActiveLayerSps[iMaxActiveLayer])) {
+ || (pTmpLayerSps[iMaxCurrentLayer] != pCtx->sSpsPpsCtx.pActiveLayerSps[iMaxActiveLayer])) {
bNewSeq = true;
}
// fill active sps if the current sps is not null while active layer is null
if (!bNewSeq) {
for (int i = 0; i < MAX_LAYER_NUM; i++) {
- if (pCtx->pActiveLayerSps[i] == NULL && pTmpLayerSps[i] != NULL) {
- pCtx->pActiveLayerSps[i] = pTmpLayerSps[i];
+ if (pCtx->sSpsPpsCtx.pActiveLayerSps[i] == NULL && pTmpLayerSps[i] != NULL) {
+ pCtx->sSpsPpsCtx.pActiveLayerSps[i] = pTmpLayerSps[i];
}
}
} else {
// UpdateActiveLayerSps if new sequence start
- memcpy (&pCtx->pActiveLayerSps[0], &pTmpLayerSps[0], MAX_LAYER_NUM * sizeof (PSps));
+ memcpy (&pCtx->sSpsPpsCtx.pActiveLayerSps[0], &pTmpLayerSps[0], MAX_LAYER_NUM * sizeof (PSps));
}
return bNewSeq;
}
static void WriteBackActiveParameters (PWelsDecoderContext pCtx) {
- if (pCtx->iOverwriteFlags & OVERWRITE_PPS) {
- memcpy (&pCtx->sPpsBuffer[pCtx->sPpsBuffer[MAX_PPS_COUNT].iPpsId], &pCtx->sPpsBuffer[MAX_PPS_COUNT], sizeof (SPps));
+ if (pCtx->sSpsPpsCtx.iOverwriteFlags & OVERWRITE_PPS) {
+ memcpy (&pCtx->sSpsPpsCtx.sPpsBuffer[pCtx->sSpsPpsCtx.sPpsBuffer[MAX_PPS_COUNT].iPpsId],
+ &pCtx->sSpsPpsCtx.sPpsBuffer[MAX_PPS_COUNT], sizeof (SPps));
}
- if (pCtx->iOverwriteFlags & OVERWRITE_SPS) {
- memcpy (&pCtx->sSpsBuffer[pCtx->sSpsBuffer[MAX_SPS_COUNT].iSpsId], &pCtx->sSpsBuffer[MAX_SPS_COUNT], sizeof (SSps));
+ if (pCtx->sSpsPpsCtx.iOverwriteFlags & OVERWRITE_SPS) {
+ memcpy (&pCtx->sSpsPpsCtx.sSpsBuffer[pCtx->sSpsPpsCtx.sSpsBuffer[MAX_SPS_COUNT].iSpsId],
+ &pCtx->sSpsPpsCtx.sSpsBuffer[MAX_SPS_COUNT], sizeof (SSps));
pCtx->bNewSeqBegin = true;
}
- if (pCtx->iOverwriteFlags & OVERWRITE_SUBSETSPS) {
- memcpy (&pCtx->sSubsetSpsBuffer[pCtx->sSubsetSpsBuffer[MAX_SPS_COUNT].sSps.iSpsId],
- &pCtx->sSubsetSpsBuffer[MAX_SPS_COUNT], sizeof (SSubsetSps));
+ if (pCtx->sSpsPpsCtx.iOverwriteFlags & OVERWRITE_SUBSETSPS) {
+ memcpy (&pCtx->sSpsPpsCtx.sSubsetSpsBuffer[pCtx->sSpsPpsCtx.sSubsetSpsBuffer[MAX_SPS_COUNT].sSps.iSpsId],
+ &pCtx->sSpsPpsCtx.sSubsetSpsBuffer[MAX_SPS_COUNT], sizeof (SSubsetSps));
pCtx->bNewSeqBegin = true;
}
- pCtx->iOverwriteFlags = OVERWRITE_NONE;
+ pCtx->sSpsPpsCtx.iOverwriteFlags = OVERWRITE_NONE;
}
/*
@@ -2205,22 +2242,19 @@ void DecodeFinishUpdate (PWelsDecoderContext pCtx) {
}
/*
- * ConstructAccessUnit
- * construct an access unit for given input bitstream, maybe partial NAL Unit, one or more Units are involved to
- * joint a collective access unit.
- * parameter\
- * buf: bitstream data buffer
- * bit_len: size in bit length of data
- * buf_len: size in byte length of data
- * coded_au: mark an Access Unit decoding finished
- * return:
- * 0 - success; otherwise returned error_no defined in error_no.h
- */
-int32_t ConstructAccessUnit (PWelsDecoderContext pCtx, uint8_t** ppDst, SBufferInfo* pDstInfo) {
- int32_t iErr;
+* WelsDecodeInitAccessUnitStart
+* check and (re)allocate picture buffers on new sequence begin
+* bit_len: size in bit length of data
+* buf_len: size in byte length of data
+* coded_au: mark an Access Unit decoding finished
+* return:
+* 0 - success; otherwise returned error_no defined in error_no.h
+*/
+int32_t WelsDecodeInitAccessUnitStart (PWelsDecoderContext pCtx, SBufferInfo* pDstInfo) {
+ int32_t iErr = ERR_NONE;
PAccessUnit pCurAu = pCtx->pAccessUnitList;
pCtx->bAuReadyFlag = false;
- pCtx->bLastHasMmco5 = false;
+ pCtx->pLastDecPicInfo->bLastHasMmco5 = false;
bool bTmpNewSeqBegin = CheckNewSeqBeginAndUpdateActiveLayerSps (pCtx);
pCtx->bNewSeqBegin = pCtx->bNewSeqBegin || bTmpNewSeqBegin;
iErr = WelsDecodeAccessUnitStart (pCtx);
@@ -2240,17 +2274,82 @@ int32_t ConstructAccessUnit (PWelsDecoderContext pCtx, uint8_t** ppDst, SBufferI
pCtx->pSps = pCurAu->pNalUnitsList[pCurAu->uiStartPos]->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.pSps;
pCtx->pPps = pCurAu->pNalUnitsList[pCurAu->uiStartPos]->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.pPps;
+ return iErr;
+}
+
+/*
+* AllocPicBuffOnNewSeqBegin
+* check and (re)allocate picture buffers on new sequence begin
+* return:
+* 0 - success; otherwise returned error_no defined in error_no.h
+*/
+int32_t AllocPicBuffOnNewSeqBegin (PWelsDecoderContext pCtx) {
//try to allocate or relocate DPB memory only when new sequence is coming.
- if (pCtx->bNewSeqBegin) {
+ if (GetThreadCount (pCtx) <= 1) {
WelsResetRefPic (pCtx); //clear ref pPic when IDR NAL
- iErr = SyncPictureResolutionExt (pCtx, pCtx->pSps->iMbWidth, pCtx->pSps->iMbHeight);
+ }
+ int32_t iErr = SyncPictureResolutionExt (pCtx, pCtx->pSps->iMbWidth, pCtx->pSps->iMbHeight);
+
+ if (ERR_NONE != iErr) {
+ WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "sync picture resolution ext failed, the error is %d", iErr);
+ return iErr;
+ }
+
+ return iErr;
+}
+/*
+* InitConstructAccessUnit
+* Init before constructing an access unit for given input bitstream, maybe partial NAL Unit, one or more Units are involved to
+* joint a collective access unit.
+* parameter\
+* SBufferInfo: Buffer info
+* return:
+* 0 - success; otherwise returned error_no defined in error_no.h
+*/
+int32_t InitConstructAccessUnit (PWelsDecoderContext pCtx, SBufferInfo* pDstInfo) {
+ int32_t iErr = ERR_NONE;
+
+ iErr = WelsDecodeInitAccessUnitStart (pCtx, pDstInfo);
+ if (ERR_NONE != iErr) {
+ return iErr;
+ }
+ if (pCtx->bNewSeqBegin) {
+ iErr = AllocPicBuffOnNewSeqBegin (pCtx);
if (ERR_NONE != iErr) {
- WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "sync picture resolution ext failed, the error is %d", iErr);
return iErr;
}
}
+ return iErr;
+}
+
+/*
+ * ConstructAccessUnit
+ * construct an access unit for given input bitstream, maybe partial NAL Unit, one or more Units are involved to
+ * joint a collective access unit.
+ * parameter\
+ * buf: bitstream data buffer
+ * bit_len: size in bit length of data
+ * buf_len: size in byte length of data
+ * coded_au: mark an Access Unit decoding finished
+ * return:
+ * 0 - success; otherwise returned error_no defined in error_no.h
+ */
+int32_t ConstructAccessUnit (PWelsDecoderContext pCtx, uint8_t** ppDst, SBufferInfo* pDstInfo) {
+ int32_t iErr = ERR_NONE;
+ if (GetThreadCount (pCtx) <= 1) {
+ iErr = InitConstructAccessUnit (pCtx, pDstInfo);
+ if (ERR_NONE != iErr) {
+ return iErr;
+ }
+ }
+ if (pCtx->pCabacDecEngine == NULL) {
+ pCtx->pCabacDecEngine = (SWelsCabacDecEngine*)pCtx->pMemAlign->WelsMallocz (sizeof (SWelsCabacDecEngine),
+ "pCtx->pCabacDecEngine");
+ WELS_VERIFY_RETURN_IF (ERR_INFO_OUT_OF_MEMORY, (NULL == pCtx->pCabacDecEngine))
+ }
+
iErr = DecodeCurrentAccessUnit (pCtx, ppDst, pDstInfo);
WelsDecodeAccessUnitEnd (pCtx);
@@ -2317,12 +2416,14 @@ void WelsDqLayerDecodeStart (PWelsDecoderContext pCtx, PNalUnit pCurNal, PSps pS
pCtx->bUsedAsRef = false;
pCtx->iFrameNum = pSh->iFrameNum;
- UpdateDecoderStatisticsForActiveParaset (& (pCtx->sDecoderStatistics),
- pSps, pPps);
+ UpdateDecoderStatisticsForActiveParaset (pCtx->pDecoderStatistics, pSps, pPps);
}
int32_t InitRefPicList (PWelsDecoderContext pCtx, const uint8_t kuiNRi, int32_t iPoc) {
int32_t iRet = ERR_NONE;
+ if (GetThreadCount (pCtx) > 1 && pCtx->bNewSeqBegin) {
+ WelsResetRefPic (pCtx);
+ }
if (pCtx->eSliceType == B_SLICE) {
iRet = WelsInitBSliceRefList (pCtx, iPoc);
CreateImplicitWeightTable (pCtx);
@@ -2377,13 +2478,27 @@ void InitCurDqLayerData (PWelsDecoderContext pCtx, PDqLayer pCurDq) {
* Decode current access unit when current AU is completed.
*/
int32_t DecodeCurrentAccessUnit (PWelsDecoderContext pCtx, uint8_t** ppDst, SBufferInfo* pDstInfo) {
- int32_t iRefCount[LIST_A];
- PNalUnit pNalCur = NULL;
+ PNalUnit pNalCur = pCtx->pNalCur = NULL;
PAccessUnit pCurAu = pCtx->pAccessUnitList;
int32_t iIdx = pCurAu->uiStartPos;
int32_t iEndIdx = pCurAu->uiEndPos;
+ //get current thread ctx
+ PWelsDecoderThreadCTX pThreadCtx = NULL;
+ if (pCtx->pThreadCtx != NULL) {
+ pThreadCtx = (PWelsDecoderThreadCTX)pCtx->pThreadCtx;
+ }
+ //get last thread ctx
+ PWelsDecoderThreadCTX pLastThreadCtx = NULL;
+ if (pCtx->pLastThreadCtx != NULL) {
+ pLastThreadCtx = (PWelsDecoderThreadCTX) (pCtx->pLastThreadCtx);
+ if (pLastThreadCtx->pDec == NULL) {
+ pLastThreadCtx->pDec = PrefetchLastPicForThread (pCtx->pPicBuff,
+ pLastThreadCtx->iPicBuffIdx);
+ }
+ }
+ int32_t iThreadCount = GetThreadCount (pCtx);
int32_t iPpsId = 0;
int32_t iRet = ERR_NONE;
@@ -2393,12 +2508,12 @@ int32_t DecodeCurrentAccessUnit (PWelsDecoderContext pCtx, uint8_t** ppDst, SBuf
const uint8_t kuiDependencyIdMax = (kuiTargetLayerDqId & 0x7F) >> 4;
int16_t iLastIdD = -1, iLastIdQ = -1;
int16_t iCurrIdD = 0, iCurrIdQ = 0;
- uint8_t uiNalRefIdc = 0;
+ pCtx->uiNalRefIdc = 0;
bool bFreshSliceAvailable =
true; // Another fresh slice comingup for given dq layer, for multiple slices in case of header parts of slices sometimes loss over error-prone channels, 8/14/2008
//update pCurDqLayer at the starting of AU decoding
- if (pCtx->bInitialDqLayersMem) {
+ if (pCtx->bInitialDqLayersMem || pCtx->pCurDqLayer == NULL) {
pCtx->pCurDqLayer = pCtx->pDqLayersList[0];
}
@@ -2411,7 +2526,47 @@ int32_t DecodeCurrentAccessUnit (PWelsDecoderContext pCtx, uint8_t** ppDst, SBuf
PSliceHeaderExt pShExt = NULL;
PSliceHeader pSh = NULL;
+ if (pLastThreadCtx != NULL) {
+ pSh = &pNalCur->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader;
+ if (pSh->iFirstMbInSlice == 0) {
+ if (pLastThreadCtx->pCtx->pDec != NULL && pLastThreadCtx->pCtx->pDec->bIsUngroupedMultiSlice) {
+ WAIT_EVENT (&pLastThreadCtx->sSliceDecodeFinish, WELS_DEC_THREAD_WAIT_INFINITE);
+ }
+ pCtx->pDec = NULL;
+ pCtx->iTotalNumMbRec = 0;
+ } else if (pLastThreadCtx->pCtx->pDec != NULL) {
+ if (pSh->iFrameNum == pLastThreadCtx->pCtx->pDec->iFrameNum
+ && pSh->iPicOrderCntLsb == pLastThreadCtx->pCtx->pDec->iFramePoc) {
+ WAIT_EVENT (&pLastThreadCtx->sSliceDecodeFinish, WELS_DEC_THREAD_WAIT_INFINITE);
+ pCtx->pDec = pLastThreadCtx->pCtx->pDec;
+ pCtx->pDec->bIsUngroupedMultiSlice = true;
+ pCtx->sRefPic = pLastThreadCtx->pCtx->sRefPic;
+ pCtx->iTotalNumMbRec = pLastThreadCtx->pCtx->iTotalNumMbRec;
+ }
+ }
+ }
+ bool isNewFrame = true;
+ if (iThreadCount > 1) {
+ isNewFrame = pCtx->pDec == NULL;
+ }
if (pCtx->pDec == NULL) {
+ if (pLastThreadCtx != NULL && iIdx == 0) {
+ pLastThreadCtx->pDec->bUsedAsRef = pLastThreadCtx->pCtx->uiNalRefIdc > 0;
+ if (pLastThreadCtx->pDec->bUsedAsRef) {
+ for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+ uint32_t i = 0;
+ while (i < MAX_DPB_COUNT && pLastThreadCtx->pCtx->sRefPic.pRefList[listIdx][i]) {
+ pLastThreadCtx->pDec->pRefPic[listIdx][i] = pLastThreadCtx->pCtx->sRefPic.pRefList[listIdx][i];
+ ++i;
+ }
+ }
+ pLastThreadCtx->pCtx->sTmpRefPic = pLastThreadCtx->pCtx->sRefPic;
+ WelsMarkAsRef (pLastThreadCtx->pCtx, pLastThreadCtx->pDec);
+ pCtx->sRefPic = pLastThreadCtx->pCtx->sTmpRefPic;
+ } else {
+ pCtx->sRefPic = pLastThreadCtx->pCtx->sRefPic;
+ }
+ }
pCtx->pDec = PrefetchPic (pCtx->pPicBuff);
if (pCtx->iTotalNumMbRec != 0)
pCtx->iTotalNumMbRec = 0;
@@ -2424,17 +2579,33 @@ int32_t DecodeCurrentAccessUnit (PWelsDecoderContext pCtx, uint8_t** ppDst, SBuf
pCtx->iErrorCode |= dsOutOfMemory;
return ERR_INFO_REF_COUNT_OVERFLOW;
}
+ if (pThreadCtx != NULL) {
+ pCtx->pDec->bIsUngroupedMultiSlice = false;
+ pThreadCtx->pDec = pCtx->pDec;
+ if (iThreadCount > 1) ++pCtx->pDec->iRefCount;
+ uint32_t uiMbHeight = (pCtx->pDec->iHeightInPixel + 15) >> 4;
+ for (uint32_t i = 0; i < uiMbHeight; ++i) {
+ RESET_EVENT (&pCtx->pDec->pReadyEvent[i]);
+ }
+ }
pCtx->pDec->bNewSeqBegin = pCtx->bNewSeqBegin; //set flag for start decoding
} else if (pCtx->iTotalNumMbRec == 0) { //pDec != NULL, already start
pCtx->pDec->bNewSeqBegin = pCtx->bNewSeqBegin; //set flag for start decoding
}
pCtx->pDec->uiTimeStamp = pNalCur->uiTimeStamp;
+ pCtx->pDec->uiDecodingTimeStamp = pCtx->uiDecodingTimeStamp;
+ if (pThreadCtx != NULL) {
+ pThreadCtx->iPicBuffIdx = pCtx->pDec->iPicBuffIdx;
+ pCtx->pCurDqLayer->pMbCorrectlyDecodedFlag = pCtx->pDec->pMbCorrectlyDecodedFlag;
+ }
if (pCtx->iTotalNumMbRec == 0) { //Picture start to decode
for (int32_t i = 0; i < LAYER_NUM_EXCHANGEABLE; ++ i)
memset (pCtx->sMb.pSliceIdc[i], 0xff, (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int32_t)));
memset (pCtx->pCurDqLayer->pMbCorrectlyDecodedFlag, 0, pCtx->pSps->iMbWidth * pCtx->pSps->iMbHeight * sizeof (bool));
memset (pCtx->pCurDqLayer->pMbRefConcealedFlag, 0, pCtx->pSps->iMbWidth * pCtx->pSps->iMbHeight * sizeof (bool));
+ memset (pCtx->pDec->pRefPic[LIST_0], 0, sizeof (PPicture) * MAX_DPB_COUNT);
+ memset (pCtx->pDec->pRefPic[LIST_1], 0, sizeof (PPicture) * MAX_DPB_COUNT);
pCtx->pDec->iMbNum = pCtx->pSps->iMbWidth * pCtx->pSps->iMbHeight;
pCtx->pDec->iMbEcedNum = 0;
pCtx->pDec->iMbEcedPropNum = 0;
@@ -2465,6 +2636,7 @@ int32_t DecodeCurrentAccessUnit (PWelsDecoderContext pCtx, uint8_t** ppDst, SBuf
pCtx->pDec->iFrameNum = pSh->iFrameNum;
pCtx->pDec->iFramePoc = pSh->iPicOrderCntLsb; // still can not obtain correct, because current do not support POCtype 2
pCtx->pDec->bIdrFlag = pNalCur->sNalHeaderExt.bIdrFlag;
+ pCtx->pDec->eSliceType = pSh->eSliceType;
memcpy (&pLayerInfo.sSliceInLayer.sSliceHeaderExt, pShExt, sizeof (SSliceHeaderExt)); //confirmed_safe_unsafe_usage
pLayerInfo.sSliceInLayer.bSliceHeaderExtFlag = pNalCur->sNalData.sVclNal.bSliceHeaderExtFlag;
@@ -2472,7 +2644,7 @@ int32_t DecodeCurrentAccessUnit (PWelsDecoderContext pCtx, uint8_t** ppDst, SBuf
pLayerInfo.sSliceInLayer.iLastMbQp = pSh->iSliceQp;
dq_cur->pBitStringAux = &pNalCur->sNalData.sVclNal.sSliceBitsRead;
- uiNalRefIdc = pNalCur->sNalHeaderExt.sNalUnitHeader.uiNalRefIdc;
+ pCtx->uiNalRefIdc = pNalCur->sNalHeaderExt.sNalUnitHeader.uiNalRefIdc;
iPpsId = pSh->iPpsId;
@@ -2497,11 +2669,9 @@ int32_t DecodeCurrentAccessUnit (PWelsDecoderContext pCtx, uint8_t** ppDst, SBuf
bFreshSliceAvailable = (iCurrIdD != iLastIdD
|| iCurrIdQ != iLastIdQ); // do not need condition of (first_mb == 0) due multiple slices might be disorder
+
WelsDqLayerDecodeStart (pCtx, pNalCur, pLayerInfo.pSps, pLayerInfo.pPps);
- if (iCurrIdQ == BASE_QUALITY_ID) {
- ST64 (iRefCount, LD64 (pLayerInfo.sSliceInLayer.sSliceHeaderExt.sSliceHeader.uiRefCount));
- }
if ((iLastIdD < 0) || //case 1: first layer
(iLastIdD == iCurrIdD)) { //case 2: same uiDId
@@ -2511,11 +2681,35 @@ int32_t DecodeCurrentAccessUnit (PWelsDecoderContext pCtx, uint8_t** ppDst, SBuf
const bool kbIdrFlag = dq_cur->sLayerInfo.sNalHeaderExt.bIdrFlag
|| (dq_cur->sLayerInfo.sNalHeaderExt.sNalUnitHeader.eNalUnitType == NAL_UNIT_CODED_SLICE_IDR);
// Subclause 8.2.5.2 Decoding process for gaps in frame_num
+ int32_t iPrevFrameNum = pCtx->pLastDecPicInfo->iPrevFrameNum;
+ if (pLastThreadCtx != NULL) {
+ if (pCtx->bNewSeqBegin) {
+ iPrevFrameNum = 0;
+ } else if (pLastThreadCtx->pDec != NULL) {
+ if (pLastThreadCtx->pDec->uiTimeStamp == pCtx->uiTimeStamp - 1) {
+ iPrevFrameNum = pLastThreadCtx->pDec->iFrameNum;
+ if (iPrevFrameNum == -1) iPrevFrameNum = pLastThreadCtx->pCtx->iFrameNum;
+ } else {
+ int32_t id = pThreadCtx->sThreadInfo.uiThrNum;
+ for (int32_t i = 0; i < iThreadCount; ++i) {
+ if (pThreadCtx[i - id].pCtx->uiTimeStamp == pCtx->uiTimeStamp - 1) {
+ if (pThreadCtx[i - id].pDec != NULL) iPrevFrameNum = pThreadCtx[i - id].pDec->iFrameNum;
+ if (iPrevFrameNum == -1) iPrevFrameNum = pThreadCtx[i - id].pCtx->iFrameNum;
+ break;
+ }
+ }
+ }
+ } else {
+ iPrevFrameNum = pCtx->bNewSeqBegin ? 0 : pLastThreadCtx->pCtx->iFrameNum;
+ }
+ }
if (!kbIdrFlag &&
- pSh->iFrameNum != pCtx->iPrevFrameNum &&
- pSh->iFrameNum != ((pCtx->iPrevFrameNum + 1) & ((1 << dq_cur->sLayerInfo.pSps->uiLog2MaxFrameNum) - 1))) {
+ pSh->iFrameNum != iPrevFrameNum &&
+ pSh->iFrameNum != ((iPrevFrameNum + 1) & ((1 << dq_cur->sLayerInfo.pSps->uiLog2MaxFrameNum) -
+ 1))) {
WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING,
- "referencing pictures lost due frame gaps exist, prev_frame_num: %d, curr_frame_num: %d", pCtx->iPrevFrameNum,
+ "referencing pictures lost due frame gaps exist, prev_frame_num: %d, curr_frame_num: %d",
+ iPrevFrameNum,
pSh->iFrameNum);
bAllRefComplete = false;
@@ -2531,8 +2725,8 @@ int32_t DecodeCurrentAccessUnit (PWelsDecoderContext pCtx, uint8_t** ppDst, SBuf
}
}
- if (iCurrIdD == kuiDependencyIdMax && iCurrIdQ == BASE_QUALITY_ID) {
- iRet = InitRefPicList (pCtx, uiNalRefIdc, pSh->iPicOrderCntLsb);
+ if (iCurrIdD == kuiDependencyIdMax && iCurrIdQ == BASE_QUALITY_ID && isNewFrame) {
+ iRet = InitRefPicList (pCtx, pCtx->uiNalRefIdc, pSh->iPicOrderCntLsb);
if (iRet) {
pCtx->bRPLRError = true;
bAllRefComplete = false; // RPLR error, set ref pictures complete flag false
@@ -2547,8 +2741,19 @@ int32_t DecodeCurrentAccessUnit (PWelsDecoderContext pCtx, uint8_t** ppDst, SBuf
}
}
}
-
- iRet = WelsDecodeSlice (pCtx, bFreshSliceAvailable, pNalCur);
+ //calculate Colocated mv scaling factor for temporal direct prediction
+ if (pSh->eSliceType == B_SLICE && !pSh->iDirectSpatialMvPredFlag)
+ ComputeColocatedTemporalScaling (pCtx);
+
+ if (iThreadCount > 1) {
+ if (iIdx == 0) {
+ memset (&pCtx->lastReadyHeightOffset[0][0], -1, LIST_A * MAX_REF_PIC_COUNT * sizeof (int16_t));
+ SET_EVENT (&pThreadCtx->sSliceDecodeStart);
+ }
+ iRet = WelsDecodeAndConstructSlice (pCtx);
+ } else {
+ iRet = WelsDecodeSlice (pCtx, bFreshSliceAvailable, pNalCur);
+ }
//Output good store_base reconstruction when enhancement quality layer occurred error for MGS key picture case
if (iRet != ERR_NONE) {
@@ -2564,17 +2769,19 @@ int32_t DecodeCurrentAccessUnit (PWelsDecoderContext pCtx, uint8_t** ppDst, SBuf
}
}
- if (bReconstructSlice) {
+ if (iThreadCount <= 1 && bReconstructSlice) {
if ((iRet = WelsDecodeConstructSlice (pCtx, pNalCur)) != ERR_NONE) {
pCtx->pDec->bIsComplete = false; // reconstruction error, directly set the flag false
return iRet;
}
}
if (bAllRefComplete && pCtx->eSliceType != I_SLICE) {
- if (pCtx->sRefPic.uiRefCount[LIST_0] > 0) {
- bAllRefComplete &= CheckRefPicturesComplete (pCtx);
- } else {
- bAllRefComplete = false;
+ if (iThreadCount <= 1) {
+ if (pCtx->sRefPic.uiRefCount[LIST_0] > 0) {
+ bAllRefComplete &= CheckRefPicturesComplete (pCtx);
+ } else {
+ bAllRefComplete = false;
+ }
}
}
}
@@ -2626,53 +2833,80 @@ int32_t DecodeCurrentAccessUnit (PWelsDecoderContext pCtx, uint8_t** ppDst, SBuf
}
}
+ if (iThreadCount >= 1) {
+ int32_t id = pThreadCtx->sThreadInfo.uiThrNum;
+ for (int32_t i = 0; i < iThreadCount; ++i) {
+ if (i == id || pThreadCtx[i - id].pCtx->uiDecodingTimeStamp == 0) continue;
+ if (pThreadCtx[i - id].pCtx->uiDecodingTimeStamp < pCtx->uiDecodingTimeStamp) {
+ WAIT_EVENT (&pThreadCtx[i - id].sSliceDecodeFinish, WELS_DEC_THREAD_WAIT_INFINITE);
+ }
+ }
+ pCtx->pLastDecPicInfo->uiDecodingTimeStamp = pCtx->uiDecodingTimeStamp;
+ }
iRet = DecodeFrameConstruction (pCtx, ppDst, pDstInfo);
- if (iRet)
+ if (iRet) {
+ if (iThreadCount > 1) {
+ SET_EVENT (&pThreadCtx->sSliceDecodeFinish);
+ }
return iRet;
+ }
- pCtx->pPreviousDecodedPictureInDpb = pCtx->pDec; //store latest decoded picture for EC
- pCtx->bUsedAsRef = false;
- if (uiNalRefIdc > 0) {
- pCtx->bUsedAsRef = true;
- //save MBType, MV and RefIndex for use in B-Slice direct mode
- memcpy (pCtx->pDec->pMbType, pCtx->pCurDqLayer->pMbType, pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (uint32_t));
- memcpy (pCtx->pDec->pMv[LIST_0], pCtx->pCurDqLayer->pMv[LIST_0],
- pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int16_t) * MV_A * MB_BLOCK4x4_NUM);
- memcpy (pCtx->pDec->pMv[LIST_1], pCtx->pCurDqLayer->pMv[LIST_1],
- pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int16_t) * MV_A * MB_BLOCK4x4_NUM);
- memcpy (pCtx->pDec->pRefIndex[LIST_0], pCtx->pCurDqLayer->pRefIndex[LIST_0],
- pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int8_t) * MB_BLOCK4x4_NUM);
- memcpy (pCtx->pDec->pRefIndex[LIST_1], pCtx->pCurDqLayer->pRefIndex[LIST_1],
- pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int8_t) * MB_BLOCK4x4_NUM);
- for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
- for (uint32_t i = 0; i < pCtx->sRefPic.uiRefCount[listIdx]; ++i) {
- pCtx->pDec->pRefPic[listIdx][i] = pCtx->sRefPic.pRefList[listIdx][i];
+ pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb = pCtx->pDec; //store latest decoded picture for EC
+ pCtx->bUsedAsRef = pCtx->uiNalRefIdc > 0;
+ if (iThreadCount <= 1) {
+ if (pCtx->bUsedAsRef) {
+ for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+ uint32_t i = 0;
+ while (i < MAX_DPB_COUNT && pCtx->sRefPic.pRefList[listIdx][i]) {
+ pCtx->pDec->pRefPic[listIdx][i] = pCtx->sRefPic.pRefList[listIdx][i];
+ ++i;
+ }
}
- }
- iRet = WelsMarkAsRef (pCtx);
- if (iRet != ERR_NONE) {
- if (iRet == ERR_INFO_DUPLICATE_FRAME_NUM)
- pCtx->iErrorCode |= dsBitstreamError;
- if (pCtx->pParam->eEcActiveIdc == ERROR_CON_DISABLE) {
- pCtx->pDec = NULL;
- return iRet;
+ iRet = WelsMarkAsRef (pCtx);
+ if (iRet != ERR_NONE) {
+ if (iRet == ERR_INFO_DUPLICATE_FRAME_NUM)
+ pCtx->iErrorCode |= dsBitstreamError;
+ if (pCtx->pParam->eEcActiveIdc == ERROR_CON_DISABLE) {
+ pCtx->pDec = NULL;
+ return iRet;
+ }
}
+ if (!pCtx->pParam->bParseOnly)
+ ExpandReferencingPicture (pCtx->pDec->pData, pCtx->pDec->iWidthInPixel, pCtx->pDec->iHeightInPixel,
+ pCtx->pDec->iLinesize,
+ pCtx->sExpandPicFunc.pfExpandLumaPicture, pCtx->sExpandPicFunc.pfExpandChromaPicture);
}
- if (!pCtx->pParam->bParseOnly)
- ExpandReferencingPicture (pCtx->pDec->pData, pCtx->pDec->iWidthInPixel, pCtx->pDec->iHeightInPixel,
- pCtx->pDec->iLinesize,
- pCtx->sExpandPicFunc.pfExpandLumaPicture, pCtx->sExpandPicFunc.pfExpandChromaPicture);
+ } else if (iThreadCount > 1) {
+ SET_EVENT (&pThreadCtx->sImageReady);
}
pCtx->pDec = NULL; //after frame decoding, always set to NULL
}
// need update frame_num due current frame is well decoded
if (pCurAu->pNalUnitsList[pCurAu->uiStartPos]->sNalHeaderExt.sNalUnitHeader.uiNalRefIdc > 0)
- pCtx->iPrevFrameNum = pSh->iFrameNum;
- if (pCtx->bLastHasMmco5)
- pCtx->iPrevFrameNum = 0;
+ pCtx->pLastDecPicInfo->iPrevFrameNum = pSh->iFrameNum;
+ if (pCtx->pLastDecPicInfo->bLastHasMmco5)
+ pCtx->pLastDecPicInfo->iPrevFrameNum = 0;
+ if (iThreadCount > 1) {
+ int32_t id = pThreadCtx->sThreadInfo.uiThrNum;
+ for (int32_t i = 0; i < iThreadCount; ++i) {
+ if (pThreadCtx[i - id].pCtx != NULL) {
+ unsigned long long uiTimeStamp = pThreadCtx[i - id].pCtx->uiTimeStamp;
+ if (uiTimeStamp > 0 && pThreadCtx[i - id].pCtx->sSpsPpsCtx.iSeqId > pCtx->sSpsPpsCtx.iSeqId) {
+ CopySpsPps (pThreadCtx[i - id].pCtx, pCtx);
+ if (pCtx->pPicBuff != pThreadCtx[i - id].pCtx->pPicBuff) {
+ pCtx->pPicBuff = pThreadCtx[i - id].pCtx->pPicBuff;
+ }
+ InitialDqLayersContext (pCtx, pCtx->pSps->iMbWidth << 4, pCtx->pSps->iMbHeight << 4);
+ break;
+ }
+ }
+ }
+ }
+ if (iThreadCount > 1) {
+ SET_EVENT (&pThreadCtx->sSliceDecodeFinish);
+ }
}
-
return ERR_NONE;
}
@@ -2682,7 +2916,8 @@ bool CheckAndFinishLastPic (PWelsDecoderContext pCtx, uint8_t** ppDst, SBufferIn
if (IS_VCL_NAL (pCtx->sCurNalHead.eNalUnitType, 1)) { //VCL data, AU list should have data
PNalUnit pCurNal = pAu->pNalUnitsList[pAu->uiEndPos];
bAuBoundaryFlag = (pCtx->iTotalNumMbRec != 0)
- && (CheckAccessUnitBoundaryExt (&pCtx->sLastNalHdrExt, &pCurNal->sNalHeaderExt, &pCtx->sLastSliceHeader,
+ && (CheckAccessUnitBoundaryExt (&pCtx->pLastDecPicInfo->sLastNalHdrExt, &pCurNal->sNalHeaderExt,
+ &pCtx->pLastDecPicInfo->sLastSliceHeader,
&pCurNal->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader));
} else { //non VCL
if (pCtx->sCurNalHead.eNalUnitType == NAL_UNIT_AU_DELIMITER) {
@@ -2690,11 +2925,11 @@ bool CheckAndFinishLastPic (PWelsDecoderContext pCtx, uint8_t** ppDst, SBufferIn
} else if (pCtx->sCurNalHead.eNalUnitType == NAL_UNIT_SEI) {
bAuBoundaryFlag = true;
} else if (pCtx->sCurNalHead.eNalUnitType == NAL_UNIT_SPS) {
- bAuBoundaryFlag = !! (pCtx->iOverwriteFlags & OVERWRITE_SPS);
+ bAuBoundaryFlag = !! (pCtx->sSpsPpsCtx.iOverwriteFlags & OVERWRITE_SPS);
} else if (pCtx->sCurNalHead.eNalUnitType == NAL_UNIT_SUBSET_SPS) {
- bAuBoundaryFlag = !! (pCtx->iOverwriteFlags & OVERWRITE_SUBSETSPS);
+ bAuBoundaryFlag = !! (pCtx->sSpsPpsCtx.iOverwriteFlags & OVERWRITE_SUBSETSPS);
} else if (pCtx->sCurNalHead.eNalUnitType == NAL_UNIT_PPS) {
- bAuBoundaryFlag = !! (pCtx->iOverwriteFlags & OVERWRITE_PPS);
+ bAuBoundaryFlag = !! (pCtx->sSpsPpsCtx.iOverwriteFlags & OVERWRITE_PPS);
}
if (bAuBoundaryFlag && pCtx->pAccessUnitList->uiAvailUnitsNum != 0) { //Construct remaining data first
ConstructAccessUnit (pCtx, ppDst, pDstInfo);
@@ -2710,16 +2945,20 @@ bool CheckAndFinishLastPic (PWelsDecoderContext pCtx, uint8_t** ppDst, SBufferIn
pCtx->pDec->iPpsId = pCtx->pPps->iPpsId;
DecodeFrameConstruction (pCtx, ppDst, pDstInfo);
- pCtx->pPreviousDecodedPictureInDpb = pCtx->pDec; //save ECed pic for future use
- if (pCtx->sLastNalHdrExt.sNalUnitHeader.uiNalRefIdc > 0) {
- MarkECFrameAsRef (pCtx);
+ pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb = pCtx->pDec; //save ECed pic for future use
+ if (pCtx->pLastDecPicInfo->sLastNalHdrExt.sNalUnitHeader.uiNalRefIdc > 0) {
+ if (MarkECFrameAsRef (pCtx) == ERR_INFO_INVALID_PTR) {
+ pCtx->iErrorCode |= dsRefListNullPtrs;
+ return false;
+ }
}
} else if (pCtx->pParam->bParseOnly) { //clear parse only internal data status
pCtx->pParserBsInfo->iNalNum = 0;
pCtx->bFrameFinish = true; //clear frame pending status here!
} else {
if (DecodeFrameConstruction (pCtx, ppDst, pDstInfo)) {
- if ((pCtx->sLastNalHdrExt.sNalUnitHeader.uiNalRefIdc > 0) && (pCtx->sLastNalHdrExt.uiTemporalId == 0))
+ if ((pCtx->pLastDecPicInfo->sLastNalHdrExt.sNalUnitHeader.uiNalRefIdc > 0)
+ && (pCtx->pLastDecPicInfo->sLastNalHdrExt.uiTemporalId == 0))
pCtx->iErrorCode |= dsNoParamSets;
else
pCtx->iErrorCode |= dsBitstreamError;
@@ -2729,9 +2968,9 @@ bool CheckAndFinishLastPic (PWelsDecoderContext pCtx, uint8_t** ppDst, SBufferIn
}
pCtx->pDec = NULL;
if (pAu->pNalUnitsList[pAu->uiStartPos]->sNalHeaderExt.sNalUnitHeader.uiNalRefIdc > 0)
- pCtx->iPrevFrameNum = pCtx->sLastSliceHeader.iFrameNum; //save frame_num
- if (pCtx->bLastHasMmco5)
- pCtx->iPrevFrameNum = 0;
+ pCtx->pLastDecPicInfo->iPrevFrameNum = pCtx->pLastDecPicInfo->sLastSliceHeader.iFrameNum; //save frame_num
+ if (pCtx->pLastDecPicInfo->bLastHasMmco5)
+ pCtx->pLastDecPicInfo->iPrevFrameNum = 0;
}
return ERR_NONE;
}
@@ -2742,28 +2981,37 @@ bool CheckRefPicturesComplete (PWelsDecoderContext pCtx) {
int32_t iRealMbIdx = pCtx->pCurDqLayer->sLayerInfo.sSliceInLayer.sSliceHeaderExt.sSliceHeader.iFirstMbInSlice;
for (int32_t iMbIdx = 0; bAllRefComplete
&& iMbIdx < pCtx->pCurDqLayer->sLayerInfo.sSliceInLayer.iTotalMbInCurSlice; iMbIdx++) {
- switch (pCtx->pCurDqLayer->pMbType[iRealMbIdx]) {
+ switch (pCtx->pCurDqLayer->pDec->pMbType[iRealMbIdx]) {
case MB_TYPE_SKIP:
case MB_TYPE_16x16:
- bAllRefComplete &= pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pRefIndex[0][iRealMbIdx][0] ]->bIsComplete;
+ bAllRefComplete &=
+ pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pDec->pRefIndex[0][iRealMbIdx][0] ]->bIsComplete;
break;
case MB_TYPE_16x8:
- bAllRefComplete &= pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pRefIndex[0][iRealMbIdx][0] ]->bIsComplete;
- bAllRefComplete &= pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pRefIndex[0][iRealMbIdx][8] ]->bIsComplete;
+ bAllRefComplete &=
+ pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pDec->pRefIndex[0][iRealMbIdx][0] ]->bIsComplete;
+ bAllRefComplete &=
+ pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pDec->pRefIndex[0][iRealMbIdx][8] ]->bIsComplete;
break;
case MB_TYPE_8x16:
- bAllRefComplete &= pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pRefIndex[0][iRealMbIdx][0] ]->bIsComplete;
- bAllRefComplete &= pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pRefIndex[0][iRealMbIdx][2] ]->bIsComplete;
+ bAllRefComplete &=
+ pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pDec->pRefIndex[0][iRealMbIdx][0] ]->bIsComplete;
+ bAllRefComplete &=
+ pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pDec->pRefIndex[0][iRealMbIdx][2] ]->bIsComplete;
break;
case MB_TYPE_8x8:
case MB_TYPE_8x8_REF0:
- bAllRefComplete &= pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pRefIndex[0][iRealMbIdx][0] ]->bIsComplete;
- bAllRefComplete &= pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pRefIndex[0][iRealMbIdx][2] ]->bIsComplete;
- bAllRefComplete &= pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pRefIndex[0][iRealMbIdx][8] ]->bIsComplete;
- bAllRefComplete &= pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pRefIndex[0][iRealMbIdx][10] ]->bIsComplete;
+ bAllRefComplete &=
+ pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pDec->pRefIndex[0][iRealMbIdx][0] ]->bIsComplete;
+ bAllRefComplete &=
+ pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pDec->pRefIndex[0][iRealMbIdx][2] ]->bIsComplete;
+ bAllRefComplete &=
+ pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pDec->pRefIndex[0][iRealMbIdx][8] ]->bIsComplete;
+ bAllRefComplete &=
+ pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pDec->pRefIndex[0][iRealMbIdx][10] ]->bIsComplete;
break;
default:
@@ -2774,6 +3022,7 @@ bool CheckRefPicturesComplete (PWelsDecoderContext pCtx) {
if (iRealMbIdx == -1) //caused by abnormal return of FmoNextMb()
return false;
}
+
return bAllRefComplete;
}
} // namespace WelsDec
diff --git a/chromium/third_party/openh264/src/codec/decoder/core/src/error_concealment.cpp b/chromium/third_party/openh264/src/codec/decoder/core/src/error_concealment.cpp
index b11f764028d..64da754693d 100644
--- a/chromium/third_party/openh264/src/codec/decoder/core/src/error_concealment.cpp
+++ b/chromium/third_party/openh264/src/codec/decoder/core/src/error_concealment.cpp
@@ -83,7 +83,7 @@ void InitErrorCon (PWelsDecoderContext pCtx) {
//Do error concealment using frame copy method
void DoErrorConFrameCopy (PWelsDecoderContext pCtx) {
PPicture pDstPic = pCtx->pDec;
- PPicture pSrcPic = pCtx->pPreviousDecodedPictureInDpb;
+ PPicture pSrcPic = pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb;
uint32_t uiHeightInPixelY = (pCtx->pSps->iMbHeight) << 4;
int32_t iStrideY = pDstPic->iLinesize[0];
int32_t iStrideUV = pDstPic->iLinesize[1];
@@ -109,7 +109,7 @@ void DoErrorConSliceCopy (PWelsDecoderContext pCtx) {
int32_t iMbWidth = (int32_t) pCtx->pSps->iMbWidth;
int32_t iMbHeight = (int32_t) pCtx->pSps->iMbHeight;
PPicture pDstPic = pCtx->pDec;
- PPicture pSrcPic = pCtx->pPreviousDecodedPictureInDpb;
+ PPicture pSrcPic = pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb;
if ((pCtx->pParam->eEcActiveIdc == ERROR_CON_SLICE_COPY) && (pCtx->pCurDqLayer->sLayerInfo.sNalHeaderExt.bIdrFlag))
pSrcPic = NULL; //no cross IDR method, should fill in data instead of copy
@@ -245,7 +245,7 @@ void DoMbECMvCopy (PWelsDecoderContext pCtx, PPicture pDec, PPicture pRef, int32
}
iMVs[0] = iFullMVx - (iMbXInPix << 2);
iMVs[1] = iFullMVy - (iMbYInPix << 2);
- BaseMC (pMCRefMem, iMbXInPix, iMbYInPix, &pCtx->sMcFunc, 16, 16, iMVs);
+ BaseMC (pCtx, pMCRefMem, -1, -1, iMbXInPix, iMbYInPix, &pCtx->sMcFunc, 16, 16, iMVs);
}
return;
}
@@ -266,40 +266,40 @@ void GetAvilInfoFromCorrectMb (PWelsDecoderContext pCtx) {
for (int32_t iMbY = 0; iMbY < iMbHeight; ++iMbY) {
for (int32_t iMbX = 0; iMbX < iMbWidth; ++iMbX) {
iMbXyIndex = iMbY * iMbWidth + iMbX;
- if (pMbCorrectlyDecodedFlag[iMbXyIndex] && IS_INTER (pCurDqLayer->pMbType[iMbXyIndex])) {
- uint32_t iMBType = pCurDqLayer->pMbType[iMbXyIndex];
+ if (pMbCorrectlyDecodedFlag[iMbXyIndex] && IS_INTER (pCurDqLayer->pDec->pMbType[iMbXyIndex])) {
+ uint32_t iMBType = pCurDqLayer->pDec->pMbType[iMbXyIndex];
switch (iMBType) {
case MB_TYPE_SKIP:
case MB_TYPE_16x16:
- iRefIdx = pCurDqLayer->pRefIndex[0][iMbXyIndex][0];
- pCtx->iECMVs[iRefIdx][0] += pCurDqLayer->pMv[0][iMbXyIndex][0][0];
- pCtx->iECMVs[iRefIdx][1] += pCurDqLayer->pMv[0][iMbXyIndex][0][1];
+ iRefIdx = pCurDqLayer->pDec->pRefIndex[0][iMbXyIndex][0];
+ pCtx->iECMVs[iRefIdx][0] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][0][0];
+ pCtx->iECMVs[iRefIdx][1] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][0][1];
pCtx->pECRefPic[iRefIdx] = pCtx->sRefPic.pRefList[LIST_0][iRefIdx];
iInterMbCorrectNum[iRefIdx]++;
break;
case MB_TYPE_16x8:
- iRefIdx = pCurDqLayer->pRefIndex[0][iMbXyIndex][0];
- pCtx->iECMVs[iRefIdx][0] += pCurDqLayer->pMv[0][iMbXyIndex][0][0];
- pCtx->iECMVs[iRefIdx][1] += pCurDqLayer->pMv[0][iMbXyIndex][0][1];
+ iRefIdx = pCurDqLayer->pDec->pRefIndex[0][iMbXyIndex][0];
+ pCtx->iECMVs[iRefIdx][0] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][0][0];
+ pCtx->iECMVs[iRefIdx][1] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][0][1];
pCtx->pECRefPic[iRefIdx] = pCtx->sRefPic.pRefList[LIST_0][iRefIdx];
iInterMbCorrectNum[iRefIdx]++;
- iRefIdx = pCurDqLayer->pRefIndex[0][iMbXyIndex][8];
- pCtx->iECMVs[iRefIdx][0] += pCurDqLayer->pMv[0][iMbXyIndex][8][0];
- pCtx->iECMVs[iRefIdx][1] += pCurDqLayer->pMv[0][iMbXyIndex][8][1];
+ iRefIdx = pCurDqLayer->pDec->pRefIndex[0][iMbXyIndex][8];
+ pCtx->iECMVs[iRefIdx][0] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][8][0];
+ pCtx->iECMVs[iRefIdx][1] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][8][1];
pCtx->pECRefPic[iRefIdx] = pCtx->sRefPic.pRefList[LIST_0][iRefIdx];
iInterMbCorrectNum[iRefIdx]++;
break;
case MB_TYPE_8x16:
- iRefIdx = pCurDqLayer->pRefIndex[0][iMbXyIndex][0];
- pCtx->iECMVs[iRefIdx][0] += pCurDqLayer->pMv[0][iMbXyIndex][0][0];
- pCtx->iECMVs[iRefIdx][1] += pCurDqLayer->pMv[0][iMbXyIndex][0][1];
+ iRefIdx = pCurDqLayer->pDec->pRefIndex[0][iMbXyIndex][0];
+ pCtx->iECMVs[iRefIdx][0] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][0][0];
+ pCtx->iECMVs[iRefIdx][1] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][0][1];
pCtx->pECRefPic[iRefIdx] = pCtx->sRefPic.pRefList[LIST_0][iRefIdx];
iInterMbCorrectNum[iRefIdx]++;
- iRefIdx = pCurDqLayer->pRefIndex[0][iMbXyIndex][2];
- pCtx->iECMVs[iRefIdx][0] += pCurDqLayer->pMv[0][iMbXyIndex][2][0];
- pCtx->iECMVs[iRefIdx][1] += pCurDqLayer->pMv[0][iMbXyIndex][2][1];
+ iRefIdx = pCurDqLayer->pDec->pRefIndex[0][iMbXyIndex][2];
+ pCtx->iECMVs[iRefIdx][0] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][2][0];
+ pCtx->iECMVs[iRefIdx][1] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][2][1];
pCtx->pECRefPic[iRefIdx] = pCtx->sRefPic.pRefList[LIST_0][iRefIdx];
iInterMbCorrectNum[iRefIdx]++;
break;
@@ -311,39 +311,39 @@ void GetAvilInfoFromCorrectMb (PWelsDecoderContext pCtx) {
for (i = 0; i < 4; i++) {
iSubMBType = pCurDqLayer->pSubMbType[iMbXyIndex][i];
iIIdx = ((i >> 1) << 3) + ((i & 1) << 1);
- iRefIdx = pCurDqLayer->pRefIndex[0][iMbXyIndex][iIIdx];
+ iRefIdx = pCurDqLayer->pDec->pRefIndex[0][iMbXyIndex][iIIdx];
pCtx->pECRefPic[iRefIdx] = pCtx->sRefPic.pRefList[LIST_0][iRefIdx];
switch (iSubMBType) {
case SUB_MB_TYPE_8x8:
- pCtx->iECMVs[iRefIdx][0] += pCurDqLayer->pMv[0][iMbXyIndex][iIIdx][0];
- pCtx->iECMVs[iRefIdx][1] += pCurDqLayer->pMv[0][iMbXyIndex][iIIdx][1];
+ pCtx->iECMVs[iRefIdx][0] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][iIIdx][0];
+ pCtx->iECMVs[iRefIdx][1] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][iIIdx][1];
iInterMbCorrectNum[iRefIdx]++;
break;
case SUB_MB_TYPE_8x4:
- pCtx->iECMVs[iRefIdx][0] += pCurDqLayer->pMv[0][iMbXyIndex][iIIdx][0];
- pCtx->iECMVs[iRefIdx][1] += pCurDqLayer->pMv[0][iMbXyIndex][iIIdx][1];
+ pCtx->iECMVs[iRefIdx][0] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][iIIdx][0];
+ pCtx->iECMVs[iRefIdx][1] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][iIIdx][1];
- pCtx->iECMVs[iRefIdx][0] += pCurDqLayer->pMv[0][iMbXyIndex][iIIdx + 4][0];
- pCtx->iECMVs[iRefIdx][1] += pCurDqLayer->pMv[0][iMbXyIndex][iIIdx + 4][1];
+ pCtx->iECMVs[iRefIdx][0] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][iIIdx + 4][0];
+ pCtx->iECMVs[iRefIdx][1] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][iIIdx + 4][1];
iInterMbCorrectNum[iRefIdx] += 2;
break;
case SUB_MB_TYPE_4x8:
- pCtx->iECMVs[iRefIdx][0] += pCurDqLayer->pMv[0][iMbXyIndex][iIIdx][0];
- pCtx->iECMVs[iRefIdx][1] += pCurDqLayer->pMv[0][iMbXyIndex][iIIdx][1];
+ pCtx->iECMVs[iRefIdx][0] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][iIIdx][0];
+ pCtx->iECMVs[iRefIdx][1] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][iIIdx][1];
- pCtx->iECMVs[iRefIdx][0] += pCurDqLayer->pMv[0][iMbXyIndex][iIIdx + 1][0];
- pCtx->iECMVs[iRefIdx][1] += pCurDqLayer->pMv[0][iMbXyIndex][iIIdx + 1][1];
+ pCtx->iECMVs[iRefIdx][0] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][iIIdx + 1][0];
+ pCtx->iECMVs[iRefIdx][1] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][iIIdx + 1][1];
iInterMbCorrectNum[iRefIdx] += 2;
break;
case SUB_MB_TYPE_4x4: {
for (j = 0; j < 4; j++) {
iJIdx = ((j >> 1) << 2) + (j & 1);
- pCtx->iECMVs[iRefIdx][0] += pCurDqLayer->pMv[0][iMbXyIndex][iIIdx + iJIdx][0];
- pCtx->iECMVs[iRefIdx][1] += pCurDqLayer->pMv[0][iMbXyIndex][iIIdx + iJIdx][1];
+ pCtx->iECMVs[iRefIdx][0] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][iIIdx + iJIdx][0];
+ pCtx->iECMVs[iRefIdx][1] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][iIIdx + iJIdx][1];
}
iInterMbCorrectNum[iRefIdx] += 4;
}
@@ -372,7 +372,7 @@ void DoErrorConSliceMVCopy (PWelsDecoderContext pCtx) {
int32_t iMbWidth = (int32_t) pCtx->pSps->iMbWidth;
int32_t iMbHeight = (int32_t) pCtx->pSps->iMbHeight;
PPicture pDstPic = pCtx->pDec;
- PPicture pSrcPic = pCtx->pPreviousDecodedPictureInDpb;
+ PPicture pSrcPic = pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb;
bool* pMbCorrectlyDecodedFlag = pCtx->pCurDqLayer->pMbCorrectlyDecodedFlag;
int32_t iMbXyIndex;
diff --git a/chromium/third_party/openh264/src/codec/decoder/core/src/manage_dec_ref.cpp b/chromium/third_party/openh264/src/codec/decoder/core/src/manage_dec_ref.cpp
index a40d2b09b81..571ce41d7d9 100644
--- a/chromium/third_party/openh264/src/codec/decoder/core/src/manage_dec_ref.cpp
+++ b/chromium/third_party/openh264/src/codec/decoder/core/src/manage_dec_ref.cpp
@@ -42,6 +42,7 @@
#include "manage_dec_ref.h"
#include "error_concealment.h"
#include "error_code.h"
+#include "decoder.h"
namespace WelsDec {
@@ -50,10 +51,10 @@ static PPicture WelsDelLongFromList (PRefPic pRefPic, uint32_t uiLongTermFrameId
static PPicture WelsDelShortFromListSetUnref (PRefPic pRefPic, int32_t iFrameNum);
static PPicture WelsDelLongFromListSetUnref (PRefPic pRefPic, uint32_t uiLongTermFrameIdx);
-static int32_t MMCO (PWelsDecoderContext pCtx, PRefPicMarking pRefPicMarking);
-static int32_t MMCOProcess (PWelsDecoderContext pCtx, uint32_t uiMmcoType,
+static int32_t MMCO (PWelsDecoderContext pCtx, PRefPic pRefPic, PRefPicMarking pRefPicMarking);
+static int32_t MMCOProcess (PWelsDecoderContext pCtx, PRefPic pRefPic, uint32_t uiMmcoType,
int32_t iShortFrameNum, uint32_t uiLongTermPicNum, int32_t iLongTermFrameIdx, int32_t iMaxLongTermFrameIdx);
-static int32_t SlidingWindow (PWelsDecoderContext pCtx);
+static int32_t SlidingWindow (PWelsDecoderContext pCtx, PRefPic pRefPic);
static int32_t AddShortTermToList (PRefPic pRefPic, PPicture pPic);
static int32_t AddLongTermToList (PRefPic pRefPic, PPicture pPic, int32_t iLongTermFrameIdx, uint32_t uiLongTermPicNum);
@@ -63,7 +64,7 @@ static int32_t WelsCheckAndRecoverForFutureDecoding (PWelsDecoderContext pCtx);
#ifdef LONG_TERM_REF
int32_t GetLTRFrameIndex (PRefPic pRefPic, int32_t iAncLTRFrameNum);
#endif
-static int32_t RemainOneBufferInDpbForEC (PWelsDecoderContext pCtx);
+static int32_t RemainOneBufferInDpbForEC (PWelsDecoderContext pCtx, PRefPic pRefPic);
static void SetUnRef (PPicture pRef) {
if (NULL != pRef) {
@@ -79,6 +80,20 @@ static void SetUnRef (PPicture pRef) {
pRef->uiSpatialId = -1;
pRef->iSpsId = -1;
pRef->bIsComplete = false;
+ pRef->iRefCount = 0;
+
+ if (pRef->eSliceType == I_SLICE) {
+ return;
+ }
+ int32_t lists = pRef->eSliceType == P_SLICE ? 1 : 2;
+ for (int32_t i = 0; i < MAX_DPB_COUNT; ++i) {
+ for (int32_t list = 0; list < lists; ++list) {
+ if (pRef->pRefPic[list][i] != NULL) {
+ pRef->pRefPic[list][i]->iRefCount = 0;
+ pRef->pRefPic[list][i] = NULL;
+ }
+ }
+ }
}
}
@@ -111,9 +126,29 @@ void WelsResetRefPic (PWelsDecoderContext pCtx) {
pRefPic->uiLongRefCount[LIST_0] = 0;
}
+void WelsResetRefPicWithoutUnRef (PWelsDecoderContext pCtx) {
+ int32_t i = 0;
+ PRefPic pRefPic = &pCtx->sRefPic;
+ pCtx->sRefPic.uiLongRefCount[LIST_0] = pCtx->sRefPic.uiShortRefCount[LIST_0] = 0;
+
+ pRefPic->uiRefCount[LIST_0] = 0;
+ pRefPic->uiRefCount[LIST_1] = 0;
+
+ for (i = 0; i < MAX_DPB_COUNT; i++) {
+ pRefPic->pShortRefList[LIST_0][i] = NULL;
+ }
+ pRefPic->uiShortRefCount[LIST_0] = 0;
+
+ for (i = 0; i < MAX_DPB_COUNT; i++) {
+ pRefPic->pLongRefList[LIST_0][i] = NULL;
+ }
+ pRefPic->uiLongRefCount[LIST_0] = 0;
+}
+
static int32_t WelsCheckAndRecoverForFutureDecoding (PWelsDecoderContext pCtx) {
- if ((pCtx->sRefPic.uiShortRefCount[LIST_0] + pCtx->sRefPic.uiLongRefCount[LIST_0] <= 0) && (pCtx->eSliceType != I_SLICE
- && pCtx->eSliceType != SI_SLICE)) {
+ if ((pCtx->sRefPic.uiShortRefCount[LIST_0] + pCtx->sRefPic.uiLongRefCount[LIST_0] <= 0)
+ && (pCtx->eSliceType != I_SLICE
+ && pCtx->eSliceType != SI_SLICE)) {
if (pCtx->pParam->eEcActiveIdc !=
ERROR_CON_DISABLE) { //IDR lost!, recover it for future decoding with data all set to 0
PPicture pRef = PrefetchPic (pCtx->pPicBuff);
@@ -122,30 +157,43 @@ static int32_t WelsCheckAndRecoverForFutureDecoding (PWelsDecoderContext pCtx) {
pRef->bIsComplete = false; // Set complete flag to false for lost IDR ref picture
pRef->iSpsId = pCtx->pSps->iSpsId;
pRef->iPpsId = pCtx->pPps->iPpsId;
+ if (pCtx->eSliceType == B_SLICE) {
+ //reset reference's references when IDR is lost
+ for (int32_t list = LIST_0; list < LIST_A; ++list) {
+ for (int32_t i = 0; i < MAX_DPB_COUNT; ++i) {
+ pRef->pRefPic[list][i] = NULL;
+ }
+ }
+ }
pCtx->iErrorCode |= dsDataErrorConcealed;
bool bCopyPrevious = ((ERROR_CON_FRAME_COPY_CROSS_IDR == pCtx->pParam->eEcActiveIdc)
|| (ERROR_CON_SLICE_COPY_CROSS_IDR == pCtx->pParam->eEcActiveIdc)
|| (ERROR_CON_SLICE_COPY_CROSS_IDR_FREEZE_RES_CHANGE == pCtx->pParam->eEcActiveIdc)
|| (ERROR_CON_SLICE_MV_COPY_CROSS_IDR == pCtx->pParam->eEcActiveIdc)
|| (ERROR_CON_SLICE_MV_COPY_CROSS_IDR_FREEZE_RES_CHANGE == pCtx->pParam->eEcActiveIdc))
- && (NULL != pCtx->pPreviousDecodedPictureInDpb);
- bCopyPrevious = bCopyPrevious && (pRef->iWidthInPixel == pCtx->pPreviousDecodedPictureInDpb->iWidthInPixel)
- && (pRef->iHeightInPixel == pCtx->pPreviousDecodedPictureInDpb->iHeightInPixel);
+ && (NULL != pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb);
+ bCopyPrevious = bCopyPrevious
+ && (pRef->iWidthInPixel == pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb->iWidthInPixel)
+ && (pRef->iHeightInPixel == pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb->iHeightInPixel);
if (!bCopyPrevious) {
memset (pRef->pData[0], 128, pRef->iLinesize[0] * pRef->iHeightInPixel);
memset (pRef->pData[1], 128, pRef->iLinesize[1] * pRef->iHeightInPixel / 2);
memset (pRef->pData[2], 128, pRef->iLinesize[2] * pRef->iHeightInPixel / 2);
- } else if (pRef == pCtx->pPreviousDecodedPictureInDpb) {
+ } else if (pRef == pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb) {
WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "WelsInitRefList()::EC memcpy overlap.");
} else {
- memcpy (pRef->pData[0], pCtx->pPreviousDecodedPictureInDpb->pData[0], pRef->iLinesize[0] * pRef->iHeightInPixel);
- memcpy (pRef->pData[1], pCtx->pPreviousDecodedPictureInDpb->pData[1], pRef->iLinesize[1] * pRef->iHeightInPixel / 2);
- memcpy (pRef->pData[2], pCtx->pPreviousDecodedPictureInDpb->pData[2], pRef->iLinesize[2] * pRef->iHeightInPixel / 2);
+ memcpy (pRef->pData[0], pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb->pData[0],
+ pRef->iLinesize[0] * pRef->iHeightInPixel);
+ memcpy (pRef->pData[1], pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb->pData[1],
+ pRef->iLinesize[1] * pRef->iHeightInPixel / 2);
+ memcpy (pRef->pData[2], pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb->pData[2],
+ pRef->iLinesize[2] * pRef->iHeightInPixel / 2);
}
pRef->iFrameNum = 0;
pRef->iFramePoc = 0;
pRef->uiTemporalId = pRef->uiQualityId = 0;
+ pRef->eSliceType = pCtx->eSliceType;
ExpandReferencingPicture (pRef->pData, pRef->iWidthInPixel, pRef->iHeightInPixel, pRef->iLinesize,
pCtx->sExpandPicFunc.pfExpandLumaPicture, pCtx->sExpandPicFunc.pfExpandChromaPicture);
AddShortTermToList (&pCtx->sRefPic, pRef);
@@ -346,8 +394,11 @@ int32_t WelsReorderRefList (PWelsDecoderContext pCtx) {
for (int32_t listIdx = 0; listIdx < ListCount; ++listIdx) {
PPicture pPic = NULL;
PPicture* ppRefList = pCtx->sRefPic.pRefList[listIdx];
- int32_t iMaxRefIdx = pCtx->pSps->iNumRefFrames;
- int32_t iRefCount = pCtx->sRefPic.uiRefCount[listIdx];
+ int32_t iMaxRefIdx = pCtx->iPicQueueNumber;
+ if (iMaxRefIdx >= MAX_REF_PIC_COUNT) {
+ iMaxRefIdx = MAX_REF_PIC_COUNT - 1;
+ }
+ int32_t iRefCount = pSliceHeader->uiRefCount[listIdx];
int32_t iPredFrameNum = pSliceHeader->iFrameNum;
int32_t iMaxPicNum = 1 << pSliceHeader->pSps->uiLog2MaxFrameNum;
int32_t iAbsDiffPicNum = -1;
@@ -520,13 +571,20 @@ int32_t WelsReorderRefList2 (PWelsDecoderContext pCtx) {
for (i = WELS_MAX (1, WELS_MAX (iCount, pCtx->sRefPic.uiRefCount[listIdx])); i < iRefCount; i++)
ppRefList[i] = ppRefList[i - 1];
- pCtx->sRefPic.uiRefCount[listIdx] = (uint8_t)WELS_MIN (WELS_MAX (iCount, pCtx->sRefPic.uiRefCount[listIdx]), iRefCount);
+ pCtx->sRefPic.uiRefCount[listIdx] = (uint8_t)WELS_MIN (WELS_MAX (iCount, pCtx->sRefPic.uiRefCount[listIdx]),
+ iRefCount);
}
return ERR_NONE;
}
-int32_t WelsMarkAsRef (PWelsDecoderContext pCtx) {
- PRefPic pRefPic = &pCtx->sRefPic;
+int32_t WelsMarkAsRef (PWelsDecoderContext pCtx, PPicture pLastDec) {
+ PPicture pDec = pLastDec;
+ bool isThreadCtx = true;
+ if (pDec == NULL) {
+ pDec = pCtx->pDec;
+ isThreadCtx = false;
+ }
+ PRefPic pRefPic = isThreadCtx ? &pCtx->sTmpRefPic : &pCtx->sRefPic;
PRefPicMarking pRefPicMarking = pCtx->pCurDqLayer->pRefPicMarking;
PAccessUnit pCurAU = pCtx->pAccessUnitList;
bool bIsIDRAU = false;
@@ -534,10 +592,10 @@ int32_t WelsMarkAsRef (PWelsDecoderContext pCtx) {
int32_t iRet = ERR_NONE;
- pCtx->pDec->uiQualityId = pCtx->pCurDqLayer->sLayerInfo.sNalHeaderExt.uiQualityId;
- pCtx->pDec->uiTemporalId = pCtx->pCurDqLayer->sLayerInfo.sNalHeaderExt.uiTemporalId;
- pCtx->pDec->iSpsId = pCtx->pSps->iSpsId;
- pCtx->pDec->iPpsId = pCtx->pPps->iPpsId;
+ pDec->uiQualityId = pCtx->pCurDqLayer->sLayerInfo.sNalHeaderExt.uiQualityId;
+ pDec->uiTemporalId = pCtx->pCurDqLayer->sLayerInfo.sNalHeaderExt.uiTemporalId;
+ pDec->iSpsId = pCtx->pSps->iSpsId;
+ pDec->iPpsId = pCtx->pPps->iPpsId;
for (j = pCurAU->uiStartPos; j <= pCurAU->uiEndPos; j++) {
if (pCurAU->pNalUnitsList[j]->sNalHeaderExt.sNalUnitHeader.eNalUnitType == NAL_UNIT_CODED_SLICE_IDR
@@ -548,33 +606,33 @@ int32_t WelsMarkAsRef (PWelsDecoderContext pCtx) {
}
if (bIsIDRAU) {
if (pRefPicMarking->bLongTermRefFlag) {
- pCtx->sRefPic.iMaxLongTermFrameIdx = 0;
- AddLongTermToList (pRefPic, pCtx->pDec, 0, 0);
+ pRefPic->iMaxLongTermFrameIdx = 0;
+ AddLongTermToList (pRefPic, pDec, 0, 0);
} else {
- pCtx->sRefPic.iMaxLongTermFrameIdx = -1;
+ pRefPic->iMaxLongTermFrameIdx = -1;
}
} else {
if (pRefPicMarking->bAdaptiveRefPicMarkingModeFlag) {
- iRet = MMCO (pCtx, pRefPicMarking);
+ iRet = MMCO (pCtx, pRefPic, pRefPicMarking);
if (iRet != ERR_NONE) {
if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) {
- iRet = RemainOneBufferInDpbForEC (pCtx);
+ iRet = RemainOneBufferInDpbForEC (pCtx, pRefPic);
WELS_VERIFY_RETURN_IF (iRet, iRet);
} else {
return iRet;
}
}
- if (pCtx->bLastHasMmco5) {
- pCtx->pDec->iFrameNum = 0;
- pCtx->pDec->iFramePoc = 0;
+ if (pCtx->pLastDecPicInfo->bLastHasMmco5) {
+ pDec->iFrameNum = 0;
+ pDec->iFramePoc = 0;
}
} else {
- iRet = SlidingWindow (pCtx);
+ iRet = SlidingWindow (pCtx, pRefPic);
if (iRet != ERR_NONE) {
if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) {
- iRet = RemainOneBufferInDpbForEC (pCtx);
+ iRet = RemainOneBufferInDpbForEC (pCtx, pRefPic);
WELS_VERIFY_RETURN_IF (iRet, iRet);
} else {
return iRet;
@@ -583,22 +641,22 @@ int32_t WelsMarkAsRef (PWelsDecoderContext pCtx) {
}
}
- if (!pCtx->pDec->bIsLongRef) {
+ if (!pDec->bIsLongRef) {
if (pRefPic->uiLongRefCount[LIST_0] + pRefPic->uiShortRefCount[LIST_0] >= WELS_MAX (1, pCtx->pSps->iNumRefFrames)) {
if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) {
- iRet = RemainOneBufferInDpbForEC (pCtx);
+ iRet = RemainOneBufferInDpbForEC (pCtx, pRefPic);
WELS_VERIFY_RETURN_IF (iRet, iRet);
} else {
return ERR_INFO_INVALID_MMCO_REF_NUM_OVERFLOW;
}
}
- iRet = AddShortTermToList (pRefPic, pCtx->pDec);
+ iRet = AddShortTermToList (pRefPic, pDec);
}
return iRet;
}
-static int32_t MMCO (PWelsDecoderContext pCtx, PRefPicMarking pRefPicMarking) {
+static int32_t MMCO (PWelsDecoderContext pCtx, PRefPic pRefPic, PRefPicMarking pRefPicMarking) {
PSps pSps = pCtx->pCurDqLayer->sLayerInfo.pSps;
int32_t i = 0;
int32_t iRet = ERR_NONE;
@@ -612,7 +670,8 @@ static int32_t MMCO (PWelsDecoderContext pCtx, PRefPicMarking pRefPicMarking) {
if (uiMmcoType > MMCO_LONG) {
return ERR_INFO_INVALID_MMCO_OPCODE_BASE;
}
- iRet = MMCOProcess (pCtx, uiMmcoType, iShortFrameNum, uiLongTermPicNum, iLongTermFrameIdx, iMaxLongTermFrameIdx);
+ iRet = MMCOProcess (pCtx, pRefPic, uiMmcoType, iShortFrameNum, uiLongTermPicNum, iLongTermFrameIdx,
+ iMaxLongTermFrameIdx);
if (iRet != ERR_NONE) {
return iRet;
}
@@ -623,9 +682,8 @@ static int32_t MMCO (PWelsDecoderContext pCtx, PRefPicMarking pRefPicMarking) {
return ERR_NONE;
}
-static int32_t MMCOProcess (PWelsDecoderContext pCtx, uint32_t uiMmcoType,
+static int32_t MMCOProcess (PWelsDecoderContext pCtx, PRefPic pRefPic, uint32_t uiMmcoType,
int32_t iShortFrameNum, uint32_t uiLongTermPicNum, int32_t iLongTermFrameIdx, int32_t iMaxLongTermFrameIdx) {
- PRefPic pRefPic = &pCtx->sRefPic;
PPicture pPic = NULL;
int32_t i = 0;
int32_t iRet = ERR_NONE;
@@ -672,7 +730,7 @@ static int32_t MMCOProcess (PWelsDecoderContext pCtx, uint32_t uiMmcoType,
break;
case MMCO_RESET:
WelsResetRefPic (pCtx);
- pCtx->bLastHasMmco5 = true;
+ pCtx->pLastDecPicInfo->bLastHasMmco5 = true;
break;
case MMCO_LONG:
if (iLongTermFrameIdx > pRefPic->iMaxLongTermFrameIdx) {
@@ -697,13 +755,12 @@ static int32_t MMCOProcess (PWelsDecoderContext pCtx, uint32_t uiMmcoType,
return iRet;
}
-static int32_t SlidingWindow (PWelsDecoderContext pCtx) {
- PRefPic pRefPic = &pCtx->sRefPic;
+static int32_t SlidingWindow (PWelsDecoderContext pCtx, PRefPic pRefPic) {
PPicture pPic = NULL;
int32_t i = 0;
- if (pCtx->sRefPic.uiShortRefCount[LIST_0] + pCtx->sRefPic.uiLongRefCount[LIST_0] >= pCtx->pSps->iNumRefFrames) {
- if (pCtx->sRefPic.uiShortRefCount[LIST_0] == 0) {
+ if (pRefPic->uiShortRefCount[LIST_0] + pRefPic->uiLongRefCount[LIST_0] >= pCtx->pSps->iNumRefFrames) {
+ if (pRefPic->uiShortRefCount[LIST_0] == 0) {
WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR, "No reference picture in short term list when sliding window");
return ERR_INFO_INVALID_MMCO_REF_NUM_NOT_ENOUGH;
}
@@ -728,8 +785,8 @@ static PPicture WelsDelShortFromList (PRefPic pRefPic, int32_t iFrameNum) {
for (i = 0; i < pRefPic->uiShortRefCount[LIST_0]; i++) {
if (pRefPic->pShortRefList[LIST_0][i]->iFrameNum == iFrameNum) {
iMoveSize = pRefPic->uiShortRefCount[LIST_0] - i - 1;
- pRefPic->pShortRefList[LIST_0][i]->bUsedAsRef = false;
pPic = pRefPic->pShortRefList[LIST_0][i];
+ pPic->bUsedAsRef = false;
pRefPic->pShortRefList[LIST_0][i] = NULL;
if (iMoveSize > 0) {
memmove (&pRefPic->pShortRefList[LIST_0][i], &pRefPic->pShortRefList[LIST_0][i + 1],
@@ -740,7 +797,6 @@ static PPicture WelsDelShortFromList (PRefPic pRefPic, int32_t iFrameNum) {
break;
}
}
-
return pPic;
}
@@ -788,6 +844,9 @@ static int32_t AddShortTermToList (PRefPic pRefPic, PPicture pPic) {
if (pRefPic->uiShortRefCount[LIST_0] > 0) {
// Check the duplicate frame_num in short ref list
for (int32_t iPos = 0; iPos < pRefPic->uiShortRefCount[LIST_0]; iPos++) {
+ if (!pRefPic->pShortRefList[LIST_0][iPos]) {
+ return ERR_INFO_INVALID_PTR;
+ }
if (pPic->iFrameNum == pRefPic->pShortRefList[LIST_0][iPos]->iFrameNum) {
// Replace the previous ref pic with the new one with the same frame_num
pRefPic->pShortRefList[LIST_0][iPos] = pPic;
@@ -815,6 +874,9 @@ static int32_t AddLongTermToList (PRefPic pRefPic, PPicture pPic, int32_t iLongT
pRefPic->pLongRefList[LIST_0][pRefPic->uiLongRefCount[LIST_0]] = pPic;
} else {
for (i = 0; i < pRefPic->uiLongRefCount[LIST_0]; i++) {
+ if (!pRefPic->pLongRefList[LIST_0][i]) {
+ return ERR_INFO_INVALID_PTR;
+ }
if (pRefPic->pLongRefList[LIST_0][i]->iLongTermFrameIdx > pPic->iLongTermFrameIdx) {
break;
}
@@ -860,14 +922,13 @@ int32_t GetLTRFrameIndex (PRefPic pRefPic, int32_t iAncLTRFrameNum) {
}
#endif
-static int32_t RemainOneBufferInDpbForEC (PWelsDecoderContext pCtx) {
+static int32_t RemainOneBufferInDpbForEC (PWelsDecoderContext pCtx, PRefPic pRefPic) {
int32_t iRet = ERR_NONE;
- PRefPic pRefPic = &pCtx->sRefPic;
if (pRefPic->uiShortRefCount[0] + pRefPic->uiLongRefCount[0] < pCtx->pSps->iNumRefFrames)
return iRet;
if (pRefPic->uiShortRefCount[0] > 0) {
- iRet = SlidingWindow (pCtx);
+ iRet = SlidingWindow (pCtx, pRefPic);
} else { //all LTR, remove the smallest long_term_frame_idx
int32_t iLongTermFrameIdx = 0;
int32_t iMaxLongTermFrameIdx = pRefPic->iMaxLongTermFrameIdx;
diff --git a/chromium/third_party/openh264/src/codec/decoder/core/src/mv_pred.cpp b/chromium/third_party/openh264/src/codec/decoder/core/src/mv_pred.cpp
index e180bf132bd..642a982d915 100644
--- a/chromium/third_party/openh264/src/codec/decoder/core/src/mv_pred.cpp
+++ b/chromium/third_party/openh264/src/codec/decoder/core/src/mv_pred.cpp
@@ -155,7 +155,7 @@ void CopyRectBlock4Cols (void* vdst, void* vsrc, const int32_t stride_dst, const
memcpy (&dst[stride_dst * 3], &src[stride_src * 3], 16);
}
}
-void PredPSkipMvFromNeighbor (PDqLayer pCurLayer, int16_t iMvp[2]) {
+void PredPSkipMvFromNeighbor (PDqLayer pCurDqLayer, int16_t iMvp[2]) {
bool bTopAvail, bLeftTopAvail, bRightTopAvail, bLeftAvail;
int32_t iCurSliceIdc, iTopSliceIdc, iLeftTopSliceIdc, iRightTopSliceIdc, iLeftSliceIdc;
@@ -170,14 +170,14 @@ void PredPSkipMvFromNeighbor (PDqLayer pCurLayer, int16_t iMvp[2]) {
int8_t iMatchRef;
int16_t iMvA[2], iMvB[2], iMvC[2], iMvD[2];
- iCurXy = pCurLayer->iMbXyIndex;
- iCurX = pCurLayer->iMbX;
- iCurY = pCurLayer->iMbY;
- iCurSliceIdc = pCurLayer->pSliceIdc[iCurXy];
+ iCurXy = pCurDqLayer->iMbXyIndex;
+ iCurX = pCurDqLayer->iMbX;
+ iCurY = pCurDqLayer->iMbY;
+ iCurSliceIdc = pCurDqLayer->pSliceIdc[iCurXy];
if (iCurX != 0) {
iLeftXy = iCurXy - 1;
- iLeftSliceIdc = pCurLayer->pSliceIdc[iLeftXy];
+ iLeftSliceIdc = pCurDqLayer->pSliceIdc[iLeftXy];
bLeftAvail = (iLeftSliceIdc == iCurSliceIdc);
} else {
bLeftAvail = 0;
@@ -185,19 +185,19 @@ void PredPSkipMvFromNeighbor (PDqLayer pCurLayer, int16_t iMvp[2]) {
}
if (iCurY != 0) {
- iTopXy = iCurXy - pCurLayer->iMbWidth;
- iTopSliceIdc = pCurLayer->pSliceIdc[iTopXy];
+ iTopXy = iCurXy - pCurDqLayer->iMbWidth;
+ iTopSliceIdc = pCurDqLayer->pSliceIdc[iTopXy];
bTopAvail = (iTopSliceIdc == iCurSliceIdc);
if (iCurX != 0) {
iLeftTopXy = iTopXy - 1;
- iLeftTopSliceIdc = pCurLayer->pSliceIdc[iLeftTopXy];
+ iLeftTopSliceIdc = pCurDqLayer->pSliceIdc[iLeftTopXy];
bLeftTopAvail = (iLeftTopSliceIdc == iCurSliceIdc);
} else {
bLeftTopAvail = 0;
}
- if (iCurX != (pCurLayer->iMbWidth - 1)) {
+ if (iCurX != (pCurDqLayer->iMbWidth - 1)) {
iRightTopXy = iTopXy + 1;
- iRightTopSliceIdc = pCurLayer->pSliceIdc[iRightTopXy];
+ iRightTopSliceIdc = pCurDqLayer->pSliceIdc[iRightTopXy];
bRightTopAvail = (iRightTopSliceIdc == iCurSliceIdc);
} else {
bRightTopAvail = 0;
@@ -208,18 +208,18 @@ void PredPSkipMvFromNeighbor (PDqLayer pCurLayer, int16_t iMvp[2]) {
bRightTopAvail = 0;
}
- iLeftType = ((iCurX != 0 && bLeftAvail) ? pCurLayer->pMbType[iLeftXy] : 0);
- iTopType = ((iCurY != 0 && bTopAvail) ? pCurLayer->pMbType[iTopXy] : 0);
+ iLeftType = ((iCurX != 0 && bLeftAvail) ? GetMbType (pCurDqLayer)[iLeftXy] : 0);
+ iTopType = ((iCurY != 0 && bTopAvail) ? GetMbType (pCurDqLayer)[iTopXy] : 0);
iLeftTopType = ((iCurX != 0 && iCurY != 0 && bLeftTopAvail)
- ? pCurLayer->pMbType[iLeftTopXy] : 0);
- iRightTopType = ((iCurX != pCurLayer->iMbWidth - 1 && iCurY != 0 && bRightTopAvail)
- ? pCurLayer->pMbType[iRightTopXy] : 0);
+ ? GetMbType (pCurDqLayer)[iLeftTopXy] : 0);
+ iRightTopType = ((iCurX != pCurDqLayer->iMbWidth - 1 && iCurY != 0 && bRightTopAvail)
+ ? GetMbType (pCurDqLayer)[iRightTopXy] : 0);
/*get neb mv&iRefIdxArray*/
/*left*/
if (bLeftAvail && IS_INTER (iLeftType)) {
- ST32 (iMvA, LD32 (pCurLayer->pMv[0][iLeftXy][3]));
- iLeftRef = pCurLayer->pRefIndex[0][iLeftXy][3];
+ ST32 (iMvA, LD32 (pCurDqLayer->pDec ? pCurDqLayer->pDec->pMv[0][iLeftXy][3] : pCurDqLayer->pMv[0][iLeftXy][3]));
+ iLeftRef = pCurDqLayer->pDec ? pCurDqLayer->pDec->pRefIndex[0][iLeftXy][3] : pCurDqLayer->pRefIndex[0][iLeftXy][3];
} else {
ST32 (iMvA, 0);
if (0 == bLeftAvail) { //not available
@@ -236,8 +236,8 @@ void PredPSkipMvFromNeighbor (PDqLayer pCurLayer, int16_t iMvp[2]) {
/*top*/
if (bTopAvail && IS_INTER (iTopType)) {
- ST32 (iMvB, LD32 (pCurLayer->pMv[0][iTopXy][12]));
- iTopRef = pCurLayer->pRefIndex[0][iTopXy][12];
+ ST32 (iMvB, LD32 (pCurDqLayer->pDec ? pCurDqLayer->pDec->pMv[0][iTopXy][12] : pCurDqLayer->pMv[0][iTopXy][12]));
+ iTopRef = pCurDqLayer->pDec ? pCurDqLayer->pDec->pRefIndex[0][iTopXy][12] : pCurDqLayer->pRefIndex[0][iTopXy][12];
} else {
ST32 (iMvB, 0);
if (0 == bTopAvail) { //not available
@@ -254,8 +254,10 @@ void PredPSkipMvFromNeighbor (PDqLayer pCurLayer, int16_t iMvp[2]) {
/*right_top*/
if (bRightTopAvail && IS_INTER (iRightTopType)) {
- ST32 (iMvC, LD32 (pCurLayer->pMv[0][iRightTopXy][12]));
- iRightTopRef = pCurLayer->pRefIndex[0][iRightTopXy][12];
+ ST32 (iMvC, LD32 (pCurDqLayer->pDec ? pCurDqLayer->pDec->pMv[0][iRightTopXy][12] :
+ pCurDqLayer->pMv[0][iRightTopXy][12]));
+ iRightTopRef = pCurDqLayer->pDec ? pCurDqLayer->pDec->pRefIndex[0][iRightTopXy][12] :
+ pCurDqLayer->pRefIndex[0][iRightTopXy][12];
} else {
ST32 (iMvC, 0);
if (0 == bRightTopAvail) { //not available
@@ -267,8 +269,9 @@ void PredPSkipMvFromNeighbor (PDqLayer pCurLayer, int16_t iMvp[2]) {
/*left_top*/
if (bLeftTopAvail && IS_INTER (iLeftTopType)) {
- ST32 (iMvD, LD32 (pCurLayer->pMv[0][iLeftTopXy][15]));
- iLeftTopRef = pCurLayer->pRefIndex[0][iLeftTopXy][15];
+ ST32 (iMvD, LD32 (pCurDqLayer->pDec ? pCurDqLayer->pDec->pMv[0][iLeftTopXy][15] : pCurDqLayer->pMv[0][iLeftTopXy][15]));
+ iLeftTopRef = pCurDqLayer->pDec ? pCurDqLayer->pDec->pRefIndex[0][iLeftTopXy][15] :
+ pCurDqLayer->pRefIndex[0][iLeftTopXy][15];
} else {
ST32 (iMvD, 0);
if (0 == bLeftTopAvail) { //not available
@@ -305,13 +308,21 @@ void PredPSkipMvFromNeighbor (PDqLayer pCurLayer, int16_t iMvp[2]) {
}
int32_t GetColocatedMb (PWelsDecoderContext pCtx, MbType& mbType, SubMbType& subMbType) {
- PDqLayer pCurLayer = pCtx->pCurDqLayer;
- int32_t iMbXy = pCurLayer->iMbXyIndex;
+ PDqLayer pCurDqLayer = pCtx->pCurDqLayer;
+ int32_t iMbXy = pCurDqLayer->iMbXyIndex;
- uint32_t is8x8 = IS_Inter_8x8 (pCurLayer->pMbType[iMbXy]);
- mbType = pCurLayer->pMbType[iMbXy];
+ uint32_t is8x8 = IS_Inter_8x8 (GetMbType (pCurDqLayer)[iMbXy]);
+ mbType = GetMbType (pCurDqLayer)[iMbXy];
PPicture colocPic = pCtx->sRefPic.pRefList[LIST_1][0];
+ if (GetThreadCount (pCtx) > 1) {
+ if (16 * pCurDqLayer->iMbY > pCtx->lastReadyHeightOffset[1][0]) {
+ if (colocPic->pReadyEvent[pCurDqLayer->iMbY].isSignaled != 1) {
+ WAIT_EVENT (&colocPic->pReadyEvent[pCurDqLayer->iMbY], WELS_DEC_THREAD_WAIT_INFINITE);
+ }
+ pCtx->lastReadyHeightOffset[1][0] = 16 * pCurDqLayer->iMbY;
+ }
+ }
if (colocPic == NULL) {
SLogContext* pLogCtx = & (pCtx->sLogCtx);
@@ -320,7 +331,10 @@ int32_t GetColocatedMb (PWelsDecoderContext pCtx, MbType& mbType, SubMbType& sub
}
MbType coloc_mbType = colocPic->pMbType[iMbXy];
-
+ if (coloc_mbType == MB_TYPE_SKIP) {
+ //This indicates the colocated MB is P SKIP MB
+ coloc_mbType |= MB_TYPE_16x16 | MB_TYPE_P0L0 | MB_TYPE_P1L0;
+ }
if (IS_Inter_8x8 (coloc_mbType) && !pCtx->pSps->bDirect8x8InferenceFlag) {
subMbType = SUB_MB_TYPE_4x4 | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_DIRECT;
mbType |= MB_TYPE_8x8 | MB_TYPE_L0 | MB_TYPE_L1;
@@ -333,43 +347,43 @@ int32_t GetColocatedMb (PWelsDecoderContext pCtx, MbType& mbType, SubMbType& sub
}
if (IS_INTRA (coloc_mbType)) {
- SetRectBlock (pCurLayer->iColocIntra, 4, 4, 4 * sizeof (int8_t), 1, sizeof (int8_t));
+ SetRectBlock (pCurDqLayer->iColocIntra, 4, 4, 4 * sizeof (int8_t), 1, sizeof (int8_t));
return ERR_NONE;
}
- SetRectBlock (pCurLayer->iColocIntra, 4, 4, 4 * sizeof (int8_t), 0, sizeof (int8_t));
+ SetRectBlock (pCurDqLayer->iColocIntra, 4, 4, 4 * sizeof (int8_t), 0, sizeof (int8_t));
if (IS_INTER_16x16 (mbType)) {
int16_t iMVZero[2] = { 0 };
int16_t* pMv = IS_TYPE_L1 (coloc_mbType) ? colocPic->pMv[LIST_1][iMbXy][0] : iMVZero;
- ST32 (pCurLayer->iColocMv[LIST_0][0], LD32 (colocPic->pMv[LIST_0][iMbXy][0]));
- ST32 (pCurLayer->iColocMv[LIST_1][0], LD32 (pMv));
- pCurLayer->iColocRefIndex[LIST_0][0] = colocPic->pRefIndex[LIST_0][iMbXy][0];
- pCurLayer->iColocRefIndex[LIST_1][0] = IS_TYPE_L1 (coloc_mbType) ? colocPic->pRefIndex[LIST_1][iMbXy][0] :
- REF_NOT_IN_LIST;
+ ST32 (pCurDqLayer->iColocMv[LIST_0][0], LD32 (colocPic->pMv[LIST_0][iMbXy][0]));
+ ST32 (pCurDqLayer->iColocMv[LIST_1][0], LD32 (pMv));
+ pCurDqLayer->iColocRefIndex[LIST_0][0] = colocPic->pRefIndex[LIST_0][iMbXy][0];
+ pCurDqLayer->iColocRefIndex[LIST_1][0] = IS_TYPE_L1 (coloc_mbType) ? colocPic->pRefIndex[LIST_1][iMbXy][0] :
+ REF_NOT_IN_LIST;
} else {
if (!pCtx->pSps->bDirect8x8InferenceFlag) {
- CopyRectBlock4Cols (pCurLayer->iColocMv[LIST_0], colocPic->pMv[LIST_0][iMbXy], 16, 16, 4, 4);
- CopyRectBlock4Cols (pCurLayer->iColocRefIndex[LIST_0], colocPic->pRefIndex[LIST_0][iMbXy], 4, 4, 4, 1);
+ CopyRectBlock4Cols (pCurDqLayer->iColocMv[LIST_0], colocPic->pMv[LIST_0][iMbXy], 16, 16, 4, 4);
+ CopyRectBlock4Cols (pCurDqLayer->iColocRefIndex[LIST_0], colocPic->pRefIndex[LIST_0][iMbXy], 4, 4, 4, 1);
if (IS_TYPE_L1 (coloc_mbType)) {
- CopyRectBlock4Cols (pCurLayer->iColocMv[LIST_1], colocPic->pMv[LIST_1][iMbXy], 16, 16, 4, 4);
- CopyRectBlock4Cols (pCurLayer->iColocRefIndex[LIST_1], colocPic->pRefIndex[LIST_1][iMbXy], 4, 4, 4, 1);
+ CopyRectBlock4Cols (pCurDqLayer->iColocMv[LIST_1], colocPic->pMv[LIST_1][iMbXy], 16, 16, 4, 4);
+ CopyRectBlock4Cols (pCurDqLayer->iColocRefIndex[LIST_1], colocPic->pRefIndex[LIST_1][iMbXy], 4, 4, 4, 1);
} else { // only forward prediction
- SetRectBlock (pCurLayer->iColocRefIndex[LIST_1], 4, 4, 4, (uint8_t)REF_NOT_IN_LIST, 1);
+ SetRectBlock (pCurDqLayer->iColocRefIndex[LIST_1], 4, 4, 4, (uint8_t)REF_NOT_IN_LIST, 1);
}
} else {
for (int32_t listIdx = 0; listIdx < 1 + !! (coloc_mbType & MB_TYPE_L1); listIdx++) {
- SetRectBlock (pCurLayer->iColocMv[listIdx][0], 2, 2, 16, LD32 (colocPic->pMv[listIdx][iMbXy][0]), 4);
- SetRectBlock (pCurLayer->iColocMv[listIdx][2], 2, 2, 16, LD32 (colocPic->pMv[listIdx][iMbXy][3]), 4);
- SetRectBlock (pCurLayer->iColocMv[listIdx][8], 2, 2, 16, LD32 (colocPic->pMv[listIdx][iMbXy][12]), 4);
- SetRectBlock (pCurLayer->iColocMv[listIdx][10], 2, 2, 16, LD32 (colocPic->pMv[listIdx][iMbXy][15]), 4);
-
- SetRectBlock (&pCurLayer->iColocRefIndex[listIdx][0], 2, 2, 4, colocPic->pRefIndex[listIdx][iMbXy][0], 1);
- SetRectBlock (&pCurLayer->iColocRefIndex[listIdx][2], 2, 2, 4, colocPic->pRefIndex[listIdx][iMbXy][3], 1);
- SetRectBlock (&pCurLayer->iColocRefIndex[listIdx][8], 2, 2, 4, colocPic->pRefIndex[listIdx][iMbXy][12], 1);
- SetRectBlock (&pCurLayer->iColocRefIndex[listIdx][10], 2, 2, 4, colocPic->pRefIndex[listIdx][iMbXy][15], 1);
+ SetRectBlock (pCurDqLayer->iColocMv[listIdx][0], 2, 2, 16, LD32 (colocPic->pMv[listIdx][iMbXy][0]), 4);
+ SetRectBlock (pCurDqLayer->iColocMv[listIdx][2], 2, 2, 16, LD32 (colocPic->pMv[listIdx][iMbXy][3]), 4);
+ SetRectBlock (pCurDqLayer->iColocMv[listIdx][8], 2, 2, 16, LD32 (colocPic->pMv[listIdx][iMbXy][12]), 4);
+ SetRectBlock (pCurDqLayer->iColocMv[listIdx][10], 2, 2, 16, LD32 (colocPic->pMv[listIdx][iMbXy][15]), 4);
+
+ SetRectBlock (&pCurDqLayer->iColocRefIndex[listIdx][0], 2, 2, 4, colocPic->pRefIndex[listIdx][iMbXy][0], 1);
+ SetRectBlock (&pCurDqLayer->iColocRefIndex[listIdx][2], 2, 2, 4, colocPic->pRefIndex[listIdx][iMbXy][3], 1);
+ SetRectBlock (&pCurDqLayer->iColocRefIndex[listIdx][8], 2, 2, 4, colocPic->pRefIndex[listIdx][iMbXy][12], 1);
+ SetRectBlock (&pCurDqLayer->iColocRefIndex[listIdx][10], 2, 2, 4, colocPic->pRefIndex[listIdx][iMbXy][15], 1);
}
if (! (coloc_mbType & MB_TYPE_L1)) // only forward prediction
- SetRectBlock (&pCurLayer->iColocRefIndex[1][0], 4, 4, 4, (uint8_t)REF_NOT_IN_LIST, 1);
+ SetRectBlock (&pCurDqLayer->iColocRefIndex[1][0], 4, 4, 4, (uint8_t)REF_NOT_IN_LIST, 1);
}
}
return ERR_NONE;
@@ -379,9 +393,9 @@ int32_t PredMvBDirectSpatial (PWelsDecoderContext pCtx, int16_t iMvp[LIST_A][2],
SubMbType& subMbType) {
int32_t ret = ERR_NONE;
- PDqLayer pCurLayer = pCtx->pCurDqLayer;
- int32_t iMbXy = pCurLayer->iMbXyIndex;
- bool bSkipOrDirect = (IS_SKIP (pCurLayer->pMbType[iMbXy]) | IS_DIRECT (pCurLayer->pMbType[iMbXy])) > 0;
+ PDqLayer pCurDqLayer = pCtx->pCurDqLayer;
+ int32_t iMbXy = pCurDqLayer->iMbXyIndex;
+ bool bSkipOrDirect = (IS_SKIP (GetMbType (pCurDqLayer)[iMbXy]) | IS_DIRECT (GetMbType (pCurDqLayer)[iMbXy])) > 0;
MbType mbType;
ret = GetColocatedMb (pCtx, mbType, subMbType);
@@ -401,15 +415,15 @@ int32_t PredMvBDirectSpatial (PWelsDecoderContext pCtx, int16_t iMvp[LIST_A][2],
int8_t iDiagonalRef[LIST_A];
int16_t iMvA[LIST_A][2], iMvB[LIST_A][2], iMvC[LIST_A][2], iMvD[LIST_A][2];
- iCurXy = pCurLayer->iMbXyIndex;
+ iCurXy = pCurDqLayer->iMbXyIndex;
- iCurX = pCurLayer->iMbX;
- iCurY = pCurLayer->iMbY;
- iCurSliceIdc = pCurLayer->pSliceIdc[iCurXy];
+ iCurX = pCurDqLayer->iMbX;
+ iCurY = pCurDqLayer->iMbY;
+ iCurSliceIdc = pCurDqLayer->pSliceIdc[iCurXy];
if (iCurX != 0) {
iLeftXy = iCurXy - 1;
- iLeftSliceIdc = pCurLayer->pSliceIdc[iLeftXy];
+ iLeftSliceIdc = pCurDqLayer->pSliceIdc[iLeftXy];
bLeftAvail = (iLeftSliceIdc == iCurSliceIdc);
} else {
bLeftAvail = 0;
@@ -417,19 +431,19 @@ int32_t PredMvBDirectSpatial (PWelsDecoderContext pCtx, int16_t iMvp[LIST_A][2],
}
if (iCurY != 0) {
- iTopXy = iCurXy - pCurLayer->iMbWidth;
- iTopSliceIdc = pCurLayer->pSliceIdc[iTopXy];
+ iTopXy = iCurXy - pCurDqLayer->iMbWidth;
+ iTopSliceIdc = pCurDqLayer->pSliceIdc[iTopXy];
bTopAvail = (iTopSliceIdc == iCurSliceIdc);
if (iCurX != 0) {
iLeftTopXy = iTopXy - 1;
- iLeftTopSliceIdc = pCurLayer->pSliceIdc[iLeftTopXy];
+ iLeftTopSliceIdc = pCurDqLayer->pSliceIdc[iLeftTopXy];
bLeftTopAvail = (iLeftTopSliceIdc == iCurSliceIdc);
} else {
bLeftTopAvail = 0;
}
- if (iCurX != (pCurLayer->iMbWidth - 1)) {
+ if (iCurX != (pCurDqLayer->iMbWidth - 1)) {
iRightTopXy = iTopXy + 1;
- iRightTopSliceIdc = pCurLayer->pSliceIdc[iRightTopXy];
+ iRightTopSliceIdc = pCurDqLayer->pSliceIdc[iRightTopXy];
bRightTopAvail = (iRightTopSliceIdc == iCurSliceIdc);
} else {
bRightTopAvail = 0;
@@ -440,20 +454,22 @@ int32_t PredMvBDirectSpatial (PWelsDecoderContext pCtx, int16_t iMvp[LIST_A][2],
bRightTopAvail = 0;
}
- iLeftType = ((iCurX != 0 && bLeftAvail) ? pCurLayer->pMbType[iLeftXy] : 0);
- iTopType = ((iCurY != 0 && bTopAvail) ? pCurLayer->pMbType[iTopXy] : 0);
+ iLeftType = ((iCurX != 0 && bLeftAvail) ? GetMbType (pCurDqLayer)[iLeftXy] : 0);
+ iTopType = ((iCurY != 0 && bTopAvail) ? GetMbType (pCurDqLayer)[iTopXy] : 0);
iLeftTopType = ((iCurX != 0 && iCurY != 0 && bLeftTopAvail)
- ? pCurLayer->pMbType[iLeftTopXy] : 0);
- iRightTopType = ((iCurX != pCurLayer->iMbWidth - 1 && iCurY != 0 && bRightTopAvail)
- ? pCurLayer->pMbType[iRightTopXy] : 0);
+ ? GetMbType (pCurDqLayer)[iLeftTopXy] : 0);
+ iRightTopType = ((iCurX != pCurDqLayer->iMbWidth - 1 && iCurY != 0 && bRightTopAvail)
+ ? GetMbType (pCurDqLayer)[iRightTopXy] : 0);
/*get neb mv&iRefIdxArray*/
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
/*left*/
if (bLeftAvail && IS_INTER (iLeftType)) {
- ST32 (iMvA[listIdx], LD32 (pCurLayer->pMv[listIdx][iLeftXy][3]));
- iLeftRef[listIdx] = pCurLayer->pRefIndex[listIdx][iLeftXy][3];
+ ST32 (iMvA[listIdx], LD32 (pCurDqLayer->pDec ? pCurDqLayer->pDec->pMv[listIdx][iLeftXy][3] :
+ pCurDqLayer->pMv[listIdx][iLeftXy][3]));
+ iLeftRef[listIdx] = pCurDqLayer->pDec ? pCurDqLayer->pDec->pRefIndex[listIdx][iLeftXy][3] :
+ pCurDqLayer->pRefIndex[listIdx][iLeftXy][3];
} else {
ST32 (iMvA[listIdx], 0);
if (0 == bLeftAvail) { //not available
@@ -465,8 +481,10 @@ int32_t PredMvBDirectSpatial (PWelsDecoderContext pCtx, int16_t iMvp[LIST_A][2],
/*top*/
if (bTopAvail && IS_INTER (iTopType)) {
- ST32 (iMvB[listIdx], LD32 (pCurLayer->pMv[listIdx][iTopXy][12]));
- iTopRef[listIdx] = pCurLayer->pRefIndex[listIdx][iTopXy][12];
+ ST32 (iMvB[listIdx], LD32 (pCurDqLayer->pDec ? pCurDqLayer->pDec->pMv[listIdx][iTopXy][12] :
+ pCurDqLayer->pMv[listIdx][iTopXy][12]));
+ iTopRef[listIdx] = pCurDqLayer->pDec ? pCurDqLayer->pDec->pRefIndex[listIdx][iTopXy][12] :
+ pCurDqLayer->pRefIndex[listIdx][iTopXy][12];
} else {
ST32 (iMvB[listIdx], 0);
if (0 == bTopAvail) { //not available
@@ -478,8 +496,10 @@ int32_t PredMvBDirectSpatial (PWelsDecoderContext pCtx, int16_t iMvp[LIST_A][2],
/*right_top*/
if (bRightTopAvail && IS_INTER (iRightTopType)) {
- ST32 (iMvC[listIdx], LD32 (pCurLayer->pMv[listIdx][iRightTopXy][12]));
- iRightTopRef[listIdx] = pCurLayer->pRefIndex[listIdx][iRightTopXy][12];
+ ST32 (iMvC[listIdx], LD32 (pCurDqLayer->pDec ? pCurDqLayer->pDec->pMv[listIdx][iRightTopXy][12] :
+ pCurDqLayer->pMv[listIdx][iRightTopXy][12]));
+ iRightTopRef[listIdx] = pCurDqLayer->pDec ? pCurDqLayer->pDec->pRefIndex[listIdx][iRightTopXy][12] :
+ pCurDqLayer->pRefIndex[listIdx][iRightTopXy][12];
} else {
ST32 (iMvC[listIdx], 0);
if (0 == bRightTopAvail) { //not available
@@ -490,8 +510,10 @@ int32_t PredMvBDirectSpatial (PWelsDecoderContext pCtx, int16_t iMvp[LIST_A][2],
}
/*left_top*/
if (bLeftTopAvail && IS_INTER (iLeftTopType)) {
- ST32 (iMvD[listIdx], LD32 (pCurLayer->pMv[listIdx][iLeftTopXy][15]));
- iLeftTopRef[listIdx] = pCurLayer->pRefIndex[listIdx][iLeftTopXy][15];
+ ST32 (iMvD[listIdx], LD32 (pCurDqLayer->pDec ? pCurDqLayer->pDec->pMv[listIdx][iLeftTopXy][15] :
+ pCurDqLayer->pMv[listIdx][iLeftTopXy][15]));
+ iLeftTopRef[listIdx] = pCurDqLayer->pDec ? pCurDqLayer->pDec->pRefIndex[listIdx][iLeftTopXy][15] :
+ pCurDqLayer->pRefIndex[listIdx][iLeftTopXy][15];
} else {
ST32 (iMvD[listIdx], 0);
if (0 == bLeftTopAvail) { //not available
@@ -540,7 +562,7 @@ int32_t PredMvBDirectSpatial (PWelsDecoderContext pCtx, int16_t iMvp[LIST_A][2],
mbType &= ~MB_TYPE_L0;
subMbType &= ~MB_TYPE_L0;
}
- pCurLayer->pMbType[iMbXy] = mbType;
+ GetMbType (pCurDqLayer)[iMbXy] = mbType;
int16_t pMvd[4] = { 0 };
@@ -548,31 +570,31 @@ int32_t PredMvBDirectSpatial (PWelsDecoderContext pCtx, int16_t iMvp[LIST_A][2],
if (IS_INTER_16x16 (mbType)) {
if ((* (int32_t*)iMvp[LIST_0] | * (int32_t*)iMvp[LIST_1])) {
- if (0 == pCurLayer->iColocIntra[0] && !bIsLongRef
- && ((pCurLayer->iColocRefIndex[LIST_0][0] == 0 && (unsigned) (pCurLayer->iColocMv[LIST_0][0][0] + 1) <= 2
- && (unsigned) (pCurLayer->iColocMv[LIST_0][0][1] + 1) <= 2)
- || (pCurLayer->iColocRefIndex[LIST_0][0] < 0 && pCurLayer->iColocRefIndex[LIST_1][0] == 0
- && (unsigned) (pCurLayer->iColocMv[LIST_1][0][0] + 1) <= 2
- && (unsigned) (pCurLayer->iColocMv[LIST_1][0][1] + 1) <= 2))) {
+ if (0 == pCurDqLayer->iColocIntra[0] && !bIsLongRef
+ && ((pCurDqLayer->iColocRefIndex[LIST_0][0] == 0 && (unsigned) (pCurDqLayer->iColocMv[LIST_0][0][0] + 1) <= 2
+ && (unsigned) (pCurDqLayer->iColocMv[LIST_0][0][1] + 1) <= 2)
+ || (pCurDqLayer->iColocRefIndex[LIST_0][0] < 0 && pCurDqLayer->iColocRefIndex[LIST_1][0] == 0
+ && (unsigned) (pCurDqLayer->iColocMv[LIST_1][0][0] + 1) <= 2
+ && (unsigned) (pCurDqLayer->iColocMv[LIST_1][0][1] + 1) <= 2))) {
if (0 >= ref[0]) * (uint32_t*)iMvp[LIST_0] = 0;
if (0 >= ref[1]) * (uint32_t*)iMvp[LIST_1] = 0;
}
}
- UpdateP16x16DirectCabac (pCurLayer);
+ UpdateP16x16DirectCabac (pCurDqLayer);
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
- UpdateP16x16MotionInfo (pCurLayer, listIdx, ref[listIdx], iMvp[listIdx]);
- UpdateP16x16MvdCabac (pCurLayer, pMvd, listIdx);
+ UpdateP16x16MotionInfo (pCurDqLayer, listIdx, ref[listIdx], iMvp[listIdx]);
+ UpdateP16x16MvdCabac (pCurDqLayer, pMvd, listIdx);
}
} else {
if (bSkipOrDirect) {
int8_t pSubPartCount[4], pPartW[4];
for (int32_t i = 0; i < 4; i++) { //Direct 8x8 Ref and mv
int16_t iIdx8 = i << 2;
- pCurLayer->pSubMbType[iMbXy][i] = subMbType;
+ pCurDqLayer->pSubMbType[iMbXy][i] = subMbType;
int8_t pRefIndex[LIST_A][30];
- UpdateP8x8RefIdxCabac (pCurLayer, pRefIndex, iIdx8, ref[LIST_0], LIST_0);
- UpdateP8x8RefIdxCabac (pCurLayer, pRefIndex, iIdx8, ref[LIST_1], LIST_1);
- UpdateP8x8DirectCabac (pCurLayer, iIdx8);
+ UpdateP8x8RefIdxCabac (pCurDqLayer, pRefIndex, iIdx8, ref[LIST_0], LIST_0);
+ UpdateP8x8RefIdxCabac (pCurDqLayer, pRefIndex, iIdx8, ref[LIST_1], LIST_1);
+ UpdateP8x8DirectCabac (pCurDqLayer, iIdx8);
pSubPartCount[i] = g_ksInterBSubMbTypeInfo[0].iPartCount;
pPartW[i] = g_ksInterBSubMbTypeInfo[0].iPartWidth;
@@ -581,137 +603,90 @@ int32_t PredMvBDirectSpatial (PWelsDecoderContext pCtx, int16_t iMvp[LIST_A][2],
pSubPartCount[i] = 4;
pPartW[i] = 1;
}
-
- int8_t iPartCount = pSubPartCount[i];
- int16_t iPartIdx, iBlockW = pPartW[i];
-
- for (int32_t j = 0; j < iPartCount; j++) {
- iPartIdx = iIdx8 + j * iBlockW;
- uint8_t iScan4Idx = g_kuiScan4[iPartIdx];
- uint8_t iColocIdx = g_kuiScan4[iPartIdx];
- //uint8_t iCacheIdx = g_kuiCache30ScanIdx[iPartIdx];
-
- int16_t pMV[4] = { 0 };
- if (IS_SUB_8x8 (subMbType)) {
- * (uint32_t*)pMV = * (uint32_t*)iMvp[LIST_0];
- ST32 ((pMV + 2), LD32 (pMV));
- ST64 (pCurLayer->pMv[LIST_0][iMbXy][iScan4Idx], LD64 (pMV));
- ST64 (pCurLayer->pMv[LIST_0][iMbXy][iScan4Idx + 4], LD64 (pMV));
- ST64 (pCurLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0);
- ST64 (pCurLayer->pMvd[LIST_0][iMbXy][iScan4Idx + 4], 0);
- * (uint32_t*)pMV = * (uint32_t*)iMvp[LIST_1];
- ST32 ((pMV + 2), LD32 (pMV));
- ST64 (pCurLayer->pMv[LIST_1][iMbXy][iScan4Idx], LD64 (pMV));
- ST64 (pCurLayer->pMv[LIST_1][iMbXy][iScan4Idx + 4], LD64 (pMV));
- ST64 (pCurLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0);
- ST64 (pCurLayer->pMvd[LIST_1][iMbXy][iScan4Idx + 4], 0);
- } else { //SUB_4x4
- * (uint32_t*)pMV = * (uint32_t*)iMvp[LIST_0];
- ST32 (pCurLayer->pMv[LIST_0][iMbXy][iScan4Idx], LD32 (pMV));
- ST32 (pCurLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0);
- * (uint32_t*)pMV = * (uint32_t*)iMvp[LIST_1];
- ST32 (pCurLayer->pMv[LIST_1][iMbXy][iScan4Idx], LD32 (pMV));
- ST32 (pCurLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0);
- }
- if ((* (int32_t*)iMvp[LIST_0] | * (int32_t*)iMvp[LIST_1])) {
- uint32_t uiColZeroFlag = (0 == pCurLayer->iColocIntra[iColocIdx]) && !bIsLongRef &&
- (pCurLayer->iColocRefIndex[LIST_0][iColocIdx] == 0 || (pCurLayer->iColocRefIndex[LIST_0][iColocIdx] < 0
- && pCurLayer->iColocRefIndex[LIST_1][iColocIdx] == 0));
- const int16_t (*mvColoc)[2] = 0 == pCurLayer->iColocRefIndex[LIST_0][iColocIdx] ? pCurLayer->iColocMv[LIST_0] :
- pCurLayer->iColocMv[LIST_1];
- const int16_t* mv = mvColoc[iColocIdx];
- if (IS_SUB_8x8 (subMbType)) {
- if (uiColZeroFlag && ((unsigned) (mv[0] + 1) <= 2 && (unsigned) (mv[1] + 1) <= 2)) {
- if (ref[LIST_0] == 0) {
- ST64 (pCurLayer->pMv[LIST_0][iMbXy][iScan4Idx], 0);
- ST64 (pCurLayer->pMv[LIST_0][iMbXy][iScan4Idx + 4], 0);
- ST64 (pCurLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0);
- ST64 (pCurLayer->pMvd[LIST_0][iMbXy][iScan4Idx + 4], 0);
- }
-
- if (ref[LIST_1] == 0) {
- ST64 (pCurLayer->pMv[LIST_1][iMbXy][iScan4Idx], 0);
- ST64 (pCurLayer->pMv[LIST_1][iMbXy][iScan4Idx + 4], 0);
- ST64 (pCurLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0);
- ST64 (pCurLayer->pMvd[LIST_1][iMbXy][iScan4Idx + 4], 0);
- }
- }
- } else {
- if (uiColZeroFlag && ((unsigned) (mv[0] + 1) <= 2 && (unsigned) (mv[1] + 1) <= 2)) {
- if (ref[LIST_0] == 0) {
- ST32 (pCurLayer->pMv[LIST_0][iMbXy][iScan4Idx], 0);
- ST32 (pCurLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0);
- }
- if (ref[LIST_1] == 0) {
- ST32 (pCurLayer->pMv[LIST_1][iMbXy][iScan4Idx], 0);
- ST32 (pCurLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0);
- }
- }
- }
- }
- }
+ FillSpatialDirect8x8Mv (pCurDqLayer, iIdx8, pSubPartCount[i], pPartW[i], subMbType, bIsLongRef, iMvp, ref, NULL, NULL);
}
}
}
return ret;
}
-int32_t PredBDirectTemporal (PWelsDecoderContext pCtx, int16_t iMvp[LIST_A][2], int8_t ref[LIST_A]) {
+int32_t PredBDirectTemporal (PWelsDecoderContext pCtx, int16_t iMvp[LIST_A][2], int8_t ref[LIST_A],
+ SubMbType& subMbType) {
int32_t ret = ERR_NONE;
- PDqLayer pCurLayer = pCtx->pCurDqLayer;
- int32_t iMbXy = pCurLayer->iMbXyIndex;
- bool bSkipOrDirect = (IS_SKIP (pCurLayer->pMbType[iMbXy]) | IS_DIRECT (pCurLayer->pMbType[iMbXy])) > 0;
+ PDqLayer pCurDqLayer = pCtx->pCurDqLayer;
+ int32_t iMbXy = pCurDqLayer->iMbXyIndex;
+ bool bSkipOrDirect = (IS_SKIP (GetMbType (pCurDqLayer)[iMbXy]) | IS_DIRECT (GetMbType (pCurDqLayer)[iMbXy])) > 0;
+
MbType mbType;
- SubMbType subMbType;
ret = GetColocatedMb (pCtx, mbType, subMbType);
if (ret != ERR_NONE) {
return ret;
}
- PSlice pSlice = &pCurLayer->sLayerInfo.sSliceInLayer;
+
+ GetMbType (pCurDqLayer)[iMbXy] = mbType;
+
+ PSlice pSlice = &pCurDqLayer->sLayerInfo.sSliceInLayer;
+ PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader;
+ int16_t pMvd[4] = { 0 };
+ const int32_t ref0Count = WELS_MIN (pSliceHeader->uiRefCount[LIST_0], pCtx->sRefPic.uiRefCount[LIST_0]);
if (IS_INTER_16x16 (mbType)) {
ref[LIST_0] = 0;
ref[LIST_1] = 0;
- UpdateP16x16RefIdx (pCurLayer, LIST_1, ref[LIST_1]);
+ UpdateP16x16DirectCabac (pCurDqLayer);
+ UpdateP16x16RefIdx (pCurDqLayer, LIST_1, ref[LIST_1]);
ST64 (iMvp, 0);
- if (pCurLayer->iColocIntra[0]) {
- UpdateP16x16MotionOnly (pCurLayer, LIST_0, iMvp[LIST_0]);
- UpdateP16x16MotionOnly (pCurLayer, LIST_1, iMvp[LIST_1]);
- UpdateP16x16RefIdx (pCurLayer, LIST_0, ref[LIST_0]);
+ if (pCurDqLayer->iColocIntra[0]) {
+ UpdateP16x16MotionOnly (pCurDqLayer, LIST_0, iMvp[LIST_0]);
+ UpdateP16x16MotionOnly (pCurDqLayer, LIST_1, iMvp[LIST_1]);
+ UpdateP16x16RefIdx (pCurDqLayer, LIST_0, ref[LIST_0]);
} else {
- ref[LIST_0] = pCurLayer->iColocRefIndex[LIST_0][0] >= 0 ? pCurLayer->iColocRefIndex[LIST_0][0] :
- pCurLayer->iColocRefIndex[LIST_1][0];
- const int16_t (*mvColoc)[2] = 0 == ref[LIST_0] ? pCurLayer->iColocMv[LIST_0] : pCurLayer->iColocMv[LIST_1];
- const int16_t* mv = mvColoc[0];
- UpdateP16x16RefIdx (pCurLayer, LIST_0, ref[LIST_0]);
+ ref[LIST_0] = 0;
+ int16_t* mv = pCurDqLayer->iColocMv[LIST_0][0];
+ int8_t colocRefIndexL0 = pCurDqLayer->iColocRefIndex[LIST_0][0];
+ if (colocRefIndexL0 >= 0) {
+ ref[LIST_0] = MapColToList0 (pCtx, colocRefIndexL0, ref0Count);
+ } else {
+ mv = pCurDqLayer->iColocMv[LIST_1][0];
+ }
+ UpdateP16x16RefIdx (pCurDqLayer, LIST_0, ref[LIST_0]);
iMvp[LIST_0][0] = (pSlice->iMvScale[LIST_0][ref[LIST_0]] * mv[0] + 128) >> 8;
iMvp[LIST_0][1] = (pSlice->iMvScale[LIST_0][ref[LIST_0]] * mv[1] + 128) >> 8;
- UpdateP16x16MotionOnly (pCurLayer, LIST_0, iMvp[LIST_0]);
+ UpdateP16x16MotionOnly (pCurDqLayer, LIST_0, iMvp[LIST_0]);
iMvp[LIST_1][0] = iMvp[LIST_0][0] - mv[0];
iMvp[LIST_1][1] = iMvp[LIST_0][1] - mv[1];
- UpdateP16x16MotionOnly (pCurLayer, LIST_1, iMvp[LIST_1]);
+ UpdateP16x16MotionOnly (pCurDqLayer, LIST_1, iMvp[LIST_1]);
}
+ UpdateP16x16MvdCabac (pCurDqLayer, pMvd, LIST_0);
+ UpdateP16x16MvdCabac (pCurDqLayer, pMvd, LIST_1);
} else {
if (bSkipOrDirect) {
int8_t pSubPartCount[4], pPartW[4];
+ int8_t pRefIndex[LIST_A][30];
for (int32_t i = 0; i < 4; i++) {
int16_t iIdx8 = i << 2;
- pCurLayer->pSubMbType[iMbXy][i] = subMbType;
+ const uint8_t iScan4Idx = g_kuiScan4[iIdx8];
+ pCurDqLayer->pSubMbType[iMbXy][i] = subMbType;
+
+ int16_t (*mvColoc)[2] = pCurDqLayer->iColocMv[LIST_0];
ref[LIST_1] = 0;
- if (pCurLayer->iColocIntra[g_kuiScan4[iIdx8]]) {
+ UpdateP8x8RefIdxCabac (pCurDqLayer, pRefIndex, iIdx8, ref[LIST_1], LIST_1);
+ if (pCurDqLayer->iColocIntra[iScan4Idx]) {
ref[LIST_0] = 0;
+ UpdateP8x8RefIdxCabac (pCurDqLayer, pRefIndex, iIdx8, ref[LIST_0], LIST_0);
+ ST64 (iMvp, 0);
} else {
- if (pCurLayer->iColocRefIndex[LIST_0][iIdx8] >= 0) {
- ref[LIST_0] = pCurLayer->iColocRefIndex[LIST_0][iIdx8];
+ ref[LIST_0] = 0;
+ int8_t colocRefIndexL0 = pCurDqLayer->iColocRefIndex[LIST_0][iScan4Idx];
+ if (colocRefIndexL0 >= 0) {
+ ref[LIST_0] = MapColToList0 (pCtx, colocRefIndexL0, ref0Count);
} else {
- ref[LIST_0] = pCurLayer->iColocRefIndex[LIST_1][iIdx8];
+ mvColoc = pCurDqLayer->iColocMv[LIST_1];
}
+ UpdateP8x8RefIdxCabac (pCurDqLayer, pRefIndex, iIdx8, ref[LIST_0], LIST_0);
}
- int8_t pRefIndex[LIST_A][30];
- UpdateP8x8RefIdxCabac (pCurLayer, pRefIndex, iIdx8, ref[LIST_0], LIST_0);
- UpdateP8x8RefIdxCabac (pCurLayer, pRefIndex, iIdx8, ref[LIST_1], LIST_1);
- UpdateP8x8DirectCabac (pCurLayer, iIdx8);
+ UpdateP8x8DirectCabac (pCurDqLayer, iIdx8);
pSubPartCount[i] = g_ksInterBSubMbTypeInfo[0].iPartCount;
pPartW[i] = g_ksInterBSubMbTypeInfo[0].iPartWidth;
@@ -720,46 +695,7 @@ int32_t PredBDirectTemporal (PWelsDecoderContext pCtx, int16_t iMvp[LIST_A][2],
pSubPartCount[i] = 4;
pPartW[i] = 1;
}
-
- int8_t iPartCount = pSubPartCount[i];
- int16_t iPartIdx, iBlockW = pPartW[i];
- for (int32_t j = 0; j < iPartCount; j++) {
- iPartIdx = iIdx8 + j * iBlockW;
- uint8_t iScan4Idx = g_kuiScan4[iPartIdx];
- uint8_t iColocIdx = g_kuiScan4[iPartIdx];
-
- int16_t (*mvColoc)[2] = pCurLayer->iColocMv[LIST_0];
- int16_t* mv = mvColoc[iColocIdx];
-
- int16_t pMV[4] = { 0 };
- if (IS_SUB_8x8 (subMbType)) {
- iMvp[LIST_0][0] = (pSlice->iMvScale[LIST_0][ref[LIST_0]] * mv[0] + 128) >> 8;
- iMvp[LIST_0][1] = (pSlice->iMvScale[LIST_0][ref[LIST_0]] * mv[1] + 128) >> 8;
- ST32 (pMV, LD32 (iMvp[LIST_0]));
- ST32 ((pMV + 2), LD32 (iMvp[LIST_0]));
- ST64 (pCurLayer->pMv[LIST_0][iMbXy][iScan4Idx], LD64 (pMV));
- ST64 (pCurLayer->pMv[LIST_0][iMbXy][iScan4Idx + 4], LD64 (pMV));
- ST64 (pCurLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0);
- ST64 (pCurLayer->pMvd[LIST_0][iMbXy][iScan4Idx + 4], 0);
- iMvp[LIST_1][0] -= iMvp[LIST_0][0] - mv[0];
- iMvp[LIST_1][1] -= iMvp[LIST_0][0] - mv[1];
- ST32 (pMV, LD32 (iMvp[LIST_1]));
- ST32 ((pMV + 2), LD32 (iMvp[LIST_1]));
- ST64 (pCurLayer->pMv[LIST_1][iMbXy][iScan4Idx], LD64 (pMV));
- ST64 (pCurLayer->pMv[LIST_1][iMbXy][iScan4Idx + 4], LD64 (pMV));
- ST64 (pCurLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0);
- ST64 (pCurLayer->pMvd[LIST_1][iMbXy][iScan4Idx + 4], 0);
- } else { //SUB_4x4
- iMvp[LIST_0][0] = (pSlice->iMvScale[LIST_0][ref[LIST_0]] * mv[0] + 128) >> 8;
- iMvp[LIST_0][1] = (pSlice->iMvScale[LIST_0][ref[LIST_0]] * mv[1] + 128) >> 8;
- ST32 (pCurLayer->pMv[LIST_0][iMbXy][iScan4Idx], LD32 (iMvp[LIST_0]));
- ST32 (pCurLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0);
- iMvp[LIST_1][0] -= iMvp[LIST_0][0] - mv[0];
- iMvp[LIST_1][1] -= iMvp[LIST_0][0] - mv[1];
- ST32 (pCurLayer->pMv[LIST_1][iMbXy][iScan4Idx], LD32 (iMvp[LIST_1]));
- ST32 (pCurLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0);
- }
- }
+ FillTemporalDirect8x8Mv (pCurDqLayer, iIdx8, pSubPartCount[i], pPartW[i], subMbType, ref, mvColoc, NULL, NULL);
}
}
}
@@ -868,14 +804,23 @@ void UpdateP16x16MotionInfo (PDqLayer pCurDqLayer, int32_t listIdx, int8_t iRef,
//mb
const uint8_t kuiScan4Idx = g_kuiScan4[i];
const uint8_t kuiScan4IdxPlus4 = 4 + kuiScan4Idx;
+ if (pCurDqLayer->pDec != NULL) {
+ ST16 (&pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][kuiScan4Idx], kiRef2);
+ ST16 (&pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][kuiScan4IdxPlus4], kiRef2);
+
+ ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][kuiScan4Idx], kiMV32);
+ ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][1 + kuiScan4Idx], kiMV32);
+ ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][kuiScan4IdxPlus4], kiMV32);
+ ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][1 + kuiScan4IdxPlus4], kiMV32);
+ } else {
+ ST16 (&pCurDqLayer->pRefIndex[listIdx][iMbXy][kuiScan4Idx], kiRef2);
+ ST16 (&pCurDqLayer->pRefIndex[listIdx][iMbXy][kuiScan4IdxPlus4], kiRef2);
- ST16 (&pCurDqLayer->pRefIndex[listIdx][iMbXy][kuiScan4Idx ], kiRef2);
- ST16 (&pCurDqLayer->pRefIndex[listIdx][iMbXy][kuiScan4IdxPlus4], kiRef2);
-
- ST32 (pCurDqLayer->pMv[listIdx][iMbXy][ kuiScan4Idx ], kiMV32);
- ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4Idx ], kiMV32);
- ST32 (pCurDqLayer->pMv[listIdx][iMbXy][ kuiScan4IdxPlus4], kiMV32);
- ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4IdxPlus4], kiMV32);
+ ST32 (pCurDqLayer->pMv[listIdx][iMbXy][kuiScan4Idx], kiMV32);
+ ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4Idx], kiMV32);
+ ST32 (pCurDqLayer->pMv[listIdx][iMbXy][kuiScan4IdxPlus4], kiMV32);
+ ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4IdxPlus4], kiMV32);
+ }
}
}
@@ -891,8 +836,8 @@ void UpdateP16x16RefIdx (PDqLayer pCurDqLayer, int32_t listIdx, int8_t iRef) {
const uint8_t kuiScan4Idx = g_kuiScan4[i];
const uint8_t kuiScan4IdxPlus4 = 4 + kuiScan4Idx;
- ST16 (&pCurDqLayer->pRefIndex[listIdx][iMbXy][kuiScan4Idx], kiRef2);
- ST16 (&pCurDqLayer->pRefIndex[listIdx][iMbXy][kuiScan4IdxPlus4], kiRef2);
+ ST16 (&pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][kuiScan4Idx], kiRef2);
+ ST16 (&pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][kuiScan4IdxPlus4], kiRef2);
}
}
@@ -907,11 +852,17 @@ void UpdateP16x16MotionOnly (PDqLayer pCurDqLayer, int32_t listIdx, int16_t iMVs
//mb
const uint8_t kuiScan4Idx = g_kuiScan4[i];
const uint8_t kuiScan4IdxPlus4 = 4 + kuiScan4Idx;
-
- ST32 (pCurDqLayer->pMv[listIdx][iMbXy][kuiScan4Idx], kiMV32);
- ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4Idx], kiMV32);
- ST32 (pCurDqLayer->pMv[listIdx][iMbXy][kuiScan4IdxPlus4], kiMV32);
- ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4IdxPlus4], kiMV32);
+ if (pCurDqLayer->pDec != NULL) {
+ ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][kuiScan4Idx], kiMV32);
+ ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][1 + kuiScan4Idx], kiMV32);
+ ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][kuiScan4IdxPlus4], kiMV32);
+ ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][1 + kuiScan4IdxPlus4], kiMV32);
+ } else {
+ ST32 (pCurDqLayer->pMv[listIdx][iMbXy][kuiScan4Idx], kiMV32);
+ ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4Idx], kiMV32);
+ ST32 (pCurDqLayer->pMv[listIdx][iMbXy][kuiScan4IdxPlus4], kiMV32);
+ ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4IdxPlus4], kiMV32);
+ }
}
}
@@ -931,12 +882,21 @@ void UpdateP16x8MotionInfo (PDqLayer pCurDqLayer, int16_t iMotionVector[LIST_A][
const uint8_t kuiCacheIdxPlus6 = 6 + kuiCacheIdx;
//mb
- ST16 (&pCurDqLayer->pRefIndex[listIdx][iMbXy][kuiScan4Idx ], kiRef2);
- ST16 (&pCurDqLayer->pRefIndex[listIdx][iMbXy][kuiScan4IdxPlus4], kiRef2);
- ST32 (pCurDqLayer->pMv[listIdx][iMbXy][ kuiScan4Idx ], kiMV32);
- ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4Idx ], kiMV32);
- ST32 (pCurDqLayer->pMv[listIdx][iMbXy][ kuiScan4IdxPlus4], kiMV32);
- ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4IdxPlus4], kiMV32);
+ if (pCurDqLayer->pDec != NULL) {
+ ST16 (&pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][kuiScan4Idx], kiRef2);
+ ST16 (&pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][kuiScan4IdxPlus4], kiRef2);
+ ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][kuiScan4Idx], kiMV32);
+ ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][1 + kuiScan4Idx], kiMV32);
+ ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][kuiScan4IdxPlus4], kiMV32);
+ ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][1 + kuiScan4IdxPlus4], kiMV32);
+ } else {
+ ST16 (&pCurDqLayer->pRefIndex[listIdx][iMbXy][kuiScan4Idx], kiRef2);
+ ST16 (&pCurDqLayer->pRefIndex[listIdx][iMbXy][kuiScan4IdxPlus4], kiRef2);
+ ST32 (pCurDqLayer->pMv[listIdx][iMbXy][kuiScan4Idx], kiMV32);
+ ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4Idx], kiMV32);
+ ST32 (pCurDqLayer->pMv[listIdx][iMbXy][kuiScan4IdxPlus4], kiMV32);
+ ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4IdxPlus4], kiMV32);
+ }
//cache
ST16 (&iRefIndex[listIdx][kuiCacheIdx ], kiRef2);
ST16 (&iRefIndex[listIdx][kuiCacheIdxPlus6], kiRef2);
@@ -962,12 +922,21 @@ void UpdateP8x16MotionInfo (PDqLayer pCurDqLayer, int16_t iMotionVector[LIST_A][
const uint8_t kuiCacheIdxPlus6 = 6 + kuiCacheIdx;
//mb
- ST16 (&pCurDqLayer->pRefIndex[listIdx][iMbXy][kuiScan4Idx ], kiRef2);
- ST16 (&pCurDqLayer->pRefIndex[listIdx][iMbXy][kuiScan4IdxPlus4], kiRef2);
- ST32 (pCurDqLayer->pMv[listIdx][iMbXy][ kuiScan4Idx ], kiMV32);
- ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4Idx ], kiMV32);
- ST32 (pCurDqLayer->pMv[listIdx][iMbXy][ kuiScan4IdxPlus4], kiMV32);
- ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4IdxPlus4], kiMV32);
+ if (pCurDqLayer->pDec != NULL) {
+ ST16 (&pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][kuiScan4Idx], kiRef2);
+ ST16 (&pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][kuiScan4IdxPlus4], kiRef2);
+ ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][kuiScan4Idx], kiMV32);
+ ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][1 + kuiScan4Idx], kiMV32);
+ ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][kuiScan4IdxPlus4], kiMV32);
+ ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][1 + kuiScan4IdxPlus4], kiMV32);
+ } else {
+ ST16 (&pCurDqLayer->pRefIndex[listIdx][iMbXy][kuiScan4Idx], kiRef2);
+ ST16 (&pCurDqLayer->pRefIndex[listIdx][iMbXy][kuiScan4IdxPlus4], kiRef2);
+ ST32 (pCurDqLayer->pMv[listIdx][iMbXy][kuiScan4Idx], kiMV32);
+ ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4Idx], kiMV32);
+ ST32 (pCurDqLayer->pMv[listIdx][iMbXy][kuiScan4IdxPlus4], kiMV32);
+ ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4IdxPlus4], kiMV32);
+ }
//cache
ST16 (&iRefIndex[listIdx][kuiCacheIdx ], kiRef2);
ST16 (&iRefIndex[listIdx][kuiCacheIdxPlus6], kiRef2);
@@ -978,4 +947,237 @@ void UpdateP8x16MotionInfo (PDqLayer pCurDqLayer, int16_t iMotionVector[LIST_A][
}
}
+void FillSpatialDirect8x8Mv (PDqLayer pCurDqLayer, const int16_t& iIdx8, const int8_t& iPartCount, const int8_t& iPartW,
+ const SubMbType& subMbType, const bool& bIsLongRef, int16_t pMvDirect[LIST_A][2], int8_t iRef[LIST_A],
+ int16_t pMotionVector[LIST_A][30][MV_A], int16_t pMvdCache[LIST_A][30][MV_A]) {
+ int32_t iMbXy = pCurDqLayer->iMbXyIndex;
+ for (int32_t j = 0; j < iPartCount; j++) {
+ int8_t iPartIdx = iIdx8 + j * iPartW;
+ uint8_t iScan4Idx = g_kuiScan4[iPartIdx];
+ uint8_t iColocIdx = g_kuiScan4[iPartIdx];
+ uint8_t iCacheIdx = g_kuiCache30ScanIdx[iPartIdx];
+
+ int16_t pMV[4] = { 0 };
+ if (IS_SUB_8x8 (subMbType)) {
+ * (uint32_t*)pMV = * (uint32_t*)pMvDirect[LIST_0];
+ ST32 ((pMV + 2), LD32 (pMV));
+ ST64 (pCurDqLayer->pDec->pMv[LIST_0][iMbXy][iScan4Idx], LD64 (pMV));
+ ST64 (pCurDqLayer->pDec->pMv[LIST_0][iMbXy][iScan4Idx + 4], LD64 (pMV));
+ ST64 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0);
+ ST64 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx + 4], 0);
+ if (pMotionVector != NULL) {
+ ST64 (pMotionVector[LIST_0][iCacheIdx], LD64 (pMV));
+ ST64 (pMotionVector[LIST_0][iCacheIdx + 6], LD64 (pMV));
+ }
+ if (pMvdCache != NULL) {
+ ST64 (pMvdCache[LIST_0][iCacheIdx], 0);
+ ST64 (pMvdCache[LIST_0][iCacheIdx + 6], 0);
+ }
+ * (uint32_t*)pMV = * (uint32_t*)pMvDirect[LIST_1];
+ ST32 ((pMV + 2), LD32 (pMV));
+ ST64 (pCurDqLayer->pDec->pMv[LIST_1][iMbXy][iScan4Idx], LD64 (pMV));
+ ST64 (pCurDqLayer->pDec->pMv[LIST_1][iMbXy][iScan4Idx + 4], LD64 (pMV));
+ ST64 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0);
+ ST64 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx + 4], 0);
+ if (pMotionVector != NULL) {
+ ST64 (pMotionVector[LIST_1][iCacheIdx], LD64 (pMV));
+ ST64 (pMotionVector[LIST_1][iCacheIdx + 6], LD64 (pMV));
+ }
+ if (pMvdCache != NULL) {
+ ST64 (pMvdCache[LIST_1][iCacheIdx], 0);
+ ST64 (pMvdCache[LIST_1][iCacheIdx + 6], 0);
+ }
+ } else { //SUB_4x4
+ * (uint32_t*)pMV = * (uint32_t*)pMvDirect[LIST_0];
+ ST32 (pCurDqLayer->pDec->pMv[LIST_0][iMbXy][iScan4Idx], LD32 (pMV));
+ ST32 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0);
+ if (pMotionVector != NULL) {
+ ST32 (pMotionVector[LIST_0][iCacheIdx], LD32 (pMV));
+ }
+ if (pMvdCache != NULL) {
+ ST32 (pMvdCache[LIST_0][iCacheIdx], 0);
+ }
+ * (uint32_t*)pMV = * (uint32_t*)pMvDirect[LIST_1];
+ ST32 (pCurDqLayer->pDec->pMv[LIST_1][iMbXy][iScan4Idx], LD32 (pMV));
+ ST32 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0);
+ if (pMotionVector != NULL) {
+ ST32 (pMotionVector[LIST_1][iCacheIdx], LD32 (pMV));
+ }
+ if (pMvdCache != NULL) {
+ ST32 (pMvdCache[LIST_1][iCacheIdx], 0);
+ }
+ }
+ if ((* (int32_t*)pMvDirect[LIST_0] | * (int32_t*)pMvDirect[LIST_1])) {
+ uint32_t uiColZeroFlag = (0 == pCurDqLayer->iColocIntra[iColocIdx]) && !bIsLongRef &&
+ (pCurDqLayer->iColocRefIndex[LIST_0][iColocIdx] == 0 || (pCurDqLayer->iColocRefIndex[LIST_0][iColocIdx] < 0
+ && pCurDqLayer->iColocRefIndex[LIST_1][iColocIdx] == 0));
+ const int16_t (*mvColoc)[2] = 0 == pCurDqLayer->iColocRefIndex[LIST_0][iColocIdx] ? pCurDqLayer->iColocMv[LIST_0] :
+ pCurDqLayer->iColocMv[LIST_1];
+ const int16_t* mv = mvColoc[iColocIdx];
+ if (IS_SUB_8x8 (subMbType)) {
+ if (uiColZeroFlag && ((unsigned) (mv[0] + 1) <= 2 && (unsigned) (mv[1] + 1) <= 2)) {
+ if (iRef[LIST_0] == 0) {
+ ST64 (pCurDqLayer->pDec->pMv[LIST_0][iMbXy][iScan4Idx], 0);
+ ST64 (pCurDqLayer->pDec->pMv[LIST_0][iMbXy][iScan4Idx + 4], 0);
+ ST64 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0);
+ ST64 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx + 4], 0);
+ if (pMotionVector != NULL) {
+ ST64 (pMotionVector[LIST_0][iCacheIdx], 0);
+ ST64 (pMotionVector[LIST_0][iCacheIdx + 6], 0);
+ }
+ if (pMvdCache != NULL) {
+ ST64 (pMvdCache[LIST_0][iCacheIdx], 0);
+ ST64 (pMvdCache[LIST_0][iCacheIdx + 6], 0);
+ }
+ }
+
+ if (iRef[LIST_1] == 0) {
+ ST64 (pCurDqLayer->pDec->pMv[LIST_1][iMbXy][iScan4Idx], 0);
+ ST64 (pCurDqLayer->pDec->pMv[LIST_1][iMbXy][iScan4Idx + 4], 0);
+ ST64 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0);
+ ST64 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx + 4], 0);
+ if (pMotionVector != NULL) {
+ ST64 (pMotionVector[LIST_1][iCacheIdx], 0);
+ ST64 (pMotionVector[LIST_1][iCacheIdx + 6], 0);
+ }
+ if (pMvdCache != NULL) {
+ ST64 (pMvdCache[LIST_1][iCacheIdx], 0);
+ ST64 (pMvdCache[LIST_1][iCacheIdx + 6], 0);
+ }
+ }
+ }
+ } else {
+ if (uiColZeroFlag && ((unsigned) (mv[0] + 1) <= 2 && (unsigned) (mv[1] + 1) <= 2)) {
+ if (iRef[LIST_0] == 0) {
+ ST32 (pCurDqLayer->pDec->pMv[LIST_0][iMbXy][iScan4Idx], 0);
+ ST32 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0);
+ if (pMotionVector != NULL) {
+ ST32 (pMotionVector[LIST_0][iCacheIdx], 0);
+ }
+ if (pMvdCache != NULL) {
+ ST32 (pMvdCache[LIST_0][iCacheIdx], 0);
+ }
+ }
+ if (iRef[LIST_1] == 0) {
+ ST32 (pCurDqLayer->pDec->pMv[LIST_1][iMbXy][iScan4Idx], 0);
+ ST32 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0);
+ if (pMotionVector != NULL) {
+ ST32 (pMotionVector[LIST_1][iCacheIdx], 0);
+ }
+ if (pMvdCache != NULL) {
+ ST32 (pMvdCache[LIST_1][iCacheIdx], 0);
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+void FillTemporalDirect8x8Mv (PDqLayer pCurDqLayer, const int16_t& iIdx8, const int8_t& iPartCount,
+ const int8_t& iPartW,
+ const SubMbType& subMbType, int8_t iRef[LIST_A], int16_t (*mvColoc)[2], int16_t pMotionVector[LIST_A][30][MV_A],
+ int16_t pMvdCache[LIST_A][30][MV_A]) {
+ PSlice pSlice = &pCurDqLayer->sLayerInfo.sSliceInLayer;
+ int32_t iMbXy = pCurDqLayer->iMbXyIndex;
+ int16_t pMvDirect[LIST_A][2] = { { 0, 0 }, { 0, 0 } };
+ for (int32_t j = 0; j < iPartCount; j++) {
+ int8_t iPartIdx = iIdx8 + j * iPartW;
+ uint8_t iScan4Idx = g_kuiScan4[iPartIdx];
+ uint8_t iColocIdx = g_kuiScan4[iPartIdx];
+ uint8_t iCacheIdx = g_kuiCache30ScanIdx[iPartIdx];
+
+ int16_t* mv = mvColoc[iColocIdx];
+
+ int16_t pMV[4] = { 0 };
+ if (IS_SUB_8x8 (subMbType)) {
+ if (!pCurDqLayer->iColocIntra[iColocIdx]) {
+ pMvDirect[LIST_0][0] = (pSlice->iMvScale[LIST_0][iRef[LIST_0]] * mv[0] + 128) >> 8;
+ pMvDirect[LIST_0][1] = (pSlice->iMvScale[LIST_0][iRef[LIST_0]] * mv[1] + 128) >> 8;
+ }
+ ST32 (pMV, LD32 (pMvDirect[LIST_0]));
+ ST32 ((pMV + 2), LD32 (pMvDirect[LIST_0]));
+ ST64 (pCurDqLayer->pDec->pMv[LIST_0][iMbXy][iScan4Idx], LD64 (pMV));
+ ST64 (pCurDqLayer->pDec->pMv[LIST_0][iMbXy][iScan4Idx + 4], LD64 (pMV));
+ ST64 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0);
+ ST64 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx + 4], 0);
+ if (pMotionVector != NULL) {
+ ST64 (pMotionVector[LIST_0][iCacheIdx], LD64 (pMV));
+ ST64 (pMotionVector[LIST_0][iCacheIdx + 6], LD64 (pMV));
+ }
+ if (pMvdCache != NULL) {
+ ST64 (pMvdCache[LIST_0][iCacheIdx], 0);
+ ST64 (pMvdCache[LIST_0][iCacheIdx + 6], 0);
+ }
+ if (!pCurDqLayer->iColocIntra[g_kuiScan4[iIdx8]]) {
+ pMvDirect[LIST_1][0] = pMvDirect[LIST_0][0] - mv[0];
+ pMvDirect[LIST_1][1] = pMvDirect[LIST_0][1] - mv[1];
+ }
+ ST32 (pMV, LD32 (pMvDirect[LIST_1]));
+ ST32 ((pMV + 2), LD32 (pMvDirect[LIST_1]));
+ ST64 (pCurDqLayer->pDec->pMv[LIST_1][iMbXy][iScan4Idx], LD64 (pMV));
+ ST64 (pCurDqLayer->pDec->pMv[LIST_1][iMbXy][iScan4Idx + 4], LD64 (pMV));
+ ST64 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0);
+ ST64 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx + 4], 0);
+ if (pMotionVector != NULL) {
+ ST64 (pMotionVector[LIST_1][iCacheIdx], LD64 (pMV));
+ ST64 (pMotionVector[LIST_1][iCacheIdx + 6], LD64 (pMV));
+ }
+ if (pMvdCache != NULL) {
+ ST64 (pMvdCache[LIST_1][iCacheIdx], 0);
+ ST64 (pMvdCache[LIST_1][iCacheIdx + 6], 0);
+ }
+ } else { //SUB_4x4
+ if (!pCurDqLayer->iColocIntra[iColocIdx]) {
+ pMvDirect[LIST_0][0] = (pSlice->iMvScale[LIST_0][iRef[LIST_0]] * mv[0] + 128) >> 8;
+ pMvDirect[LIST_0][1] = (pSlice->iMvScale[LIST_0][iRef[LIST_0]] * mv[1] + 128) >> 8;
+ }
+ ST32 (pCurDqLayer->pDec->pMv[LIST_0][iMbXy][iScan4Idx], LD32 (pMvDirect[LIST_0]));
+ ST32 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0);
+ if (pMotionVector != NULL) {
+ ST32 (pMotionVector[LIST_0][iCacheIdx], LD32 (pMvDirect[LIST_0]));
+ }
+ if (pMvdCache != NULL) {
+ ST32 (pMvdCache[LIST_0][iCacheIdx], 0);
+ }
+ if (!pCurDqLayer->iColocIntra[iColocIdx]) {
+ pMvDirect[LIST_1][0] = pMvDirect[LIST_0][0] - mv[0];
+ pMvDirect[LIST_1][1] = pMvDirect[LIST_0][1] - mv[1];
+ }
+ ST32 (pCurDqLayer->pDec->pMv[LIST_1][iMbXy][iScan4Idx], LD32 (pMvDirect[LIST_1]));
+ ST32 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0);
+ if (pMotionVector != NULL) {
+ ST32 (pMotionVector[LIST_1][iCacheIdx], LD32 (pMvDirect[LIST_1]));
+ }
+ if (pMvdCache != NULL) {
+ ST32 (pMvdCache[LIST_1][iCacheIdx], 0);
+ }
+ }
+ }
+}
+int8_t MapColToList0 (PWelsDecoderContext& pCtx, const int8_t& colocRefIndexL0,
+ const int32_t& ref0Count) { //ISO/IEC 14496-10:2009(E) (8-193)
+ //When reference is lost, this function must be skipped.
+ if ((pCtx->iErrorCode & dsRefLost) == dsRefLost) {
+ return 0;
+ }
+ PPicture pic1 = pCtx->sRefPic.pRefList[LIST_1][0];
+ if (pic1 && pic1->pRefPic[LIST_0][colocRefIndexL0]) {
+ const int32_t iFramePoc = pic1->pRefPic[LIST_0][colocRefIndexL0]->iFramePoc;
+ for (int32_t i = 0; i < ref0Count; i++) {
+ if (pCtx->sRefPic.pRefList[LIST_0][i]->iFramePoc == iFramePoc) {
+ return i;
+ }
+ }
+ }
+ return 0;
+}
+void Update8x8RefIdx (PDqLayer& pCurDqLayer, const int16_t& iPartIdx, const int32_t& listIdx, const int8_t& iRef) {
+ int32_t iMbXy = pCurDqLayer->iMbXyIndex;
+ const uint8_t iScan4Idx = g_kuiScan4[iPartIdx];
+ pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][iScan4Idx] = pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][iScan4Idx + 1] =
+ pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][iScan4Idx + 4] = pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][iScan4Idx +
+ 5] = iRef;
+
+}
} // namespace WelsDec
diff --git a/chromium/third_party/openh264/src/codec/decoder/core/src/parse_mb_syn_cabac.cpp b/chromium/third_party/openh264/src/codec/decoder/core/src/parse_mb_syn_cabac.cpp
index 65e53ff5bf7..690acd09e17 100644
--- a/chromium/third_party/openh264/src/codec/decoder/core/src/parse_mb_syn_cabac.cpp
+++ b/chromium/third_party/openh264/src/codec/decoder/core/src/parse_mb_syn_cabac.cpp
@@ -35,6 +35,7 @@
#include "mv_pred.h"
#include "error_code.h"
#include <stdio.h>
+
namespace WelsDec {
#define IDX_UNUSED -1
@@ -110,8 +111,8 @@ void UpdateP16x8RefIdxCabac (PDqLayer pCurDqLayer, int8_t pRefIndex[LIST_A][30],
const uint8_t iCacheIdx = g_kuiCache30ScanIdx[iPartIdx];
const uint8_t iCacheIdx6 = 6 + iCacheIdx;
//mb
- ST32 (&pCurDqLayer->pRefIndex[iListIdx][iMbXy][iScan4Idx ], iRef4Bytes);
- ST32 (&pCurDqLayer->pRefIndex[iListIdx][iMbXy][iScan4Idx4], iRef4Bytes);
+ ST32 (&pCurDqLayer->pDec->pRefIndex[iListIdx][iMbXy][iScan4Idx ], iRef4Bytes);
+ ST32 (&pCurDqLayer->pDec->pRefIndex[iListIdx][iMbXy][iScan4Idx4], iRef4Bytes);
//cache
ST32 (&pRefIndex[iListIdx][iCacheIdx ], iRef4Bytes);
ST32 (&pRefIndex[iListIdx][iCacheIdx6], iRef4Bytes);
@@ -129,8 +130,8 @@ void UpdateP8x16RefIdxCabac (PDqLayer pCurDqLayer, int8_t pRefIndex[LIST_A][30],
const uint8_t iScan4Idx4 = 4 + iScan4Idx;
const uint8_t iCacheIdx6 = 6 + iCacheIdx;
//mb
- ST16 (&pCurDqLayer->pRefIndex[iListIdx][iMbXy][iScan4Idx ], iRef2Bytes);
- ST16 (&pCurDqLayer->pRefIndex[iListIdx][iMbXy][iScan4Idx4], iRef2Bytes);
+ ST16 (&pCurDqLayer->pDec->pRefIndex[iListIdx][iMbXy][iScan4Idx ], iRef2Bytes);
+ ST16 (&pCurDqLayer->pDec->pRefIndex[iListIdx][iMbXy][iScan4Idx4], iRef2Bytes);
//cache
ST16 (&pRefIndex[iListIdx][iCacheIdx ], iRef2Bytes);
ST16 (&pRefIndex[iListIdx][iCacheIdx6], iRef2Bytes);
@@ -141,8 +142,10 @@ void UpdateP8x8RefIdxCabac (PDqLayer pCurDqLayer, int8_t pRefIndex[LIST_A][30],
const int8_t iListIdx) {
int32_t iMbXy = pCurDqLayer->iMbXyIndex;
const uint8_t iScan4Idx = g_kuiScan4[iPartIdx];
- pCurDqLayer->pRefIndex[iListIdx][iMbXy][iScan4Idx] = pCurDqLayer->pRefIndex[iListIdx][iMbXy][iScan4Idx + 1] =
- pCurDqLayer->pRefIndex[iListIdx][iMbXy][iScan4Idx + 4] = pCurDqLayer->pRefIndex[iListIdx][iMbXy][iScan4Idx + 5] = iRef;
+ pCurDqLayer->pDec->pRefIndex[iListIdx][iMbXy][iScan4Idx] = pCurDqLayer->pDec->pRefIndex[iListIdx][iMbXy][iScan4Idx + 1]
+ =
+ pCurDqLayer->pDec->pRefIndex[iListIdx][iMbXy][iScan4Idx + 4] = pCurDqLayer->pDec->pRefIndex[iListIdx][iMbXy][iScan4Idx +
+ 5] = iRef;
}
void UpdateP8x8DirectCabac (PDqLayer pCurDqLayer, int32_t iPartIdx) {
@@ -476,7 +479,7 @@ int32_t ParseIntraPredModeChromaCabac (PWelsDecoderContext pCtx, uint8_t uiNeigh
uint32_t uiCode;
int32_t iIdxA, iIdxB, iCtxInc;
int8_t* pChromaPredMode = pCtx->pCurDqLayer->pChromaPredMode;
- uint32_t* pMbType = pCtx->pCurDqLayer->pMbType;
+ uint32_t* pMbType = pCtx->pCurDqLayer->pDec->pMbType;
int32_t iLeftAvail = uiNeighAvail & 0x04;
int32_t iTopAvail = uiNeighAvail & 0x01;
@@ -532,7 +535,9 @@ int32_t ParseInterPMotionInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pN
pRefCount[0] = pSliceHeader->uiRefCount[0];
pRefCount[1] = pSliceHeader->uiRefCount[1];
- switch (pCurDqLayer->pMbType[iMbXy]) {
+ bool bIsPending = GetThreadCount (pCtx) > 1;
+
+ switch (pCurDqLayer->pDec->pMbType[iMbXy]) {
case MB_TYPE_16x16: {
iPartIdx = 0;
WELS_READ_VERIFY (ParseRefIdxCabac (pCtx, pNeighAvail, pNonZeroCount, pRefIndex, 0, LIST_0, iPartIdx, pRefCount[0], 0,
@@ -547,7 +552,7 @@ int32_t ParseInterPMotionInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pN
}
}
pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[iRef[0]]
- && ppRefPic[iRef[0]]->bIsComplete);
+ && (ppRefPic[iRef[0]]->bIsComplete || bIsPending));
PredMv (pMotionVector, pRefIndex, LIST_0, 0, 4, iRef[0], pMv);
WELS_READ_VERIFY (ParseMvdInfoCabac (pCtx, pNeighAvail, pRefIndex, pMvdCache, iPartIdx, LIST_0, 0, pMvd[0]));
WELS_READ_VERIFY (ParseMvdInfoCabac (pCtx, pNeighAvail, pRefIndex, pMvdCache, iPartIdx, LIST_0, 1, pMvd[1]));
@@ -573,7 +578,7 @@ int32_t ParseInterPMotionInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pN
}
}
pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[iRef[i]]
- && ppRefPic[iRef[i]]->bIsComplete);
+ && (ppRefPic[iRef[i]]->bIsComplete || bIsPending));
UpdateP16x8RefIdxCabac (pCurDqLayer, pRefIndex, iPartIdx, iRef[i], LIST_0);
}
for (i = 0; i < 2; i++) {
@@ -603,7 +608,7 @@ int32_t ParseInterPMotionInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pN
}
}
pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[iRef[i]]
- && ppRefPic[iRef[i]]->bIsComplete);
+ && (ppRefPic[iRef[i]]->bIsComplete || bIsPending));
UpdateP8x16RefIdxCabac (pCurDqLayer, pRefIndex, iPartIdx, iRef[i], LIST_0);
}
for (i = 0; i < 2; i++) {
@@ -651,7 +656,7 @@ int32_t ParseInterPMotionInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pN
}
}
pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[pRefIdx[i]]
- && ppRefPic[pRefIdx[i]]->bIsComplete);
+ && (ppRefPic[pRefIdx[i]]->bIsComplete || bIsPending));
UpdateP8x8RefIdxCabac (pCurDqLayer, pRefIndex, iIdx8, pRefIdx[i], LIST_0);
}
//mv
@@ -677,8 +682,8 @@ int32_t ParseInterPMotionInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pN
if (SUB_MB_TYPE_8x8 == uiSubMbType) {
ST32 ((pMv + 2), LD32 (pMv));
ST32 ((pMvd + 2), LD32 (pMvd));
- ST64 (pCurDqLayer->pMv[0][iMbXy][iScan4Idx], LD64 (pMv));
- ST64 (pCurDqLayer->pMv[0][iMbXy][iScan4Idx + 4], LD64 (pMv));
+ ST64 (pCurDqLayer->pDec->pMv[0][iMbXy][iScan4Idx], LD64 (pMv));
+ ST64 (pCurDqLayer->pDec->pMv[0][iMbXy][iScan4Idx + 4], LD64 (pMv));
ST64 (pCurDqLayer->pMvd[0][iMbXy][iScan4Idx], LD64 (pMvd));
ST64 (pCurDqLayer->pMvd[0][iMbXy][iScan4Idx + 4], LD64 (pMvd));
ST64 (pMotionVector[0][iCacheIdx ], LD64 (pMv));
@@ -688,13 +693,13 @@ int32_t ParseInterPMotionInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pN
} else if (SUB_MB_TYPE_8x4 == uiSubMbType) {
ST32 ((pMv + 2), LD32 (pMv));
ST32 ((pMvd + 2), LD32 (pMvd));
- ST64 (pCurDqLayer->pMv[0][iMbXy][iScan4Idx ], LD64 (pMv));
+ ST64 (pCurDqLayer->pDec->pMv[0][iMbXy][iScan4Idx ], LD64 (pMv));
ST64 (pCurDqLayer->pMvd[0][iMbXy][iScan4Idx ], LD64 (pMvd));
ST64 (pMotionVector[0][iCacheIdx ], LD64 (pMv));
ST64 (pMvdCache[0][iCacheIdx ], LD64 (pMvd));
} else if (SUB_MB_TYPE_4x8 == uiSubMbType) {
- ST32 (pCurDqLayer->pMv[0][iMbXy][iScan4Idx ], LD32 (pMv));
- ST32 (pCurDqLayer->pMv[0][iMbXy][iScan4Idx + 4], LD32 (pMv));
+ ST32 (pCurDqLayer->pDec->pMv[0][iMbXy][iScan4Idx ], LD32 (pMv));
+ ST32 (pCurDqLayer->pDec->pMv[0][iMbXy][iScan4Idx + 4], LD32 (pMv));
ST32 (pCurDqLayer->pMvd[0][iMbXy][iScan4Idx ], LD32 (pMvd));
ST32 (pCurDqLayer->pMvd[0][iMbXy][iScan4Idx + 4], LD32 (pMvd));
ST32 (pMotionVector[0][iCacheIdx ], LD32 (pMv));
@@ -702,7 +707,7 @@ int32_t ParseInterPMotionInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pN
ST32 (pMvdCache[0][iCacheIdx ], LD32 (pMvd));
ST32 (pMvdCache[0][iCacheIdx + 6], LD32 (pMvd));
} else { //SUB_MB_TYPE_4x4
- ST32 (pCurDqLayer->pMv[0][iMbXy][iScan4Idx ], LD32 (pMv));
+ ST32 (pCurDqLayer->pDec->pMv[0][iMbXy][iScan4Idx ], LD32 (pMv));
ST32 (pCurDqLayer->pMvd[0][iMbXy][iScan4Idx ], LD32 (pMvd));
ST32 (pMotionVector[0][iCacheIdx ], LD32 (pMv));
ST32 (pMvdCache[0][iCacheIdx ], LD32 (pMvd));
@@ -734,22 +739,23 @@ int32_t ParseInterBMotionInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pN
pRefCount[0] = pSliceHeader->uiRefCount[0];
pRefCount[1] = pSliceHeader->uiRefCount[1];
- MbType mbType = pCurDqLayer->pMbType[iMbXy];
+ MbType mbType = pCurDqLayer->pDec->pMbType[iMbXy];
+
+ bool bIsPending = GetThreadCount (pCtx) > 1;
if (IS_DIRECT (mbType)) {
int16_t pMvDirect[LIST_A][2] = { { 0, 0 }, { 0, 0 } };
+ SubMbType subMbType;
if (pSliceHeader->iDirectSpatialMvPredFlag) {
//predict direct spatial mv
- SubMbType subMbType;
int32_t ret = PredMvBDirectSpatial (pCtx, pMvDirect, iRef, subMbType);
if (ret != ERR_NONE) {
return ret;
}
} else {
//temporal direct 16x16 mode
- ComputeColocated (pCtx);
- int32_t ret = PredBDirectTemporal (pCtx, pMvDirect, iRef);
+ int32_t ret = PredBDirectTemporal (pCtx, pMvDirect, iRef, subMbType);
if (ret != ERR_NONE) {
return ret;
}
@@ -773,7 +779,7 @@ int32_t ParseInterBMotionInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pN
}
}
pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (pCtx->sRefPic.pRefList[listIdx][iRef[listIdx]]
- && pCtx->sRefPic.pRefList[listIdx][iRef[listIdx]]->bIsComplete);
+ && (pCtx->sRefPic.pRefList[listIdx][iRef[listIdx]]->bIsComplete || bIsPending));
}
}
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
@@ -810,7 +816,7 @@ int32_t ParseInterBMotionInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pN
}
}
pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (pCtx->sRefPic.pRefList[listIdx][ref_idx]
- && pCtx->sRefPic.pRefList[listIdx][ref_idx]->bIsComplete);
+ && (pCtx->sRefPic.pRefList[listIdx][ref_idx]->bIsComplete || bIsPending));
}
UpdateP16x8RefIdxCabac (pCurDqLayer, pRefIndex, iPartIdx, ref_idx, listIdx);
ref_idx_list[listIdx][i] = ref_idx;
@@ -854,7 +860,7 @@ int32_t ParseInterBMotionInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pN
}
}
pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (pCtx->sRefPic.pRefList[listIdx][ref_idx]
- && pCtx->sRefPic.pRefList[listIdx][ref_idx]->bIsComplete);
+ && (pCtx->sRefPic.pRefList[listIdx][ref_idx]->bIsComplete || bIsPending));
}
UpdateP8x16RefIdxCabac (pCurDqLayer, pRefIndex, iPartIdx, ref_idx, listIdx);
ref_idx_list[listIdx][i] = ref_idx;
@@ -883,11 +889,18 @@ int32_t ParseInterBMotionInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pN
uint32_t uiSubMbType;
//sub_mb_type, partition
int16_t pMvDirect[LIST_A][2] = { {0, 0}, {0, 0} };
+ if (pCtx->sRefPic.pRefList[LIST_1][0] == NULL) {
+ SLogContext* pLogCtx = & (pCtx->sLogCtx);
+ WelsLog (pLogCtx, WELS_LOG_ERROR, "Colocated Ref Picture for B-Slice is lost, B-Slice decoding cannot be continued!");
+ return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_DATA, ERR_INFO_REFERENCE_PIC_LOST);
+ }
+ bool bIsLongRef = pCtx->sRefPic.pRefList[LIST_1][0]->bIsLongRef;
+ const int32_t ref0Count = WELS_MIN (pSliceHeader->uiRefCount[LIST_0], pCtx->sRefPic.uiRefCount[LIST_0]);
bool has_direct_called = false;
SubMbType directSubMbType = 0;
for (int32_t i = 0; i < 4; i++) {
WELS_READ_VERIFY (ParseBSubMBTypeCabac (pCtx, pNeighAvail, uiSubMbType));
- if (uiSubMbType > 13) { //invalid sub_mb_type
+ if (uiSubMbType >= 13) { //invalid sub_mb_type
return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_SUB_MB_TYPE);
}
// pCurDqLayer->pSubMbType[iMbXy][i] = g_ksInterBSubMbTypeInfo[uiSubMbType].iType;
@@ -908,8 +921,7 @@ int32_t ParseInterBMotionInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pN
} else {
//temporal direct mode
- ComputeColocated (pCtx);
- int32_t ret = PredBDirectTemporal (pCtx, pMvDirect, iRef);
+ int32_t ret = PredBDirectTemporal (pCtx, pMvDirect, iRef, directSubMbType);
if (ret != ERR_NONE) {
return ret;
}
@@ -926,151 +938,31 @@ int32_t ParseInterBMotionInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pN
}
}
for (int32_t i = 0; i < 4; i++) { //Direct 8x8 Ref and mv
-
int16_t iIdx8 = i << 2;
if (IS_DIRECT (pCurDqLayer->pSubMbType[iMbXy][i])) {
-
- int8_t iPartCount = pSubPartCount[i];
- int16_t iPartIdx, iBlockW = pPartW[i];
- uint8_t iScan4Idx, iCacheIdx, iColocIdx;
- iCacheIdx = g_kuiCache30ScanIdx[iIdx8];
-
- if (!pSliceHeader->iDirectSpatialMvPredFlag) {
+ if (pSliceHeader->iDirectSpatialMvPredFlag) {
+ FillSpatialDirect8x8Mv (pCurDqLayer, iIdx8, pSubPartCount[i], pPartW[i], directSubMbType, bIsLongRef, pMvDirect, iRef,
+ pMotionVector, pMvdCache);
+ } else {
+ int16_t (*mvColoc)[2] = pCurDqLayer->iColocMv[LIST_0];
iRef[LIST_1] = 0;
- if (pCurDqLayer->iColocIntra[g_kuiScan4[iIdx8]]) {
+ iRef[LIST_0] = 0;
+ const uint8_t uiColoc4Idx = g_kuiScan4[iIdx8];
+ if (!pCurDqLayer->iColocIntra[uiColoc4Idx]) {
iRef[LIST_0] = 0;
- } else {
- if (pCurDqLayer->iColocRefIndex[LIST_0][iIdx8] >= 0) {
- iRef[LIST_0] = pCurDqLayer->iColocRefIndex[LIST_0][iIdx8];
+ int8_t colocRefIndexL0 = pCurDqLayer->iColocRefIndex[LIST_0][uiColoc4Idx];
+ if (colocRefIndexL0 >= 0) {
+ iRef[LIST_0] = MapColToList0 (pCtx, colocRefIndexL0, ref0Count);
} else {
- iRef[LIST_0] = pCurDqLayer->iColocRefIndex[LIST_1][iIdx8];
- }
- }
- }
- for (int32_t j = 0; j < iPartCount; j++) {
- iPartIdx = iIdx8 + j * iBlockW;
- iColocIdx = g_kuiScan4[iPartIdx];
- iScan4Idx = g_kuiScan4[iPartIdx];
- iCacheIdx = g_kuiCache30ScanIdx[iPartIdx];
-
- if (pSliceHeader->iDirectSpatialMvPredFlag) {
- int16_t pMV[4] = { 0 };
- if (IS_SUB_8x8 (pCurDqLayer->pSubMbType[iMbXy][i])) {
- * (uint32_t*)pMV = * (uint32_t*)pMvDirect[LIST_0];
- ST32 ((pMV + 2), LD32 (pMV));
- ST64 (pCurDqLayer->pMv[LIST_0][iMbXy][iScan4Idx], LD64 (pMV));
- ST64 (pCurDqLayer->pMv[LIST_0][iMbXy][iScan4Idx + 4], LD64 (pMV));
- ST64 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0);
- ST64 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx + 4], 0);
- ST64 (pMotionVector[LIST_0][iCacheIdx], LD64 (pMV));
- ST64 (pMotionVector[LIST_0][iCacheIdx + 6], LD64 (pMV));
- ST64 (pMvdCache[LIST_0][iCacheIdx], 0);
- ST64 (pMvdCache[LIST_0][iCacheIdx + 6], 0);
- * (uint32_t*)pMV = * (uint32_t*)pMvDirect[LIST_1];
- ST32 ((pMV + 2), LD32 (pMV));
- ST64 (pCurDqLayer->pMv[LIST_1][iMbXy][iScan4Idx], LD64 (pMV));
- ST64 (pCurDqLayer->pMv[LIST_1][iMbXy][iScan4Idx + 4], LD64 (pMV));
- ST64 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0);
- ST64 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx + 4], 0);
- ST64 (pMotionVector[LIST_1][iCacheIdx], LD64 (pMV));
- ST64 (pMotionVector[LIST_1][iCacheIdx + 6], LD64 (pMV));
- ST64 (pMvdCache[LIST_1][iCacheIdx], 0);
- ST64 (pMvdCache[LIST_1][iCacheIdx + 6], 0);
- } else { //SUB_4x4
- * (uint32_t*)pMV = * (uint32_t*)pMvDirect[LIST_0];
- ST32 (pCurDqLayer->pMv[LIST_0][iMbXy][iScan4Idx], LD32 (pMV));
- ST32 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0);
- ST32 (pMotionVector[LIST_0][iCacheIdx], LD32 (pMV));
- ST32 (pMvdCache[LIST_0][iCacheIdx], 0);
- * (uint32_t*)pMV = * (uint32_t*)pMvDirect[LIST_1];
- ST32 (pCurDqLayer->pMv[LIST_1][iMbXy][iScan4Idx], LD32 (pMV));
- ST32 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0);
- ST32 (pMotionVector[LIST_1][iCacheIdx], LD32 (pMV));
- ST32 (pMvdCache[LIST_1][iCacheIdx], 0);
- }
-
- if ((* (int32_t*)pMvDirect[LIST_0] | * (int32_t*)pMvDirect[LIST_1])) {
- bool bIsLongRef = pCtx->sRefPic.pRefList[LIST_1][0]->bIsLongRef;
- uint32_t uiColZeroFlag = (0 == pCurDqLayer->iColocIntra[iColocIdx]) && !bIsLongRef &&
- (pCurDqLayer->iColocRefIndex[LIST_0][iColocIdx] == 0 || (pCurDqLayer->iColocRefIndex[LIST_0][iColocIdx] < 0
- && pCurDqLayer->iColocRefIndex[LIST_1][iColocIdx] == 0));
- const int16_t (*mvColoc)[2] = pCurDqLayer->iColocRefIndex[LIST_0][iColocIdx] == 0 ? pCurDqLayer->iColocMv[LIST_0] :
- pCurDqLayer->iColocMv[LIST_1];
- const int16_t* mv = mvColoc[iColocIdx];
- if (IS_SUB_8x8 (pCurDqLayer->pSubMbType[iMbXy][i])) {
- if (uiColZeroFlag && ((unsigned) (mv[0] + 1) <= 2 && (unsigned) (mv[1] + 1) <= 2)) {
- if (iRef[LIST_0] == 0) {
- ST64 (pCurDqLayer->pMv[LIST_0][iMbXy][iScan4Idx], 0);
- ST64 (pCurDqLayer->pMv[LIST_0][iMbXy][iScan4Idx + 4], 0);
- ST64 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0);
- ST64 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx + 4], 0);
- ST64 (pMotionVector[LIST_0][iCacheIdx], 0);
- ST64 (pMotionVector[LIST_0][iCacheIdx + 6], 0);
- ST64 (pMvdCache[LIST_0][iCacheIdx], 0);
- ST64 (pMvdCache[LIST_0][iCacheIdx + 6], 0);
- }
-
- if (iRef[LIST_1] == 0) {
- ST64 (pCurDqLayer->pMv[LIST_1][iMbXy][iScan4Idx], 0);
- ST64 (pCurDqLayer->pMv[LIST_1][iMbXy][iScan4Idx + 4], 0);
- ST64 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0);
- ST64 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx + 4], 0);
- ST64 (pMotionVector[LIST_1][iCacheIdx], 0);
- ST64 (pMotionVector[LIST_1][iCacheIdx + 6], 0);
- ST64 (pMvdCache[LIST_1][iCacheIdx], 0);
- ST64 (pMvdCache[LIST_1][iCacheIdx + 6], 0);
- }
- }
- } else {
- if (uiColZeroFlag && ((unsigned) (mv[0] + 1) <= 2 && (unsigned) (mv[1] + 1) <= 2)) {
- if (iRef[LIST_0] == 0) {
- ST32 (pCurDqLayer->pMv[LIST_0][iMbXy][iScan4Idx], 0);
- ST32 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0);
- ST32 (pMotionVector[LIST_0][iCacheIdx], 0);
- ST32 (pMvdCache[LIST_0][iCacheIdx], 0);
- }
- if (iRef[LIST_1] == 0) {
- ST32 (pCurDqLayer->pMv[LIST_1][iMbXy][iScan4Idx], 0);
- ST32 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0);
- ST32 (pMotionVector[LIST_1][iCacheIdx], 0);
- ST32 (pMvdCache[LIST_1][iCacheIdx], 0);
- }
- }
- }
- }
- } else {
- int16_t (*mvColoc)[2] = pCurDqLayer->iColocMv[LIST_0];
- int16_t* mv = mvColoc[iColocIdx];
- int16_t pMV[4] = { 0 };
- int16_t iMvp[LIST_A][2];
- if (IS_SUB_8x8 (pCurDqLayer->pSubMbType[iMbXy][i])) {
- iMvp[LIST_0][0] = (pSlice->iMvScale[LIST_0][iRef[LIST_0]] * mv[0] + 128) >> 8;
- iMvp[LIST_0][1] = (pSlice->iMvScale[LIST_0][iRef[LIST_0]] * mv[1] + 128) >> 8;
- ST32 (pMV, LD32 (iMvp[LIST_0]));
- ST32 ((pMV + 2), LD32 (iMvp[LIST_0]));
- ST64 (pCurDqLayer->pMv[LIST_0][iMbXy][iScan4Idx], LD64 (pMV));
- ST64 (pCurDqLayer->pMv[LIST_0][iMbXy][iScan4Idx + 4], LD64 (pMV));
- ST64 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0);
- ST64 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx + 4], 0);
- iMvp[LIST_1][0] -= iMvp[LIST_0][0] - mv[0];
- iMvp[LIST_1][1] -= iMvp[LIST_0][0] - mv[1];
- ST32 (pMV, LD32 (iMvp[LIST_1]));
- ST32 ((pMV + 2), LD32 (iMvp[LIST_1]));
- ST64 (pCurDqLayer->pMv[LIST_1][iMbXy][iScan4Idx], LD64 (pMV));
- ST64 (pCurDqLayer->pMv[LIST_1][iMbXy][iScan4Idx + 4], LD64 (pMV));
- ST64 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0);
- ST64 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx + 4], 0);
- } else { //SUB_4x4
- iMvp[LIST_0][0] = (pSlice->iMvScale[LIST_0][iRef[LIST_0]] * mv[0] + 128) >> 8;
- iMvp[LIST_0][1] = (pSlice->iMvScale[LIST_0][iRef[LIST_0]] * mv[1] + 128) >> 8;
- ST32 (pCurDqLayer->pMv[LIST_0][iMbXy][iScan4Idx], LD32 (iMvp[LIST_0]));
- ST32 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0);
- iMvp[LIST_1][0] -= iMvp[LIST_0][0] - mv[0];
- iMvp[LIST_1][1] -= iMvp[LIST_0][0] - mv[1];
- ST32 (pCurDqLayer->pMv[LIST_1][iMbXy][iScan4Idx], LD32 (iMvp[LIST_1]));
- ST32 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0);
+ mvColoc = pCurDqLayer->iColocMv[LIST_1];
}
}
+ Update8x8RefIdx (pCurDqLayer, iIdx8, LIST_0, iRef[LIST_0]);
+ Update8x8RefIdx (pCurDqLayer, iIdx8, LIST_1, iRef[LIST_1]);
+ UpdateP8x8RefCacheIdxCabac (pRefIndex, iIdx8, LIST_0, iRef[LIST_0]);
+ UpdateP8x8RefCacheIdxCabac (pRefIndex, iIdx8, LIST_1, iRef[LIST_1]);
+ FillTemporalDirect8x8Mv (pCurDqLayer, iIdx8, pSubPartCount[i], pPartW[i], directSubMbType, iRef, mvColoc, pMotionVector,
+ pMvdCache);
}
}
}
@@ -1083,18 +975,8 @@ int32_t ParseInterBMotionInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pN
int8_t iref = REF_NOT_IN_LIST;
if (IS_DIRECT (subMbType)) {
if (pSliceHeader->iDirectSpatialMvPredFlag) {
- iref = iRef[listIdx];
- } else {
- iref = 0;
- if (listIdx == LIST_0) {
- if (!pCurDqLayer->iColocIntra[g_kuiScan4[iIdx8]]) {
- if (pCurDqLayer->iColocRefIndex[LIST_0][iIdx8] >= 0) {
- iref = pCurDqLayer->iColocRefIndex[LIST_0][iIdx8];
- } else {
- iref = pCurDqLayer->iColocRefIndex[LIST_1][iIdx8];
- }
- }
- }
+ Update8x8RefIdx (pCurDqLayer, iIdx8, listIdx, iRef[listIdx]);
+ ref_idx_list[listIdx][i] = iRef[listIdx];
}
UpdateP8x8DirectCabac (pCurDqLayer, iIdx8);
} else {
@@ -1112,31 +994,32 @@ int32_t ParseInterBMotionInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pN
}
}
pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (pCtx->sRefPic.pRefList[listIdx][iref]
- && pCtx->sRefPic.pRefList[listIdx][iref]->bIsComplete);
+ && (pCtx->sRefPic.pRefList[listIdx][iref]->bIsComplete || bIsPending));
}
+ Update8x8RefIdx (pCurDqLayer, iIdx8, listIdx, iref);
+ ref_idx_list[listIdx][i] = iref;
}
- UpdateP8x8RefIdxCabac (pCurDqLayer, pRefIndex, iIdx8, iref, listIdx);
- ref_idx_list[listIdx][i] = iref;
}
}
//mv
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
for (int32_t i = 0; i < 4; i++) {
- int8_t iPartCount = pSubPartCount[i];
- int16_t iPartIdx, iBlockW = pPartW[i];
- uint8_t iScan4Idx, iCacheIdx;
+ int16_t iIdx8 = i << 2;
- iCacheIdx = g_kuiCache30ScanIdx[i << 2];
+ uint32_t subMbType = pCurDqLayer->pSubMbType[iMbXy][i];
+ if (IS_DIRECT (subMbType) && !pSliceHeader->iDirectSpatialMvPredFlag)
+ continue;
int8_t iref = ref_idx_list[listIdx][i];
- pRefIndex[listIdx][iCacheIdx] = pRefIndex[listIdx][iCacheIdx + 1]
- = pRefIndex[listIdx][iCacheIdx + 6] = pRefIndex[listIdx][iCacheIdx + 7] = iref;
+ UpdateP8x8RefCacheIdxCabac (pRefIndex, iIdx8, listIdx, iref);
- uint32_t subMbType = pCurDqLayer->pSubMbType[iMbXy][i];
- if (IS_DIRECT (subMbType)) {
+ if (IS_DIRECT (subMbType))
continue;
- }
+
bool is_dir = IS_DIR (subMbType, 0, listIdx) > 0;
+ int8_t iPartCount = pSubPartCount[i];
+ int16_t iBlockW = pPartW[i];
+ uint8_t iScan4Idx, iCacheIdx;
for (int32_t j = 0; j < iPartCount; j++) {
iPartIdx = (i << 2) + j * iBlockW;
iScan4Idx = g_kuiScan4[iPartIdx];
@@ -1154,8 +1037,8 @@ int32_t ParseInterBMotionInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pN
if (IS_SUB_8x8 (subMbType)) { //MB_TYPE_8x8
ST32 ((pMv + 2), LD32 (pMv));
ST32 ((pMvd + 2), LD32 (pMvd));
- ST64 (pCurDqLayer->pMv[listIdx][iMbXy][iScan4Idx], LD64 (pMv));
- ST64 (pCurDqLayer->pMv[listIdx][iMbXy][iScan4Idx + 4], LD64 (pMv));
+ ST64 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][iScan4Idx], LD64 (pMv));
+ ST64 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][iScan4Idx + 4], LD64 (pMv));
ST64 (pCurDqLayer->pMvd[listIdx][iMbXy][iScan4Idx], LD64 (pMvd));
ST64 (pCurDqLayer->pMvd[listIdx][iMbXy][iScan4Idx + 4], LD64 (pMvd));
ST64 (pMotionVector[listIdx][iCacheIdx], LD64 (pMv));
@@ -1163,13 +1046,13 @@ int32_t ParseInterBMotionInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pN
ST64 (pMvdCache[listIdx][iCacheIdx], LD64 (pMvd));
ST64 (pMvdCache[listIdx][iCacheIdx + 6], LD64 (pMvd));
} else if (IS_SUB_4x4 (subMbType)) { //MB_TYPE_4x4
- ST32 (pCurDqLayer->pMv[listIdx][iMbXy][iScan4Idx], LD32 (pMv));
+ ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][iScan4Idx], LD32 (pMv));
ST32 (pCurDqLayer->pMvd[listIdx][iMbXy][iScan4Idx], LD32 (pMvd));
ST32 (pMotionVector[listIdx][iCacheIdx], LD32 (pMv));
ST32 (pMvdCache[listIdx][iCacheIdx], LD32 (pMvd));
} else if (IS_SUB_4x8 (subMbType)) { //MB_TYPE_4x8 5, 7, 9
- ST32 (pCurDqLayer->pMv[listIdx][iMbXy][iScan4Idx], LD32 (pMv));
- ST32 (pCurDqLayer->pMv[listIdx][iMbXy][iScan4Idx + 4], LD32 (pMv));
+ ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][iScan4Idx], LD32 (pMv));
+ ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][iScan4Idx + 4], LD32 (pMv));
ST32 (pCurDqLayer->pMvd[listIdx][iMbXy][iScan4Idx], LD32 (pMvd));
ST32 (pCurDqLayer->pMvd[listIdx][iMbXy][iScan4Idx + 4], LD32 (pMvd));
ST32 (pMotionVector[listIdx][iCacheIdx], LD32 (pMv));
@@ -1179,7 +1062,7 @@ int32_t ParseInterBMotionInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pN
} else { //MB_TYPE_8x4 4, 6, 8
ST32 ((pMv + 2), LD32 (pMv));
ST32 ((pMvd + 2), LD32 (pMvd));
- ST64 (pCurDqLayer->pMv[listIdx][iMbXy][iScan4Idx], LD64 (pMv));
+ ST64 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][iScan4Idx], LD64 (pMv));
ST64 (pCurDqLayer->pMvd[listIdx][iMbXy][iScan4Idx], LD64 (pMvd));
ST64 (pMotionVector[listIdx][iCacheIdx], LD64 (pMv));
ST64 (pMvdCache[listIdx][iCacheIdx], LD64 (pMvd));
@@ -1201,7 +1084,7 @@ int32_t ParseRefIdxCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail,
uint32_t uiCode;
int32_t iIdxA = 0, iIdxB = 0;
int32_t iCtxInc = 0;
- int8_t* pRefIdxInMB = pCtx->pCurDqLayer->pRefIndex[iListIdx][pCtx->pCurDqLayer->iMbXyIndex];
+ int8_t* pRefIdxInMB = pCtx->pCurDqLayer->pDec->pRefIndex[iListIdx][pCtx->pCurDqLayer->iMbXyIndex];
int8_t* pDirect = pCtx->pCurDqLayer->pDirect[pCtx->pCurDqLayer->iMbXyIndex];
if (iZOrderIdx == 0) {
iIdxB = (pNeighAvail->iTopAvail && pNeighAvail->iTopType != MB_TYPE_INTRA_PCM
@@ -1394,7 +1277,7 @@ int32_t ParseCbfInfoCabac (PWelsNeighAvail pNeighAvail, uint8_t* pNzcCache, int3
int32_t iTopBlkXy = iCurrBlkXy - pCtx->pCurDqLayer->iMbWidth; //default value: MB neighboring
int32_t iLeftBlkXy = iCurrBlkXy - 1; //default value: MB neighboring
uint16_t* pCbfDc = pCtx->pCurDqLayer->pCbfDc;
- uint32_t* pMbType = pCtx->pCurDqLayer->pMbType;
+ uint32_t* pMbType = pCtx->pCurDqLayer->pDec->pMbType;
int32_t iCtxInc;
uiCbfBit = 0;
nA = nB = (int8_t)!!IS_INTRA (pMbType[iCurrBlkXy]);
@@ -1617,12 +1500,12 @@ int32_t ParseIPCMInfoCabac (PWelsDecoderContext pCtx) {
int32_t i;
PWelsCabacDecEngine pCabacDecEngine = pCtx->pCabacDecEngine;
SBitStringAux* pBsAux = pCtx->pCurDqLayer->pBitStringAux;
- SDqLayer* pCurLayer = pCtx->pCurDqLayer;
- int32_t iDstStrideLuma = pCurLayer->pDec->iLinesize[0];
- int32_t iDstStrideChroma = pCurLayer->pDec->iLinesize[1];
- int32_t iMbX = pCurLayer->iMbX;
- int32_t iMbY = pCurLayer->iMbY;
- int32_t iMbXy = pCurLayer->iMbXyIndex;
+ SDqLayer* pCurDqLayer = pCtx->pCurDqLayer;
+ int32_t iDstStrideLuma = pCurDqLayer->pDec->iLinesize[0];
+ int32_t iDstStrideChroma = pCurDqLayer->pDec->iLinesize[1];
+ int32_t iMbX = pCurDqLayer->iMbX;
+ int32_t iMbY = pCurDqLayer->iMbY;
+ int32_t iMbXy = pCurDqLayer->iMbXyIndex;
int32_t iMbOffsetLuma = (iMbX + iMbY * iDstStrideLuma) << 4;
int32_t iMbOffsetChroma = (iMbX + iMbY * iDstStrideChroma) << 3;
@@ -1633,7 +1516,7 @@ int32_t ParseIPCMInfoCabac (PWelsDecoderContext pCtx) {
uint8_t* pPtrSrc;
- pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA_PCM;
+ pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA_PCM;
RestoreCabacDecEngineToBS (pCabacDecEngine, pBsAux);
intX_t iBytesLeft = pBsAux->pEndBuf - pBsAux->pCurBuf;
if (iBytesLeft < 384) {
@@ -1660,13 +1543,19 @@ int32_t ParseIPCMInfoCabac (PWelsDecoderContext pCtx) {
pBsAux->pCurBuf += 384;
- pCurLayer->pLumaQp[iMbXy] = 0;
- pCurLayer->pChromaQp[iMbXy][0] = pCurLayer->pChromaQp[iMbXy][1] = 0;
- memset (pCurLayer->pNzc[iMbXy], 16, sizeof (pCurLayer->pNzc[iMbXy]));
+ pCurDqLayer->pLumaQp[iMbXy] = 0;
+ pCurDqLayer->pChromaQp[iMbXy][0] = pCurDqLayer->pChromaQp[iMbXy][1] = 0;
+ memset (pCurDqLayer->pNzc[iMbXy], 16, sizeof (pCurDqLayer->pNzc[iMbXy]));
//step 4: cabac engine init
WELS_READ_VERIFY (InitReadBits (pBsAux, 1));
WELS_READ_VERIFY (InitCabacDecEngineFromBS (pCabacDecEngine, pBsAux));
return ERR_NONE;
}
+void UpdateP8x8RefCacheIdxCabac (int8_t pRefIndex[LIST_A][30], const int16_t& iPartIdx,
+ const int32_t& listIdx, const int8_t& iRef) {
+ const uint8_t uiCacheIdx = g_kuiCache30ScanIdx[iPartIdx];
+ pRefIndex[listIdx][uiCacheIdx] = pRefIndex[listIdx][uiCacheIdx + 1] = pRefIndex[listIdx][uiCacheIdx + 6] =
+ pRefIndex[listIdx][uiCacheIdx + 7] = iRef;
+}
}
diff --git a/chromium/third_party/openh264/src/codec/decoder/core/src/parse_mb_syn_cavlc.cpp b/chromium/third_party/openh264/src/codec/decoder/core/src/parse_mb_syn_cavlc.cpp
index fc44b65e7e3..ba3a46d58a4 100644
--- a/chromium/third_party/openh264/src/codec/decoder/core/src/parse_mb_syn_cavlc.cpp
+++ b/chromium/third_party/openh264/src/codec/decoder/core/src/parse_mb_syn_cavlc.cpp
@@ -53,20 +53,20 @@ typedef struct TagReadBitsCache {
uint8_t* pBuf;
} SReadBitsCache;
-void GetNeighborAvailMbType (PWelsNeighAvail pNeighAvail, PDqLayer pCurLayer) {
+void GetNeighborAvailMbType (PWelsNeighAvail pNeighAvail, PDqLayer pCurDqLayer) {
int32_t iCurSliceIdc, iTopSliceIdc, iLeftTopSliceIdc, iRightTopSliceIdc, iLeftSliceIdc;
int32_t iCurXy, iTopXy = 0, iLeftXy = 0, iLeftTopXy = 0, iRightTopXy = 0;
int32_t iCurX, iCurY;
- iCurXy = pCurLayer->iMbXyIndex;
- iCurX = pCurLayer->iMbX;
- iCurY = pCurLayer->iMbY;
- iCurSliceIdc = pCurLayer->pSliceIdc[iCurXy];
+ iCurXy = pCurDqLayer->iMbXyIndex;
+ iCurX = pCurDqLayer->iMbX;
+ iCurY = pCurDqLayer->iMbY;
+ iCurSliceIdc = pCurDqLayer->pSliceIdc[iCurXy];
if (iCurX != 0) {
iLeftXy = iCurXy - 1;
- iLeftSliceIdc = pCurLayer->pSliceIdc[iLeftXy];
+ iLeftSliceIdc = pCurDqLayer->pSliceIdc[iLeftXy];
pNeighAvail->iLeftAvail = (iLeftSliceIdc == iCurSliceIdc);
- pNeighAvail->iLeftCbp = pNeighAvail->iLeftAvail ? pCurLayer->pCbp[iLeftXy] : 0;
+ pNeighAvail->iLeftCbp = pNeighAvail->iLeftAvail ? pCurDqLayer->pCbp[iLeftXy] : 0;
} else {
pNeighAvail->iLeftAvail = 0;
pNeighAvail->iLeftTopAvail = 0;
@@ -74,20 +74,20 @@ void GetNeighborAvailMbType (PWelsNeighAvail pNeighAvail, PDqLayer pCurLayer) {
}
if (iCurY != 0) {
- iTopXy = iCurXy - pCurLayer->iMbWidth;
- iTopSliceIdc = pCurLayer->pSliceIdc[iTopXy];
+ iTopXy = iCurXy - pCurDqLayer->iMbWidth;
+ iTopSliceIdc = pCurDqLayer->pSliceIdc[iTopXy];
pNeighAvail->iTopAvail = (iTopSliceIdc == iCurSliceIdc);
- pNeighAvail->iTopCbp = pNeighAvail->iTopAvail ? pCurLayer->pCbp[iTopXy] : 0;
+ pNeighAvail->iTopCbp = pNeighAvail->iTopAvail ? pCurDqLayer->pCbp[iTopXy] : 0;
if (iCurX != 0) {
iLeftTopXy = iTopXy - 1;
- iLeftTopSliceIdc = pCurLayer->pSliceIdc[iLeftTopXy];
+ iLeftTopSliceIdc = pCurDqLayer->pSliceIdc[iLeftTopXy];
pNeighAvail->iLeftTopAvail = (iLeftTopSliceIdc == iCurSliceIdc);
} else {
pNeighAvail->iLeftTopAvail = 0;
}
- if (iCurX != (pCurLayer->iMbWidth - 1)) {
+ if (iCurX != (pCurDqLayer->iMbWidth - 1)) {
iRightTopXy = iTopXy + 1;
- iRightTopSliceIdc = pCurLayer->pSliceIdc[iRightTopXy];
+ iRightTopSliceIdc = pCurDqLayer->pSliceIdc[iRightTopXy];
pNeighAvail->iRightTopAvail = (iRightTopSliceIdc == iCurSliceIdc);
} else {
pNeighAvail->iRightTopAvail = 0;
@@ -99,18 +99,18 @@ void GetNeighborAvailMbType (PWelsNeighAvail pNeighAvail, PDqLayer pCurLayer) {
pNeighAvail->iTopCbp = 0;
}
- pNeighAvail->iLeftType = (pNeighAvail->iLeftAvail ? pCurLayer->pMbType[iLeftXy] : 0);
- pNeighAvail->iTopType = (pNeighAvail->iTopAvail ? pCurLayer->pMbType[iTopXy] : 0);
- pNeighAvail->iLeftTopType = (pNeighAvail->iLeftTopAvail ? pCurLayer->pMbType[iLeftTopXy] : 0);
- pNeighAvail->iRightTopType = (pNeighAvail->iRightTopAvail ? pCurLayer->pMbType[iRightTopXy] : 0);
+ pNeighAvail->iLeftType = (pNeighAvail->iLeftAvail ? pCurDqLayer->pDec->pMbType[iLeftXy] : 0);
+ pNeighAvail->iTopType = (pNeighAvail->iTopAvail ? pCurDqLayer->pDec->pMbType[iTopXy] : 0);
+ pNeighAvail->iLeftTopType = (pNeighAvail->iLeftTopAvail ? pCurDqLayer->pDec->pMbType[iLeftTopXy] : 0);
+ pNeighAvail->iRightTopType = (pNeighAvail->iRightTopAvail ? pCurDqLayer->pDec->pMbType[iRightTopXy] : 0);
}
void WelsFillCacheNonZeroCount (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount,
- PDqLayer pCurLayer) { //no matter slice type, intra_pred_constrained_flag
- int32_t iCurXy = pCurLayer->iMbXyIndex;
+ PDqLayer pCurDqLayer) { //no matter slice type, intra_pred_constrained_flag
+ int32_t iCurXy = pCurDqLayer->iMbXyIndex;
int32_t iTopXy = 0;
int32_t iLeftXy = 0;
if (pNeighAvail->iTopAvail) {
- iTopXy = iCurXy - pCurLayer->iMbWidth;
+ iTopXy = iCurXy - pCurDqLayer->iMbWidth;
}
if (pNeighAvail->iLeftAvail) {
iLeftXy = iCurXy - 1;
@@ -118,10 +118,10 @@ void WelsFillCacheNonZeroCount (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCo
//stuff non_zero_coeff_count from pNeighAvail(left and top)
if (pNeighAvail->iTopAvail) {
- ST32 (&pNonZeroCount[1], LD32 (&pCurLayer->pNzc[iTopXy][12]));
+ ST32 (&pNonZeroCount[1], LD32 (&pCurDqLayer->pNzc[iTopXy][12]));
pNonZeroCount[0] = pNonZeroCount[5] = pNonZeroCount[29] = 0;
- ST16 (&pNonZeroCount[6], LD16 (&pCurLayer->pNzc[iTopXy][20]));
- ST16 (&pNonZeroCount[30], LD16 (&pCurLayer->pNzc[iTopXy][22]));
+ ST16 (&pNonZeroCount[6], LD16 (&pCurDqLayer->pNzc[iTopXy][20]));
+ ST16 (&pNonZeroCount[30], LD16 (&pCurDqLayer->pNzc[iTopXy][22]));
} else {
ST32 (&pNonZeroCount[1], 0xFFFFFFFFU);
pNonZeroCount[0] = pNonZeroCount[5] = pNonZeroCount[29] = 0xFF;
@@ -130,15 +130,15 @@ void WelsFillCacheNonZeroCount (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCo
}
if (pNeighAvail->iLeftAvail) {
- pNonZeroCount[8 * 1] = pCurLayer->pNzc[iLeftXy][3];
- pNonZeroCount[8 * 2] = pCurLayer->pNzc[iLeftXy][7];
- pNonZeroCount[8 * 3] = pCurLayer->pNzc[iLeftXy][11];
- pNonZeroCount[8 * 4] = pCurLayer->pNzc[iLeftXy][15];
-
- pNonZeroCount[5 + 8 * 1] = pCurLayer->pNzc[iLeftXy][17];
- pNonZeroCount[5 + 8 * 2] = pCurLayer->pNzc[iLeftXy][21];
- pNonZeroCount[5 + 8 * 4] = pCurLayer->pNzc[iLeftXy][19];
- pNonZeroCount[5 + 8 * 5] = pCurLayer->pNzc[iLeftXy][23];
+ pNonZeroCount[8 * 1] = pCurDqLayer->pNzc[iLeftXy][3];
+ pNonZeroCount[8 * 2] = pCurDqLayer->pNzc[iLeftXy][7];
+ pNonZeroCount[8 * 3] = pCurDqLayer->pNzc[iLeftXy][11];
+ pNonZeroCount[8 * 4] = pCurDqLayer->pNzc[iLeftXy][15];
+
+ pNonZeroCount[5 + 8 * 1] = pCurDqLayer->pNzc[iLeftXy][17];
+ pNonZeroCount[5 + 8 * 2] = pCurDqLayer->pNzc[iLeftXy][21];
+ pNonZeroCount[5 + 8 * 4] = pCurDqLayer->pNzc[iLeftXy][19];
+ pNonZeroCount[5 + 8 * 5] = pCurDqLayer->pNzc[iLeftXy][23];
} else {
pNonZeroCount[8 * 1] =
pNonZeroCount[8 * 2] =
@@ -153,16 +153,16 @@ void WelsFillCacheNonZeroCount (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCo
}
}
void WelsFillCacheConstrain1IntraNxN (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount, int8_t* pIntraPredMode,
- PDqLayer pCurLayer) { //no matter slice type
- int32_t iCurXy = pCurLayer->iMbXyIndex;
+ PDqLayer pCurDqLayer) { //no matter slice type
+ int32_t iCurXy = pCurDqLayer->iMbXyIndex;
int32_t iTopXy = 0;
int32_t iLeftXy = 0;
//stuff non_zero_coeff_count from pNeighAvail(left and top)
- WelsFillCacheNonZeroCount (pNeighAvail, pNonZeroCount, pCurLayer);
+ WelsFillCacheNonZeroCount (pNeighAvail, pNonZeroCount, pCurDqLayer);
if (pNeighAvail->iTopAvail) {
- iTopXy = iCurXy - pCurLayer->iMbWidth;
+ iTopXy = iCurXy - pCurDqLayer->iMbWidth;
}
if (pNeighAvail->iLeftAvail) {
iLeftXy = iCurXy - 1;
@@ -170,7 +170,7 @@ void WelsFillCacheConstrain1IntraNxN (PWelsNeighAvail pNeighAvail, uint8_t* pNon
//intraNxN_pred_mode
if (pNeighAvail->iTopAvail && IS_INTRANxN (pNeighAvail->iTopType)) { //top
- ST32 (pIntraPredMode + 1, LD32 (&pCurLayer->pIntraPredMode[iTopXy][0]));
+ ST32 (pIntraPredMode + 1, LD32 (&pCurDqLayer->pIntraPredMode[iTopXy][0]));
} else {
int32_t iPred;
if (IS_INTRA16x16 (pNeighAvail->iTopType) || (MB_TYPE_INTRA_PCM == pNeighAvail->iTopType))
@@ -181,10 +181,10 @@ void WelsFillCacheConstrain1IntraNxN (PWelsNeighAvail pNeighAvail, uint8_t* pNon
}
if (pNeighAvail->iLeftAvail && IS_INTRANxN (pNeighAvail->iLeftType)) { //left
- pIntraPredMode[ 0 + 8 ] = pCurLayer->pIntraPredMode[iLeftXy][4];
- pIntraPredMode[ 0 + 8 * 2] = pCurLayer->pIntraPredMode[iLeftXy][5];
- pIntraPredMode[ 0 + 8 * 3] = pCurLayer->pIntraPredMode[iLeftXy][6];
- pIntraPredMode[ 0 + 8 * 4] = pCurLayer->pIntraPredMode[iLeftXy][3];
+ pIntraPredMode[ 0 + 8 ] = pCurDqLayer->pIntraPredMode[iLeftXy][4];
+ pIntraPredMode[ 0 + 8 * 2] = pCurDqLayer->pIntraPredMode[iLeftXy][5];
+ pIntraPredMode[ 0 + 8 * 3] = pCurDqLayer->pIntraPredMode[iLeftXy][6];
+ pIntraPredMode[ 0 + 8 * 4] = pCurDqLayer->pIntraPredMode[iLeftXy][3];
} else {
int8_t iPred;
if (IS_INTRA16x16 (pNeighAvail->iLeftType) || (MB_TYPE_INTRA_PCM == pNeighAvail->iLeftType))
@@ -199,16 +199,16 @@ void WelsFillCacheConstrain1IntraNxN (PWelsNeighAvail pNeighAvail, uint8_t* pNon
}
void WelsFillCacheConstrain0IntraNxN (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount, int8_t* pIntraPredMode,
- PDqLayer pCurLayer) { //no matter slice type
- int32_t iCurXy = pCurLayer->iMbXyIndex;
+ PDqLayer pCurDqLayer) { //no matter slice type
+ int32_t iCurXy = pCurDqLayer->iMbXyIndex;
int32_t iTopXy = 0;
int32_t iLeftXy = 0;
//stuff non_zero_coeff_count from pNeighAvail(left and top)
- WelsFillCacheNonZeroCount (pNeighAvail, pNonZeroCount, pCurLayer);
+ WelsFillCacheNonZeroCount (pNeighAvail, pNonZeroCount, pCurDqLayer);
if (pNeighAvail->iTopAvail) {
- iTopXy = iCurXy - pCurLayer->iMbWidth;
+ iTopXy = iCurXy - pCurDqLayer->iMbWidth;
}
if (pNeighAvail->iLeftAvail) {
iLeftXy = iCurXy - 1;
@@ -216,7 +216,7 @@ void WelsFillCacheConstrain0IntraNxN (PWelsNeighAvail pNeighAvail, uint8_t* pNon
//intra4x4_pred_mode
if (pNeighAvail->iTopAvail && IS_INTRANxN (pNeighAvail->iTopType)) { //top
- ST32 (pIntraPredMode + 1, LD32 (&pCurLayer->pIntraPredMode[iTopXy][0]));
+ ST32 (pIntraPredMode + 1, LD32 (&pCurDqLayer->pIntraPredMode[iTopXy][0]));
} else {
int32_t iPred;
if (pNeighAvail->iTopAvail)
@@ -227,10 +227,10 @@ void WelsFillCacheConstrain0IntraNxN (PWelsNeighAvail pNeighAvail, uint8_t* pNon
}
if (pNeighAvail->iLeftAvail && IS_INTRANxN (pNeighAvail->iLeftType)) { //left
- pIntraPredMode[ 0 + 8 * 1] = pCurLayer->pIntraPredMode[iLeftXy][4];
- pIntraPredMode[ 0 + 8 * 2] = pCurLayer->pIntraPredMode[iLeftXy][5];
- pIntraPredMode[ 0 + 8 * 3] = pCurLayer->pIntraPredMode[iLeftXy][6];
- pIntraPredMode[ 0 + 8 * 4] = pCurLayer->pIntraPredMode[iLeftXy][3];
+ pIntraPredMode[ 0 + 8 * 1] = pCurDqLayer->pIntraPredMode[iLeftXy][4];
+ pIntraPredMode[ 0 + 8 * 2] = pCurDqLayer->pIntraPredMode[iLeftXy][5];
+ pIntraPredMode[ 0 + 8 * 3] = pCurDqLayer->pIntraPredMode[iLeftXy][6];
+ pIntraPredMode[ 0 + 8 * 4] = pCurDqLayer->pIntraPredMode[iLeftXy][3];
} else {
int8_t iPred;
if (pNeighAvail->iLeftAvail)
@@ -245,52 +245,52 @@ void WelsFillCacheConstrain0IntraNxN (PWelsNeighAvail pNeighAvail, uint8_t* pNon
}
void WelsFillCacheInterCabac (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount, int16_t iMvArray[LIST_A][30][MV_A],
- int16_t iMvdCache[LIST_A][30][MV_A], int8_t iRefIdxArray[LIST_A][30], PDqLayer pCurLayer) {
- int32_t iCurXy = pCurLayer->iMbXyIndex;
+ int16_t iMvdCache[LIST_A][30][MV_A], int8_t iRefIdxArray[LIST_A][30], PDqLayer pCurDqLayer) {
+ int32_t iCurXy = pCurDqLayer->iMbXyIndex;
int32_t iTopXy = 0;
int32_t iLeftXy = 0;
int32_t iLeftTopXy = 0;
int32_t iRightTopXy = 0;
- PSlice pSlice = &pCurLayer->sLayerInfo.sSliceInLayer;
+ PSlice pSlice = &pCurDqLayer->sLayerInfo.sSliceInLayer;
PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader;
int32_t listCount = 1;
if (pSliceHeader->eSliceType == B_SLICE) {
listCount = 2;
}
//stuff non_zero_coeff_count from pNeighAvail(left and top)
- WelsFillCacheNonZeroCount (pNeighAvail, pNonZeroCount, pCurLayer);
+ WelsFillCacheNonZeroCount (pNeighAvail, pNonZeroCount, pCurDqLayer);
if (pNeighAvail->iTopAvail) {
- iTopXy = iCurXy - pCurLayer->iMbWidth;
+ iTopXy = iCurXy - pCurDqLayer->iMbWidth;
}
if (pNeighAvail->iLeftAvail) {
iLeftXy = iCurXy - 1;
}
if (pNeighAvail->iLeftTopAvail) {
- iLeftTopXy = iCurXy - 1 - pCurLayer->iMbWidth;
+ iLeftTopXy = iCurXy - 1 - pCurDqLayer->iMbWidth;
}
if (pNeighAvail->iRightTopAvail) {
- iRightTopXy = iCurXy + 1 - pCurLayer->iMbWidth;
+ iRightTopXy = iCurXy + 1 - pCurDqLayer->iMbWidth;
}
for (int32_t listIdx = 0; listIdx < listCount; ++listIdx) {
//stuff mv_cache and iRefIdxArray from left and top (inter)
if (pNeighAvail->iLeftAvail && IS_INTER (pNeighAvail->iLeftType)) {
- ST32 (iMvArray[listIdx][6], LD32 (pCurLayer->pMv[listIdx][iLeftXy][3]));
- ST32 (iMvArray[listIdx][12], LD32 (pCurLayer->pMv[listIdx][iLeftXy][7]));
- ST32 (iMvArray[listIdx][18], LD32 (pCurLayer->pMv[listIdx][iLeftXy][11]));
- ST32 (iMvArray[listIdx][24], LD32 (pCurLayer->pMv[listIdx][iLeftXy][15]));
-
- ST32 (iMvdCache[listIdx][6], LD32 (pCurLayer->pMvd[listIdx][iLeftXy][3]));
- ST32 (iMvdCache[listIdx][12], LD32 (pCurLayer->pMvd[listIdx][iLeftXy][7]));
- ST32 (iMvdCache[listIdx][18], LD32 (pCurLayer->pMvd[listIdx][iLeftXy][11]));
- ST32 (iMvdCache[listIdx][24], LD32 (pCurLayer->pMvd[listIdx][iLeftXy][15]));
-
- iRefIdxArray[listIdx][6] = pCurLayer->pRefIndex[listIdx][iLeftXy][3];
- iRefIdxArray[listIdx][12] = pCurLayer->pRefIndex[listIdx][iLeftXy][7];
- iRefIdxArray[listIdx][18] = pCurLayer->pRefIndex[listIdx][iLeftXy][11];
- iRefIdxArray[listIdx][24] = pCurLayer->pRefIndex[listIdx][iLeftXy][15];
+ ST32 (iMvArray[listIdx][6], LD32 (pCurDqLayer->pDec->pMv[listIdx][iLeftXy][3]));
+ ST32 (iMvArray[listIdx][12], LD32 (pCurDqLayer->pDec->pMv[listIdx][iLeftXy][7]));
+ ST32 (iMvArray[listIdx][18], LD32 (pCurDqLayer->pDec->pMv[listIdx][iLeftXy][11]));
+ ST32 (iMvArray[listIdx][24], LD32 (pCurDqLayer->pDec->pMv[listIdx][iLeftXy][15]));
+
+ ST32 (iMvdCache[listIdx][6], LD32 (pCurDqLayer->pMvd[listIdx][iLeftXy][3]));
+ ST32 (iMvdCache[listIdx][12], LD32 (pCurDqLayer->pMvd[listIdx][iLeftXy][7]));
+ ST32 (iMvdCache[listIdx][18], LD32 (pCurDqLayer->pMvd[listIdx][iLeftXy][11]));
+ ST32 (iMvdCache[listIdx][24], LD32 (pCurDqLayer->pMvd[listIdx][iLeftXy][15]));
+
+ iRefIdxArray[listIdx][6] = pCurDqLayer->pDec->pRefIndex[listIdx][iLeftXy][3];
+ iRefIdxArray[listIdx][12] = pCurDqLayer->pDec->pRefIndex[listIdx][iLeftXy][7];
+ iRefIdxArray[listIdx][18] = pCurDqLayer->pDec->pRefIndex[listIdx][iLeftXy][11];
+ iRefIdxArray[listIdx][24] = pCurDqLayer->pDec->pRefIndex[listIdx][iLeftXy][15];
} else {
ST32 (iMvArray[listIdx][6], 0);
ST32 (iMvArray[listIdx][12], 0);
@@ -316,9 +316,9 @@ void WelsFillCacheInterCabac (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCoun
}
}
if (pNeighAvail->iLeftTopAvail && IS_INTER (pNeighAvail->iLeftTopType)) {
- ST32 (iMvArray[listIdx][0], LD32 (pCurLayer->pMv[listIdx][iLeftTopXy][15]));
- ST32 (iMvdCache[listIdx][0], LD32 (pCurLayer->pMvd[listIdx][iLeftTopXy][15]));
- iRefIdxArray[listIdx][0] = pCurLayer->pRefIndex[listIdx][iLeftTopXy][15];
+ ST32 (iMvArray[listIdx][0], LD32 (pCurDqLayer->pDec->pMv[listIdx][iLeftTopXy][15]));
+ ST32 (iMvdCache[listIdx][0], LD32 (pCurDqLayer->pMvd[listIdx][iLeftTopXy][15]));
+ iRefIdxArray[listIdx][0] = pCurDqLayer->pDec->pRefIndex[listIdx][iLeftTopXy][15];
} else {
ST32 (iMvArray[listIdx][0], 0);
ST32 (iMvdCache[listIdx][0], 0);
@@ -330,11 +330,11 @@ void WelsFillCacheInterCabac (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCoun
}
if (pNeighAvail->iTopAvail && IS_INTER (pNeighAvail->iTopType)) {
- ST64 (iMvArray[listIdx][1], LD64 (pCurLayer->pMv[listIdx][iTopXy][12]));
- ST64 (iMvArray[listIdx][3], LD64 (pCurLayer->pMv[listIdx][iTopXy][14]));
- ST64 (iMvdCache[listIdx][1], LD64 (pCurLayer->pMvd[listIdx][iTopXy][12]));
- ST64 (iMvdCache[listIdx][3], LD64 (pCurLayer->pMvd[listIdx][iTopXy][14]));
- ST32 (&iRefIdxArray[listIdx][1], LD32 (&pCurLayer->pRefIndex[listIdx][iTopXy][12]));
+ ST64 (iMvArray[listIdx][1], LD64 (pCurDqLayer->pDec->pMv[listIdx][iTopXy][12]));
+ ST64 (iMvArray[listIdx][3], LD64 (pCurDqLayer->pDec->pMv[listIdx][iTopXy][14]));
+ ST64 (iMvdCache[listIdx][1], LD64 (pCurDqLayer->pMvd[listIdx][iTopXy][12]));
+ ST64 (iMvdCache[listIdx][3], LD64 (pCurDqLayer->pMvd[listIdx][iTopXy][14]));
+ ST32 (&iRefIdxArray[listIdx][1], LD32 (&pCurDqLayer->pDec->pRefIndex[listIdx][iTopXy][12]));
} else {
ST64 (iMvArray[listIdx][1], 0);
ST64 (iMvArray[listIdx][3], 0);
@@ -354,9 +354,9 @@ void WelsFillCacheInterCabac (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCoun
}
if (pNeighAvail->iRightTopAvail && IS_INTER (pNeighAvail->iRightTopType)) {
- ST32 (iMvArray[listIdx][5], LD32 (pCurLayer->pMv[listIdx][iRightTopXy][12]));
- ST32 (iMvdCache[listIdx][5], LD32 (pCurLayer->pMvd[listIdx][iRightTopXy][12]));
- iRefIdxArray[listIdx][5] = pCurLayer->pRefIndex[listIdx][iRightTopXy][12];
+ ST32 (iMvArray[listIdx][5], LD32 (pCurDqLayer->pDec->pMv[listIdx][iRightTopXy][12]));
+ ST32 (iMvdCache[listIdx][5], LD32 (pCurDqLayer->pMvd[listIdx][iRightTopXy][12]));
+ iRefIdxArray[listIdx][5] = pCurDqLayer->pDec->pRefIndex[listIdx][iRightTopXy][12];
} else {
ST32 (iMvArray[listIdx][5], 0);
if (0 == pNeighAvail->iRightTopAvail) { //not available
@@ -385,151 +385,160 @@ void WelsFillCacheInterCabac (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCoun
}
}
-void WelsFillDirectCacheCabac (PWelsNeighAvail pNeighAvail, int8_t iDirect[30], PDqLayer pCurLayer) {
+void WelsFillDirectCacheCabac (PWelsNeighAvail pNeighAvail, int8_t iDirect[30], PDqLayer pCurDqLayer) {
- int32_t iCurXy = pCurLayer->iMbXyIndex;
+ int32_t iCurXy = pCurDqLayer->iMbXyIndex;
int32_t iTopXy = 0;
int32_t iLeftXy = 0;
int32_t iLeftTopXy = 0;
int32_t iRightTopXy = 0;
if (pNeighAvail->iTopAvail) {
- iTopXy = iCurXy - pCurLayer->iMbWidth;
+ iTopXy = iCurXy - pCurDqLayer->iMbWidth;
}
if (pNeighAvail->iLeftAvail) {
iLeftXy = iCurXy - 1;
}
if (pNeighAvail->iLeftTopAvail) {
- iLeftTopXy = iCurXy - 1 - pCurLayer->iMbWidth;
+ iLeftTopXy = iCurXy - 1 - pCurDqLayer->iMbWidth;
}
if (pNeighAvail->iRightTopAvail) {
- iRightTopXy = iCurXy + 1 - pCurLayer->iMbWidth;
+ iRightTopXy = iCurXy + 1 - pCurDqLayer->iMbWidth;
}
memset (iDirect, 0, 30);
if (pNeighAvail->iLeftAvail && IS_INTER (pNeighAvail->iLeftType)) {
- iDirect[6] = pCurLayer->pDirect[iLeftXy][3];
- iDirect[12] = pCurLayer->pDirect[iLeftXy][7];
- iDirect[18] = pCurLayer->pDirect[iLeftXy][11];
- iDirect[24] = pCurLayer->pDirect[iLeftXy][15];
+ iDirect[6] = pCurDqLayer->pDirect[iLeftXy][3];
+ iDirect[12] = pCurDqLayer->pDirect[iLeftXy][7];
+ iDirect[18] = pCurDqLayer->pDirect[iLeftXy][11];
+ iDirect[24] = pCurDqLayer->pDirect[iLeftXy][15];
}
if (pNeighAvail->iLeftTopAvail && IS_INTER (pNeighAvail->iLeftTopType)) {
- iDirect[0] = pCurLayer->pDirect[iLeftTopXy][15];
+ iDirect[0] = pCurDqLayer->pDirect[iLeftTopXy][15];
}
if (pNeighAvail->iTopAvail && IS_INTER (pNeighAvail->iTopType)) {
- ST32 (&iDirect[1], LD32 (&pCurLayer->pDirect[iTopXy][12]));
+ ST32 (&iDirect[1], LD32 (&pCurDqLayer->pDirect[iTopXy][12]));
}
if (pNeighAvail->iRightTopAvail && IS_INTER (pNeighAvail->iRightTopType)) {
- iDirect[5] = pCurLayer->pDirect[iRightTopXy][12];
+ iDirect[5] = pCurDqLayer->pDirect[iRightTopXy][12];
}
//right-top 4*4 block unavailable
}
void WelsFillCacheInter (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount,
- int16_t iMvArray[LIST_A][30][MV_A], int8_t iRefIdxArray[LIST_A][30], PDqLayer pCurLayer) {
- int32_t iCurXy = pCurLayer->iMbXyIndex;
+ int16_t iMvArray[LIST_A][30][MV_A], int8_t iRefIdxArray[LIST_A][30], PDqLayer pCurDqLayer) {
+ int32_t iCurXy = pCurDqLayer->iMbXyIndex;
int32_t iTopXy = 0;
int32_t iLeftXy = 0;
int32_t iLeftTopXy = 0;
int32_t iRightTopXy = 0;
+ PSlice pSlice = &pCurDqLayer->sLayerInfo.sSliceInLayer;
+ PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader;
+ int32_t listCount = 1;
+ if (pSliceHeader->eSliceType == B_SLICE) {
+ listCount = 2;
+ }
+
//stuff non_zero_coeff_count from pNeighAvail(left and top)
- WelsFillCacheNonZeroCount (pNeighAvail, pNonZeroCount, pCurLayer);
+ WelsFillCacheNonZeroCount (pNeighAvail, pNonZeroCount, pCurDqLayer);
if (pNeighAvail->iTopAvail) {
- iTopXy = iCurXy - pCurLayer->iMbWidth;
+ iTopXy = iCurXy - pCurDqLayer->iMbWidth;
}
if (pNeighAvail->iLeftAvail) {
iLeftXy = iCurXy - 1;
}
if (pNeighAvail->iLeftTopAvail) {
- iLeftTopXy = iCurXy - 1 - pCurLayer->iMbWidth;
+ iLeftTopXy = iCurXy - 1 - pCurDqLayer->iMbWidth;
}
if (pNeighAvail->iRightTopAvail) {
- iRightTopXy = iCurXy + 1 - pCurLayer->iMbWidth;
+ iRightTopXy = iCurXy + 1 - pCurDqLayer->iMbWidth;
}
- //stuff mv_cache and iRefIdxArray from left and top (inter)
- if (pNeighAvail->iLeftAvail && IS_INTER (pNeighAvail->iLeftType)) {
- ST32 (iMvArray[0][ 6], LD32 (pCurLayer->pMv[0][iLeftXy][ 3]));
- ST32 (iMvArray[0][12], LD32 (pCurLayer->pMv[0][iLeftXy][ 7]));
- ST32 (iMvArray[0][18], LD32 (pCurLayer->pMv[0][iLeftXy][11]));
- ST32 (iMvArray[0][24], LD32 (pCurLayer->pMv[0][iLeftXy][15]));
- iRefIdxArray[0][ 6] = pCurLayer->pRefIndex[0][iLeftXy][ 3];
- iRefIdxArray[0][12] = pCurLayer->pRefIndex[0][iLeftXy][ 7];
- iRefIdxArray[0][18] = pCurLayer->pRefIndex[0][iLeftXy][11];
- iRefIdxArray[0][24] = pCurLayer->pRefIndex[0][iLeftXy][15];
- } else {
- ST32 (iMvArray[0][ 6], 0);
- ST32 (iMvArray[0][12], 0);
- ST32 (iMvArray[0][18], 0);
- ST32 (iMvArray[0][24], 0);
-
- if (0 == pNeighAvail->iLeftAvail) { //not available
- iRefIdxArray[0][ 6] =
- iRefIdxArray[0][12] =
- iRefIdxArray[0][18] =
- iRefIdxArray[0][24] = REF_NOT_AVAIL;
- } else { //available but is intra mb type
- iRefIdxArray[0][ 6] =
- iRefIdxArray[0][12] =
- iRefIdxArray[0][18] =
- iRefIdxArray[0][24] = REF_NOT_IN_LIST;
+ for (int32_t listIdx = 0; listIdx < listCount; ++listIdx) {
+ //stuff mv_cache and iRefIdxArray from left and top (inter)
+ if (pNeighAvail->iLeftAvail && IS_INTER (pNeighAvail->iLeftType)) {
+ ST32 (iMvArray[listIdx][6], LD32 (pCurDqLayer->pDec->pMv[listIdx][iLeftXy][3]));
+ ST32 (iMvArray[listIdx][12], LD32 (pCurDqLayer->pDec->pMv[listIdx][iLeftXy][7]));
+ ST32 (iMvArray[listIdx][18], LD32 (pCurDqLayer->pDec->pMv[listIdx][iLeftXy][11]));
+ ST32 (iMvArray[listIdx][24], LD32 (pCurDqLayer->pDec->pMv[listIdx][iLeftXy][15]));
+ iRefIdxArray[listIdx][6] = pCurDqLayer->pDec->pRefIndex[listIdx][iLeftXy][3];
+ iRefIdxArray[listIdx][12] = pCurDqLayer->pDec->pRefIndex[listIdx][iLeftXy][7];
+ iRefIdxArray[listIdx][18] = pCurDqLayer->pDec->pRefIndex[listIdx][iLeftXy][11];
+ iRefIdxArray[listIdx][24] = pCurDqLayer->pDec->pRefIndex[listIdx][iLeftXy][15];
+ } else {
+ ST32 (iMvArray[listIdx][6], 0);
+ ST32 (iMvArray[listIdx][12], 0);
+ ST32 (iMvArray[listIdx][18], 0);
+ ST32 (iMvArray[listIdx][24], 0);
+
+ if (0 == pNeighAvail->iLeftAvail) { //not available
+ iRefIdxArray[listIdx][6] =
+ iRefIdxArray[listIdx][12] =
+ iRefIdxArray[listIdx][18] =
+ iRefIdxArray[listIdx][24] = REF_NOT_AVAIL;
+ } else { //available but is intra mb type
+ iRefIdxArray[listIdx][6] =
+ iRefIdxArray[listIdx][12] =
+ iRefIdxArray[listIdx][18] =
+ iRefIdxArray[listIdx][24] = REF_NOT_IN_LIST;
+ }
}
- }
- if (pNeighAvail->iLeftTopAvail && IS_INTER (pNeighAvail->iLeftTopType)) {
- ST32 (iMvArray[0][0], LD32 (pCurLayer->pMv[0][iLeftTopXy][15]));
- iRefIdxArray[0][0] = pCurLayer->pRefIndex[0][iLeftTopXy][15];
- } else {
- ST32 (iMvArray[0][0], 0);
- if (0 == pNeighAvail->iLeftTopAvail) { //not available
- iRefIdxArray[0][0] = REF_NOT_AVAIL;
- } else { //available but is intra mb type
- iRefIdxArray[0][0] = REF_NOT_IN_LIST;
+ if (pNeighAvail->iLeftTopAvail && IS_INTER (pNeighAvail->iLeftTopType)) {
+ ST32 (iMvArray[listIdx][0], LD32 (pCurDqLayer->pDec->pMv[listIdx][iLeftTopXy][15]));
+ iRefIdxArray[listIdx][0] = pCurDqLayer->pDec->pRefIndex[listIdx][iLeftTopXy][15];
+ } else {
+ ST32 (iMvArray[listIdx][0], 0);
+ if (0 == pNeighAvail->iLeftTopAvail) { //not available
+ iRefIdxArray[listIdx][0] = REF_NOT_AVAIL;
+ } else { //available but is intra mb type
+ iRefIdxArray[listIdx][0] = REF_NOT_IN_LIST;
+ }
}
- }
- if (pNeighAvail->iTopAvail && IS_INTER (pNeighAvail->iTopType)) {
- ST64 (iMvArray[0][1], LD64 (pCurLayer->pMv[0][iTopXy][12]));
- ST64 (iMvArray[0][3], LD64 (pCurLayer->pMv[0][iTopXy][14]));
- ST32 (&iRefIdxArray[0][1], LD32 (&pCurLayer->pRefIndex[0][iTopXy][12]));
- } else {
- ST64 (iMvArray[0][1], 0);
- ST64 (iMvArray[0][3], 0);
- if (0 == pNeighAvail->iTopAvail) { //not available
- iRefIdxArray[0][1] =
- iRefIdxArray[0][2] =
- iRefIdxArray[0][3] =
- iRefIdxArray[0][4] = REF_NOT_AVAIL;
- } else { //available but is intra mb type
- iRefIdxArray[0][1] =
- iRefIdxArray[0][2] =
- iRefIdxArray[0][3] =
- iRefIdxArray[0][4] = REF_NOT_IN_LIST;
+ if (pNeighAvail->iTopAvail && IS_INTER (pNeighAvail->iTopType)) {
+ ST64 (iMvArray[listIdx][1], LD64 (pCurDqLayer->pDec->pMv[listIdx][iTopXy][12]));
+ ST64 (iMvArray[listIdx][3], LD64 (pCurDqLayer->pDec->pMv[listIdx][iTopXy][14]));
+ ST32 (&iRefIdxArray[listIdx][1], LD32 (&pCurDqLayer->pDec->pRefIndex[listIdx][iTopXy][12]));
+ } else {
+ ST64 (iMvArray[listIdx][1], 0);
+ ST64 (iMvArray[listIdx][3], 0);
+ if (0 == pNeighAvail->iTopAvail) { //not available
+ iRefIdxArray[listIdx][1] =
+ iRefIdxArray[listIdx][2] =
+ iRefIdxArray[listIdx][3] =
+ iRefIdxArray[listIdx][4] = REF_NOT_AVAIL;
+ } else { //available but is intra mb type
+ iRefIdxArray[listIdx][1] =
+ iRefIdxArray[listIdx][2] =
+ iRefIdxArray[listIdx][3] =
+ iRefIdxArray[listIdx][4] = REF_NOT_IN_LIST;
+ }
}
- }
- if (pNeighAvail->iRightTopAvail && IS_INTER (pNeighAvail->iRightTopType)) {
- ST32 (iMvArray[0][5], LD32 (pCurLayer->pMv[0][iRightTopXy][12]));
- iRefIdxArray[0][5] = pCurLayer->pRefIndex[0][iRightTopXy][12];
- } else {
- ST32 (iMvArray[0][5], 0);
- if (0 == pNeighAvail->iRightTopAvail) { //not available
- iRefIdxArray[0][5] = REF_NOT_AVAIL;
- } else { //available but is intra mb type
- iRefIdxArray[0][5] = REF_NOT_IN_LIST;
+ if (pNeighAvail->iRightTopAvail && IS_INTER (pNeighAvail->iRightTopType)) {
+ ST32 (iMvArray[listIdx][5], LD32 (pCurDqLayer->pDec->pMv[listIdx][iRightTopXy][12]));
+ iRefIdxArray[listIdx][5] = pCurDqLayer->pDec->pRefIndex[listIdx][iRightTopXy][12];
+ } else {
+ ST32 (iMvArray[listIdx][5], 0);
+ if (0 == pNeighAvail->iRightTopAvail) { //not available
+ iRefIdxArray[listIdx][5] = REF_NOT_AVAIL;
+ } else { //available but is intra mb type
+ iRefIdxArray[listIdx][5] = REF_NOT_IN_LIST;
+ }
}
+ //right-top 4*4 block unavailable
+ ST32 (iMvArray[listIdx][9], 0);
+ ST32 (iMvArray[listIdx][21], 0);
+ ST32 (iMvArray[listIdx][11], 0);
+ ST32 (iMvArray[listIdx][17], 0);
+ ST32 (iMvArray[listIdx][23], 0);
+ iRefIdxArray[listIdx][9] =
+ iRefIdxArray[listIdx][21] =
+ iRefIdxArray[listIdx][11] =
+ iRefIdxArray[listIdx][17] =
+ iRefIdxArray[listIdx][23] = REF_NOT_AVAIL;
}
- //right-top 4*4 block unavailable
- ST32 (iMvArray[0][ 9], 0);
- ST32 (iMvArray[0][21], 0);
- ST32 (iMvArray[0][11], 0);
- ST32 (iMvArray[0][17], 0);
- ST32 (iMvArray[0][23], 0);
- iRefIdxArray[0][ 9] =
- iRefIdxArray[0][21] =
- iRefIdxArray[0][11] =
- iRefIdxArray[0][17] =
- iRefIdxArray[0][23] = REF_NOT_AVAIL;
}
int32_t PredIntra4x4Mode (int8_t* pIntraPredMode, int32_t iIdx4) {
@@ -1074,7 +1083,9 @@ int32_t ParseInterInfo (PWelsDecoderContext pCtx, int16_t iMvArray[LIST_A][30][M
iRefCount[0] = pSliceHeader->uiRefCount[0];
iRefCount[1] = pSliceHeader->uiRefCount[1];
- switch (pCurDqLayer->pMbType[iMbXy]) {
+ bool bIsPending = GetThreadCount (pCtx) > 1;
+
+ switch (pCurDqLayer->pDec->pMbType[iMbXy]) {
case MB_TYPE_16x16: {
int32_t iRefIdx = 0;
if (pSlice->sSliceHeaderExt.bAdaptiveMotionPredFlag) {
@@ -1096,7 +1107,7 @@ int32_t ParseInterInfo (PWelsDecoderContext pCtx, int16_t iMvArray[LIST_A][30][M
}
}
pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[iRefIdx]
- && ppRefPic[iRefIdx]->bIsComplete);
+ && (ppRefPic[iRefIdx]->bIsComplete || bIsPending));
} else {
WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "inter parse: iMotionPredFlag = 1 not supported. ");
return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_UNSUPPORTED_ILP);
@@ -1137,7 +1148,7 @@ int32_t ParseInterInfo (PWelsDecoderContext pCtx, int16_t iMvArray[LIST_A][30][M
}
}
pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[iRefIdx[i]]
- && ppRefPic[iRefIdx[i]]->bIsComplete);
+ && (ppRefPic[iRefIdx[i]]->bIsComplete || bIsPending));
}
for (i = 0; i < 2; i++) {
PredInter16x8Mv (iMvArray, iRefIdxArray, LIST_0, i << 3, iRefIdx[i], iMv);
@@ -1174,7 +1185,7 @@ int32_t ParseInterInfo (PWelsDecoderContext pCtx, int16_t iMvArray[LIST_A][30][M
}
}
pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[iRefIdx[i]]
- && ppRefPic[iRefIdx[i]]->bIsComplete);
+ && (ppRefPic[iRefIdx[i]]->bIsComplete || bIsPending));
} else {
WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "inter parse: iMotionPredFlag = 1 not supported. ");
return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_UNSUPPORTED_ILP);
@@ -1198,7 +1209,7 @@ int32_t ParseInterInfo (PWelsDecoderContext pCtx, int16_t iMvArray[LIST_A][30][M
int32_t iRefIdx[4] = {0}, iSubPartCount[4], iPartWidth[4];
uint32_t uiSubMbType;
- if (MB_TYPE_8x8_REF0 == pCurDqLayer->pMbType[iMbXy]) {
+ if (MB_TYPE_8x8_REF0 == pCurDqLayer->pDec->pMbType[iMbXy]) {
iRefCount[0] =
iRefCount[1] = 1;
}
@@ -1226,8 +1237,8 @@ int32_t ParseInterInfo (PWelsDecoderContext pCtx, int16_t iMvArray[LIST_A][30][M
}
//iRefIdxArray
- if (MB_TYPE_8x8_REF0 == pCurDqLayer->pMbType[iMbXy]) {
- memset (pCurDqLayer->pRefIndex[0][iMbXy], 0, 16);
+ if (MB_TYPE_8x8_REF0 == pCurDqLayer->pDec->pMbType[iMbXy]) {
+ memset (pCurDqLayer->pDec->pRefIndex[0][iMbXy], 0, 16);
} else {
for (i = 0; i < 4; i++) {
int16_t iIndex8 = i << 2;
@@ -1246,10 +1257,11 @@ int32_t ParseInterInfo (PWelsDecoderContext pCtx, int16_t iMvArray[LIST_A][30][M
}
}
pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[iRefIdx[i]]
- && ppRefPic[iRefIdx[i]]->bIsComplete);
+ && (ppRefPic[iRefIdx[i]]->bIsComplete || bIsPending));
- pCurDqLayer->pRefIndex[0][iMbXy][uiScan4Idx ] = pCurDqLayer->pRefIndex[0][iMbXy][uiScan4Idx + 1] =
- pCurDqLayer->pRefIndex[0][iMbXy][uiScan4Idx + 4] = pCurDqLayer->pRefIndex[0][iMbXy][uiScan4Idx + 5] = iRefIdx[i];
+ pCurDqLayer->pDec->pRefIndex[0][iMbXy][uiScan4Idx ] = pCurDqLayer->pDec->pRefIndex[0][iMbXy][uiScan4Idx + 1] =
+ pCurDqLayer->pDec->pRefIndex[0][iMbXy][uiScan4Idx + 4] = pCurDqLayer->pDec->pRefIndex[0][iMbXy][uiScan4Idx + 5] =
+ iRefIdx[i];
} else {
WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "inter parse: iMotionPredFlag = 1 not supported. ");
return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_UNSUPPORTED_ILP);
@@ -1281,26 +1293,26 @@ int32_t ParseInterInfo (PWelsDecoderContext pCtx, int16_t iMvArray[LIST_A][30][M
iMv[1] += iCode;
WELS_CHECK_SE_BOTH_WARNING (iMv[1], iMinVmv, iMaxVmv, "vertical mv");
if (SUB_MB_TYPE_8x8 == uiSubMbType) {
- ST32 (pCurDqLayer->pMv[0][iMbXy][uiScan4Idx], LD32 (iMv));
- ST32 (pCurDqLayer->pMv[0][iMbXy][uiScan4Idx + 1], LD32 (iMv));
- ST32 (pCurDqLayer->pMv[0][iMbXy][uiScan4Idx + 4], LD32 (iMv));
- ST32 (pCurDqLayer->pMv[0][iMbXy][uiScan4Idx + 5], LD32 (iMv));
+ ST32 (pCurDqLayer->pDec->pMv[0][iMbXy][uiScan4Idx], LD32 (iMv));
+ ST32 (pCurDqLayer->pDec->pMv[0][iMbXy][uiScan4Idx + 1], LD32 (iMv));
+ ST32 (pCurDqLayer->pDec->pMv[0][iMbXy][uiScan4Idx + 4], LD32 (iMv));
+ ST32 (pCurDqLayer->pDec->pMv[0][iMbXy][uiScan4Idx + 5], LD32 (iMv));
ST32 (iMvArray[0][uiCacheIdx ], LD32 (iMv));
ST32 (iMvArray[0][uiCacheIdx + 1], LD32 (iMv));
ST32 (iMvArray[0][uiCacheIdx + 6], LD32 (iMv));
ST32 (iMvArray[0][uiCacheIdx + 7], LD32 (iMv));
} else if (SUB_MB_TYPE_8x4 == uiSubMbType) {
- ST32 (pCurDqLayer->pMv[0][iMbXy][uiScan4Idx ], LD32 (iMv));
- ST32 (pCurDqLayer->pMv[0][iMbXy][uiScan4Idx + 1], LD32 (iMv));
+ ST32 (pCurDqLayer->pDec->pMv[0][iMbXy][uiScan4Idx ], LD32 (iMv));
+ ST32 (pCurDqLayer->pDec->pMv[0][iMbXy][uiScan4Idx + 1], LD32 (iMv));
ST32 (iMvArray[0][uiCacheIdx ], LD32 (iMv));
ST32 (iMvArray[0][uiCacheIdx + 1], LD32 (iMv));
} else if (SUB_MB_TYPE_4x8 == uiSubMbType) {
- ST32 (pCurDqLayer->pMv[0][iMbXy][uiScan4Idx ], LD32 (iMv));
- ST32 (pCurDqLayer->pMv[0][iMbXy][uiScan4Idx + 4], LD32 (iMv));
+ ST32 (pCurDqLayer->pDec->pMv[0][iMbXy][uiScan4Idx ], LD32 (iMv));
+ ST32 (pCurDqLayer->pDec->pMv[0][iMbXy][uiScan4Idx + 4], LD32 (iMv));
ST32 (iMvArray[0][uiCacheIdx ], LD32 (iMv));
ST32 (iMvArray[0][uiCacheIdx + 6], LD32 (iMv));
} else { //SUB_MB_TYPE_4x4 == uiSubMbType
- ST32 (pCurDqLayer->pMv[0][iMbXy][uiScan4Idx ], LD32 (iMv));
+ ST32 (pCurDqLayer->pDec->pMv[0][iMbXy][uiScan4Idx ], LD32 (iMv));
ST32 (iMvArray[0][uiCacheIdx ], LD32 (iMv));
}
}
@@ -1313,5 +1325,401 @@ int32_t ParseInterInfo (PWelsDecoderContext pCtx, int16_t iMvArray[LIST_A][30][M
return ERR_NONE;
}
+int32_t ParseInterBInfo (PWelsDecoderContext pCtx, int16_t iMvArray[LIST_A][30][MV_A],
+ int8_t iRefIdxArray[LIST_A][30], PBitStringAux pBs) {
+ PSlice pSlice = &pCtx->pCurDqLayer->sLayerInfo.sSliceInLayer;
+ PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader;
+ PPicture* ppRefPic[2];
+ ppRefPic[LIST_0] = pCtx->sRefPic.pRefList[LIST_0];
+ ppRefPic[LIST_1] = pCtx->sRefPic.pRefList[LIST_1];
+ int8_t ref_idx_list[LIST_A][4];
+ int8_t iRef[2] = { 0, 0 };
+ int32_t iRefCount[2];
+ PDqLayer pCurDqLayer = pCtx->pCurDqLayer;
+ int32_t iMbXy = pCurDqLayer->iMbXyIndex;
+ uint8_t iMotionPredFlag[LIST_A][4];
+ int16_t iMv[2];
+ uint32_t uiCode;
+ int32_t iCode;
+ int16_t iMinVmv = pSliceHeader->pSps->pSLevelLimits->iMinVmv;
+ int16_t iMaxVmv = pSliceHeader->pSps->pSLevelLimits->iMaxVmv;
+ memset (ref_idx_list, -1, LIST_A * 4);
+ memset (iMotionPredFlag, (pSlice->sSliceHeaderExt.bDefaultMotionPredFlag ? 1 : 0), LIST_A * 4);
+ iRefCount[0] = pSliceHeader->uiRefCount[0];
+ iRefCount[1] = pSliceHeader->uiRefCount[1];
+ bool bIsPending = GetThreadCount (pCtx) > 1;
+
+ MbType mbType = pCurDqLayer->pDec->pMbType[iMbXy];
+ if (IS_DIRECT (mbType)) {
+
+ int16_t pMvDirect[LIST_A][2] = { { 0, 0 }, { 0, 0 } };
+ SubMbType subMbType;
+ if (pSliceHeader->iDirectSpatialMvPredFlag) {
+ //predict direct spatial mv
+ int32_t ret = PredMvBDirectSpatial (pCtx, pMvDirect, iRef, subMbType);
+ if (ret != ERR_NONE) {
+ return ret;
+ }
+ } else {
+ //temporal direct 16x16 mode
+ int32_t ret = PredBDirectTemporal (pCtx, pMvDirect, iRef, subMbType);
+ if (ret != ERR_NONE) {
+ return ret;
+ }
+ }
+ } else if (IS_INTER_16x16 (mbType)) {
+ if (pSlice->sSliceHeaderExt.bAdaptiveMotionPredFlag) {
+ for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+ if (IS_DIR (mbType, 0, listIdx)) {
+ WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //motion_prediction_flag_l0/l1[ mbPartIdx ]
+ iMotionPredFlag[listIdx][0] = uiCode;
+ }
+ }
+ }
+ for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+ if (IS_DIR (mbType, 0, listIdx)) {
+ if (iMotionPredFlag[listIdx][0] == 0) {
+ WELS_READ_VERIFY (BsGetTe0 (pBs, iRefCount[listIdx], &uiCode)); //motion_prediction_flag_l1[ mbPartIdx ]
+ ref_idx_list[listIdx][0] = uiCode;
+ // Security check: iRefIdx should be in range 0 to num_ref_idx_l0_active_minus1, includsive
+ // ref to standard section 7.4.5.1. iRefCount[0] is 1 + num_ref_idx_l0_active_minus1.
+ if ((ref_idx_list[listIdx][0] < 0) || (ref_idx_list[listIdx][0] >= iRefCount[listIdx])
+ || (ppRefPic[listIdx][ref_idx_list[listIdx][0]] == NULL)) { //error ref_idx
+ pCtx->bMbRefConcealed = true;
+ if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) {
+ ref_idx_list[listIdx][0] = 0;
+ pCtx->iErrorCode |= dsBitstreamError;
+ } else {
+ return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_REF_INDEX);
+ }
+ }
+ pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[listIdx][ref_idx_list[listIdx][0]]
+ && (ppRefPic[listIdx][ref_idx_list[listIdx][0]]->bIsComplete || bIsPending));
+ } else {
+ WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "inter parse: iMotionPredFlag = 1 not supported. ");
+ return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_UNSUPPORTED_ILP);
+ }
+ }
+ }
+ for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+ if (IS_DIR (mbType, 0, listIdx)) {
+ PredMv (iMvArray, iRefIdxArray, listIdx, 0, 4, ref_idx_list[listIdx][0], iMv);
+ WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //mvd_l0[ mbPartIdx ][ 0 ][ compIdx ]
+ iMv[0] += iCode;
+ WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //mvd_l1[ mbPartIdx ][ 0 ][ compIdx ]
+ iMv[1] += iCode;
+ WELS_CHECK_SE_BOTH_WARNING (iMv[1], iMinVmv, iMaxVmv, "vertical mv");
+ } else {
+ * (uint32_t*)iMv = 0;
+ }
+ UpdateP16x16MotionInfo (pCurDqLayer, listIdx, ref_idx_list[listIdx][0], iMv);
+ }
+ } else if (IS_INTER_16x8 (mbType)) {
+ if (pSlice->sSliceHeaderExt.bAdaptiveMotionPredFlag) {
+ for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+ for (int32_t i = 0; i < 2; ++i) {
+ if (IS_DIR (mbType, i, listIdx)) {
+ WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //motion_prediction_flag_l0/l1[ mbPartIdx ]
+ iMotionPredFlag[listIdx][i] = uiCode;
+ }
+ }
+ }
+ }
+ for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+ for (int32_t i = 0; i < 2; ++i) {
+ if (IS_DIR (mbType, i, listIdx)) {
+ if (iMotionPredFlag[listIdx][i] == 0) {
+ WELS_READ_VERIFY (BsGetTe0 (pBs, iRefCount[listIdx], &uiCode)); //motion_prediction_flag_l1[ mbPartIdx ]
+ int32_t iRefIdx = uiCode;
+ // Security check: iRefIdx should be in range 0 to num_ref_idx_l0_active_minus1, includsive
+ // ref to standard section 7.4.5.1. iRefCount[0] is 1 + num_ref_idx_l0_active_minus1.
+ if ((iRefIdx < 0) || (iRefIdx >= iRefCount[listIdx]) || (ppRefPic[listIdx][iRefIdx] == NULL)) { //error ref_idx
+ pCtx->bMbRefConcealed = true;
+ if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) {
+ iRefIdx = 0;
+ pCtx->iErrorCode |= dsBitstreamError;
+ } else {
+ return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_REF_INDEX);
+ }
+ }
+ ref_idx_list[listIdx][i] = iRefIdx;
+ pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[listIdx][iRefIdx]
+ && (ppRefPic[listIdx][iRefIdx]->bIsComplete || bIsPending));
+ } else {
+ WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "inter parse: iMotionPredFlag = 1 not supported. ");
+ return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_UNSUPPORTED_ILP);
+ }
+ }
+ }
+ }
+ // Read mvd_L0 then mvd_L1
+ for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+ // Partitions
+ for (int32_t i = 0; i < 2; i++) {
+ int iPartIdx = i << 3;
+ int32_t iRefIdx = ref_idx_list[listIdx][i];
+ if (IS_DIR (mbType, i, listIdx)) {
+ PredInter16x8Mv (iMvArray, iRefIdxArray, listIdx, iPartIdx, iRefIdx, iMv);
+
+ WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //mvd_l{0,1}[ mbPartIdx ][ listIdx ][x]
+ iMv[0] += iCode;
+ WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //mvd_l{0,1}[ mbPartIdx ][ listIdx ][y]
+ iMv[1] += iCode;
+
+ WELS_CHECK_SE_BOTH_WARNING (iMv[1], iMinVmv, iMaxVmv, "vertical mv");
+ } else {
+ * (uint32_t*)iMv = 0;
+ }
+ UpdateP16x8MotionInfo (pCurDqLayer, iMvArray, iRefIdxArray, listIdx, iPartIdx, iRefIdx, iMv);
+ }
+ }
+ } else if (IS_INTER_8x16 (mbType)) {
+ if (pSlice->sSliceHeaderExt.bAdaptiveMotionPredFlag) {
+ for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+ for (int32_t i = 0; i < 2; ++i) {
+ if (IS_DIR (mbType, i, listIdx)) {
+ WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //motion_prediction_flag_l0/l1[ mbPartIdx ]
+ iMotionPredFlag[listIdx][i] = uiCode;
+ }
+ }
+ }
+ }
+ for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+ for (int32_t i = 0; i < 2; ++i) {
+ if (IS_DIR (mbType, i, listIdx)) {
+ if (iMotionPredFlag[listIdx][i] == 0) {
+ WELS_READ_VERIFY (BsGetTe0 (pBs, iRefCount[listIdx], &uiCode)); //motion_prediction_flag_l1[ mbPartIdx ]
+ int32_t iRefIdx = uiCode;
+ // Security check: iRefIdx should be in range 0 to num_ref_idx_l0_active_minus1, includsive
+ // ref to standard section 7.4.5.1. iRefCount[0] is 1 + num_ref_idx_l0_active_minus1.
+ if ((iRefIdx < 0) || (iRefIdx >= iRefCount[listIdx]) || (ppRefPic[listIdx][iRefIdx] == NULL)) { //error ref_idx
+ pCtx->bMbRefConcealed = true;
+ if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) {
+ iRefIdx = 0;
+ pCtx->iErrorCode |= dsBitstreamError;
+ } else {
+ return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_REF_INDEX);
+ }
+ }
+ ref_idx_list[listIdx][i] = iRefIdx;
+ pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[listIdx][iRefIdx]
+ && (ppRefPic[listIdx][iRefIdx]->bIsComplete || bIsPending));
+ } else {
+ WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "inter parse: iMotionPredFlag = 1 not supported. ");
+ return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_UNSUPPORTED_ILP);
+ }
+ }
+ }
+ }
+ for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+ for (int32_t i = 0; i < 2; i++) {
+ int iPartIdx = i << 2;
+ int32_t iRefIdx = ref_idx_list[listIdx][i];
+ if (IS_DIR (mbType, i, listIdx)) {
+ PredInter8x16Mv (iMvArray, iRefIdxArray, listIdx, iPartIdx, iRefIdx, iMv);
+
+ WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //mvd_l0[ mbPartIdx ][ 0 ][ compIdx ]
+ iMv[0] += iCode;
+ WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //mvd_l1[ mbPartIdx ][ 0 ][ compIdx ]
+ iMv[1] += iCode;
+ WELS_CHECK_SE_BOTH_WARNING (iMv[1], iMinVmv, iMaxVmv, "vertical mv");
+ } else {
+ * (uint32_t*)iMv = 0;
+ }
+ UpdateP8x16MotionInfo (pCurDqLayer, iMvArray, iRefIdxArray, listIdx, iPartIdx, iRefIdx, iMv);
+ }
+ }
+ } else if (IS_Inter_8x8 (mbType)) {
+ int8_t pSubPartCount[4], pPartW[4];
+ uint32_t uiSubMbType;
+ //sub_mb_type, partition
+ int16_t pMvDirect[LIST_A][2] = { { 0, 0 }, { 0, 0 } };
+ if (pCtx->sRefPic.pRefList[LIST_1][0] == NULL) {
+ SLogContext* pLogCtx = & (pCtx->sLogCtx);
+ WelsLog (pLogCtx, WELS_LOG_ERROR, "Colocated Ref Picture for B-Slice is lost, B-Slice decoding cannot be continued!");
+ return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_DATA, ERR_INFO_REFERENCE_PIC_LOST);
+ }
+ bool bIsLongRef = pCtx->sRefPic.pRefList[LIST_1][0]->bIsLongRef;
+ const int32_t ref0Count = WELS_MIN (pSliceHeader->uiRefCount[LIST_0], pCtx->sRefPic.uiRefCount[LIST_0]);
+ bool has_direct_called = false;
+ SubMbType directSubMbType = 0;
+
+ //uiSubMbType, partition
+ for (int32_t i = 0; i < 4; i++) {
+ WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //sub_mb_type[ mbPartIdx ]
+ uiSubMbType = uiCode;
+ if (uiSubMbType >= 13) { //invalid uiSubMbType
+ return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_SUB_MB_TYPE);
+ }
+ pSubPartCount[i] = g_ksInterBSubMbTypeInfo[uiSubMbType].iPartCount;
+ pPartW[i] = g_ksInterBSubMbTypeInfo[uiSubMbType].iPartWidth;
+
+ // Need modification when B picture add in, reference to 7.3.5
+ if (pSubPartCount[i] > 1)
+ pCurDqLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = false;
+
+ if (IS_DIRECT (g_ksInterBSubMbTypeInfo[uiSubMbType].iType)) {
+ if (!has_direct_called) {
+ if (pSliceHeader->iDirectSpatialMvPredFlag) {
+ int32_t ret = PredMvBDirectSpatial (pCtx, pMvDirect, iRef, directSubMbType);
+ if (ret != ERR_NONE) {
+ return ret;
+ }
+
+ } else {
+ //temporal direct mode
+ int32_t ret = PredBDirectTemporal (pCtx, pMvDirect, iRef, directSubMbType);
+ if (ret != ERR_NONE) {
+ return ret;
+ }
+ }
+ has_direct_called = true;
+ }
+ pCurDqLayer->pSubMbType[iMbXy][i] = directSubMbType;
+ if (IS_SUB_4x4 (pCurDqLayer->pSubMbType[iMbXy][i])) {
+ pSubPartCount[i] = 4;
+ pPartW[i] = 1;
+ }
+ } else {
+ pCurDqLayer->pSubMbType[iMbXy][i] = g_ksInterBSubMbTypeInfo[uiSubMbType].iType;
+ }
+ }
+ if (pSlice->sSliceHeaderExt.bAdaptiveMotionPredFlag) {
+ for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+ for (int32_t i = 0; i < 4; i++) {
+ bool is_dir = IS_DIR (pCurDqLayer->pSubMbType[iMbXy][i], 0, listIdx) > 0;
+ if (is_dir) {
+ WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //motion_prediction_flag_l0[ mbPartIdx ]
+ iMotionPredFlag[listIdx][i] = uiCode;
+ }
+ }
+ }
+ }
+ for (int32_t i = 0; i < 4; i++) { //Direct 8x8 Ref and mv
+ int16_t iIdx8 = i << 2;
+ if (IS_DIRECT (pCurDqLayer->pSubMbType[iMbXy][i])) {
+ if (pSliceHeader->iDirectSpatialMvPredFlag) {
+ FillSpatialDirect8x8Mv (pCurDqLayer, iIdx8, pSubPartCount[i], pPartW[i], directSubMbType, bIsLongRef, pMvDirect, iRef,
+ iMvArray, NULL);
+ } else {
+ int16_t (*mvColoc)[2] = pCurDqLayer->iColocMv[LIST_0];
+ iRef[LIST_1] = 0;
+ iRef[LIST_0] = 0;
+ const uint8_t uiColoc4Idx = g_kuiScan4[iIdx8];
+ if (!pCurDqLayer->iColocIntra[uiColoc4Idx]) {
+ iRef[LIST_0] = 0;
+ int8_t colocRefIndexL0 = pCurDqLayer->iColocRefIndex[LIST_0][uiColoc4Idx];
+ if (colocRefIndexL0 >= 0) {
+ iRef[LIST_0] = MapColToList0 (pCtx, colocRefIndexL0, ref0Count);
+ } else {
+ mvColoc = pCurDqLayer->iColocMv[LIST_1];
+ }
+ }
+ Update8x8RefIdx (pCurDqLayer, iIdx8, LIST_0, iRef[LIST_0]);
+ Update8x8RefIdx (pCurDqLayer, iIdx8, LIST_1, iRef[LIST_1]);
+ FillTemporalDirect8x8Mv (pCurDqLayer, iIdx8, pSubPartCount[i], pPartW[i], directSubMbType, iRef, mvColoc, iMvArray,
+ NULL);
+ }
+ }
+ }
+ //ref no-direct
+ for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+ for (int32_t i = 0; i < 4; i++) {
+ int16_t iIdx8 = i << 2;
+ int32_t subMbType = pCurDqLayer->pSubMbType[iMbXy][i];
+ int8_t iref = REF_NOT_IN_LIST;
+ if (IS_DIRECT (subMbType)) {
+ if (pSliceHeader->iDirectSpatialMvPredFlag) {
+ Update8x8RefIdx (pCurDqLayer, iIdx8, listIdx, iRef[listIdx]);
+ ref_idx_list[listIdx][i] = iRef[listIdx];
+ }
+ } else {
+ if (IS_DIR (subMbType, 0, listIdx)) {
+ if (iMotionPredFlag[listIdx][i] == 0) {
+ WELS_READ_VERIFY (BsGetTe0 (pBs, iRefCount[listIdx], &uiCode)); //ref_idx_l0[ mbPartIdx ]
+ iref = uiCode;
+ if ((iref < 0) || (iref >= iRefCount[listIdx]) || (ppRefPic[listIdx][iref] == NULL)) { //error ref_idx
+ pCtx->bMbRefConcealed = true;
+ if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) {
+ iref = 0;
+ pCtx->iErrorCode |= dsBitstreamError;
+ } else {
+ return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_REF_INDEX);
+ }
+ }
+ pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[listIdx][iref]
+ && (ppRefPic[listIdx][iref]->bIsComplete || bIsPending));
+ } else {
+ WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "inter parse: iMotionPredFlag = 1 not supported. ");
+ return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_UNSUPPORTED_ILP);
+ }
+ }
+ Update8x8RefIdx (pCurDqLayer, iIdx8, listIdx, iref);
+ ref_idx_list[listIdx][i] = iref;
+ }
+ }
+ }
+ //mv
+ for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+ for (int32_t i = 0; i < 4; i++) {
+ int8_t iPartCount = pSubPartCount[i];
+ int16_t iPartIdx, iBlockW = pPartW[i];
+ uint8_t uiScan4Idx, uiCacheIdx;
+
+ uiCacheIdx = g_kuiCache30ScanIdx[i << 2];
+
+ int8_t iref = ref_idx_list[listIdx][i];
+ iRefIdxArray[listIdx][uiCacheIdx] = iRefIdxArray[listIdx][uiCacheIdx + 1] =
+ iRefIdxArray[listIdx][uiCacheIdx + 6] = iRefIdxArray[listIdx][uiCacheIdx + 7] = iref;
+
+ uint32_t subMbType = pCurDqLayer->pSubMbType[iMbXy][i];
+ if (IS_DIRECT (subMbType)) {
+ continue;
+ }
+ bool is_dir = IS_DIR (subMbType, 0, listIdx) > 0;
+ for (int32_t j = 0; j < iPartCount; j++) {
+ iPartIdx = (i << 2) + j * iBlockW;
+ uiScan4Idx = g_kuiScan4[iPartIdx];
+ uiCacheIdx = g_kuiCache30ScanIdx[iPartIdx];
+ if (is_dir) {
+ PredMv (iMvArray, iRefIdxArray, listIdx, iPartIdx, iBlockW, iref, iMv);
+
+ WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //mvd_l0[ mbPartIdx ][ subMbPartIdx ][ compIdx ]
+ iMv[0] += iCode;
+ WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //mvd_l1[ mbPartIdx ][ subMbPartIdx ][ compIdx ]
+ iMv[1] += iCode;
+ WELS_CHECK_SE_BOTH_WARNING (iMv[1], iMinVmv, iMaxVmv, "vertical mv");
+ } else {
+ * (uint32_t*)iMv = 0;
+ }
+ if (IS_SUB_8x8 (subMbType)) { //MB_TYPE_8x8
+ ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][uiScan4Idx], LD32 (iMv));
+ ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][uiScan4Idx + 1], LD32 (iMv));
+ ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][uiScan4Idx + 4], LD32 (iMv));
+ ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][uiScan4Idx + 5], LD32 (iMv));
+ ST32 (iMvArray[listIdx][uiCacheIdx], LD32 (iMv));
+ ST32 (iMvArray[listIdx][uiCacheIdx + 1], LD32 (iMv));
+ ST32 (iMvArray[listIdx][uiCacheIdx + 6], LD32 (iMv));
+ ST32 (iMvArray[listIdx][uiCacheIdx + 7], LD32 (iMv));
+ } else if (IS_SUB_8x4 (subMbType)) {
+ ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][uiScan4Idx], LD32 (iMv));
+ ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][uiScan4Idx + 1], LD32 (iMv));
+ ST32 (iMvArray[listIdx][uiCacheIdx], LD32 (iMv));
+ ST32 (iMvArray[listIdx][uiCacheIdx + 1], LD32 (iMv));
+ } else if (IS_SUB_4x8 (subMbType)) {
+ ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][uiScan4Idx], LD32 (iMv));
+ ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][uiScan4Idx + 4], LD32 (iMv));
+ ST32 (iMvArray[listIdx][uiCacheIdx], LD32 (iMv));
+ ST32 (iMvArray[listIdx][uiCacheIdx + 6], LD32 (iMv));
+ } else { //SUB_MB_TYPE_4x4 == uiSubMbType
+ ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][uiScan4Idx], LD32 (iMv));
+ ST32 (iMvArray[listIdx][uiCacheIdx], LD32 (iMv));
+ }
+ }
+ }
+ }
+ }
+ return ERR_NONE;
+}
} // namespace WelsDec
diff --git a/chromium/third_party/openh264/src/codec/decoder/core/src/pic_queue.cpp b/chromium/third_party/openh264/src/codec/decoder/core/src/pic_queue.cpp
index a2c5e7f08d9..475df0ac0d9 100644
--- a/chromium/third_party/openh264/src/codec/decoder/core/src/pic_queue.cpp
+++ b/chromium/third_party/openh264/src/codec/decoder/core/src/pic_queue.cpp
@@ -106,13 +106,15 @@ PPicture AllocPicture (PWelsDecoderContext pCtx, const int32_t kiPicWidth, const
pPic->iWidthInPixel = kiPicWidth;
pPic->iHeightInPixel = kiPicHeight;
pPic->iFrameNum = -1;
- pPic->bAvailableFlag = true;
+ pPic->iRefCount = 0;
uint32_t uiMbWidth = (kiPicWidth + 15) >> 4;
uint32_t uiMbHeight = (kiPicHeight + 15) >> 4;
uint32_t uiMbCount = uiMbWidth * uiMbHeight;
- pPic->pMbType = (uint32_t*)pMa->WelsMallocz (uiMbCount * sizeof (uint32_t),
- "pPic->pMbType");
+
+ pPic->pMbCorrectlyDecodedFlag = (bool*)pMa->WelsMallocz (uiMbCount * sizeof (bool), "pPic->pMbCorrectlyDecodedFlag");
+ pPic->pNzc = GetThreadCount (pCtx) > 1 ? (int8_t (*)[24])pMa->WelsMallocz (uiMbCount * 24, "pPic->pNzc") : NULL;
+ pPic->pMbType = (uint32_t*)pMa->WelsMallocz (uiMbCount * sizeof (uint32_t), "pPic->pMbType");
pPic->pMv[LIST_0] = (int16_t (*)[16][2])pMa->WelsMallocz (uiMbCount * sizeof (
int16_t) * MV_A * MB_BLOCK4x4_NUM, "pPic->pMv[]");
pPic->pMv[LIST_1] = (int16_t (*)[16][2])pMa->WelsMallocz (uiMbCount * sizeof (
@@ -121,6 +123,15 @@ PPicture AllocPicture (PWelsDecoderContext pCtx, const int32_t kiPicWidth, const
int8_t) * MB_BLOCK4x4_NUM, "pCtx->sMb.pRefIndex[]");
pPic->pRefIndex[LIST_1] = (int8_t (*)[16])pMa->WelsMallocz (uiMbCount * sizeof (
int8_t) * MB_BLOCK4x4_NUM, "pCtx->sMb.pRefIndex[]");
+ if (pCtx->pThreadCtx != NULL) {
+ pPic->pReadyEvent = (SWelsDecEvent*)pMa->WelsMallocz (uiMbHeight * sizeof (SWelsDecEvent), "pPic->pReadyEvent");
+ for (uint32_t i = 0; i < uiMbHeight; ++i) {
+ CREATE_EVENT (&pPic->pReadyEvent[i], 1, 0, NULL);
+ }
+ } else {
+ pPic->pReadyEvent = NULL;
+ }
+
return pPic;
}
@@ -131,6 +142,16 @@ void FreePicture (PPicture pPic, CMemoryAlign* pMa) {
pPic->pBuffer[0] = NULL;
}
+ if (pPic->pMbCorrectlyDecodedFlag) {
+ pMa->WelsFree (pPic->pMbCorrectlyDecodedFlag, "pPic->pMbCorrectlyDecodedFlag");
+ pPic->pMbCorrectlyDecodedFlag = NULL;
+ }
+
+ if (pPic->pNzc) {
+ pMa->WelsFree (pPic->pNzc, "pPic->pNzc");
+ pPic->pNzc = NULL;
+ }
+
if (pPic->pMbType) {
pMa->WelsFree (pPic->pMbType, "pPic->pMbType");
pPic->pMbType = NULL;
@@ -147,6 +168,14 @@ void FreePicture (PPicture pPic, CMemoryAlign* pMa) {
pPic->pRefIndex[listIdx] = NULL;
}
}
+ if (pPic->pReadyEvent != NULL) {
+ uint32_t uiMbHeight = (pPic->iHeightInPixel + 15) >> 4;
+ for (uint32_t i = 0; i < uiMbHeight; ++i) {
+ CLOSE_EVENT (&pPic->pReadyEvent[i]);
+ }
+ pMa->WelsFree (pPic->pReadyEvent, "pPic->pReadyEvent");
+ pPic->pReadyEvent = NULL;
+ }
pMa->WelsFree (pPic, "pPic");
pPic = NULL;
}
@@ -160,25 +189,55 @@ PPicture PrefetchPic (PPicBuff pPicBuf) {
}
for (iPicIdx = pPicBuf->iCurrentIdx + 1; iPicIdx < pPicBuf->iCapacity ; ++iPicIdx) {
- if (pPicBuf->ppPic[iPicIdx] != NULL && pPicBuf->ppPic[iPicIdx]->bAvailableFlag
- && !pPicBuf->ppPic[iPicIdx]->bUsedAsRef) {
+ if (pPicBuf->ppPic[iPicIdx] != NULL && !pPicBuf->ppPic[iPicIdx]->bUsedAsRef
+ && pPicBuf->ppPic[iPicIdx]->iRefCount <= 0) {
pPic = pPicBuf->ppPic[iPicIdx];
break;
}
}
if (pPic != NULL) {
pPicBuf->iCurrentIdx = iPicIdx;
+ pPic->iPicBuffIdx = iPicIdx;
return pPic;
}
for (iPicIdx = 0 ; iPicIdx <= pPicBuf->iCurrentIdx ; ++iPicIdx) {
- if (pPicBuf->ppPic[iPicIdx] != NULL && pPicBuf->ppPic[iPicIdx]->bAvailableFlag
- && !pPicBuf->ppPic[iPicIdx]->bUsedAsRef) {
+ if (pPicBuf->ppPic[iPicIdx] != NULL && !pPicBuf->ppPic[iPicIdx]->bUsedAsRef
+ && pPicBuf->ppPic[iPicIdx]->iRefCount <= 0) {
pPic = pPicBuf->ppPic[iPicIdx];
break;
}
}
pPicBuf->iCurrentIdx = iPicIdx;
+ if (pPic != NULL) {
+ pPic->iPicBuffIdx = iPicIdx;
+ }
+ return pPic;
+}
+
+PPicture PrefetchPicForThread (PPicBuff pPicBuf) {
+ PPicture pPic = NULL;
+
+ if (pPicBuf->iCapacity == 0) {
+ return NULL;
+ }
+ pPic = pPicBuf->ppPic[pPicBuf->iCurrentIdx];
+ pPic->iPicBuffIdx = pPicBuf->iCurrentIdx;
+ if (++pPicBuf->iCurrentIdx >= pPicBuf->iCapacity) {
+ pPicBuf->iCurrentIdx = 0;
+ }
+ return pPic;
+}
+
+PPicture PrefetchLastPicForThread (PPicBuff pPicBuf, const int32_t& iLastPicBuffIdx) {
+ PPicture pPic = NULL;
+
+ if (pPicBuf->iCapacity == 0) {
+ return NULL;
+ }
+ if (iLastPicBuffIdx >= 0 && iLastPicBuffIdx < pPicBuf->iCapacity) {
+ pPic = pPicBuf->ppPic[iLastPicBuffIdx];
+ }
return pPic;
}
diff --git a/chromium/third_party/openh264/src/codec/decoder/core/src/rec_mb.cpp b/chromium/third_party/openh264/src/codec/decoder/core/src/rec_mb.cpp
index 157fb4cdb6b..9034cc4d7da 100644
--- a/chromium/third_party/openh264/src/codec/decoder/core/src/rec_mb.cpp
+++ b/chromium/third_party/openh264/src/codec/decoder/core/src/rec_mb.cpp
@@ -44,20 +44,20 @@
namespace WelsDec {
-void WelsFillRecNeededMbInfo (PWelsDecoderContext pCtx, bool bOutput, PDqLayer pCurLayer) {
+void WelsFillRecNeededMbInfo (PWelsDecoderContext pCtx, bool bOutput, PDqLayer pCurDqLayer) {
PPicture pCurPic = pCtx->pDec;
int32_t iLumaStride = pCurPic->iLinesize[0];
int32_t iChromaStride = pCurPic->iLinesize[1];
- int32_t iMbX = pCurLayer->iMbX;
- int32_t iMbY = pCurLayer->iMbY;
+ int32_t iMbX = pCurDqLayer->iMbX;
+ int32_t iMbY = pCurDqLayer->iMbY;
- pCurLayer->iLumaStride = iLumaStride;
- pCurLayer->iChromaStride = iChromaStride;
+ pCurDqLayer->iLumaStride = iLumaStride;
+ pCurDqLayer->iChromaStride = iChromaStride;
if (bOutput) {
- pCurLayer->pPred[0] = pCurPic->pData[0] + ((iMbY * iLumaStride + iMbX) << 4);
- pCurLayer->pPred[1] = pCurPic->pData[1] + ((iMbY * iChromaStride + iMbX) << 3);
- pCurLayer->pPred[2] = pCurPic->pData[2] + ((iMbY * iChromaStride + iMbX) << 3);
+ pCurDqLayer->pPred[0] = pCurPic->pData[0] + ((iMbY * iLumaStride + iMbX) << 4);
+ pCurDqLayer->pPred[1] = pCurPic->pData[1] + ((iMbY * iChromaStride + iMbX) << 3);
+ pCurDqLayer->pPred[2] = pCurPic->pData[2] + ((iMbY * iChromaStride + iMbX) << 3);
}
}
@@ -214,11 +214,10 @@ int32_t RecI16x16Mb (int32_t iMBXY, PWelsDecoderContext pCtx, int16_t* pScoeffLe
//according to current 8*8 block ref_index to gain reference picture
-static inline int32_t GetRefPic (sMCRefMember* pMCRefMem, PWelsDecoderContext pCtx, int8_t* pRefIdxList,
- int32_t iIndex, int32_t listIdx) {
+static inline int32_t GetRefPic (sMCRefMember* pMCRefMem, PWelsDecoderContext pCtx, const int8_t& iRefIdx,
+ int32_t listIdx) {
PPicture pRefPic;
- int8_t iRefIdx = pRefIdxList[iIndex];
if (iRefIdx >= 0) {
pRefPic = pCtx->sRefPic.pRefList[listIdx][iRefIdx];
@@ -229,7 +228,9 @@ static inline int32_t GetRefPic (sMCRefMember* pMCRefMem, PWelsDecoderContext pC
pMCRefMem->pSrcY = pRefPic->pData[0];
pMCRefMem->pSrcU = pRefPic->pData[1];
pMCRefMem->pSrcV = pRefPic->pData[2];
-
+ if (!pMCRefMem->pSrcY || !pMCRefMem->pSrcU || !pMCRefMem->pSrcV) {
+ return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_DATA, ERR_INFO_REFERENCE_PIC_LOST);
+ }
return ERR_NONE;
}
}
@@ -240,7 +241,9 @@ static inline int32_t GetRefPic (sMCRefMember* pMCRefMem, PWelsDecoderContext pC
#ifndef MC_FLOW_SIMPLE_JUDGE
#define MC_FLOW_SIMPLE_JUDGE 1
#endif //MC_FLOW_SIMPLE_JUDGE
-void BaseMC (sMCRefMember* pMCRefMem, int32_t iXOffset, int32_t iYOffset, SMcFunc* pMCFunc,
+void BaseMC (PWelsDecoderContext pCtx, sMCRefMember* pMCRefMem, const int32_t& listIdx, const int8_t& iRefIdx,
+ int32_t iXOffset, int32_t iYOffset,
+ SMcFunc* pMCFunc,
int32_t iBlkWidth, int32_t iBlkHeight, int16_t iMVs[2]) {
int32_t iFullMVx = (iXOffset << 2) + iMVs[0]; //quarter pixel
int32_t iFullMVy = (iYOffset << 2) + iMVs[1];
@@ -249,6 +252,27 @@ void BaseMC (sMCRefMember* pMCRefMem, int32_t iXOffset, int32_t iYOffset, SMcFun
iFullMVy = WELS_CLIP3 (iFullMVy, ((-PADDING_LENGTH + 2) * (1 << 2)),
((pMCRefMem->iPicHeight + PADDING_LENGTH - 19) * (1 << 2)));
+ if (GetThreadCount (pCtx) > 1 && iRefIdx >= 0) {
+ // wait for the lines of reference macroblock (3 + 16).
+ PPicture pRefPic = pCtx->sRefPic.pRefList[listIdx][iRefIdx];
+ if (pCtx->bNewSeqBegin && (pCtx->iErrorCode & dsRefLost)) {
+ //set event if refpic is lost to prevent from infinite waiting.
+ if (!pRefPic->pReadyEvent[0].isSignaled) {
+ for (uint32_t ln = 0; ln < pCtx->sMb.iMbHeight; ++ln) {
+ SET_EVENT (&pRefPic->pReadyEvent[ln]);
+ }
+ }
+ }
+ int32_t offset = (iFullMVy >> 2) + iBlkHeight + 3 + 16;
+ if (offset > pCtx->lastReadyHeightOffset[listIdx][iRefIdx]) {
+ const int32_t down_line = WELS_MIN (offset >> 4, int32_t (pCtx->sMb.iMbHeight) - 1);
+ if (pRefPic->pReadyEvent[down_line].isSignaled != 1) {
+ WAIT_EVENT (&pRefPic->pReadyEvent[down_line], WELS_DEC_THREAD_WAIT_INFINITE);
+ }
+ pCtx->lastReadyHeightOffset[listIdx][iRefIdx] = offset;
+ }
+ }
+
int32_t iSrcPixOffsetLuma = (iFullMVx >> 2) + (iFullMVy >> 2) * pMCRefMem->iSrcLineLuma;
int32_t iSrcPixOffsetChroma = (iFullMVx >> 3) + (iFullMVy >> 3) * pMCRefMem->iSrcLineChroma;
@@ -435,7 +459,7 @@ static void BiPrediction (PDqLayer pCurDqLayer, sMCRefMember* pMCRefMem, sMCRefM
}
}
-void GetInterPred (uint8_t* pPredY, uint8_t* pPredCb, uint8_t* pPredCr, PWelsDecoderContext pCtx) {
+int32_t GetInterPred (uint8_t* pPredY, uint8_t* pPredCb, uint8_t* pPredCr, PWelsDecoderContext pCtx) {
sMCRefMember pMCRefMem;
PDqLayer pCurDqLayer = pCtx->pCurDqLayer;
SMcFunc* pMCFunc = &pCtx->sMcFunc;
@@ -444,7 +468,7 @@ void GetInterPred (uint8_t* pPredY, uint8_t* pPredCb, uint8_t* pPredCr, PWelsDec
int16_t iMVs[2] = {0};
- uint32_t iMBType = pCurDqLayer->pMbType[iMBXY];
+ uint32_t iMBType = pCurDqLayer->pDec->pMbType[iMBXY];
int32_t iMBOffsetX = pCurDqLayer->iMbX << 4;
int32_t iMBOffsetY = pCurDqLayer->iMbY << 4;
@@ -464,65 +488,66 @@ void GetInterPred (uint8_t* pPredY, uint8_t* pPredCb, uint8_t* pPredCr, PWelsDec
pMCRefMem.iDstLineLuma = iDstLineLuma;
pMCRefMem.iDstLineChroma = iDstLineChroma;
- int32_t iRefIndex = 0;
+ int8_t iRefIndex = 0;
switch (iMBType) {
case MB_TYPE_SKIP:
case MB_TYPE_16x16:
- iMVs[0] = pCurDqLayer->pMv[0][iMBXY][0][0];
- iMVs[1] = pCurDqLayer->pMv[0][iMBXY][0][1];
- GetRefPic (&pMCRefMem, pCtx, pCurDqLayer->pRefIndex[0][iMBXY], 0, LIST_0);
- BaseMC (&pMCRefMem, iMBOffsetX, iMBOffsetY, pMCFunc, 16, 16, iMVs);
+ iMVs[0] = pCurDqLayer->pDec->pMv[0][iMBXY][0][0];
+ iMVs[1] = pCurDqLayer->pDec->pMv[0][iMBXY][0][1];
+ iRefIndex = pCurDqLayer->pDec->pRefIndex[0][iMBXY][0];
+ WELS_B_MB_REC_VERIFY (GetRefPic (&pMCRefMem, pCtx, iRefIndex, LIST_0));
+ BaseMC (pCtx, &pMCRefMem, LIST_0, iRefIndex, iMBOffsetX, iMBOffsetY, pMCFunc, 16, 16, iMVs);
if (pCurDqLayer->bUseWeightPredictionFlag) {
- iRefIndex = pCurDqLayer->pRefIndex[0][iMBXY][0];
+ iRefIndex = pCurDqLayer->pDec->pRefIndex[0][iMBXY][0];
WeightPrediction (pCurDqLayer, &pMCRefMem, LIST_0, iRefIndex, 16, 16);
}
break;
case MB_TYPE_16x8:
- iMVs[0] = pCurDqLayer->pMv[0][iMBXY][0][0];
- iMVs[1] = pCurDqLayer->pMv[0][iMBXY][0][1];
- GetRefPic (&pMCRefMem, pCtx, pCurDqLayer->pRefIndex[0][iMBXY], 0, LIST_0);
- BaseMC (&pMCRefMem, iMBOffsetX, iMBOffsetY, pMCFunc, 16, 8, iMVs);
+ iMVs[0] = pCurDqLayer->pDec->pMv[0][iMBXY][0][0];
+ iMVs[1] = pCurDqLayer->pDec->pMv[0][iMBXY][0][1];
+ iRefIndex = pCurDqLayer->pDec->pRefIndex[0][iMBXY][0];
+ WELS_B_MB_REC_VERIFY (GetRefPic (&pMCRefMem, pCtx, iRefIndex, LIST_0));
+ BaseMC (pCtx, &pMCRefMem, LIST_0, iRefIndex, iMBOffsetX, iMBOffsetY, pMCFunc, 16, 8, iMVs);
if (pCurDqLayer->bUseWeightPredictionFlag) {
- iRefIndex = pCurDqLayer->pRefIndex[0][iMBXY][0];
WeightPrediction (pCurDqLayer, &pMCRefMem, LIST_0, iRefIndex, 16, 8);
}
- iMVs[0] = pCurDqLayer->pMv[0][iMBXY][8][0];
- iMVs[1] = pCurDqLayer->pMv[0][iMBXY][8][1];
- GetRefPic (&pMCRefMem, pCtx, pCurDqLayer->pRefIndex[0][iMBXY], 8, LIST_0);
+ iMVs[0] = pCurDqLayer->pDec->pMv[0][iMBXY][8][0];
+ iMVs[1] = pCurDqLayer->pDec->pMv[0][iMBXY][8][1];
+ iRefIndex = pCurDqLayer->pDec->pRefIndex[0][iMBXY][8];
+ WELS_B_MB_REC_VERIFY (GetRefPic (&pMCRefMem, pCtx, iRefIndex, LIST_0));
pMCRefMem.pDstY = pPredY + (iDstLineLuma << 3);
pMCRefMem.pDstU = pPredCb + (iDstLineChroma << 2);
pMCRefMem.pDstV = pPredCr + (iDstLineChroma << 2);
- BaseMC (&pMCRefMem, iMBOffsetX, iMBOffsetY + 8, pMCFunc, 16, 8, iMVs);
+ BaseMC (pCtx, &pMCRefMem, LIST_0, iRefIndex, iMBOffsetX, iMBOffsetY + 8, pMCFunc, 16, 8, iMVs);
if (pCurDqLayer->bUseWeightPredictionFlag) {
- iRefIndex = pCurDqLayer->pRefIndex[0][iMBXY][8];
WeightPrediction (pCurDqLayer, &pMCRefMem, LIST_0, iRefIndex, 16, 8);
}
break;
case MB_TYPE_8x16:
- iMVs[0] = pCurDqLayer->pMv[0][iMBXY][0][0];
- iMVs[1] = pCurDqLayer->pMv[0][iMBXY][0][1];
- GetRefPic (&pMCRefMem, pCtx, pCurDqLayer->pRefIndex[0][iMBXY], 0, LIST_0);
- BaseMC (&pMCRefMem, iMBOffsetX, iMBOffsetY, pMCFunc, 8, 16, iMVs);
+ iMVs[0] = pCurDqLayer->pDec->pMv[0][iMBXY][0][0];
+ iMVs[1] = pCurDqLayer->pDec->pMv[0][iMBXY][0][1];
+ iRefIndex = pCurDqLayer->pDec->pRefIndex[0][iMBXY][0];
+ WELS_B_MB_REC_VERIFY (GetRefPic (&pMCRefMem, pCtx, iRefIndex, LIST_0));
+ BaseMC (pCtx, &pMCRefMem, LIST_0, iRefIndex, iMBOffsetX, iMBOffsetY, pMCFunc, 8, 16, iMVs);
if (pCurDqLayer->bUseWeightPredictionFlag) {
- iRefIndex = pCurDqLayer->pRefIndex[0][iMBXY][0];
WeightPrediction (pCurDqLayer, &pMCRefMem, LIST_0, iRefIndex, 8, 16);
}
- iMVs[0] = pCurDqLayer->pMv[0][iMBXY][2][0];
- iMVs[1] = pCurDqLayer->pMv[0][iMBXY][2][1];
- GetRefPic (&pMCRefMem, pCtx, pCurDqLayer->pRefIndex[0][iMBXY], 2, LIST_0);
+ iMVs[0] = pCurDqLayer->pDec->pMv[0][iMBXY][2][0];
+ iMVs[1] = pCurDqLayer->pDec->pMv[0][iMBXY][2][1];
+ iRefIndex = pCurDqLayer->pDec->pRefIndex[0][iMBXY][2];
+ WELS_B_MB_REC_VERIFY (GetRefPic (&pMCRefMem, pCtx, iRefIndex, LIST_0));
pMCRefMem.pDstY = pPredY + 8;
pMCRefMem.pDstU = pPredCb + 4;
pMCRefMem.pDstV = pPredCr + 4;
- BaseMC (&pMCRefMem, iMBOffsetX + 8, iMBOffsetY, pMCFunc, 8, 16, iMVs);
+ BaseMC (pCtx, &pMCRefMem, LIST_0, iRefIndex, iMBOffsetX + 8, iMBOffsetY, pMCFunc, 8, 16, iMVs);
if (pCurDqLayer->bUseWeightPredictionFlag) {
- iRefIndex = pCurDqLayer->pRefIndex[0][iMBXY][2];
WeightPrediction (pCurDqLayer, &pMCRefMem, LIST_0, iRefIndex, 8, 16);
}
break;
@@ -539,9 +564,8 @@ void GetInterPred (uint8_t* pPredY, uint8_t* pPredCb, uint8_t* pPredCr, PWelsDec
iYOffset = iMBOffsetY + iBlk8Y;
iIIdx = ((i >> 1) << 3) + ((i & 1) << 1);
- GetRefPic (&pMCRefMem, pCtx, pCurDqLayer->pRefIndex[0][iMBXY], iIIdx, LIST_0);
- iRefIndex = pCurDqLayer->bUseWeightPredictionFlag ? pCurDqLayer->pRefIndex[0][iMBXY][iIIdx] : 0;
-
+ iRefIndex = pCurDqLayer->pDec->pRefIndex[0][iMBXY][iIIdx];
+ WELS_B_MB_REC_VERIFY (GetRefPic (&pMCRefMem, pCtx, iRefIndex, LIST_0));
pDstY = pPredY + iBlk8X + iBlk8Y * iDstLineLuma;
pDstU = pPredCb + (iBlk8X >> 1) + (iBlk8Y >> 1) * iDstLineChroma;
pDstV = pPredCr + (iBlk8X >> 1) + (iBlk8Y >> 1) * iDstLineChroma;
@@ -550,9 +574,9 @@ void GetInterPred (uint8_t* pPredY, uint8_t* pPredCb, uint8_t* pPredCr, PWelsDec
pMCRefMem.pDstV = pDstV;
switch (iSubMBType) {
case SUB_MB_TYPE_8x8:
- iMVs[0] = pCurDqLayer->pMv[0][iMBXY][iIIdx][0];
- iMVs[1] = pCurDqLayer->pMv[0][iMBXY][iIIdx][1];
- BaseMC (&pMCRefMem, iXOffset, iYOffset, pMCFunc, 8, 8, iMVs);
+ iMVs[0] = pCurDqLayer->pDec->pMv[0][iMBXY][iIIdx][0];
+ iMVs[1] = pCurDqLayer->pDec->pMv[0][iMBXY][iIIdx][1];
+ BaseMC (pCtx, &pMCRefMem, LIST_0, iRefIndex, iXOffset, iYOffset, pMCFunc, 8, 8, iMVs);
if (pCurDqLayer->bUseWeightPredictionFlag) {
WeightPrediction (pCurDqLayer, &pMCRefMem, LIST_0, iRefIndex, 8, 8);
@@ -560,21 +584,21 @@ void GetInterPred (uint8_t* pPredY, uint8_t* pPredCb, uint8_t* pPredCr, PWelsDec
break;
case SUB_MB_TYPE_8x4:
- iMVs[0] = pCurDqLayer->pMv[0][iMBXY][iIIdx][0];
- iMVs[1] = pCurDqLayer->pMv[0][iMBXY][iIIdx][1];
- BaseMC (&pMCRefMem, iXOffset, iYOffset, pMCFunc, 8, 4, iMVs);
+ iMVs[0] = pCurDqLayer->pDec->pMv[0][iMBXY][iIIdx][0];
+ iMVs[1] = pCurDqLayer->pDec->pMv[0][iMBXY][iIIdx][1];
+ BaseMC (pCtx, &pMCRefMem, LIST_0, iRefIndex, iXOffset, iYOffset, pMCFunc, 8, 4, iMVs);
if (pCurDqLayer->bUseWeightPredictionFlag) {
WeightPrediction (pCurDqLayer, &pMCRefMem, LIST_0, iRefIndex, 8, 4);
}
- iMVs[0] = pCurDqLayer->pMv[0][iMBXY][iIIdx + 4][0];
- iMVs[1] = pCurDqLayer->pMv[0][iMBXY][iIIdx + 4][1];
+ iMVs[0] = pCurDqLayer->pDec->pMv[0][iMBXY][iIIdx + 4][0];
+ iMVs[1] = pCurDqLayer->pDec->pMv[0][iMBXY][iIIdx + 4][1];
pMCRefMem.pDstY += (iDstLineLuma << 2);
pMCRefMem.pDstU += (iDstLineChroma << 1);
pMCRefMem.pDstV += (iDstLineChroma << 1);
- BaseMC (&pMCRefMem, iXOffset, iYOffset + 4, pMCFunc, 8, 4, iMVs);
+ BaseMC (pCtx, &pMCRefMem, LIST_0, iRefIndex, iXOffset, iYOffset + 4, pMCFunc, 8, 4, iMVs);
if (pCurDqLayer->bUseWeightPredictionFlag) {
WeightPrediction (pCurDqLayer, &pMCRefMem, LIST_0, iRefIndex, 8, 4);
@@ -582,21 +606,21 @@ void GetInterPred (uint8_t* pPredY, uint8_t* pPredCb, uint8_t* pPredCr, PWelsDec
break;
case SUB_MB_TYPE_4x8:
- iMVs[0] = pCurDqLayer->pMv[0][iMBXY][iIIdx][0];
- iMVs[1] = pCurDqLayer->pMv[0][iMBXY][iIIdx][1];
- BaseMC (&pMCRefMem, iXOffset, iYOffset, pMCFunc, 4, 8, iMVs);
+ iMVs[0] = pCurDqLayer->pDec->pMv[0][iMBXY][iIIdx][0];
+ iMVs[1] = pCurDqLayer->pDec->pMv[0][iMBXY][iIIdx][1];
+ BaseMC (pCtx, &pMCRefMem, LIST_0, iRefIndex, iXOffset, iYOffset, pMCFunc, 4, 8, iMVs);
if (pCurDqLayer->bUseWeightPredictionFlag) {
WeightPrediction (pCurDqLayer, &pMCRefMem, LIST_0, iRefIndex, 4, 8);
}
- iMVs[0] = pCurDqLayer->pMv[0][iMBXY][iIIdx + 1][0];
- iMVs[1] = pCurDqLayer->pMv[0][iMBXY][iIIdx + 1][1];
+ iMVs[0] = pCurDqLayer->pDec->pMv[0][iMBXY][iIIdx + 1][0];
+ iMVs[1] = pCurDqLayer->pDec->pMv[0][iMBXY][iIIdx + 1][1];
pMCRefMem.pDstY += 4;
pMCRefMem.pDstU += 2;
pMCRefMem.pDstV += 2;
- BaseMC (&pMCRefMem, iXOffset + 4, iYOffset, pMCFunc, 4, 8, iMVs);
+ BaseMC (pCtx, &pMCRefMem, LIST_0, iRefIndex, iXOffset + 4, iYOffset, pMCFunc, 4, 8, iMVs);
if (pCurDqLayer->bUseWeightPredictionFlag) {
WeightPrediction (pCurDqLayer, &pMCRefMem, LIST_0, iRefIndex, 4, 8);
@@ -616,9 +640,9 @@ void GetInterPred (uint8_t* pPredY, uint8_t* pPredCb, uint8_t* pPredCr, PWelsDec
pMCRefMem.pDstU = pDstU + iUVLineStride;
pMCRefMem.pDstV = pDstV + iUVLineStride;
- iMVs[0] = pCurDqLayer->pMv[0][iMBXY][iIIdx + iJIdx][0];
- iMVs[1] = pCurDqLayer->pMv[0][iMBXY][iIIdx + iJIdx][1];
- BaseMC (&pMCRefMem, iXOffset + iBlk4X, iYOffset + iBlk4Y, pMCFunc, 4, 4, iMVs);
+ iMVs[0] = pCurDqLayer->pDec->pMv[0][iMBXY][iIIdx + iJIdx][0];
+ iMVs[1] = pCurDqLayer->pDec->pMv[0][iMBXY][iIIdx + iJIdx][1];
+ BaseMC (pCtx, &pMCRefMem, LIST_0, iRefIndex, iXOffset + iBlk4X, iYOffset + iBlk4Y, pMCFunc, 4, 4, iMVs);
if (pCurDqLayer->bUseWeightPredictionFlag) {
WeightPrediction (pCurDqLayer, &pMCRefMem, LIST_0, iRefIndex, 4, 4);
@@ -636,6 +660,7 @@ void GetInterPred (uint8_t* pPredY, uint8_t* pPredCb, uint8_t* pPredCr, PWelsDec
default:
break;
}
+ return ERR_NONE;
}
int32_t GetInterBPred (uint8_t* pPredYCbCr[3], uint8_t* pTempPredYCbCr[3], PWelsDecoderContext pCtx) {
@@ -649,7 +674,7 @@ int32_t GetInterBPred (uint8_t* pPredYCbCr[3], uint8_t* pTempPredYCbCr[3], PWels
int16_t iMVs[2] = { 0 };
- uint32_t iMBType = pCurDqLayer->pMbType[iMBXY];
+ uint32_t iMBType = pCurDqLayer->pDec->pMbType[iMBXY];
int32_t iMBOffsetX = pCurDqLayer->iMbX << 4;
int32_t iMBOffsetY = pCurDqLayer->iMbY << 4;
@@ -674,37 +699,38 @@ int32_t GetInterBPred (uint8_t* pPredYCbCr[3], uint8_t* pTempPredYCbCr[3], PWels
pTempMCRefMem.pDstV = pTempPredYCbCr[2];
- int32_t iRefIndex1 = 0;
- int32_t iRefIndex2 = 0;
+ int8_t iRefIndex0 = 0;
+ int8_t iRefIndex1 = 0;
+ int8_t iRefIndex = 0;
bool bWeightedBipredIdcIs1 = pCurDqLayer->sLayerInfo.pPps->uiWeightedBipredIdc == 1;
if (IS_INTER_16x16 (iMBType)) {
if (IS_TYPE_L0 (iMBType) && IS_TYPE_L1 (iMBType)) {
- iMVs[0] = pCurDqLayer->pMv[LIST_0][iMBXY][0][0];
- iMVs[1] = pCurDqLayer->pMv[LIST_0][iMBXY][0][1];
- WELS_B_MB_REC_VERIFY (GetRefPic (&pMCRefMem, pCtx, pCurDqLayer->pRefIndex[LIST_0][iMBXY], 0, LIST_0));
- BaseMC (&pMCRefMem, iMBOffsetX, iMBOffsetY, pMCFunc, 16, 16, iMVs);
-
- iMVs[0] = pCurDqLayer->pMv[LIST_1][iMBXY][0][0];
- iMVs[1] = pCurDqLayer->pMv[LIST_1][iMBXY][0][1];
- WELS_B_MB_REC_VERIFY (GetRefPic (&pTempMCRefMem, pCtx, pCurDqLayer->pRefIndex[LIST_1][iMBXY], 0, LIST_1));
- BaseMC (&pTempMCRefMem, iMBOffsetX, iMBOffsetY, pMCFunc, 16, 16, iMVs);
- iRefIndex1 = pCurDqLayer->pRefIndex[LIST_0][iMBXY][0];
- iRefIndex2 = pCurDqLayer->pRefIndex[LIST_1][iMBXY][0];
+ iMVs[0] = pCurDqLayer->pDec->pMv[LIST_0][iMBXY][0][0];
+ iMVs[1] = pCurDqLayer->pDec->pMv[LIST_0][iMBXY][0][1];
+ iRefIndex0 = pCurDqLayer->pDec->pRefIndex[LIST_0][iMBXY][0];
+ WELS_B_MB_REC_VERIFY (GetRefPic (&pMCRefMem, pCtx, iRefIndex0, LIST_0));
+ BaseMC (pCtx, &pMCRefMem, LIST_0, iRefIndex0, iMBOffsetX, iMBOffsetY, pMCFunc, 16, 16, iMVs);
+
+ iMVs[0] = pCurDqLayer->pDec->pMv[LIST_1][iMBXY][0][0];
+ iMVs[1] = pCurDqLayer->pDec->pMv[LIST_1][iMBXY][0][1];
+ iRefIndex1 = pCurDqLayer->pDec->pRefIndex[LIST_1][iMBXY][0];
+ WELS_B_MB_REC_VERIFY (GetRefPic (&pTempMCRefMem, pCtx, iRefIndex1, LIST_1));
+ BaseMC (pCtx, &pTempMCRefMem, LIST_1, iRefIndex1, iMBOffsetX, iMBOffsetY, pMCFunc, 16, 16, iMVs);
if (pCurDqLayer->bUseWeightedBiPredIdc) {
- BiWeightPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, iRefIndex1, iRefIndex2, bWeightedBipredIdcIs1, 16, 16);
+ BiWeightPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, iRefIndex0, iRefIndex1, bWeightedBipredIdcIs1, 16, 16);
} else {
BiPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, 16, 16);
}
} else {
int32_t listIdx = (iMBType & MB_TYPE_P0L0) ? LIST_0 : LIST_1;
- iMVs[0] = pCurDqLayer->pMv[listIdx][iMBXY][0][0];
- iMVs[1] = pCurDqLayer->pMv[listIdx][iMBXY][0][1];
- WELS_B_MB_REC_VERIFY (GetRefPic (&pMCRefMem, pCtx, pCurDqLayer->pRefIndex[listIdx][iMBXY], 0, listIdx));
- BaseMC (&pMCRefMem, iMBOffsetX, iMBOffsetY, pMCFunc, 16, 16, iMVs);
+ iMVs[0] = pCurDqLayer->pDec->pMv[listIdx][iMBXY][0][0];
+ iMVs[1] = pCurDqLayer->pDec->pMv[listIdx][iMBXY][0][1];
+ iRefIndex = pCurDqLayer->pDec->pRefIndex[listIdx][iMBXY][0];
+ WELS_B_MB_REC_VERIFY (GetRefPic (&pMCRefMem, pCtx, iRefIndex, listIdx));
+ BaseMC (pCtx, &pMCRefMem, listIdx, iRefIndex, iMBOffsetX, iMBOffsetY, pMCFunc, 16, 16, iMVs);
if (bWeightedBipredIdcIs1) {
- int32_t iRefIndex = pCurDqLayer->pRefIndex[listIdx][iMBXY][0];
WeightPrediction (pCurDqLayer, &pMCRefMem, listIdx, iRefIndex, 16, 16);
}
}
@@ -716,29 +742,31 @@ int32_t GetInterBPred (uint8_t* pPredYCbCr[3], uint8_t* pTempPredYCbCr[3], PWels
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
if (IS_DIR (iMBType, i, listIdx)) {
lastListIdx = listIdx;
- iMVs[0] = pCurDqLayer->pMv[listIdx][iMBXY][iPartIdx][0];
- iMVs[1] = pCurDqLayer->pMv[listIdx][iMBXY][iPartIdx][1];
- WELS_B_MB_REC_VERIFY (GetRefPic (&pMCRefMem, pCtx, pCurDqLayer->pRefIndex[listIdx][iMBXY], iPartIdx, listIdx));
+ iMVs[0] = pCurDqLayer->pDec->pMv[listIdx][iMBXY][iPartIdx][0];
+ iMVs[1] = pCurDqLayer->pDec->pMv[listIdx][iMBXY][iPartIdx][1];
+ iRefIndex = pCurDqLayer->pDec->pRefIndex[listIdx][iMBXY][iPartIdx];
+ WELS_B_MB_REC_VERIFY (GetRefPic (&pMCRefMem, pCtx, iRefIndex, listIdx));
if (i) {
pMCRefMem.pDstY += (iDstLineLuma << 3);
pMCRefMem.pDstU += (iDstLineChroma << 2);
pMCRefMem.pDstV += (iDstLineChroma << 2);
}
- BaseMC (&pMCRefMem, iMBOffsetX, iMBOffsetY + iPartIdx, pMCFunc, 16, 8, iMVs);
+ BaseMC (pCtx, &pMCRefMem, listIdx, iRefIndex, iMBOffsetX, iMBOffsetY + iPartIdx, pMCFunc, 16, 8, iMVs);
if (++listCount == 2) {
- iMVs[0] = pCurDqLayer->pMv[LIST_1][iMBXY][iPartIdx][0];
- iMVs[1] = pCurDqLayer->pMv[LIST_1][iMBXY][iPartIdx][1];
- WELS_B_MB_REC_VERIFY (GetRefPic (&pTempMCRefMem, pCtx, pCurDqLayer->pRefIndex[LIST_1][iMBXY], iPartIdx, LIST_1));
+ iMVs[0] = pCurDqLayer->pDec->pMv[LIST_1][iMBXY][iPartIdx][0];
+ iMVs[1] = pCurDqLayer->pDec->pMv[LIST_1][iMBXY][iPartIdx][1];
+ iRefIndex1 = pCurDqLayer->pDec->pRefIndex[LIST_1][iMBXY][iPartIdx];
+ WELS_B_MB_REC_VERIFY (GetRefPic (&pTempMCRefMem, pCtx, iRefIndex1, LIST_1));
if (i) {
pTempMCRefMem.pDstY += (iDstLineLuma << 3);
pTempMCRefMem.pDstU += (iDstLineChroma << 2);
pTempMCRefMem.pDstV += (iDstLineChroma << 2);
}
- BaseMC (&pTempMCRefMem, iMBOffsetX, iMBOffsetY + iPartIdx, pMCFunc, 16, 8, iMVs);
+ BaseMC (pCtx, &pTempMCRefMem, LIST_1, iRefIndex1, iMBOffsetX, iMBOffsetY + iPartIdx, pMCFunc, 16, 8, iMVs);
if (pCurDqLayer->bUseWeightedBiPredIdc) {
- iRefIndex1 = pCurDqLayer->pRefIndex[LIST_0][iMBXY][iPartIdx];
- iRefIndex2 = pCurDqLayer->pRefIndex[LIST_1][iMBXY][iPartIdx];
- BiWeightPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, iRefIndex1, iRefIndex2, bWeightedBipredIdcIs1, 16, 8);
+ iRefIndex0 = pCurDqLayer->pDec->pRefIndex[LIST_0][iMBXY][iPartIdx];
+ iRefIndex1 = pCurDqLayer->pDec->pRefIndex[LIST_1][iMBXY][iPartIdx];
+ BiWeightPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, iRefIndex0, iRefIndex1, bWeightedBipredIdcIs1, 16, 8);
} else {
BiPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, 16, 8);
}
@@ -747,7 +775,7 @@ int32_t GetInterBPred (uint8_t* pPredYCbCr[3], uint8_t* pTempPredYCbCr[3], PWels
}
if (listCount == 1) {
if (bWeightedBipredIdcIs1) {
- int32_t iRefIndex = pCurDqLayer->pRefIndex[lastListIdx][iMBXY][iPartIdx];
+ iRefIndex = pCurDqLayer->pDec->pRefIndex[lastListIdx][iMBXY][iPartIdx];
WeightPrediction (pCurDqLayer, &pMCRefMem, lastListIdx, iRefIndex, 16, 8);
}
}
@@ -759,29 +787,31 @@ int32_t GetInterBPred (uint8_t* pPredYCbCr[3], uint8_t* pTempPredYCbCr[3], PWels
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
if (IS_DIR (iMBType, i, listIdx)) {
lastListIdx = listIdx;
- iMVs[0] = pCurDqLayer->pMv[listIdx][iMBXY][i << 1][0];
- iMVs[1] = pCurDqLayer->pMv[listIdx][iMBXY][i << 1][1];
- WELS_B_MB_REC_VERIFY (GetRefPic (&pMCRefMem, pCtx, pCurDqLayer->pRefIndex[listIdx][iMBXY], i << 1, listIdx));
+ iMVs[0] = pCurDqLayer->pDec->pMv[listIdx][iMBXY][i << 1][0];
+ iMVs[1] = pCurDqLayer->pDec->pMv[listIdx][iMBXY][i << 1][1];
+ iRefIndex = pCurDqLayer->pDec->pRefIndex[listIdx][iMBXY][i << 1];
+ WELS_B_MB_REC_VERIFY (GetRefPic (&pMCRefMem, pCtx, iRefIndex, listIdx));
if (i) {
pMCRefMem.pDstY += 8;
pMCRefMem.pDstU += 4;
pMCRefMem.pDstV += 4;
}
- BaseMC (&pMCRefMem, iMBOffsetX + (i ? 8 : 0), iMBOffsetY, pMCFunc, 8, 16, iMVs);
+ BaseMC (pCtx, &pMCRefMem, listIdx, iRefIndex, iMBOffsetX + (i ? 8 : 0), iMBOffsetY, pMCFunc, 8, 16, iMVs);
if (++listCount == 2) {
- iMVs[0] = pCurDqLayer->pMv[LIST_1][iMBXY][i << 1][0];
- iMVs[1] = pCurDqLayer->pMv[LIST_1][iMBXY][i << 1][1];
- WELS_B_MB_REC_VERIFY (GetRefPic (&pTempMCRefMem, pCtx, pCurDqLayer->pRefIndex[LIST_1][iMBXY], i << 1, LIST_1));
+ iMVs[0] = pCurDqLayer->pDec->pMv[LIST_1][iMBXY][i << 1][0];
+ iMVs[1] = pCurDqLayer->pDec->pMv[LIST_1][iMBXY][i << 1][1];
+ iRefIndex1 = pCurDqLayer->pDec->pRefIndex[LIST_1][iMBXY][i << 1];
+ WELS_B_MB_REC_VERIFY (GetRefPic (&pTempMCRefMem, pCtx, iRefIndex1, LIST_1));
if (i) {
pTempMCRefMem.pDstY += 8;
pTempMCRefMem.pDstU += 4;
pTempMCRefMem.pDstV += 4;
}
- BaseMC (&pTempMCRefMem, iMBOffsetX + (i ? 8 : 0), iMBOffsetY, pMCFunc, 8, 16, iMVs);
+ BaseMC (pCtx, &pTempMCRefMem, LIST_1, iRefIndex1, iMBOffsetX + (i ? 8 : 0), iMBOffsetY, pMCFunc, 8, 16, iMVs);
if (pCurDqLayer->bUseWeightedBiPredIdc) {
- iRefIndex1 = pCurDqLayer->pRefIndex[LIST_0][iMBXY][i << 1];
- iRefIndex2 = pCurDqLayer->pRefIndex[LIST_1][iMBXY][i << 1];
- BiWeightPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, iRefIndex1, iRefIndex2, bWeightedBipredIdcIs1, 8, 16);
+ iRefIndex0 = pCurDqLayer->pDec->pRefIndex[LIST_0][iMBXY][i << 1];
+ iRefIndex1 = pCurDqLayer->pDec->pRefIndex[LIST_1][iMBXY][i << 1];
+ BiWeightPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, iRefIndex0, iRefIndex1, bWeightedBipredIdcIs1, 8, 16);
} else {
BiPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, 8, 16);
}
@@ -790,7 +820,7 @@ int32_t GetInterBPred (uint8_t* pPredYCbCr[3], uint8_t* pTempPredYCbCr[3], PWels
}
if (listCount == 1) {
if (bWeightedBipredIdcIs1) {
- int32_t iRefIndex = pCurDqLayer->pRefIndex[lastListIdx][iMBXY][i << 1];
+ iRefIndex = pCurDqLayer->pDec->pRefIndex[lastListIdx][iMBXY][i << 1];
WeightPrediction (pCurDqLayer, &pMCRefMem, lastListIdx, iRefIndex, 8, 16);
}
}
@@ -827,53 +857,53 @@ int32_t GetInterBPred (uint8_t* pPredYCbCr[3], uint8_t* pTempPredYCbCr[3], PWels
pTempMCRefMem.pDstV = pDstV2;
if ((IS_TYPE_L0 (iSubMBType) && IS_TYPE_L1 (iSubMBType))) {
- iRefIndex1 = pCurDqLayer->pRefIndex[LIST_0][iMBXY][iIIdx];
- WELS_B_MB_REC_VERIFY (GetRefPic (&pMCRefMem, pCtx, pCurDqLayer->pRefIndex[LIST_0][iMBXY], iIIdx, LIST_0));
+ iRefIndex0 = pCurDqLayer->pDec->pRefIndex[LIST_0][iMBXY][iIIdx];
+ WELS_B_MB_REC_VERIFY (GetRefPic (&pMCRefMem, pCtx, iRefIndex0, LIST_0));
- iRefIndex2 = pCurDqLayer->pRefIndex[LIST_1][iMBXY][iIIdx];
- WELS_B_MB_REC_VERIFY (GetRefPic (&pTempMCRefMem, pCtx, pCurDqLayer->pRefIndex[LIST_1][iMBXY], iIIdx, LIST_1));
+ iRefIndex1 = pCurDqLayer->pDec->pRefIndex[LIST_1][iMBXY][iIIdx];
+ WELS_B_MB_REC_VERIFY (GetRefPic (&pTempMCRefMem, pCtx, iRefIndex1, LIST_1));
} else {
int32_t listIdx = IS_TYPE_L0 (iSubMBType) ? LIST_0 : LIST_1;
- iRefIndex1 = pCurDqLayer->pRefIndex[listIdx][iMBXY][iIIdx];
- WELS_B_MB_REC_VERIFY (GetRefPic (&pMCRefMem, pCtx, pCurDqLayer->pRefIndex[listIdx][iMBXY], iIIdx, listIdx));
+ iRefIndex = pCurDqLayer->pDec->pRefIndex[listIdx][iMBXY][iIIdx];
+ WELS_B_MB_REC_VERIFY (GetRefPic (&pMCRefMem, pCtx, iRefIndex, listIdx));
}
if (IS_SUB_8x8 (iSubMBType)) {
if (IS_TYPE_L0 (iSubMBType) && IS_TYPE_L1 (iSubMBType)) {
- iMVs[0] = pCurDqLayer->pMv[LIST_0][iMBXY][iIIdx][0];
- iMVs[1] = pCurDqLayer->pMv[LIST_0][iMBXY][iIIdx][1];
- BaseMC (&pMCRefMem, iXOffset, iYOffset, pMCFunc, 8, 8, iMVs);
+ iMVs[0] = pCurDqLayer->pDec->pMv[LIST_0][iMBXY][iIIdx][0];
+ iMVs[1] = pCurDqLayer->pDec->pMv[LIST_0][iMBXY][iIIdx][1];
+ BaseMC (pCtx, &pMCRefMem, LIST_0, iRefIndex0, iXOffset, iYOffset, pMCFunc, 8, 8, iMVs);
- iMVs[0] = pCurDqLayer->pMv[LIST_1][iMBXY][iIIdx][0];
- iMVs[1] = pCurDqLayer->pMv[LIST_1][iMBXY][iIIdx][1];
- BaseMC (&pTempMCRefMem, iXOffset, iYOffset, pMCFunc, 8, 8, iMVs);
+ iMVs[0] = pCurDqLayer->pDec->pMv[LIST_1][iMBXY][iIIdx][0];
+ iMVs[1] = pCurDqLayer->pDec->pMv[LIST_1][iMBXY][iIIdx][1];
+ BaseMC (pCtx, &pTempMCRefMem, LIST_1, iRefIndex1, iXOffset, iYOffset, pMCFunc, 8, 8, iMVs);
if (pCurDqLayer->bUseWeightedBiPredIdc) {
- BiWeightPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, iRefIndex1, iRefIndex2, bWeightedBipredIdcIs1, 8, 8);
+ BiWeightPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, iRefIndex0, iRefIndex1, bWeightedBipredIdcIs1, 8, 8);
} else {
BiPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, 8, 8);
}
} else {
int32_t listIdx = IS_TYPE_L0 (iSubMBType) ? LIST_0 : LIST_1;
- iMVs[0] = pCurDqLayer->pMv[listIdx][iMBXY][iIIdx][0];
- iMVs[1] = pCurDqLayer->pMv[listIdx][iMBXY][iIIdx][1];
- BaseMC (&pMCRefMem, iXOffset, iYOffset, pMCFunc, 8, 8, iMVs);
+ iMVs[0] = pCurDqLayer->pDec->pMv[listIdx][iMBXY][iIIdx][0];
+ iMVs[1] = pCurDqLayer->pDec->pMv[listIdx][iMBXY][iIIdx][1];
+ iRefIndex = pCurDqLayer->pDec->pRefIndex[listIdx][iMBXY][iIIdx];
+ BaseMC (pCtx, &pMCRefMem, listIdx, iRefIndex, iXOffset, iYOffset, pMCFunc, 8, 8, iMVs);
if (bWeightedBipredIdcIs1) {
- int32_t iRefIndex = pCurDqLayer->pRefIndex[listIdx][iMBXY][iIIdx];
WeightPrediction (pCurDqLayer, &pMCRefMem, listIdx, iRefIndex, 8, 8);
}
}
} else if (IS_SUB_8x4 (iSubMBType)) {
if (IS_TYPE_L0 (iSubMBType) && IS_TYPE_L1 (iSubMBType)) { //B_Bi_8x4
- iMVs[0] = pCurDqLayer->pMv[LIST_0][iMBXY][iIIdx][0];
- iMVs[1] = pCurDqLayer->pMv[LIST_0][iMBXY][iIIdx][1];
- BaseMC (&pMCRefMem, iXOffset, iYOffset, pMCFunc, 8, 4, iMVs);
- iMVs[0] = pCurDqLayer->pMv[LIST_1][iMBXY][iIIdx][0];
- iMVs[1] = pCurDqLayer->pMv[LIST_1][iMBXY][iIIdx][1];
- BaseMC (&pTempMCRefMem, iXOffset, iYOffset, pMCFunc, 8, 4, iMVs);
+ iMVs[0] = pCurDqLayer->pDec->pMv[LIST_0][iMBXY][iIIdx][0];
+ iMVs[1] = pCurDqLayer->pDec->pMv[LIST_0][iMBXY][iIIdx][1];
+ BaseMC (pCtx, &pMCRefMem, LIST_0, iRefIndex0, iXOffset, iYOffset, pMCFunc, 8, 4, iMVs);
+ iMVs[0] = pCurDqLayer->pDec->pMv[LIST_1][iMBXY][iIIdx][0];
+ iMVs[1] = pCurDqLayer->pDec->pMv[LIST_1][iMBXY][iIIdx][1];
+ BaseMC (pCtx, &pTempMCRefMem, LIST_1, iRefIndex1, iXOffset, iYOffset, pMCFunc, 8, 4, iMVs);
if (pCurDqLayer->bUseWeightedBiPredIdc) {
- BiWeightPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, iRefIndex1, iRefIndex2, bWeightedBipredIdcIs1, 8, 4);
+ BiWeightPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, iRefIndex0, iRefIndex1, bWeightedBipredIdcIs1, 8, 4);
} else {
BiPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, 8, 4);
}
@@ -881,49 +911,49 @@ int32_t GetInterBPred (uint8_t* pPredYCbCr[3], uint8_t* pTempPredYCbCr[3], PWels
pMCRefMem.pDstY += (iDstLineLuma << 2);
pMCRefMem.pDstU += (iDstLineChroma << 1);
pMCRefMem.pDstV += (iDstLineChroma << 1);
- iMVs[0] = pCurDqLayer->pMv[LIST_0][iMBXY][iIIdx + 4][0];
- iMVs[1] = pCurDqLayer->pMv[LIST_0][iMBXY][iIIdx + 4][1];
- BaseMC (&pMCRefMem, iXOffset, iYOffset + 4, pMCFunc, 8, 4, iMVs);
+ iMVs[0] = pCurDqLayer->pDec->pMv[LIST_0][iMBXY][iIIdx + 4][0];
+ iMVs[1] = pCurDqLayer->pDec->pMv[LIST_0][iMBXY][iIIdx + 4][1];
+ BaseMC (pCtx, &pMCRefMem, LIST_0, iRefIndex0, iXOffset, iYOffset + 4, pMCFunc, 8, 4, iMVs);
pTempMCRefMem.pDstY += (iDstLineLuma << 2);
pTempMCRefMem.pDstU += (iDstLineChroma << 1);
pTempMCRefMem.pDstV += (iDstLineChroma << 1);
- iMVs[0] = pCurDqLayer->pMv[LIST_1][iMBXY][iIIdx + 4][0];
- iMVs[1] = pCurDqLayer->pMv[LIST_1][iMBXY][iIIdx + 4][1];
- BaseMC (&pTempMCRefMem, iXOffset, iYOffset + 4, pMCFunc, 8, 4, iMVs);
+ iMVs[0] = pCurDqLayer->pDec->pMv[LIST_1][iMBXY][iIIdx + 4][0];
+ iMVs[1] = pCurDqLayer->pDec->pMv[LIST_1][iMBXY][iIIdx + 4][1];
+ BaseMC (pCtx, &pTempMCRefMem, LIST_1, iRefIndex1, iXOffset, iYOffset + 4, pMCFunc, 8, 4, iMVs);
if (pCurDqLayer->bUseWeightedBiPredIdc) {
- BiWeightPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, iRefIndex1, iRefIndex2, bWeightedBipredIdcIs1, 8, 4);
+ BiWeightPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, iRefIndex0, iRefIndex1, bWeightedBipredIdcIs1, 8, 4);
} else {
BiPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, 8, 4);
}
} else { //B_L0_8x4 B_L1_8x4
int32_t listIdx = IS_TYPE_L0 (iSubMBType) ? LIST_0 : LIST_1;
- iMVs[0] = pCurDqLayer->pMv[listIdx][iMBXY][iIIdx][0];
- iMVs[1] = pCurDqLayer->pMv[listIdx][iMBXY][iIIdx][1];
- BaseMC (&pMCRefMem, iXOffset, iYOffset, pMCFunc, 8, 4, iMVs);
+ iMVs[0] = pCurDqLayer->pDec->pMv[listIdx][iMBXY][iIIdx][0];
+ iMVs[1] = pCurDqLayer->pDec->pMv[listIdx][iMBXY][iIIdx][1];
+ iRefIndex = pCurDqLayer->pDec->pRefIndex[listIdx][iMBXY][iIIdx];
+ BaseMC (pCtx, &pMCRefMem, listIdx, iRefIndex, iXOffset, iYOffset, pMCFunc, 8, 4, iMVs);
pMCRefMem.pDstY += (iDstLineLuma << 2);
pMCRefMem.pDstU += (iDstLineChroma << 1);
pMCRefMem.pDstV += (iDstLineChroma << 1);
- iMVs[0] = pCurDqLayer->pMv[listIdx][iMBXY][iIIdx + 4][0];
- iMVs[1] = pCurDqLayer->pMv[listIdx][iMBXY][iIIdx + 4][1];
- BaseMC (&pMCRefMem, iXOffset, iYOffset + 4, pMCFunc, 8, 4, iMVs);
+ iMVs[0] = pCurDqLayer->pDec->pMv[listIdx][iMBXY][iIIdx + 4][0];
+ iMVs[1] = pCurDqLayer->pDec->pMv[listIdx][iMBXY][iIIdx + 4][1];
+ BaseMC (pCtx, &pMCRefMem, listIdx, iRefIndex, iXOffset, iYOffset + 4, pMCFunc, 8, 4, iMVs);
if (bWeightedBipredIdcIs1) {
- int32_t iRefIndex = pCurDqLayer->pRefIndex[listIdx][iMBXY][iIIdx];
WeightPrediction (pCurDqLayer, &pMCRefMem, listIdx, iRefIndex, 8, 4);
}
}
} else if (IS_SUB_4x8 (iSubMBType)) {
if (IS_TYPE_L0 (iSubMBType) && IS_TYPE_L1 (iSubMBType)) { //B_Bi_4x8
- iMVs[0] = pCurDqLayer->pMv[LIST_0][iMBXY][iIIdx][0];
- iMVs[1] = pCurDqLayer->pMv[LIST_0][iMBXY][iIIdx][1];
- BaseMC (&pMCRefMem, iXOffset, iYOffset, pMCFunc, 4, 8, iMVs);
- iMVs[0] = pCurDqLayer->pMv[LIST_1][iMBXY][iIIdx][0];
- iMVs[1] = pCurDqLayer->pMv[LIST_1][iMBXY][iIIdx][1];
- BaseMC (&pTempMCRefMem, iXOffset, iYOffset, pMCFunc, 4, 8, iMVs);
+ iMVs[0] = pCurDqLayer->pDec->pMv[LIST_0][iMBXY][iIIdx][0];
+ iMVs[1] = pCurDqLayer->pDec->pMv[LIST_0][iMBXY][iIIdx][1];
+ BaseMC (pCtx, &pMCRefMem, LIST_0, iRefIndex0, iXOffset, iYOffset, pMCFunc, 4, 8, iMVs);
+ iMVs[0] = pCurDqLayer->pDec->pMv[LIST_1][iMBXY][iIIdx][0];
+ iMVs[1] = pCurDqLayer->pDec->pMv[LIST_1][iMBXY][iIIdx][1];
+ BaseMC (pCtx, &pTempMCRefMem, LIST_1, iRefIndex1, iXOffset, iYOffset, pMCFunc, 4, 8, iMVs);
if (pCurDqLayer->bUseWeightedBiPredIdc) {
- BiWeightPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, iRefIndex1, iRefIndex2, bWeightedBipredIdcIs1, 4, 8);
+ BiWeightPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, iRefIndex0, iRefIndex1, bWeightedBipredIdcIs1, 4, 8);
} else {
BiPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, 4, 8);
}
@@ -931,35 +961,35 @@ int32_t GetInterBPred (uint8_t* pPredYCbCr[3], uint8_t* pTempPredYCbCr[3], PWels
pMCRefMem.pDstY += 4;
pMCRefMem.pDstU += 2;
pMCRefMem.pDstV += 2;
- iMVs[0] = pCurDqLayer->pMv[LIST_0][iMBXY][iIIdx + 1][0];
- iMVs[1] = pCurDqLayer->pMv[LIST_0][iMBXY][iIIdx + 1][1];
- BaseMC (&pMCRefMem, iXOffset + 4, iYOffset, pMCFunc, 4, 8, iMVs);
+ iMVs[0] = pCurDqLayer->pDec->pMv[LIST_0][iMBXY][iIIdx + 1][0];
+ iMVs[1] = pCurDqLayer->pDec->pMv[LIST_0][iMBXY][iIIdx + 1][1];
+ BaseMC (pCtx, &pMCRefMem, LIST_0, iRefIndex0, iXOffset + 4, iYOffset, pMCFunc, 4, 8, iMVs);
pTempMCRefMem.pDstY += 4;
pTempMCRefMem.pDstU += 2;
pTempMCRefMem.pDstV += 2;
- iMVs[0] = pCurDqLayer->pMv[LIST_1][iMBXY][iIIdx + 1][0];
- iMVs[1] = pCurDqLayer->pMv[LIST_1][iMBXY][iIIdx + 1][1];
- BaseMC (&pTempMCRefMem, iXOffset + 4, iYOffset, pMCFunc, 4, 8, iMVs);
+ iMVs[0] = pCurDqLayer->pDec->pMv[LIST_1][iMBXY][iIIdx + 1][0];
+ iMVs[1] = pCurDqLayer->pDec->pMv[LIST_1][iMBXY][iIIdx + 1][1];
+ BaseMC (pCtx, &pTempMCRefMem, LIST_1, iRefIndex1, iXOffset + 4, iYOffset, pMCFunc, 4, 8, iMVs);
if (pCurDqLayer->bUseWeightedBiPredIdc) {
- BiWeightPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, iRefIndex1, iRefIndex2, bWeightedBipredIdcIs1, 4, 8);
+ BiWeightPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, iRefIndex0, iRefIndex1, bWeightedBipredIdcIs1, 4, 8);
} else {
BiPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, 4, 8);
}
} else { //B_L0_4x8 B_L1_4x8
int32_t listIdx = IS_TYPE_L0 (iSubMBType) ? LIST_0 : LIST_1;
- iMVs[0] = pCurDqLayer->pMv[listIdx][iMBXY][iIIdx][0];
- iMVs[1] = pCurDqLayer->pMv[listIdx][iMBXY][iIIdx][1];
- BaseMC (&pMCRefMem, iXOffset, iYOffset, pMCFunc, 4, 8, iMVs);
+ iMVs[0] = pCurDqLayer->pDec->pMv[listIdx][iMBXY][iIIdx][0];
+ iMVs[1] = pCurDqLayer->pDec->pMv[listIdx][iMBXY][iIIdx][1];
+ iRefIndex = pCurDqLayer->pDec->pRefIndex[listIdx][iMBXY][iIIdx];
+ BaseMC (pCtx, &pMCRefMem, listIdx, iRefIndex, iXOffset, iYOffset, pMCFunc, 4, 8, iMVs);
pMCRefMem.pDstY += 4;
pMCRefMem.pDstU += 2;
pMCRefMem.pDstV += 2;
- iMVs[0] = pCurDqLayer->pMv[listIdx][iMBXY][iIIdx + 1][0];
- iMVs[1] = pCurDqLayer->pMv[listIdx][iMBXY][iIIdx + 1][1];
- BaseMC (&pMCRefMem, iXOffset + 4, iYOffset, pMCFunc, 4, 8, iMVs);
+ iMVs[0] = pCurDqLayer->pDec->pMv[listIdx][iMBXY][iIIdx + 1][0];
+ iMVs[1] = pCurDqLayer->pDec->pMv[listIdx][iMBXY][iIIdx + 1][1];
+ BaseMC (pCtx, &pMCRefMem, listIdx, iRefIndex, iXOffset + 4, iYOffset, pMCFunc, 4, 8, iMVs);
if (bWeightedBipredIdcIs1) {
- int32_t iRefIndex = pCurDqLayer->pRefIndex[listIdx][iMBXY][iIIdx];
WeightPrediction (pCurDqLayer, &pMCRefMem, listIdx, iRefIndex, 4, 8);
}
}
@@ -977,27 +1007,27 @@ int32_t GetInterBPred (uint8_t* pPredYCbCr[3], uint8_t* pTempPredYCbCr[3], PWels
pMCRefMem.pDstU = pDstU + iUVLineStride;
pMCRefMem.pDstV = pDstV + iUVLineStride;
- iMVs[0] = pCurDqLayer->pMv[LIST_0][iMBXY][iIIdx + iJIdx][0];
- iMVs[1] = pCurDqLayer->pMv[LIST_0][iMBXY][iIIdx + iJIdx][1];
- BaseMC (&pMCRefMem, iXOffset + iBlk4X, iYOffset + iBlk4Y, pMCFunc, 4, 4, iMVs);
+ iMVs[0] = pCurDqLayer->pDec->pMv[LIST_0][iMBXY][iIIdx + iJIdx][0];
+ iMVs[1] = pCurDqLayer->pDec->pMv[LIST_0][iMBXY][iIIdx + iJIdx][1];
+ BaseMC (pCtx, &pMCRefMem, LIST_0, iRefIndex0, iXOffset + iBlk4X, iYOffset + iBlk4Y, pMCFunc, 4, 4, iMVs);
pTempMCRefMem.pDstY = pDstY2 + iBlk8X + iBlk8Y * iDstLineLuma;
pTempMCRefMem.pDstU = pDstU2 + iUVLineStride;
pTempMCRefMem.pDstV = pDstV2 + iUVLineStride;;
- iMVs[0] = pCurDqLayer->pMv[LIST_1][iMBXY][iIIdx + iJIdx][0];
- iMVs[1] = pCurDqLayer->pMv[LIST_1][iMBXY][iIIdx + iJIdx][1];
- BaseMC (&pTempMCRefMem, iXOffset + iBlk4X, iYOffset + iBlk4Y, pMCFunc, 4, 4, iMVs);
+ iMVs[0] = pCurDqLayer->pDec->pMv[LIST_1][iMBXY][iIIdx + iJIdx][0];
+ iMVs[1] = pCurDqLayer->pDec->pMv[LIST_1][iMBXY][iIIdx + iJIdx][1];
+ BaseMC (pCtx, &pTempMCRefMem, LIST_1, iRefIndex1, iXOffset + iBlk4X, iYOffset + iBlk4Y, pMCFunc, 4, 4, iMVs);
if (pCurDqLayer->bUseWeightedBiPredIdc) {
- BiWeightPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, iRefIndex1, iRefIndex2, bWeightedBipredIdcIs1, 4, 4);
+ BiWeightPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, iRefIndex0, iRefIndex1, bWeightedBipredIdcIs1, 4, 4);
} else {
BiPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, 4, 4);
}
}
} else {
int32_t listIdx = IS_TYPE_L0 (iSubMBType) ? LIST_0 : LIST_1;
- int32_t iRefIndex = pCurDqLayer->pRefIndex[listIdx][iMBXY][iIIdx];
+ iRefIndex = pCurDqLayer->pDec->pRefIndex[listIdx][iMBXY][iIIdx];
for (int32_t j = 0; j < 4; j++) {
int32_t iUVLineStride;
iJIdx = ((j >> 1) << 2) + (j & 1);
@@ -1010,9 +1040,9 @@ int32_t GetInterBPred (uint8_t* pPredYCbCr[3], uint8_t* pTempPredYCbCr[3], PWels
pMCRefMem.pDstU = pDstU + iUVLineStride;
pMCRefMem.pDstV = pDstV + iUVLineStride;
- iMVs[0] = pCurDqLayer->pMv[listIdx][iMBXY][iIIdx + iJIdx][0];
- iMVs[1] = pCurDqLayer->pMv[listIdx][iMBXY][iIIdx + iJIdx][1];
- BaseMC (&pMCRefMem, iXOffset + iBlk4X, iYOffset + iBlk4Y, pMCFunc, 4, 4, iMVs);
+ iMVs[0] = pCurDqLayer->pDec->pMv[listIdx][iMBXY][iIIdx + iJIdx][0];
+ iMVs[1] = pCurDqLayer->pDec->pMv[listIdx][iMBXY][iIIdx + iJIdx][1];
+ BaseMC (pCtx, &pMCRefMem, listIdx, iRefIndex, iXOffset + iBlk4X, iYOffset + iBlk4Y, pMCFunc, 4, 4, iMVs);
if (bWeightedBipredIdcIs1) {
WeightPrediction (pCurDqLayer, &pMCRefMem, listIdx, iRefIndex, 4, 4);
}
diff --git a/chromium/third_party/openh264/src/codec/decoder/core/src/wels_decoder_thread.cpp b/chromium/third_party/openh264/src/codec/decoder/core/src/wels_decoder_thread.cpp
new file mode 100644
index 00000000000..d05aa4515b3
--- /dev/null
+++ b/chromium/third_party/openh264/src/codec/decoder/core/src/wels_decoder_thread.cpp
@@ -0,0 +1,311 @@
+/*!
+ * \copy
+ * Copyright (c) 2009-2019, Cisco Systems
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ *
+ * \file wels_decoder_thread.cpp
+ *
+ * \brief Interfaces introduced in thread programming
+ *
+ * \date 08/06/2018 Created
+ *
+ *************************************************************************************
+ */
+
+
+#ifdef __linux__
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <sched.h>
+#elif !defined(_WIN32) && !defined(__CYGWIN__)
+#include <sys/types.h>
+#include <sys/param.h>
+#include <unistd.h>
+#ifndef __Fuchsia__
+#include <sys/sysctl.h>
+#endif
+#ifdef __APPLE__
+#define HW_NCPU_NAME "hw.logicalcpu"
+#else
+#define HW_NCPU_NAME "hw.ncpu"
+#endif
+#endif
+
+#include "wels_decoder_thread.h"
+#include <stdio.h>
+#include <stdlib.h>
+
+int32_t GetCPUCount() {
+ WelsLogicalProcessInfo pInfo;
+ pInfo.ProcessorCount = 1;
+ WelsQueryLogicalProcessInfo (&pInfo);
+ return pInfo.ProcessorCount;
+}
+
+int ThreadCreate (SWelsDecThread* t, LPWELS_THREAD_ROUTINE tf, void* ta) {
+ WELS_THREAD_ATTR attr = 0;
+ return WelsThreadCreate (& (t->h), tf, ta, attr);
+}
+
+int ThreadWait (SWelsDecThread* t) {
+ return WelsThreadJoin (t->h);
+}
+
+#if defined(_WIN32) || defined(__CYGWIN__)
+
+int EventCreate (SWelsDecEvent* e, int manualReset, int initialState) {
+ e->h = CreateEvent (NULL, manualReset, initialState, NULL);
+ e->isSignaled = initialState;
+ return (e->h != NULL) ? 0 : 1;
+}
+
+void EventReset (SWelsDecEvent* e) {
+ ResetEvent (e->h);
+ e->isSignaled = 0;
+}
+
+void EventPost (SWelsDecEvent* e) {
+ SetEvent (e->h);
+ e->isSignaled = 1;
+}
+
+int EventWait (SWelsDecEvent* e, int32_t timeout) {
+ DWORD result;
+ if ((uint32_t)timeout == WELS_DEC_THREAD_WAIT_INFINITE || timeout < 0)
+ result = WaitForSingleObject (e->h, INFINITE);
+ else
+ result = WaitForSingleObject (e->h, timeout);
+
+ if (result == WAIT_OBJECT_0)
+ return WELS_DEC_THREAD_WAIT_SIGNALED;
+ else
+ return WAIT_TIMEOUT;
+}
+
+void EventDestroy (SWelsDecEvent* e) {
+ CloseHandle (e->h);
+ e->h = NULL;
+}
+
+int SemCreate (SWelsDecSemphore* s, long value, long max) {
+ s->h = CreateSemaphore (NULL, value, max, NULL);
+ return (s->h != NULL) ? 0 : 1;
+}
+
+int SemWait (SWelsDecSemphore* s, int32_t timeout) {
+ DWORD result;
+ if ((uint32_t)timeout == WELS_DEC_THREAD_WAIT_INFINITE || timeout < 0)
+ result = WaitForSingleObject (s->h, INFINITE);
+ else
+ result = WaitForSingleObject (s->h, timeout);
+
+ if (result == WAIT_OBJECT_0) {
+ return WELS_DEC_THREAD_WAIT_SIGNALED;
+ } else {
+ return WELS_DEC_THREAD_WAIT_TIMEDOUT;
+ }
+}
+
+void SemRelease (SWelsDecSemphore* s, long* prevcount) {
+ ReleaseSemaphore (s->h, 1, prevcount);
+}
+
+void SemDestroy (SWelsDecSemphore* s) {
+ CloseHandle (s->h);
+ s->h = NULL;
+}
+
+#else /* _WIN32 */
+
+static void getTimespecFromTimeout (struct timespec* ts, int32_t timeout) {
+ struct timeval tv;
+ gettimeofday (&tv, 0);
+ ts->tv_nsec = tv.tv_usec * 1000 + timeout * 1000000;
+ ts->tv_sec = tv.tv_sec + ts->tv_nsec / 1000000000;
+ ts->tv_nsec %= 1000000000;
+}
+int EventCreate (SWelsDecEvent* e, int manualReset, int initialState) {
+ if (pthread_mutex_init (& (e->m), NULL))
+ return 1;
+ if (pthread_cond_init (& (e->c), NULL))
+ return 2;
+
+ e->isSignaled = initialState;
+ e->manualReset = manualReset;
+
+ return 0;
+}
+
+void EventReset (SWelsDecEvent* e) {
+ pthread_mutex_lock (& (e->m));
+ e->isSignaled = 0;
+ pthread_mutex_unlock (& (e->m));
+}
+
+void EventPost (SWelsDecEvent* e) {
+ pthread_mutex_lock (& (e->m));
+ pthread_cond_broadcast (& (e->c));
+ e->isSignaled = 1;
+ pthread_mutex_unlock (& (e->m));
+}
+
+int EventWait (SWelsDecEvent* e, int32_t timeout) {
+ pthread_mutex_lock (& (e->m));
+ int signaled = e->isSignaled;
+ if (timeout == 0) {
+ pthread_mutex_unlock (& (e->m));
+ if (signaled)
+ return WELS_DEC_THREAD_WAIT_SIGNALED;
+ else
+ return WELS_DEC_THREAD_WAIT_TIMEDOUT;
+ }
+ if (signaled) {
+ if (!e->manualReset) {
+ e->isSignaled = 0;
+ }
+ pthread_mutex_unlock (& (e->m));
+ return WELS_DEC_THREAD_WAIT_SIGNALED;
+ }
+ int rc = 0;
+ if (timeout == WELS_DEC_THREAD_WAIT_INFINITE || timeout < 0) {
+ rc = pthread_cond_wait (& (e->c), & (e->m));
+ } else {
+ struct timespec ts;
+ getTimespecFromTimeout (&ts, timeout);
+ rc = pthread_cond_timedwait (& (e->c), & (e->m), &ts);
+ }
+ if (!e->manualReset) {
+ e->isSignaled = 0;
+ }
+ pthread_mutex_unlock (& (e->m));
+ if (rc == 0)
+ return WELS_DEC_THREAD_WAIT_SIGNALED;
+ else
+ return WELS_DEC_THREAD_WAIT_TIMEDOUT;
+}
+
+void EventDestroy (SWelsDecEvent* e) {
+ pthread_mutex_destroy (& (e->m));
+ pthread_cond_destroy (& (e->c));
+}
+
+int SemCreate (SWelsDecSemphore* s, long value, long max) {
+ s->v = value;
+ s->max = max;
+ if (pthread_mutex_init (& (s->m), NULL))
+ return 1;
+ const char* event_name = "";
+ if (WelsEventOpen (& (s->e), event_name)) {
+ return 2;
+ }
+ return 0;
+}
+
+int SemWait (SWelsDecSemphore* s, int32_t timeout) {
+#if defined(__APPLE__)
+ pthread_mutex_lock (& (s->m));
+#endif
+ int rc = 0;
+ if (timeout != 0) {
+ while ((s->v) == 0) {
+ if (timeout == WELS_DEC_THREAD_WAIT_INFINITE || timeout < 0) {
+ // infinite wait until released
+#if defined(__APPLE__)
+ rc = pthread_cond_wait (& (s->e), & (s->m));
+#else
+ rc = sem_wait (s->e);
+ if (rc != 0) rc = errno;
+#endif
+ } else {
+ struct timespec ts;
+ getTimespecFromTimeout (&ts, timeout);
+#if defined(__APPLE__)
+ rc = pthread_cond_timedwait (& (s->e), & (s->m), &ts);
+#else
+ rc = sem_timedwait (s->e, &ts);
+ if (rc != 0) rc = errno;
+#endif
+ if (rc != EINTR) {
+ // if timed out we return to the caller
+ break;
+ }
+ }
+ }
+ // only decrement counter if semaphore was signaled
+ if (rc == 0)
+ s->v -= 1;
+
+ } else {
+ // Special handling for timeout of 0
+ if (s->v > 0) {
+ s->v -= 1;
+ rc = 0;
+ } else {
+ rc = 1;
+ }
+ }
+#if defined(__APPLE__)
+ pthread_mutex_unlock (& (s->m));
+#endif
+ // set return value
+ if (rc == 0)
+ return WELS_DEC_THREAD_WAIT_SIGNALED;
+ else
+ return WELS_DEC_THREAD_WAIT_TIMEDOUT;
+}
+
+void SemRelease (SWelsDecSemphore* s, long* o_pPrevCount) {
+ long prevcount;
+#ifdef __APPLE__
+ pthread_mutex_lock (& (s->m));
+ prevcount = s->v;
+ if (s->v < s->max)
+ s->v += 1;
+ pthread_cond_signal (& (s->e));
+ pthread_mutex_unlock (& (s->m));
+#else
+ prevcount = s->v;
+ if (s->v < s->max)
+ s->v += 1;
+ sem_post (s->e);
+#endif
+ if (o_pPrevCount != NULL) {
+ *o_pPrevCount = prevcount;
+ }
+}
+
+void SemDestroy (SWelsDecSemphore* s) {
+ pthread_mutex_destroy (& (s->m));
+ const char* event_name = "";
+ WelsEventClose (& (s->e), event_name);
+}
+
+#endif /* !_WIN32 */
+
diff --git a/chromium/third_party/openh264/src/codec/decoder/meson.build b/chromium/third_party/openh264/src/codec/decoder/meson.build
index f93837eaf1a..1131022ffe4 100644
--- a/chromium/third_party/openh264/src/codec/decoder/meson.build
+++ b/chromium/third_party/openh264/src/codec/decoder/meson.build
@@ -19,15 +19,30 @@ cpp_sources = [
'core/src/pic_queue.cpp',
'core/src/rec_mb.cpp',
'plus/src/welsDecoderExt.cpp',
+ 'core/src/wels_decoder_thread.cpp',
]
-asm_sources = [
- 'core/x86/dct.asm',
- 'core/x86/intra_pred.asm',
-]
-
-objs_asm = asm_gen.process(asm_sources)
+objs_asm = []
+if ['x86', 'x86_64'].contains(cpu_family)
+ asm_sources = [
+ 'core/x86/dct.asm',
+ 'core/x86/intra_pred.asm',
+ ]
+ objs_asm = asm_gen.process(asm_sources)
+elif cpu_family == 'arm'
+ cpp_sources += [
+ 'core/arm/block_add_neon.S',
+ 'core/arm/intra_pred_neon.S',
+ ]
+elif cpu_family == 'aarch64'
+ cpp_sources += [
+ 'core/arm64/block_add_aarch64_neon.S',
+ 'core/arm64/intra_pred_aarch64_neon.S',
+ ]
+else
+ error('Unsupported cpu family @0@'.format(cpu_family))
+endif
libdecoder = static_library('decoder', cpp_sources, objs_asm,
- include_directories: [inc, decoder_inc],
+ include_directories: [inc, decoder_inc, casm_inc],
dependencies: deps)
diff --git a/chromium/third_party/openh264/src/codec/decoder/plus/inc/welsDecoderExt.h b/chromium/third_party/openh264/src/codec/decoder/plus/inc/welsDecoderExt.h
index 6ed73acc119..cfacbc83f59 100644
--- a/chromium/third_party/openh264/src/codec/decoder/plus/inc/welsDecoderExt.h
+++ b/chromium/third_party/openh264/src/codec/decoder/plus/inc/welsDecoderExt.h
@@ -109,32 +109,53 @@ class CWelsDecoder : public ISVCDecoder {
virtual long EXTAPI SetOption (DECODER_OPTION eOptID, void* pOption);
virtual long EXTAPI GetOption (DECODER_OPTION eOptID, void* pOption);
- typedef struct tagPictInfo {
- SBufferInfo sBufferInfo;
- int32_t iPOC;
- int32_t iFrameNum;
- bool bLastGOP;
- unsigned char* pData[3];
- } SPictInfo, *PPictInfo;
+ public:
+ DECODING_STATE DecodeFrame2WithCtx (PWelsDecoderContext pCtx, const unsigned char* kpSrc, const int kiSrcLen,
+ unsigned char** ppDst, SBufferInfo* pDstInfo);
+ DECODING_STATE ParseAccessUnit (SWelsDecoderThreadCTX& sThreadCtx);
private:
- PWelsDecoderContext m_pDecContext;
welsCodecTrace* m_pWelsTrace;
+ uint32_t m_uiDecodeTimeStamp;
+ bool m_bIsBaseline;
+ int32_t m_iCpuCount;
+ int32_t m_iThreadCount;
+ int32_t m_iCtxCount;
+ PPicBuff m_pPicBuff;
+ bool m_bParamSetsLostFlag;
+ bool m_bFreezeOutput;
+ int32_t m_DecCtxActiveCount;
+ PWelsDecoderThreadCTX m_pDecThrCtx;
+ PWelsDecoderThreadCTX m_pLastDecThrCtx;
+ int32_t m_iLastBufferedIdx;
+ WELS_MUTEX m_csDecoder;
+ SWelsDecEvent m_sBufferingEvent;
+ SWelsDecEvent m_sReleaseBufferEvent;
+ SWelsDecSemphore m_sIsBusy;
SPictInfo m_sPictInfoList[16];
- int32_t m_iPictInfoIndex;
- int32_t m_iMinPOC;
- int32_t m_iNumOfPicts;
- int32_t m_iLastGOPRemainPicts;
- int32_t m_LastWrittenPOC;
- int32_t m_iLargestBufferedPicIndex;
+ SPictReoderingStatus m_sReoderingStatus;
+ PWelsDecoderThreadCTX m_pDecThrCtxActive[WELS_DEC_MAX_NUM_CPU];
+ SVlcTable m_sVlcTable;
+ SWelsLastDecPicInfo m_sLastDecPicInfo;
+ SDecoderStatistics m_sDecoderStatistics;// For real time debugging
+ private:
int32_t InitDecoder (const SDecodingParam* pParam);
void UninitDecoder (void);
- int32_t ResetDecoder();
+ int32_t InitDecoderCtx (PWelsDecoderContext& pCtx, const SDecodingParam* pParam);
+ void UninitDecoderCtx (PWelsDecoderContext& pCtx);
+ int32_t ResetDecoder (PWelsDecoderContext& pCtx);
+ int32_t ThreadResetDecoder (PWelsDecoderContext& pCtx);
void OutputStatisticsLog (SDecoderStatistics& sDecoderStatistics);
- DECODING_STATE ReorderPicturesInDisplay (unsigned char** ppDst, SBufferInfo* pDstInfo);
-
+ DECODING_STATE ReorderPicturesInDisplay (PWelsDecoderContext pCtx, unsigned char** ppDst, SBufferInfo* pDstInfo);
+ int ThreadDecodeFrameInternal (const unsigned char* kpSrc, const int kiSrcLen, unsigned char** ppDst,
+ SBufferInfo* pDstInfo);
+ void BufferingReadyPicture (PWelsDecoderContext pCtx, unsigned char** ppDst, SBufferInfo* pDstInfo);
+ void ReleaseBufferedReadyPicture (PWelsDecoderContext pCtx, unsigned char** ppDst, SBufferInfo* pDstInfo);
+
+ void OpenDecoderThreads();
+ void CloseDecoderThreads();
#ifdef OUTPUT_BIT_STREAM
WelsFileHandle* m_pFBS;
WelsFileHandle* m_pFBSSize;
diff --git a/chromium/third_party/openh264/src/codec/decoder/plus/src/welsDecoderExt.cpp b/chromium/third_party/openh264/src/codec/decoder/plus/src/welsDecoderExt.cpp
index 0dbe117a030..85a10600886 100644
--- a/chromium/third_party/openh264/src/codec/decoder/plus/src/welsDecoderExt.cpp
+++ b/chromium/third_party/openh264/src/codec/decoder/plus/src/welsDecoderExt.cpp
@@ -51,6 +51,7 @@
//#include "macros.h"
#include "decoder.h"
#include "decoder_core.h"
+#include "manage_dec_ref.h"
#include "error_concealment.h"
#include "measure_time.h"
@@ -67,14 +68,11 @@ extern "C" {
#include <stdio.h>
#include <stdarg.h>
#include <sys/types.h>
+#include <malloc.h>
#else
#include <sys/time.h>
#endif
-#define _PICTURE_REORDERING_ 1
-
-static int32_t sIMinInt32 = -0x7FFFFFFF;
-
namespace WelsDec {
//////////////////////////////////////////////////////////////////////
@@ -90,15 +88,62 @@ namespace WelsDec {
*
* return: none
***************************************************************************/
+DECLARE_PROCTHREAD (pThrProcInit, p) {
+ SWelsDecThreadInfo* sThreadInfo = (SWelsDecThreadInfo*)p;
+#if defined(WIN32)
+ _alloca (WELS_DEC_MAX_THREAD_STACK_SIZE * (sThreadInfo->uiThrNum + 1));
+#endif
+ return sThreadInfo->pThrProcMain (p);
+}
+
+static DECODING_STATE ConstructAccessUnit (CWelsDecoder* pWelsDecoder, PWelsDecoderThreadCTX pThrCtx) {
+ int iRet = dsErrorFree;
+ //WelsMutexLock (&pWelsDecoder->m_csDecoder);
+ if (pThrCtx->pCtx->pLastThreadCtx != NULL) {
+ PWelsDecoderThreadCTX pLastThreadCtx = (PWelsDecoderThreadCTX) (pThrCtx->pCtx->pLastThreadCtx);
+ WAIT_EVENT (&pLastThreadCtx->sSliceDecodeStart, WELS_DEC_THREAD_WAIT_INFINITE);
+ RESET_EVENT (&pLastThreadCtx->sSliceDecodeStart);
+ }
+ pThrCtx->pDec = NULL;
+ if (GetThreadCount (pThrCtx->pCtx) > 1) {
+ RESET_EVENT (&pThrCtx->sSliceDecodeFinish);
+ }
+ iRet |= pWelsDecoder->DecodeFrame2WithCtx (pThrCtx->pCtx, NULL, 0, pThrCtx->ppDst, &pThrCtx->sDstInfo);
+
+ //WelsMutexUnlock (&pWelsDecoder->m_csDecoder);
+ return (DECODING_STATE)iRet;
+}
+
+DECLARE_PROCTHREAD (pThrProcFrame, p) {
+ SWelsDecoderThreadCTX* pThrCtx = (SWelsDecoderThreadCTX*)p;
+ while (1) {
+ RELEASE_SEMAPHORE (pThrCtx->sThreadInfo.sIsBusy);
+ RELEASE_SEMAPHORE (&pThrCtx->sThreadInfo.sIsIdle);
+ WAIT_SEMAPHORE (&pThrCtx->sThreadInfo.sIsActivated, WELS_DEC_THREAD_WAIT_INFINITE);
+ if (pThrCtx->sThreadInfo.uiCommand == WELS_DEC_THREAD_COMMAND_RUN) {
+ CWelsDecoder* pWelsDecoder = (CWelsDecoder*)pThrCtx->threadCtxOwner;
+ ConstructAccessUnit (pWelsDecoder, pThrCtx);
+ } else if (pThrCtx->sThreadInfo.uiCommand == WELS_DEC_THREAD_COMMAND_ABORT) {
+ break;
+ }
+ }
+ return 0;
+}
+
CWelsDecoder::CWelsDecoder (void)
- : m_pDecContext (NULL),
- m_pWelsTrace (NULL),
- m_iPictInfoIndex (0),
- m_iMinPOC (sIMinInt32),
- m_iNumOfPicts (0),
- m_iLastGOPRemainPicts (0),
- m_LastWrittenPOC (sIMinInt32),
- m_iLargestBufferedPicIndex (0) {
+ : m_pWelsTrace (NULL),
+ m_uiDecodeTimeStamp (0),
+ m_bIsBaseline (false),
+ m_iCpuCount (1),
+ m_iThreadCount (0),
+ m_iCtxCount (1),
+ m_pPicBuff (NULL),
+ m_bParamSetsLostFlag (false),
+ m_bFreezeOutput (false),
+ m_DecCtxActiveCount (0),
+ m_pDecThrCtx (NULL),
+ m_pLastDecThrCtx (NULL),
+ m_iLastBufferedIdx (0) {
#ifdef OUTPUT_BIT_STREAM
char chFileName[1024] = { 0 }; //for .264
int iBufUsed = 0;
@@ -120,11 +165,18 @@ CWelsDecoder::CWelsDecoder (void)
WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, "CWelsDecoder::CWelsDecoder() entry");
}
- for (int32_t i = 0; i < 16; ++i) {
- m_sPictInfoList[i].bLastGOP = false;
- m_sPictInfoList[i].iPOC = sIMinInt32;
+ ResetReorderingPictureBuffers (&m_sReoderingStatus, m_sPictInfoList, true);
+
+ m_iCpuCount = GetCPUCount();
+ if (m_iCpuCount > WELS_DEC_MAX_NUM_CPU) {
+ m_iCpuCount = WELS_DEC_MAX_NUM_CPU;
}
+ m_pDecThrCtx = new SWelsDecoderThreadCTX[m_iCtxCount];
+ memset (m_pDecThrCtx, 0, sizeof (SWelsDecoderThreadCTX)*m_iCtxCount);
+ for (int32_t i = 0; i < WELS_DEC_MAX_NUM_CPU; ++i) {
+ m_pDecThrCtxActive[i] = NULL;
+ }
#ifdef OUTPUT_BIT_STREAM
SWelsTime sCurTime;
@@ -180,7 +232,7 @@ CWelsDecoder::~CWelsDecoder() {
if (m_pWelsTrace != NULL) {
WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, "CWelsDecoder::~CWelsDecoder()");
}
-
+ CloseDecoderThreads();
UninitDecoder();
#ifdef OUTPUT_BIT_STREAM
@@ -198,6 +250,10 @@ CWelsDecoder::~CWelsDecoder() {
delete m_pWelsTrace;
m_pWelsTrace = NULL;
}
+ if (m_pDecThrCtx != NULL) {
+ delete[] m_pDecThrCtx;
+ m_pDecThrCtx = NULL;
+ }
}
long CWelsDecoder::Initialize (const SDecodingParam* pParam) {
@@ -226,26 +282,88 @@ long CWelsDecoder::Uninitialize() {
}
void CWelsDecoder::UninitDecoder (void) {
- if (NULL == m_pDecContext)
- return;
+ for (int32_t i = 0; i < m_iCtxCount; ++i) {
+ if (m_pDecThrCtx[i].pCtx != NULL) {
+ if (i > 0) {
+ WelsResetRefPicWithoutUnRef (m_pDecThrCtx[i].pCtx);
+ }
+ UninitDecoderCtx (m_pDecThrCtx[i].pCtx);
+ }
+ }
+}
+
+void CWelsDecoder::OpenDecoderThreads() {
+ if (m_iThreadCount >= 1) {
+ m_uiDecodeTimeStamp = 0;
+ CREATE_SEMAPHORE (&m_sIsBusy, m_iThreadCount, m_iThreadCount, NULL);
+ WelsMutexInit (&m_csDecoder);
+ CREATE_EVENT (&m_sBufferingEvent, 1, 0, NULL);
+ SET_EVENT (&m_sBufferingEvent);
+ CREATE_EVENT (&m_sReleaseBufferEvent, 1, 0, NULL);
+ SET_EVENT (&m_sReleaseBufferEvent);
+ for (int32_t i = 0; i < m_iThreadCount; ++i) {
+ m_pDecThrCtx[i].sThreadInfo.uiThrMaxNum = m_iThreadCount;
+ m_pDecThrCtx[i].sThreadInfo.uiThrNum = i;
+ m_pDecThrCtx[i].sThreadInfo.uiThrStackSize = WELS_DEC_MAX_THREAD_STACK_SIZE;
+ m_pDecThrCtx[i].sThreadInfo.pThrProcMain = pThrProcFrame;
+ m_pDecThrCtx[i].sThreadInfo.sIsBusy = &m_sIsBusy;
+ m_pDecThrCtx[i].sThreadInfo.uiCommand = WELS_DEC_THREAD_COMMAND_RUN;
+ m_pDecThrCtx[i].threadCtxOwner = this;
+ m_pDecThrCtx[i].kpSrc = NULL;
+ m_pDecThrCtx[i].kiSrcLen = 0;
+ m_pDecThrCtx[i].ppDst = NULL;
+ m_pDecThrCtx[i].pDec = NULL;
+ CREATE_EVENT (&m_pDecThrCtx[i].sImageReady, 1, 0, NULL);
+ CREATE_EVENT (&m_pDecThrCtx[i].sSliceDecodeStart, 1, 0, NULL);
+ CREATE_EVENT (&m_pDecThrCtx[i].sSliceDecodeFinish, 1, 0, NULL);
+ CREATE_SEMAPHORE (&m_pDecThrCtx[i].sThreadInfo.sIsIdle, 0, 1, NULL);
+ CREATE_SEMAPHORE (&m_pDecThrCtx[i].sThreadInfo.sIsActivated, 0, 1, NULL);
+ CREATE_THREAD (&m_pDecThrCtx[i].sThreadInfo.sThrHandle, pThrProcInit, (void*) (& (m_pDecThrCtx[i])));
+ }
+ }
+}
+void CWelsDecoder::CloseDecoderThreads() {
+ if (m_iThreadCount >= 1) {
+ for (int32_t i = 0; i < m_iThreadCount; i++) { //waiting the completion begun slices
+ WAIT_SEMAPHORE (&m_pDecThrCtx[i].sThreadInfo.sIsIdle, WELS_DEC_THREAD_WAIT_INFINITE);
+ m_pDecThrCtx[i].sThreadInfo.uiCommand = WELS_DEC_THREAD_COMMAND_ABORT;
+ RELEASE_SEMAPHORE (&m_pDecThrCtx[i].sThreadInfo.sIsActivated);
+ WAIT_THREAD (&m_pDecThrCtx[i].sThreadInfo.sThrHandle);
+ CLOSE_EVENT (&m_pDecThrCtx[i].sImageReady);
+ CLOSE_EVENT (&m_pDecThrCtx[i].sSliceDecodeStart);
+ CLOSE_EVENT (&m_pDecThrCtx[i].sSliceDecodeFinish);
+ CLOSE_SEMAPHORE (&m_pDecThrCtx[i].sThreadInfo.sIsIdle);
+ CLOSE_SEMAPHORE (&m_pDecThrCtx[i].sThreadInfo.sIsActivated);
+ }
+ WelsMutexDestroy (&m_csDecoder);
+ CLOSE_EVENT (&m_sBufferingEvent);
+ CLOSE_EVENT (&m_sReleaseBufferEvent);
+ CLOSE_SEMAPHORE (&m_sIsBusy);
+ }
+}
- WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, "CWelsDecoder::UninitDecoder(), openh264 codec version = %s.",
- VERSION_NUMBER);
+void CWelsDecoder::UninitDecoderCtx (PWelsDecoderContext& pCtx) {
+ if (pCtx != NULL) {
- WelsEndDecoder (m_pDecContext);
+ WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, "CWelsDecoder::UninitDecoderCtx(), openh264 codec version = %s.",
+ VERSION_NUMBER);
- if (m_pDecContext->pMemAlign != NULL) {
- WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO,
- "CWelsDecoder::UninitDecoder(), verify memory usage (%d bytes) after free..",
- m_pDecContext->pMemAlign->WelsGetMemoryUsage());
- delete m_pDecContext->pMemAlign;
- m_pDecContext->pMemAlign = NULL;
- }
+ WelsEndDecoder (pCtx);
- if (NULL != m_pDecContext) {
- WelsFree (m_pDecContext, "m_pDecContext");
+ if (pCtx->pMemAlign != NULL) {
+ WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO,
+ "CWelsDecoder::UninitDecoder(), verify memory usage (%d bytes) after free..",
+ pCtx->pMemAlign->WelsGetMemoryUsage());
+ delete pCtx->pMemAlign;
+ pCtx->pMemAlign = NULL;
+ }
- m_pDecContext = NULL;
+ if (NULL != pCtx) {
+ WelsFree (pCtx, "m_pDecContext");
+
+ pCtx = NULL;
+ }
+ if (m_iCtxCount <= 1) m_pDecThrCtx[0].pCtx = NULL;
}
}
@@ -255,43 +373,97 @@ int32_t CWelsDecoder::InitDecoder (const SDecodingParam* pParam) {
WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO,
"CWelsDecoder::init_decoder(), openh264 codec version = %s, ParseOnly = %d",
VERSION_NUMBER, (int32_t)pParam->bParseOnly);
+ if (m_iThreadCount >= 1 && pParam->bParseOnly) {
+ m_iThreadCount = 0;
+ }
+ OpenDecoderThreads();
+ //reset decoder context
+ memset (&m_sDecoderStatistics, 0, sizeof (SDecoderStatistics));
+ memset (&m_sLastDecPicInfo, 0, sizeof (SWelsLastDecPicInfo));
+ memset (&m_sVlcTable, 0, sizeof (SVlcTable));
+ UninitDecoder();
+ WelsDecoderLastDecPicInfoDefaults (m_sLastDecPicInfo);
+ for (int32_t i = 0; i < m_iCtxCount; ++i) {
+ InitDecoderCtx (m_pDecThrCtx[i].pCtx, pParam);
+ if (m_iThreadCount >= 1) {
+ m_pDecThrCtx[i].pCtx->pThreadCtx = &m_pDecThrCtx[i];
+ }
+ }
+ m_bParamSetsLostFlag = false;
+ m_bFreezeOutput = false;
+ return cmResultSuccess;
+}
+
+// the return value of this function is not suitable, it need report failure info to upper layer.
+int32_t CWelsDecoder::InitDecoderCtx (PWelsDecoderContext& pCtx, const SDecodingParam* pParam) {
+
+ WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO,
+ "CWelsDecoder::init_decoder(), openh264 codec version = %s, ParseOnly = %d",
+ VERSION_NUMBER, (int32_t)pParam->bParseOnly);
//reset decoder context
- if (m_pDecContext) //free
- UninitDecoder();
- m_pDecContext = (PWelsDecoderContext)WelsMallocz (sizeof (SWelsDecoderContext), "m_pDecContext");
- if (NULL == m_pDecContext)
+ UninitDecoderCtx (pCtx);
+ pCtx = (PWelsDecoderContext)WelsMallocz (sizeof (SWelsDecoderContext), "m_pDecContext");
+ if (NULL == pCtx)
return cmMallocMemeError;
int32_t iCacheLineSize = 16; // on chip cache line size in byte
- m_pDecContext->pMemAlign = new CMemoryAlign (iCacheLineSize);
- WELS_VERIFY_RETURN_PROC_IF (cmMallocMemeError, (NULL == m_pDecContext->pMemAlign), UninitDecoder())
-
+ pCtx->pMemAlign = new CMemoryAlign (iCacheLineSize);
+ WELS_VERIFY_RETURN_PROC_IF (cmMallocMemeError, (NULL == pCtx->pMemAlign), UninitDecoderCtx (pCtx))
+ if (m_iCtxCount <= 1) m_pDecThrCtx[0].pCtx = pCtx;
//fill in default value into context
- WelsDecoderDefaults (m_pDecContext, &m_pWelsTrace->m_sLogCtx);
-
+ pCtx->pLastDecPicInfo = &m_sLastDecPicInfo;
+ pCtx->pDecoderStatistics = &m_sDecoderStatistics;
+ pCtx->pVlcTable = &m_sVlcTable;
+ pCtx->pPictInfoList = m_sPictInfoList;
+ pCtx->pPictReoderingStatus = &m_sReoderingStatus;
+ pCtx->pCsDecoder = &m_csDecoder;
+ WelsDecoderDefaults (pCtx, &m_pWelsTrace->m_sLogCtx);
+ WelsDecoderSpsPpsDefaults (pCtx->sSpsPpsCtx);
//check param and update decoder context
- m_pDecContext->pParam = (SDecodingParam*)m_pDecContext->pMemAlign->WelsMallocz (sizeof (SDecodingParam),
- "SDecodingParam");
- WELS_VERIFY_RETURN_PROC_IF (cmMallocMemeError, (NULL == m_pDecContext->pParam), UninitDecoder());
- int32_t iRet = DecoderConfigParam (m_pDecContext, pParam);
+ pCtx->pParam = (SDecodingParam*)pCtx->pMemAlign->WelsMallocz (sizeof (SDecodingParam),
+ "SDecodingParam");
+ WELS_VERIFY_RETURN_PROC_IF (cmMallocMemeError, (NULL == pCtx->pParam), UninitDecoderCtx (pCtx));
+ int32_t iRet = DecoderConfigParam (pCtx, pParam);
WELS_VERIFY_RETURN_IFNEQ (iRet, cmResultSuccess);
//init decoder
- WELS_VERIFY_RETURN_PROC_IF (cmMallocMemeError, WelsInitDecoder (m_pDecContext, &m_pWelsTrace->m_sLogCtx),
- UninitDecoder())
-
+ WELS_VERIFY_RETURN_PROC_IF (cmMallocMemeError, WelsInitDecoder (pCtx, &m_pWelsTrace->m_sLogCtx),
+ UninitDecoderCtx (pCtx))
+ pCtx->pPicBuff = NULL;
return cmResultSuccess;
}
-int32_t CWelsDecoder::ResetDecoder() {
+int32_t CWelsDecoder::ResetDecoder (PWelsDecoderContext& pCtx) {
// TBC: need to be modified when context and trace point are null
- if (m_pDecContext != NULL && m_pWelsTrace != NULL) {
- WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, "ResetDecoder(), context error code is %d",
- m_pDecContext->iErrorCode);
- SDecodingParam sPrevParam;
- memcpy (&sPrevParam, m_pDecContext->pParam, sizeof (SDecodingParam));
+ if (m_iThreadCount >= 1) {
+ ThreadResetDecoder (pCtx);
+ } else {
+ if (pCtx != NULL && m_pWelsTrace != NULL) {
+ WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, "ResetDecoder(), context error code is %d",
+ pCtx->iErrorCode);
+ SDecodingParam sPrevParam;
+ memcpy (&sPrevParam, pCtx->pParam, sizeof (SDecodingParam));
+
+ WELS_VERIFY_RETURN_PROC_IF (cmInitParaError, InitDecoderCtx (pCtx, &sPrevParam),
+ UninitDecoderCtx (pCtx));
+ } else if (m_pWelsTrace != NULL) {
+ WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_ERROR, "ResetDecoder() failed as decoder context null");
+ }
+ ResetReorderingPictureBuffers (&m_sReoderingStatus, m_sPictInfoList, false);
+ }
+ return ERR_INFO_UNINIT;
+}
- WELS_VERIFY_RETURN_PROC_IF (cmInitParaError, InitDecoder (&sPrevParam), UninitDecoder());
+int32_t CWelsDecoder::ThreadResetDecoder (PWelsDecoderContext& pCtx) {
+ // TBC: need to be modified when context and trace point are null
+ SDecodingParam sPrevParam;
+ if (pCtx != NULL && m_pWelsTrace != NULL) {
+ WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, "ResetDecoder(), context error code is %d", pCtx->iErrorCode);
+ memcpy (&sPrevParam, pCtx->pParam, sizeof (SDecodingParam));
+ ResetReorderingPictureBuffers (&m_sReoderingStatus, m_sPictInfoList, true);
+ CloseDecoderThreads();
+ UninitDecoder();
+ InitDecoder (&sPrevParam);
} else if (m_pWelsTrace != NULL) {
WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_ERROR, "ResetDecoder() failed as decoder context null");
}
@@ -303,71 +475,100 @@ int32_t CWelsDecoder::ResetDecoder() {
*/
long CWelsDecoder::SetOption (DECODER_OPTION eOptID, void* pOption) {
int iVal = 0;
+ if (eOptID == DECODER_OPTION_NUM_OF_THREADS) {
+ if (pOption != NULL) {
+ int32_t threadCount = * ((int32_t*)pOption);
+ if (threadCount < 0) threadCount = 0;
+ if (threadCount > m_iCpuCount) {
+ threadCount = m_iCpuCount;
+ }
+ if (threadCount > 3) {
+ threadCount = 3;
+ }
+ if (threadCount != m_iThreadCount) {
+ m_iThreadCount = threadCount;
+ if (m_pDecThrCtx != NULL) {
+ delete [] m_pDecThrCtx;
+ m_iCtxCount = m_iThreadCount == 0 ? 1 : m_iThreadCount;
+ m_pDecThrCtx = new SWelsDecoderThreadCTX[m_iCtxCount];
+ memset (m_pDecThrCtx, 0, sizeof (SWelsDecoderThreadCTX)*m_iCtxCount);
+ }
+ }
+ }
+ return cmResultSuccess;
+ }
+ for (int32_t i = 0; i < m_iCtxCount; ++i) {
+ PWelsDecoderContext pDecContext = m_pDecThrCtx[i].pCtx;
+ if (pDecContext == NULL && eOptID != DECODER_OPTION_TRACE_LEVEL &&
+ eOptID != DECODER_OPTION_TRACE_CALLBACK && eOptID != DECODER_OPTION_TRACE_CALLBACK_CONTEXT)
+ return dsInitialOptExpected;
+ if (eOptID == DECODER_OPTION_END_OF_STREAM) { // Indicate bit-stream of the final frame to be decoded
+ if (pOption == NULL)
+ return cmInitParaError;
- if (m_pDecContext == NULL && eOptID != DECODER_OPTION_TRACE_LEVEL &&
- eOptID != DECODER_OPTION_TRACE_CALLBACK && eOptID != DECODER_OPTION_TRACE_CALLBACK_CONTEXT)
- return dsInitialOptExpected;
- if (eOptID == DECODER_OPTION_END_OF_STREAM) { // Indicate bit-stream of the final frame to be decoded
- if (pOption == NULL)
- return cmInitParaError;
+ iVal = * ((int*)pOption); // boolean value for whether enabled End Of Stream flag
- iVal = * ((int*)pOption); // boolean value for whether enabled End Of Stream flag
+ if (pDecContext == NULL) return dsInitialOptExpected;
- m_pDecContext->bEndOfStreamFlag = iVal ? true : false;
+ pDecContext->bEndOfStreamFlag = iVal ? true : false;
- return cmResultSuccess;
- } else if (eOptID == DECODER_OPTION_ERROR_CON_IDC) { // Indicate error concealment status
- if (pOption == NULL)
- return cmInitParaError;
+ return cmResultSuccess;
+ } else if (eOptID == DECODER_OPTION_ERROR_CON_IDC) { // Indicate error concealment status
+ if (pOption == NULL)
+ return cmInitParaError;
+
+ if (pDecContext == NULL) return dsInitialOptExpected;
+
+ iVal = * ((int*)pOption); // int value for error concealment idc
+ iVal = WELS_CLIP3 (iVal, (int32_t)ERROR_CON_DISABLE, (int32_t)ERROR_CON_SLICE_MV_COPY_CROSS_IDR_FREEZE_RES_CHANGE);
+ if ((pDecContext->pParam->bParseOnly) && (iVal != (int32_t)ERROR_CON_DISABLE)) {
+ WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO,
+ "CWelsDecoder::SetOption for ERROR_CON_IDC = %d not allowd for parse only!.", iVal);
+ return cmInitParaError;
+ }
- iVal = * ((int*)pOption); // int value for error concealment idc
- iVal = WELS_CLIP3 (iVal, (int32_t)ERROR_CON_DISABLE, (int32_t)ERROR_CON_SLICE_MV_COPY_CROSS_IDR_FREEZE_RES_CHANGE);
- if ((m_pDecContext->pParam->bParseOnly) && (iVal != (int32_t)ERROR_CON_DISABLE)) {
+ pDecContext->pParam->eEcActiveIdc = (ERROR_CON_IDC)iVal;
+ InitErrorCon (pDecContext);
WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO,
- "CWelsDecoder::SetOption for ERROR_CON_IDC = %d not allowd for parse only!.", iVal);
- return cmInitParaError;
- }
-
- m_pDecContext->pParam->eEcActiveIdc = (ERROR_CON_IDC)iVal;
- InitErrorCon (m_pDecContext);
- WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO,
- "CWelsDecoder::SetOption for ERROR_CON_IDC = %d.", iVal);
+ "CWelsDecoder::SetOption for ERROR_CON_IDC = %d.", iVal);
- return cmResultSuccess;
- } else if (eOptID == DECODER_OPTION_TRACE_LEVEL) {
- if (m_pWelsTrace) {
- uint32_t level = * ((uint32_t*)pOption);
- m_pWelsTrace->SetTraceLevel (level);
- }
- return cmResultSuccess;
- } else if (eOptID == DECODER_OPTION_TRACE_CALLBACK) {
- if (m_pWelsTrace) {
- WelsTraceCallback callback = * ((WelsTraceCallback*)pOption);
- m_pWelsTrace->SetTraceCallback (callback);
- WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO,
- "CWelsDecoder::SetOption():DECODER_OPTION_TRACE_CALLBACK callback = %p.",
- callback);
- }
- return cmResultSuccess;
- } else if (eOptID == DECODER_OPTION_TRACE_CALLBACK_CONTEXT) {
- if (m_pWelsTrace) {
- void* ctx = * ((void**)pOption);
- m_pWelsTrace->SetTraceCallbackContext (ctx);
- }
- return cmResultSuccess;
- } else if (eOptID == DECODER_OPTION_GET_STATISTICS) {
- WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_WARNING,
- "CWelsDecoder::SetOption():DECODER_OPTION_GET_STATISTICS: this option is get-only!");
- return cmInitParaError;
- } else if (eOptID == DECODER_OPTION_STATISTICS_LOG_INTERVAL) {
- if (pOption) {
- m_pDecContext->sDecoderStatistics.iStatisticsLogInterval = (* ((unsigned int*)pOption));
return cmResultSuccess;
+ } else if (eOptID == DECODER_OPTION_TRACE_LEVEL) {
+ if (m_pWelsTrace) {
+ uint32_t level = * ((uint32_t*)pOption);
+ m_pWelsTrace->SetTraceLevel (level);
+ }
+ return cmResultSuccess;
+ } else if (eOptID == DECODER_OPTION_TRACE_CALLBACK) {
+ if (m_pWelsTrace) {
+ WelsTraceCallback callback = * ((WelsTraceCallback*)pOption);
+ m_pWelsTrace->SetTraceCallback (callback);
+ WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO,
+ "CWelsDecoder::SetOption():DECODER_OPTION_TRACE_CALLBACK callback = %p.",
+ callback);
+ }
+ return cmResultSuccess;
+ } else if (eOptID == DECODER_OPTION_TRACE_CALLBACK_CONTEXT) {
+ if (m_pWelsTrace) {
+ void* ctx = * ((void**)pOption);
+ m_pWelsTrace->SetTraceCallbackContext (ctx);
+ }
+ return cmResultSuccess;
+ } else if (eOptID == DECODER_OPTION_GET_STATISTICS) {
+ WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_WARNING,
+ "CWelsDecoder::SetOption():DECODER_OPTION_GET_STATISTICS: this option is get-only!");
+ return cmInitParaError;
+ } else if (eOptID == DECODER_OPTION_STATISTICS_LOG_INTERVAL) {
+ if (pOption) {
+ if (pDecContext == NULL) return dsInitialOptExpected;
+ pDecContext->pDecoderStatistics->iStatisticsLogInterval = (* ((unsigned int*)pOption));
+ return cmResultSuccess;
+ }
+ } else if (eOptID == DECODER_OPTION_GET_SAR_INFO) {
+ WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_WARNING,
+ "CWelsDecoder::SetOption():DECODER_OPTION_GET_SAR_INFO: this option is get-only!");
+ return cmInitParaError;
}
- } else if (eOptID == DECODER_OPTION_GET_SAR_INFO) {
- WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_WARNING,
- "CWelsDecoder::SetOption():DECODER_OPTION_GET_SAR_INFO: this option is get-only!");
- return cmInitParaError;
}
return cmInitParaError;
}
@@ -377,105 +578,109 @@ long CWelsDecoder::SetOption (DECODER_OPTION eOptID, void* pOption) {
*/
long CWelsDecoder::GetOption (DECODER_OPTION eOptID, void* pOption) {
int iVal = 0;
-
- if (m_pDecContext == NULL)
+ if (DECODER_OPTION_NUM_OF_THREADS == eOptID) {
+ * ((int*)pOption) = m_iThreadCount;
+ return cmResultSuccess;
+ }
+ PWelsDecoderContext pDecContext = m_pDecThrCtx[0].pCtx;
+ if (pDecContext == NULL)
return cmInitExpected;
if (pOption == NULL)
return cmInitParaError;
if (DECODER_OPTION_END_OF_STREAM == eOptID) {
- iVal = m_pDecContext->bEndOfStreamFlag;
+ iVal = pDecContext->bEndOfStreamFlag;
* ((int*)pOption) = iVal;
return cmResultSuccess;
}
#ifdef LONG_TERM_REF
else if (DECODER_OPTION_IDR_PIC_ID == eOptID) {
- iVal = m_pDecContext->uiCurIdrPicId;
+ iVal = pDecContext->uiCurIdrPicId;
* ((int*)pOption) = iVal;
return cmResultSuccess;
} else if (DECODER_OPTION_FRAME_NUM == eOptID) {
- iVal = m_pDecContext->iFrameNum;
+ iVal = pDecContext->iFrameNum;
* ((int*)pOption) = iVal;
return cmResultSuccess;
} else if (DECODER_OPTION_LTR_MARKING_FLAG == eOptID) {
- iVal = m_pDecContext->bCurAuContainLtrMarkSeFlag;
+ iVal = pDecContext->bCurAuContainLtrMarkSeFlag;
* ((int*)pOption) = iVal;
return cmResultSuccess;
} else if (DECODER_OPTION_LTR_MARKED_FRAME_NUM == eOptID) {
- iVal = m_pDecContext->iFrameNumOfAuMarkedLtr;
+ iVal = pDecContext->iFrameNumOfAuMarkedLtr;
* ((int*)pOption) = iVal;
return cmResultSuccess;
}
#endif
else if (DECODER_OPTION_VCL_NAL == eOptID) { //feedback whether or not have VCL NAL in current AU
- iVal = m_pDecContext->iFeedbackVclNalInAu;
+ iVal = pDecContext->iFeedbackVclNalInAu;
* ((int*)pOption) = iVal;
return cmResultSuccess;
} else if (DECODER_OPTION_TEMPORAL_ID == eOptID) { //if have VCL NAL in current AU, then feedback the temporal ID
- iVal = m_pDecContext->iFeedbackTidInAu;
+ iVal = pDecContext->iFeedbackTidInAu;
* ((int*)pOption) = iVal;
return cmResultSuccess;
} else if (DECODER_OPTION_IS_REF_PIC == eOptID) {
- iVal = m_pDecContext->iFeedbackNalRefIdc;
+ iVal = pDecContext->iFeedbackNalRefIdc;
if (iVal > 0)
iVal = 1;
* ((int*)pOption) = iVal;
return cmResultSuccess;
} else if (DECODER_OPTION_ERROR_CON_IDC == eOptID) {
- iVal = (int)m_pDecContext->pParam->eEcActiveIdc;
+ iVal = (int)pDecContext->pParam->eEcActiveIdc;
* ((int*)pOption) = iVal;
return cmResultSuccess;
} else if (DECODER_OPTION_GET_STATISTICS == eOptID) { // get decoder statistics info for real time debugging
SDecoderStatistics* pDecoderStatistics = (static_cast<SDecoderStatistics*> (pOption));
- memcpy (pDecoderStatistics, &m_pDecContext->sDecoderStatistics, sizeof (SDecoderStatistics));
+ memcpy (pDecoderStatistics, pDecContext->pDecoderStatistics, sizeof (SDecoderStatistics));
- if (m_pDecContext->sDecoderStatistics.uiDecodedFrameCount != 0) { //not original status
- pDecoderStatistics->fAverageFrameSpeedInMs = (float) (m_pDecContext->dDecTime) /
- (m_pDecContext->sDecoderStatistics.uiDecodedFrameCount);
- pDecoderStatistics->fActualAverageFrameSpeedInMs = (float) (m_pDecContext->dDecTime) /
- (m_pDecContext->sDecoderStatistics.uiDecodedFrameCount + m_pDecContext->sDecoderStatistics.uiFreezingIDRNum +
- m_pDecContext->sDecoderStatistics.uiFreezingNonIDRNum);
+ if (pDecContext->pDecoderStatistics->uiDecodedFrameCount != 0) { //not original status
+ pDecoderStatistics->fAverageFrameSpeedInMs = (float) (pDecContext->dDecTime) /
+ (pDecContext->pDecoderStatistics->uiDecodedFrameCount);
+ pDecoderStatistics->fActualAverageFrameSpeedInMs = (float) (pDecContext->dDecTime) /
+ (pDecContext->pDecoderStatistics->uiDecodedFrameCount + pDecContext->pDecoderStatistics->uiFreezingIDRNum +
+ pDecContext->pDecoderStatistics->uiFreezingNonIDRNum);
}
return cmResultSuccess;
} else if (eOptID == DECODER_OPTION_STATISTICS_LOG_INTERVAL) {
if (pOption) {
- iVal = m_pDecContext->sDecoderStatistics.iStatisticsLogInterval;
+ iVal = pDecContext->pDecoderStatistics->iStatisticsLogInterval;
* ((unsigned int*)pOption) = iVal;
return cmResultSuccess;
}
} else if (DECODER_OPTION_GET_SAR_INFO == eOptID) { //get decoder SAR info in VUI
PVuiSarInfo pVuiSarInfo = (static_cast<PVuiSarInfo> (pOption));
memset (pVuiSarInfo, 0, sizeof (SVuiSarInfo));
- if (!m_pDecContext->pSps) {
+ if (!pDecContext->pSps) {
return cmInitExpected;
} else {
- pVuiSarInfo->uiSarWidth = m_pDecContext->pSps->sVui.uiSarWidth;
- pVuiSarInfo->uiSarHeight = m_pDecContext->pSps->sVui.uiSarHeight;
- pVuiSarInfo->bOverscanAppropriateFlag = m_pDecContext->pSps->sVui.bOverscanAppropriateFlag;
+ pVuiSarInfo->uiSarWidth = pDecContext->pSps->sVui.uiSarWidth;
+ pVuiSarInfo->uiSarHeight = pDecContext->pSps->sVui.uiSarHeight;
+ pVuiSarInfo->bOverscanAppropriateFlag = pDecContext->pSps->sVui.bOverscanAppropriateFlag;
return cmResultSuccess;
}
} else if (DECODER_OPTION_PROFILE == eOptID) {
- if (!m_pDecContext->pSps) {
+ if (!pDecContext->pSps) {
return cmInitExpected;
}
- iVal = (int)m_pDecContext->pSps->uiProfileIdc;
+ iVal = (int)pDecContext->pSps->uiProfileIdc;
* ((int*)pOption) = iVal;
return cmResultSuccess;
} else if (DECODER_OPTION_LEVEL == eOptID) {
- if (!m_pDecContext->pSps) {
+ if (!pDecContext->pSps) {
return cmInitExpected;
}
- iVal = (int)m_pDecContext->pSps->uiLevelIdc;
+ iVal = (int)pDecContext->pSps->uiLevelIdc;
* ((int*)pOption) = iVal;
return cmResultSuccess;
} else if (DECODER_OPTION_NUM_OF_FRAMES_REMAINING_IN_BUFFER == eOptID) {
- if (m_pDecContext->pSps && m_pDecContext->pSps->uiProfileIdc != 66 && m_pDecContext->pPps->bEntropyCodingModeFlag) {
- * ((int*)pOption) = m_iNumOfPicts > 0 ? m_iNumOfPicts : 0;
- } else {
- * ((int*)pOption) = 0;
+ for (int32_t activeThread = 0; activeThread < m_DecCtxActiveCount; ++activeThread) {
+ WAIT_SEMAPHORE (&m_pDecThrCtxActive[activeThread]->sThreadInfo.sIsIdle, WELS_DEC_THREAD_WAIT_INFINITE);
+ RELEASE_SEMAPHORE (&m_pDecThrCtxActive[activeThread]->sThreadInfo.sIsIdle);
}
+ * ((int*)pOption) = m_sReoderingStatus.iNumOfPicts;
return cmResultSuccess;
}
@@ -486,7 +691,17 @@ DECODING_STATE CWelsDecoder::DecodeFrameNoDelay (const unsigned char* kpSrc,
const int kiSrcLen,
unsigned char** ppDst,
SBufferInfo* pDstInfo) {
- int iRet;
+ int iRet = dsErrorFree;
+ if (m_iThreadCount >= 1) {
+ iRet = ThreadDecodeFrameInternal (kpSrc, kiSrcLen, ppDst, pDstInfo);
+ if (m_sReoderingStatus.iNumOfPicts) {
+ WAIT_EVENT (&m_sBufferingEvent, WELS_DEC_THREAD_WAIT_INFINITE);
+ RESET_EVENT (&m_sReleaseBufferEvent);
+ ReleaseBufferedReadyPicture (NULL, ppDst, pDstInfo);
+ SET_EVENT (&m_sReleaseBufferEvent);
+ }
+ return (DECODING_STATE)iRet;
+ }
//SBufferInfo sTmpBufferInfo;
//unsigned char* ppTmpDst[3] = {NULL, NULL, NULL};
iRet = (int)DecodeFrame2 (kpSrc, kiSrcLen, ppDst, pDstInfo);
@@ -504,24 +719,24 @@ DECODING_STATE CWelsDecoder::DecodeFrameNoDelay (const unsigned char* kpSrc,
return (DECODING_STATE)iRet;
}
-DECODING_STATE CWelsDecoder::DecodeFrame2 (const unsigned char* kpSrc,
+DECODING_STATE CWelsDecoder::DecodeFrame2WithCtx (PWelsDecoderContext pDecContext, const unsigned char* kpSrc,
const int kiSrcLen,
unsigned char** ppDst,
SBufferInfo* pDstInfo) {
- if (m_pDecContext == NULL || m_pDecContext->pParam == NULL) {
+ if (pDecContext == NULL || pDecContext->pParam == NULL) {
if (m_pWelsTrace != NULL) {
WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_ERROR, "Call DecodeFrame2 without Initialize.\n");
}
return dsInitialOptExpected;
}
- if (m_pDecContext->pParam->bParseOnly) {
+ if (pDecContext->pParam->bParseOnly) {
WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_ERROR, "bParseOnly should be false for this API calling! \n");
- m_pDecContext->iErrorCode |= dsInvalidArgument;
+ pDecContext->iErrorCode |= dsInvalidArgument;
return dsInvalidArgument;
}
- if (CheckBsBuffer (m_pDecContext, kiSrcLen)) {
- if (ResetDecoder())
+ if (CheckBsBuffer (pDecContext, kiSrcLen)) {
+ if (ResetDecoder (pDecContext))
return dsOutOfMemory;
return dsErrorFree;
@@ -537,163 +752,243 @@ DECODING_STATE CWelsDecoder::DecodeFrame2 (const unsigned char* kpSrc,
WelsFflush (m_pFBSSize);
}
#endif//OUTPUT_BIT_STREAM
- m_pDecContext->bEndOfStreamFlag = false;
+ pDecContext->bEndOfStreamFlag = false;
+ if (GetThreadCount (pDecContext) <= 0) {
+ pDecContext->uiDecodingTimeStamp = ++m_uiDecodeTimeStamp;
+ }
} else {
//For application MODE, the error detection should be added for safe.
//But for CONSOLE MODE, when decoding LAST AU, kiSrcLen==0 && kpSrc==NULL.
- m_pDecContext->bEndOfStreamFlag = true;
- m_pDecContext->bInstantDecFlag = true;
+ pDecContext->bEndOfStreamFlag = true;
+ pDecContext->bInstantDecFlag = true;
}
int64_t iStart, iEnd;
iStart = WelsTime();
- ppDst[0] = ppDst[1] = ppDst[2] = NULL;
- m_pDecContext->iErrorCode = dsErrorFree; //initialize at the starting of AU decoding.
- m_pDecContext->iFeedbackVclNalInAu = FEEDBACK_UNKNOWN_NAL; //initialize
+ if (GetThreadCount (pDecContext) <= 1) {
+ ppDst[0] = ppDst[1] = ppDst[2] = NULL;
+ }
+ pDecContext->iErrorCode = dsErrorFree; //initialize at the starting of AU decoding.
+ pDecContext->iFeedbackVclNalInAu = FEEDBACK_UNKNOWN_NAL; //initialize
unsigned long long uiInBsTimeStamp = pDstInfo->uiInBsTimeStamp;
- memset (pDstInfo, 0, sizeof (SBufferInfo));
+ if (GetThreadCount (pDecContext) <= 1) {
+ memset (pDstInfo, 0, sizeof (SBufferInfo));
+ }
pDstInfo->uiInBsTimeStamp = uiInBsTimeStamp;
#ifdef LONG_TERM_REF
- m_pDecContext->bReferenceLostAtT0Flag = false; //initialize for LTR
- m_pDecContext->bCurAuContainLtrMarkSeFlag = false;
- m_pDecContext->iFrameNumOfAuMarkedLtr = 0;
- m_pDecContext->iFrameNum = -1; //initialize
+ pDecContext->bReferenceLostAtT0Flag = false; //initialize for LTR
+ pDecContext->bCurAuContainLtrMarkSeFlag = false;
+ pDecContext->iFrameNumOfAuMarkedLtr = 0;
+ pDecContext->iFrameNum = -1; //initialize
#endif
- m_pDecContext->iFeedbackTidInAu = -1; //initialize
- m_pDecContext->iFeedbackNalRefIdc = -1; //initialize
+ pDecContext->iFeedbackTidInAu = -1; //initialize
+ pDecContext->iFeedbackNalRefIdc = -1; //initialize
if (pDstInfo) {
pDstInfo->uiOutYuvTimeStamp = 0;
- m_pDecContext->uiTimeStamp = pDstInfo->uiInBsTimeStamp;
+ pDecContext->uiTimeStamp = pDstInfo->uiInBsTimeStamp;
} else {
- m_pDecContext->uiTimeStamp = 0;
+ pDecContext->uiTimeStamp = 0;
}
- WelsDecodeBs (m_pDecContext, kpSrc, kiSrcLen, ppDst,
+ WelsDecodeBs (pDecContext, kpSrc, kiSrcLen, ppDst,
pDstInfo, NULL); //iErrorCode has been modified in this function
- m_pDecContext->bInstantDecFlag = false; //reset no-delay flag
- if (m_pDecContext->iErrorCode) {
+ pDecContext->bInstantDecFlag = false; //reset no-delay flag
+ if (pDecContext->iErrorCode) {
EWelsNalUnitType eNalType =
NAL_UNIT_UNSPEC_0; //for NBR, IDR frames are expected to decode as followed if error decoding an IDR currently
- eNalType = m_pDecContext->sCurNalHead.eNalUnitType;
-
- if (m_pDecContext->iErrorCode & dsOutOfMemory) {
- if (ResetDecoder())
+ eNalType = pDecContext->sCurNalHead.eNalUnitType;
+ if (pDecContext->iErrorCode & dsOutOfMemory) {
+ if (ResetDecoder (pDecContext)) {
return dsOutOfMemory;
-
+ }
+ return dsErrorFree;
+ }
+ if (pDecContext->iErrorCode & dsRefListNullPtrs) {
+ if (ResetDecoder (pDecContext)) {
+ return dsRefListNullPtrs;
+ }
+ return dsErrorFree;
+ }
+ if ((pDecContext->iErrorCode & (dsBitstreamError | dsDataErrorConcealed)) && pDecContext->eSliceType == B_SLICE) {
+ if (ResetDecoder (pDecContext)) {
+ pDstInfo->iBufferStatus = 0;
+ return (DECODING_STATE)pDecContext->iErrorCode;
+ }
return dsErrorFree;
}
//for AVC bitstream (excluding AVC with temporal scalability, including TP), as long as error occur, SHOULD notify upper layer key frame loss.
if ((IS_PARAM_SETS_NALS (eNalType) || NAL_UNIT_CODED_SLICE_IDR == eNalType) ||
- (VIDEO_BITSTREAM_AVC == m_pDecContext->eVideoType)) {
- if (m_pDecContext->pParam->eEcActiveIdc == ERROR_CON_DISABLE) {
+ (VIDEO_BITSTREAM_AVC == pDecContext->eVideoType)) {
+ if (pDecContext->pParam->eEcActiveIdc == ERROR_CON_DISABLE) {
#ifdef LONG_TERM_REF
- m_pDecContext->bParamSetsLostFlag = true;
+ pDecContext->bParamSetsLostFlag = true;
#else
- m_pDecContext->bReferenceLostAtT0Flag = true;
+ pDecContext->bReferenceLostAtT0Flag = true;
#endif
}
}
- if (m_pDecContext->bPrintFrameErrorTraceFlag) {
+ if (pDecContext->bPrintFrameErrorTraceFlag) {
WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, "decode failed, failure type:%d \n",
- m_pDecContext->iErrorCode);
- m_pDecContext->bPrintFrameErrorTraceFlag = false;
+ pDecContext->iErrorCode);
+ pDecContext->bPrintFrameErrorTraceFlag = false;
} else {
- m_pDecContext->iIgnoredErrorInfoPacketCount++;
- if (m_pDecContext->iIgnoredErrorInfoPacketCount == INT_MAX) {
+ pDecContext->iIgnoredErrorInfoPacketCount++;
+ if (pDecContext->iIgnoredErrorInfoPacketCount == INT_MAX) {
WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_WARNING, "continuous error reached INT_MAX! Restart as 0.");
- m_pDecContext->iIgnoredErrorInfoPacketCount = 0;
+ pDecContext->iIgnoredErrorInfoPacketCount = 0;
}
}
- if ((m_pDecContext->pParam->eEcActiveIdc != ERROR_CON_DISABLE) && (pDstInfo->iBufferStatus == 1)) {
+ if ((pDecContext->pParam->eEcActiveIdc != ERROR_CON_DISABLE) && (pDstInfo->iBufferStatus == 1)) {
//TODO after dec status updated
- m_pDecContext->iErrorCode |= dsDataErrorConcealed;
+ pDecContext->iErrorCode |= dsDataErrorConcealed;
- m_pDecContext->sDecoderStatistics.uiDecodedFrameCount++;
- if (m_pDecContext->sDecoderStatistics.uiDecodedFrameCount == 0) { //exceed max value of uint32_t
- ResetDecStatNums (&m_pDecContext->sDecoderStatistics);
- m_pDecContext->sDecoderStatistics.uiDecodedFrameCount++;
+ pDecContext->pDecoderStatistics->uiDecodedFrameCount++;
+ if (pDecContext->pDecoderStatistics->uiDecodedFrameCount == 0) { //exceed max value of uint32_t
+ ResetDecStatNums (pDecContext->pDecoderStatistics);
+ pDecContext->pDecoderStatistics->uiDecodedFrameCount++;
}
- int32_t iMbConcealedNum = m_pDecContext->iMbEcedNum + m_pDecContext->iMbEcedPropNum;
- m_pDecContext->sDecoderStatistics.uiAvgEcRatio = m_pDecContext->iMbNum == 0 ?
- (m_pDecContext->sDecoderStatistics.uiAvgEcRatio * m_pDecContext->sDecoderStatistics.uiEcFrameNum) : ((
- m_pDecContext->sDecoderStatistics.uiAvgEcRatio * m_pDecContext->sDecoderStatistics.uiEcFrameNum) + ((
- iMbConcealedNum * 100) / m_pDecContext->iMbNum));
- m_pDecContext->sDecoderStatistics.uiAvgEcPropRatio = m_pDecContext->iMbNum == 0 ?
- (m_pDecContext->sDecoderStatistics.uiAvgEcPropRatio * m_pDecContext->sDecoderStatistics.uiEcFrameNum) : ((
- m_pDecContext->sDecoderStatistics.uiAvgEcPropRatio * m_pDecContext->sDecoderStatistics.uiEcFrameNum) + ((
- m_pDecContext->iMbEcedPropNum * 100) / m_pDecContext->iMbNum));
- m_pDecContext->sDecoderStatistics.uiEcFrameNum += (iMbConcealedNum == 0 ? 0 : 1);
- m_pDecContext->sDecoderStatistics.uiAvgEcRatio = m_pDecContext->sDecoderStatistics.uiEcFrameNum == 0 ? 0 :
- m_pDecContext->sDecoderStatistics.uiAvgEcRatio / m_pDecContext->sDecoderStatistics.uiEcFrameNum;
- m_pDecContext->sDecoderStatistics.uiAvgEcPropRatio = m_pDecContext->sDecoderStatistics.uiEcFrameNum == 0 ? 0 :
- m_pDecContext->sDecoderStatistics.uiAvgEcPropRatio / m_pDecContext->sDecoderStatistics.uiEcFrameNum;
+ int32_t iMbConcealedNum = pDecContext->iMbEcedNum + pDecContext->iMbEcedPropNum;
+ pDecContext->pDecoderStatistics->uiAvgEcRatio = pDecContext->iMbNum == 0 ?
+ (pDecContext->pDecoderStatistics->uiAvgEcRatio * pDecContext->pDecoderStatistics->uiEcFrameNum) : ((
+ pDecContext->pDecoderStatistics->uiAvgEcRatio * pDecContext->pDecoderStatistics->uiEcFrameNum) + ((
+ iMbConcealedNum * 100) / pDecContext->iMbNum));
+ pDecContext->pDecoderStatistics->uiAvgEcPropRatio = pDecContext->iMbNum == 0 ?
+ (pDecContext->pDecoderStatistics->uiAvgEcPropRatio * pDecContext->pDecoderStatistics->uiEcFrameNum) : ((
+ pDecContext->pDecoderStatistics->uiAvgEcPropRatio * pDecContext->pDecoderStatistics->uiEcFrameNum) + ((
+ pDecContext->iMbEcedPropNum * 100) / pDecContext->iMbNum));
+ pDecContext->pDecoderStatistics->uiEcFrameNum += (iMbConcealedNum == 0 ? 0 : 1);
+ pDecContext->pDecoderStatistics->uiAvgEcRatio = pDecContext->pDecoderStatistics->uiEcFrameNum == 0 ? 0 :
+ pDecContext->pDecoderStatistics->uiAvgEcRatio / pDecContext->pDecoderStatistics->uiEcFrameNum;
+ pDecContext->pDecoderStatistics->uiAvgEcPropRatio = pDecContext->pDecoderStatistics->uiEcFrameNum == 0 ? 0 :
+ pDecContext->pDecoderStatistics->uiAvgEcPropRatio / pDecContext->pDecoderStatistics->uiEcFrameNum;
}
iEnd = WelsTime();
- m_pDecContext->dDecTime += (iEnd - iStart) / 1e3;
+ pDecContext->dDecTime += (iEnd - iStart) / 1e3;
- OutputStatisticsLog (m_pDecContext->sDecoderStatistics);
+ OutputStatisticsLog (*pDecContext->pDecoderStatistics);
-#ifdef _PICTURE_REORDERING_
- ReorderPicturesInDisplay (ppDst, pDstInfo);
-#endif
+ if (GetThreadCount (pDecContext) >= 1) {
+ WAIT_EVENT (&m_sReleaseBufferEvent, WELS_DEC_THREAD_WAIT_INFINITE);
+ RESET_EVENT (&m_sBufferingEvent);
+ BufferingReadyPicture (pDecContext, ppDst, pDstInfo);
+ SET_EVENT (&m_sBufferingEvent);
+ } else {
+ ReorderPicturesInDisplay (pDecContext, ppDst, pDstInfo);
+ }
- return (DECODING_STATE)m_pDecContext->iErrorCode;
+ return (DECODING_STATE)pDecContext->iErrorCode;
}
// else Error free, the current codec works well
if (pDstInfo->iBufferStatus == 1) {
- m_pDecContext->sDecoderStatistics.uiDecodedFrameCount++;
- if (m_pDecContext->sDecoderStatistics.uiDecodedFrameCount == 0) { //exceed max value of uint32_t
- ResetDecStatNums (&m_pDecContext->sDecoderStatistics);
- m_pDecContext->sDecoderStatistics.uiDecodedFrameCount++;
+ pDecContext->pDecoderStatistics->uiDecodedFrameCount++;
+ if (pDecContext->pDecoderStatistics->uiDecodedFrameCount == 0) { //exceed max value of uint32_t
+ ResetDecStatNums (pDecContext->pDecoderStatistics);
+ pDecContext->pDecoderStatistics->uiDecodedFrameCount++;
}
- OutputStatisticsLog (m_pDecContext->sDecoderStatistics);
+ OutputStatisticsLog (*pDecContext->pDecoderStatistics);
}
iEnd = WelsTime();
- m_pDecContext->dDecTime += (iEnd - iStart) / 1e3;
+ pDecContext->dDecTime += (iEnd - iStart) / 1e3;
-#ifdef _PICTURE_REORDERING_
- ReorderPicturesInDisplay (ppDst, pDstInfo);
-#endif
+ if (GetThreadCount (pDecContext) >= 1) {
+ WAIT_EVENT (&m_sReleaseBufferEvent, WELS_DEC_THREAD_WAIT_INFINITE);
+ RESET_EVENT (&m_sBufferingEvent);
+ BufferingReadyPicture (pDecContext, ppDst, pDstInfo);
+ SET_EVENT (&m_sBufferingEvent);
+ } else {
+ ReorderPicturesInDisplay (pDecContext, ppDst, pDstInfo);
+ }
return dsErrorFree;
}
+DECODING_STATE CWelsDecoder::DecodeFrame2 (const unsigned char* kpSrc,
+ const int kiSrcLen,
+ unsigned char** ppDst,
+ SBufferInfo* pDstInfo) {
+ PWelsDecoderContext pDecContext = m_pDecThrCtx[0].pCtx;
+ return DecodeFrame2WithCtx (pDecContext, kpSrc, kiSrcLen, ppDst, pDstInfo);
+}
+
DECODING_STATE CWelsDecoder::FlushFrame (unsigned char** ppDst,
SBufferInfo* pDstInfo) {
- if (m_pDecContext->bEndOfStreamFlag && m_iNumOfPicts > 0) {
- m_iMinPOC = sIMinInt32;
- for (int32_t i = 0; i <= m_iLargestBufferedPicIndex; ++i) {
- if (m_iMinPOC == sIMinInt32 && m_sPictInfoList[i].iPOC > sIMinInt32) {
- m_iMinPOC = m_sPictInfoList[i].iPOC;
- m_iPictInfoIndex = i;
+ bool bEndOfStreamFlag = true;
+ if (m_iThreadCount <= 1) {
+ for (int32_t j = 0; j < m_iCtxCount; ++j) {
+ if (!m_pDecThrCtx[j].pCtx->bEndOfStreamFlag) {
+ bEndOfStreamFlag = false;
+ }
+ }
+ }
+ if (bEndOfStreamFlag && m_sReoderingStatus.iNumOfPicts > 0) {
+ m_sReoderingStatus.iMinPOC = IMinInt32;
+ if (m_bIsBaseline) {
+ uint32_t uiDecodingTimeStamp = 0;
+ int32_t firstValidIdx = -1;
+ for (int32_t i = 0; i <= m_sReoderingStatus.iLargestBufferedPicIndex; ++i) {
+ if (m_sPictInfoList[i].iPOC > IMinInt32) {
+ uiDecodingTimeStamp = m_sPictInfoList[i].uiDecodingTimeStamp;
+ m_sReoderingStatus.iMinPOC = m_sPictInfoList[i].iPOC;
+ m_sReoderingStatus.iPictInfoIndex = i;
+ firstValidIdx = i;
+ break;
+ }
+ }
+ for (int32_t i = 0; i <= m_sReoderingStatus.iLargestBufferedPicIndex; ++i) {
+ if (i == firstValidIdx) continue;
+ if (m_sPictInfoList[i].iPOC > IMinInt32 && m_sPictInfoList[i].uiDecodingTimeStamp < uiDecodingTimeStamp) {
+ uiDecodingTimeStamp = m_sPictInfoList[i].uiDecodingTimeStamp;
+ m_sReoderingStatus.iMinPOC = m_sPictInfoList[i].iPOC;
+ m_sReoderingStatus.iPictInfoIndex = i;
+ }
+ }
+ } else {
+ int32_t firstValidIdx = -1;
+ for (int32_t i = 0; i <= m_sReoderingStatus.iLargestBufferedPicIndex; ++i) {
+ if (m_sReoderingStatus.iMinPOC == IMinInt32 && m_sPictInfoList[i].iPOC > IMinInt32) {
+ m_sReoderingStatus.iMinPOC = m_sPictInfoList[i].iPOC;
+ m_sReoderingStatus.iPictInfoIndex = i;
+ firstValidIdx = i;
+ break;
+ }
}
- if (m_sPictInfoList[i].iPOC > sIMinInt32 && m_sPictInfoList[i].iPOC < m_iMinPOC) {
- m_iMinPOC = m_sPictInfoList[i].iPOC;
- m_iPictInfoIndex = i;
+ for (int32_t i = 0; i <= m_sReoderingStatus.iLargestBufferedPicIndex; ++i) {
+ if (i == firstValidIdx) continue;
+ if (m_sPictInfoList[i].iPOC > IMinInt32 && m_sPictInfoList[i].iPOC < m_sReoderingStatus.iMinPOC) {
+ m_sReoderingStatus.iMinPOC = m_sPictInfoList[i].iPOC;
+ m_sReoderingStatus.iPictInfoIndex = i;
+ }
}
}
}
- if (m_iMinPOC > sIMinInt32) {
- m_LastWrittenPOC = m_iMinPOC;
+ if (m_sReoderingStatus.iMinPOC > IMinInt32) {
+ m_sReoderingStatus.iLastWrittenPOC = m_sReoderingStatus.iMinPOC;
#if defined (_DEBUG)
#ifdef _MOTION_VECTOR_DUMP_
- fprintf (stderr, "Output POC: #%d\n", m_LastWrittenPOC);
+ fprintf (stderr, "Output POC: #%d uiDecodingTimeStamp=%d\n", m_sReoderingStatus.iLastWrittenPOC,
+ m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].uiDecodingTimeStamp);
#endif
#endif
- memcpy (pDstInfo, &m_sPictInfoList[m_iPictInfoIndex].sBufferInfo, sizeof (SBufferInfo));
- ppDst[0] = m_sPictInfoList[m_iPictInfoIndex].pData[0];
- ppDst[1] = m_sPictInfoList[m_iPictInfoIndex].pData[1];
- ppDst[2] = m_sPictInfoList[m_iPictInfoIndex].pData[2];
- m_sPictInfoList[m_iPictInfoIndex].iPOC = sIMinInt32;
- m_sPictInfoList[m_iPictInfoIndex].bLastGOP = false;
- m_iMinPOC = sIMinInt32;
- --m_iNumOfPicts;
+ memcpy (pDstInfo, &m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].sBufferInfo, sizeof (SBufferInfo));
+ ppDst[0] = pDstInfo->pDst[0];
+ ppDst[1] = pDstInfo->pDst[1];
+ ppDst[2] = pDstInfo->pDst[2];
+ m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].iPOC = IMinInt32;
+ PPicBuff pPicBuff = m_iThreadCount <= 1 ? m_pDecThrCtx[0].pCtx->pPicBuff : m_pPicBuff;
+ if (m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].iPicBuffIdx < pPicBuff->iCapacity) {
+ PPicture pPic = pPicBuff->ppPic[m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].iPicBuffIdx];
+ --pPic->iRefCount;
+ }
+ m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].bLastGOP = false;
+ m_sReoderingStatus.iMinPOC = IMinInt32;
+ --m_sReoderingStatus.iNumOfPicts;
}
+
return dsErrorFree;
}
@@ -742,124 +1037,223 @@ void CWelsDecoder::OutputStatisticsLog (SDecoderStatistics& sDecoderStatistics)
}
}
-DECODING_STATE CWelsDecoder::ReorderPicturesInDisplay (unsigned char** ppDst, SBufferInfo* pDstInfo) {
- if (pDstInfo->iBufferStatus == 1 && m_pDecContext->pSps->uiProfileIdc != 66
- && m_pDecContext->pPps->bEntropyCodingModeFlag) {
- if (m_pDecContext->pSliceHeader->iPicOrderCntLsb == 0) {
- if (m_iNumOfPicts > 0) {
- m_iLastGOPRemainPicts = m_iNumOfPicts;
- for (int32_t i = 0; i <= m_iLargestBufferedPicIndex; ++i) {
- if (m_sPictInfoList[i].iPOC > sIMinInt32) {
- m_sPictInfoList[i].bLastGOP = true;
+void CWelsDecoder::BufferingReadyPicture (PWelsDecoderContext pCtx, unsigned char** ppDst,
+ SBufferInfo* pDstInfo) {
+ if (pDstInfo->iBufferStatus == 0) {
+ return;
+ }
+ m_bIsBaseline = pCtx->pSps->uiProfileIdc == 66 || pCtx->pSps->uiProfileIdc == 83;
+ if (!m_bIsBaseline) {
+ if (m_sReoderingStatus.iNumOfPicts && pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb
+ && pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb->bNewSeqBegin) {
+ m_sReoderingStatus.iLastGOPRemainPicts = m_sReoderingStatus.iNumOfPicts;
+
+ for (int32_t i = 0; i <= m_sReoderingStatus.iLargestBufferedPicIndex; ++i) {
+ if (m_sPictInfoList[i].iPOC > IMinInt32) {
+ m_sPictInfoList[i].bLastGOP = true;
+ }
+ }
+ } else {
+ if (m_sReoderingStatus.iNumOfPicts > 0) {
+ //This can happen when decoder moves to next GOP without being able to decoder first picture PicOrderCntLsb = 0
+ bool hasGOPChanged = false;
+ for (int32_t i = 0; i <= m_sReoderingStatus.iLargestBufferedPicIndex; ++i) {
+ if (m_sPictInfoList[i].iPOC == pCtx->pSliceHeader->iPicOrderCntLsb) {
+ hasGOPChanged = true;
+ break;
+ }
+ }
+ if (hasGOPChanged) {
+ m_sReoderingStatus.iLastGOPRemainPicts = m_sReoderingStatus.iNumOfPicts;
+ for (int32_t i = 0; i <= m_sReoderingStatus.iLargestBufferedPicIndex; ++i) {
+ if (m_sPictInfoList[i].iPOC > IMinInt32) {
+ m_sPictInfoList[i].bLastGOP = true;
+ }
}
}
}
}
- for (int32_t i = 0; i < 16; ++i) {
- if (m_sPictInfoList[i].iPOC == sIMinInt32) {
- memcpy (&m_sPictInfoList[i].sBufferInfo, pDstInfo, sizeof (SBufferInfo));
- m_sPictInfoList[i].pData[0] = ppDst[0];
- m_sPictInfoList[i].pData[1] = ppDst[1];
- m_sPictInfoList[i].pData[2] = ppDst[2];
- m_sPictInfoList[i].iPOC = m_pDecContext->pSliceHeader->iPicOrderCntLsb;
- m_sPictInfoList[i].iFrameNum = m_pDecContext->pSliceHeader->iFrameNum;
- m_sPictInfoList[i].bLastGOP = false;
- pDstInfo->iBufferStatus = 0;
- ++m_iNumOfPicts;
- if (i > m_iLargestBufferedPicIndex) {
- m_iLargestBufferedPicIndex = i;
- }
+ }
+ for (int32_t i = 0; i < 16; ++i) {
+ if (m_sPictInfoList[i].iPOC == IMinInt32) {
+ memcpy (&m_sPictInfoList[i].sBufferInfo, pDstInfo, sizeof (SBufferInfo));
+ m_sPictInfoList[i].iPOC = pCtx->pSliceHeader->iPicOrderCntLsb;
+ m_sPictInfoList[i].uiDecodingTimeStamp = pCtx->uiDecodingTimeStamp;
+ m_sPictInfoList[i].iPicBuffIdx = pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb->iPicBuffIdx;
+ if (GetThreadCount (pCtx) <= 1) ++pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb->iRefCount;
+ m_sPictInfoList[i].bLastGOP = false;
+ m_iLastBufferedIdx = i;
+ pDstInfo->iBufferStatus = 0;
+ ++m_sReoderingStatus.iNumOfPicts;
+ if (i > m_sReoderingStatus.iLargestBufferedPicIndex) {
+ m_sReoderingStatus.iLargestBufferedPicIndex = i;
+ }
+ break;
+ }
+ }
+}
+
+void CWelsDecoder::ReleaseBufferedReadyPicture (PWelsDecoderContext pCtx, unsigned char** ppDst,
+ SBufferInfo* pDstInfo) {
+ PPicBuff pPicBuff = pCtx ? pCtx->pPicBuff : m_pPicBuff;
+ if (pCtx == NULL && m_iThreadCount <= 1) {
+ pCtx = m_pDecThrCtx[0].pCtx;
+ }
+ if (!m_bIsBaseline && m_sReoderingStatus.iLastGOPRemainPicts > 0) {
+ m_sReoderingStatus.iMinPOC = IMinInt32;
+ int32_t firstValidIdx = -1;
+ for (int32_t i = 0; i <= m_sReoderingStatus.iLargestBufferedPicIndex; ++i) {
+ if (m_sReoderingStatus.iMinPOC == IMinInt32 && m_sPictInfoList[i].iPOC > IMinInt32 && m_sPictInfoList[i].bLastGOP) {
+ m_sReoderingStatus.iMinPOC = m_sPictInfoList[i].iPOC;
+ m_sReoderingStatus.iPictInfoIndex = i;
+ firstValidIdx = i;
break;
}
}
- if (m_iLastGOPRemainPicts > 0) {
- m_iMinPOC = sIMinInt32;
- for (int32_t i = 0; i <= m_iLargestBufferedPicIndex; ++i) {
- if (m_iMinPOC == sIMinInt32 && m_sPictInfoList[i].iPOC > sIMinInt32 && m_sPictInfoList[i].bLastGOP) {
- m_iMinPOC = m_sPictInfoList[i].iPOC;
- m_iPictInfoIndex = i;
- }
- if (m_sPictInfoList[i].iPOC > sIMinInt32 && m_sPictInfoList[i].iPOC < m_iMinPOC && m_sPictInfoList[i].bLastGOP) {
- m_iMinPOC = m_sPictInfoList[i].iPOC;
- m_iPictInfoIndex = i;
- }
+ for (int32_t i = 0; i <= m_sReoderingStatus.iLargestBufferedPicIndex; ++i) {
+ if (i == firstValidIdx) continue;
+ if (m_sPictInfoList[i].iPOC > IMinInt32 && m_sPictInfoList[i].iPOC < m_sReoderingStatus.iMinPOC
+ && m_sPictInfoList[i].bLastGOP) {
+ m_sReoderingStatus.iMinPOC = m_sPictInfoList[i].iPOC;
+ m_sReoderingStatus.iPictInfoIndex = i;
}
- m_LastWrittenPOC = m_iMinPOC;
+ }
+ m_sReoderingStatus.iLastWrittenPOC = m_sReoderingStatus.iMinPOC;
#if defined (_DEBUG)
#ifdef _MOTION_VECTOR_DUMP_
- fprintf (stderr, "Output POC: #%d\n", m_LastWrittenPOC);
+ fprintf (stderr, "Output POC: #%d uiDecodingTimeStamp=%d\n", m_sReoderingStatus.iLastWrittenPOC,
+ m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].uiDecodingTimeStamp);
#endif
#endif
- memcpy (pDstInfo, &m_sPictInfoList[m_iPictInfoIndex].sBufferInfo, sizeof (SBufferInfo));
- ppDst[0] = m_sPictInfoList[m_iPictInfoIndex].pData[0];
- ppDst[1] = m_sPictInfoList[m_iPictInfoIndex].pData[1];
- ppDst[2] = m_sPictInfoList[m_iPictInfoIndex].pData[2];
- m_sPictInfoList[m_iPictInfoIndex].iPOC = sIMinInt32;
- m_sPictInfoList[m_iPictInfoIndex].bLastGOP = false;
- m_iMinPOC = sIMinInt32;
- --m_iNumOfPicts;
- --m_iLastGOPRemainPicts;
- if (m_iLastGOPRemainPicts == 0) {
- m_LastWrittenPOC = sIMinInt32;
+ memcpy (pDstInfo, &m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].sBufferInfo, sizeof (SBufferInfo));
+ ppDst[0] = pDstInfo->pDst[0];
+ ppDst[1] = pDstInfo->pDst[1];
+ ppDst[2] = pDstInfo->pDst[2];
+ m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].iPOC = IMinInt32;
+ PPicture pPic = pPicBuff->ppPic[m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].iPicBuffIdx];
+ --pPic->iRefCount;
+ m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].bLastGOP = false;
+ m_sReoderingStatus.iMinPOC = IMinInt32;
+ --m_sReoderingStatus.iNumOfPicts;
+ --m_sReoderingStatus.iLastGOPRemainPicts;
+ if (m_sReoderingStatus.iLastGOPRemainPicts == 0) {
+ m_sReoderingStatus.iLastWrittenPOC = IMinInt32;
+ }
+ return;
+ }
+ if (m_sReoderingStatus.iNumOfPicts && m_bIsBaseline) {
+ uint32_t uiDecodingTimeStamp = 0;
+ int32_t firstValidIdx = -1;
+ for (int32_t i = 0; i <= m_sReoderingStatus.iLargestBufferedPicIndex; ++i) {
+ if (m_sPictInfoList[i].iPOC > IMinInt32) {
+ uiDecodingTimeStamp = m_sPictInfoList[i].uiDecodingTimeStamp;
+ m_sReoderingStatus.iPictInfoIndex = i;
+ firstValidIdx = i;
+ break;
}
- return dsErrorFree;
}
- if (m_iNumOfPicts > 0) {
- m_iMinPOC = sIMinInt32;
- for (int32_t i = 0; i <= m_iLargestBufferedPicIndex; ++i) {
- if (m_iMinPOC == sIMinInt32 && m_sPictInfoList[i].iPOC > sIMinInt32) {
- m_iMinPOC = m_sPictInfoList[i].iPOC;
- m_iPictInfoIndex = i;
- }
- if (m_sPictInfoList[i].iPOC > sIMinInt32 && m_sPictInfoList[i].iPOC < m_iMinPOC) {
- m_iMinPOC = m_sPictInfoList[i].iPOC;
- m_iPictInfoIndex = i;
- }
+ for (int32_t i = 0; i <= m_sReoderingStatus.iLargestBufferedPicIndex; ++i) {
+ if (i == firstValidIdx) continue;
+ if (m_sPictInfoList[i].iPOC > IMinInt32 && m_sPictInfoList[i].uiDecodingTimeStamp < uiDecodingTimeStamp) {
+ uiDecodingTimeStamp = m_sPictInfoList[i].uiDecodingTimeStamp;
+ m_sReoderingStatus.iPictInfoIndex = i;
}
}
- if (m_iMinPOC > sIMinInt32) {
- if ((m_LastWrittenPOC > sIMinInt32 && m_iMinPOC - m_LastWrittenPOC <= 1)
- || m_iMinPOC < m_pDecContext->pSliceHeader->iPicOrderCntLsb) {
- m_LastWrittenPOC = m_iMinPOC;
+ if (uiDecodingTimeStamp > 0) {
#if defined (_DEBUG)
#ifdef _MOTION_VECTOR_DUMP_
- fprintf (stderr, "Output POC: #%d\n", m_LastWrittenPOC);
+ fprintf (stderr, "Output POC: #%d uiDecodingTimeStamp=%d\n", m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].iPOC,
+ uiDecodingTimeStamp);
#endif
#endif
- memcpy (pDstInfo, &m_sPictInfoList[m_iPictInfoIndex].sBufferInfo, sizeof (SBufferInfo));
- ppDst[0] = m_sPictInfoList[m_iPictInfoIndex].pData[0];
- ppDst[1] = m_sPictInfoList[m_iPictInfoIndex].pData[1];
- ppDst[2] = m_sPictInfoList[m_iPictInfoIndex].pData[2];
- m_sPictInfoList[m_iPictInfoIndex].iPOC = sIMinInt32;
- m_sPictInfoList[m_iPictInfoIndex].bLastGOP = false;
- m_iMinPOC = sIMinInt32;
- --m_iNumOfPicts;
- return dsErrorFree;
+ memcpy (pDstInfo, &m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].sBufferInfo, sizeof (SBufferInfo));
+ ppDst[0] = pDstInfo->pDst[0];
+ ppDst[1] = pDstInfo->pDst[1];
+ ppDst[2] = pDstInfo->pDst[2];
+ m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].iPOC = IMinInt32;
+ PPicture pPic = pPicBuff->ppPic[m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].iPicBuffIdx];
+ --pPic->iRefCount;
+ --m_sReoderingStatus.iNumOfPicts;
+ }
+ return;
+ }
+ if (m_sReoderingStatus.iNumOfPicts > 0) {
+ m_sReoderingStatus.iMinPOC = IMinInt32;
+ int32_t firstValidIdx = -1;
+ for (int32_t i = 0; i <= m_sReoderingStatus.iLargestBufferedPicIndex; ++i) {
+ if (m_sReoderingStatus.iMinPOC == IMinInt32 && m_sPictInfoList[i].iPOC > IMinInt32) {
+ m_sReoderingStatus.iMinPOC = m_sPictInfoList[i].iPOC;
+ m_sReoderingStatus.iPictInfoIndex = i;
+ firstValidIdx = i;
+ break;
}
}
+ for (int32_t i = 0; i <= m_sReoderingStatus.iLargestBufferedPicIndex; ++i) {
+ if (i == firstValidIdx) continue;
+ if (m_sPictInfoList[i].iPOC > IMinInt32 && m_sPictInfoList[i].iPOC < m_sReoderingStatus.iMinPOC) {
+ m_sReoderingStatus.iMinPOC = m_sPictInfoList[i].iPOC;
+ m_sReoderingStatus.iPictInfoIndex = i;
+ }
+ }
+ }
+ if (m_sReoderingStatus.iMinPOC > IMinInt32) {
+ int32_t iLastPOC = pCtx != NULL ? pCtx->pSliceHeader->iPicOrderCntLsb : m_sPictInfoList[m_iLastBufferedIdx].iPOC;
+ bool isReady = (m_sReoderingStatus.iLastWrittenPOC > IMinInt32
+ && m_sReoderingStatus.iMinPOC - m_sReoderingStatus.iLastWrittenPOC <= 1)
+ || m_sReoderingStatus.iMinPOC < iLastPOC;
+ if (isReady) {
+ m_sReoderingStatus.iLastWrittenPOC = m_sReoderingStatus.iMinPOC;
+#if defined (_DEBUG)
+#ifdef _MOTION_VECTOR_DUMP_
+ fprintf (stderr, "Output POC: #%d uiDecodingTimeStamp=%d\n", m_sReoderingStatus.iLastWrittenPOC,
+ m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].uiDecodingTimeStamp);
+#endif
+#endif
+ memcpy (pDstInfo, &m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].sBufferInfo, sizeof (SBufferInfo));
+ ppDst[0] = pDstInfo->pDst[0];
+ ppDst[1] = pDstInfo->pDst[1];
+ ppDst[2] = pDstInfo->pDst[2];
+ m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].iPOC = IMinInt32;
+ PPicture pPic = pPicBuff->ppPic[m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].iPicBuffIdx];
+ --pPic->iRefCount;
+ m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].bLastGOP = false;
+ m_sReoderingStatus.iMinPOC = IMinInt32;
+ --m_sReoderingStatus.iNumOfPicts;
+ }
}
+}
- return dsErrorFree;
+DECODING_STATE CWelsDecoder::ReorderPicturesInDisplay (PWelsDecoderContext pDecContext, unsigned char** ppDst,
+ SBufferInfo* pDstInfo) {
+ DECODING_STATE iRet = dsErrorFree;
+ if (pDstInfo->iBufferStatus == 1) {
+ m_bIsBaseline = pDecContext->pSps->uiProfileIdc == 66 || pDecContext->pSps->uiProfileIdc == 83;
+ if (!m_bIsBaseline) {
+ BufferingReadyPicture (pDecContext, ppDst, pDstInfo);
+ ReleaseBufferedReadyPicture (pDecContext, ppDst, pDstInfo);
+ }
+ }
+ return iRet;
}
-DECODING_STATE CWelsDecoder::DecodeParser (const unsigned char* kpSrc,
- const int kiSrcLen,
- SParserBsInfo* pDstInfo) {
- if (m_pDecContext == NULL || m_pDecContext->pParam == NULL) {
+DECODING_STATE CWelsDecoder::DecodeParser (const unsigned char* kpSrc, const int kiSrcLen, SParserBsInfo* pDstInfo) {
+ PWelsDecoderContext pDecContext = m_pDecThrCtx[0].pCtx;
+
+ if (pDecContext == NULL || pDecContext->pParam == NULL) {
if (m_pWelsTrace != NULL) {
WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_ERROR, "Call DecodeParser without Initialize.\n");
}
return dsInitialOptExpected;
}
- if (!m_pDecContext->pParam->bParseOnly) {
+ if (!pDecContext->pParam->bParseOnly) {
WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_ERROR, "bParseOnly should be true for this API calling! \n");
- m_pDecContext->iErrorCode |= dsInvalidArgument;
+ pDecContext->iErrorCode |= dsInvalidArgument;
return dsInvalidArgument;
}
int64_t iEnd, iStart = WelsTime();
- if (CheckBsBuffer (m_pDecContext, kiSrcLen)) {
- if (ResetDecoder())
+ if (CheckBsBuffer (pDecContext, kiSrcLen)) {
+ if (ResetDecoder (pDecContext))
return dsOutOfMemory;
return dsErrorFree;
@@ -871,58 +1265,57 @@ DECODING_STATE CWelsDecoder::DecodeParser (const unsigned char* kpSrc,
WelsFflush (m_pFBS);
}
#endif//OUTPUT_BIT_STREAM
- m_pDecContext->bEndOfStreamFlag = false;
+ pDecContext->bEndOfStreamFlag = false;
} else {
//For application MODE, the error detection should be added for safe.
//But for CONSOLE MODE, when decoding LAST AU, kiSrcLen==0 && kpSrc==NULL.
- m_pDecContext->bEndOfStreamFlag = true;
- m_pDecContext->bInstantDecFlag = true;
+ pDecContext->bEndOfStreamFlag = true;
+ pDecContext->bInstantDecFlag = true;
}
- m_pDecContext->iErrorCode = dsErrorFree; //initialize at the starting of AU decoding.
- m_pDecContext->pParam->eEcActiveIdc = ERROR_CON_DISABLE; //add protection to disable EC here.
- m_pDecContext->iFeedbackNalRefIdc = -1; //initialize
- if (!m_pDecContext->bFramePending) { //frame complete
- m_pDecContext->pParserBsInfo->iNalNum = 0;
- memset (m_pDecContext->pParserBsInfo->pNalLenInByte, 0, MAX_NAL_UNITS_IN_LAYER);
+ pDecContext->iErrorCode = dsErrorFree; //initialize at the starting of AU decoding.
+ pDecContext->pParam->eEcActiveIdc = ERROR_CON_DISABLE; //add protection to disable EC here.
+ pDecContext->iFeedbackNalRefIdc = -1; //initialize
+ if (!pDecContext->bFramePending) { //frame complete
+ pDecContext->pParserBsInfo->iNalNum = 0;
+ memset (pDecContext->pParserBsInfo->pNalLenInByte, 0, MAX_NAL_UNITS_IN_LAYER);
}
pDstInfo->iNalNum = 0;
pDstInfo->iSpsWidthInPixel = pDstInfo->iSpsHeightInPixel = 0;
if (pDstInfo) {
- m_pDecContext->uiTimeStamp = pDstInfo->uiInBsTimeStamp;
+ pDecContext->uiTimeStamp = pDstInfo->uiInBsTimeStamp;
pDstInfo->uiOutBsTimeStamp = 0;
} else {
- m_pDecContext->uiTimeStamp = 0;
+ pDecContext->uiTimeStamp = 0;
}
- WelsDecodeBs (m_pDecContext, kpSrc, kiSrcLen, NULL, NULL, pDstInfo);
- if (m_pDecContext->iErrorCode & dsOutOfMemory) {
- if (ResetDecoder())
+ WelsDecodeBs (pDecContext, kpSrc, kiSrcLen, NULL, NULL, pDstInfo);
+ if (pDecContext->iErrorCode & dsOutOfMemory) {
+ if (ResetDecoder (pDecContext))
return dsOutOfMemory;
return dsErrorFree;
}
- if (!m_pDecContext->bFramePending && m_pDecContext->pParserBsInfo->iNalNum) {
- memcpy (pDstInfo, m_pDecContext->pParserBsInfo, sizeof (SParserBsInfo));
+ if (!pDecContext->bFramePending && pDecContext->pParserBsInfo->iNalNum) {
+ memcpy (pDstInfo, pDecContext->pParserBsInfo, sizeof (SParserBsInfo));
- if (m_pDecContext->iErrorCode == ERR_NONE) { //update statistics: decoding frame count
- m_pDecContext->sDecoderStatistics.uiDecodedFrameCount++;
- if (m_pDecContext->sDecoderStatistics.uiDecodedFrameCount == 0) { //exceed max value of uint32_t
- ResetDecStatNums (&m_pDecContext->sDecoderStatistics);
- m_pDecContext->sDecoderStatistics.uiDecodedFrameCount++;
+ if (pDecContext->iErrorCode == ERR_NONE) { //update statistics: decoding frame count
+ pDecContext->pDecoderStatistics->uiDecodedFrameCount++;
+ if (pDecContext->pDecoderStatistics->uiDecodedFrameCount == 0) { //exceed max value of uint32_t
+ ResetDecStatNums (pDecContext->pDecoderStatistics);
+ pDecContext->pDecoderStatistics->uiDecodedFrameCount++;
}
}
}
- m_pDecContext->bInstantDecFlag = false; //reset no-delay flag
+ pDecContext->bInstantDecFlag = false; //reset no-delay flag
- if (m_pDecContext->iErrorCode && m_pDecContext->bPrintFrameErrorTraceFlag) {
- WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, "decode failed, failure type:%d \n", m_pDecContext->iErrorCode);
- m_pDecContext->bPrintFrameErrorTraceFlag = false;
+ if (pDecContext->iErrorCode && pDecContext->bPrintFrameErrorTraceFlag) {
+ WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, "decode failed, failure type:%d \n", pDecContext->iErrorCode);
+ pDecContext->bPrintFrameErrorTraceFlag = false;
}
iEnd = WelsTime();
- m_pDecContext->dDecTime += (iEnd - iStart) / 1e3;
-
- return (DECODING_STATE) m_pDecContext->iErrorCode;
+ pDecContext->dDecTime += (iEnd - iStart) / 1e3;
+ return (DECODING_STATE)pDecContext->iErrorCode;
}
DECODING_STATE CWelsDecoder::DecodeFrame (const unsigned char* kpSrc,
@@ -964,6 +1357,100 @@ DECODING_STATE CWelsDecoder::DecodeFrameEx (const unsigned char* kpSrc,
return state;
}
+DECODING_STATE CWelsDecoder::ParseAccessUnit (SWelsDecoderThreadCTX& sThreadCtx) {
+ sThreadCtx.pCtx->bHasNewSps = false;
+ sThreadCtx.pCtx->bParamSetsLostFlag = m_bParamSetsLostFlag;
+ sThreadCtx.pCtx->bFreezeOutput = m_bFreezeOutput;
+ sThreadCtx.pCtx->uiDecodingTimeStamp = ++m_uiDecodeTimeStamp;
+ bool bPicBuffChanged = false;
+ if (m_pLastDecThrCtx != NULL && sThreadCtx.pCtx->sSpsPpsCtx.iSeqId < m_pLastDecThrCtx->pCtx->sSpsPpsCtx.iSeqId) {
+ CopySpsPps (m_pLastDecThrCtx->pCtx, sThreadCtx.pCtx);
+ sThreadCtx.pCtx->iPicQueueNumber = m_pLastDecThrCtx->pCtx->iPicQueueNumber;
+ if (sThreadCtx.pCtx->pPicBuff != m_pPicBuff) {
+ bPicBuffChanged = true;
+ sThreadCtx.pCtx->pPicBuff = m_pPicBuff;
+ sThreadCtx.pCtx->bHaveGotMemory = m_pPicBuff != NULL;
+ sThreadCtx.pCtx->iImgWidthInPixel = m_pLastDecThrCtx->pCtx->iImgWidthInPixel;
+ sThreadCtx.pCtx->iImgHeightInPixel = m_pLastDecThrCtx->pCtx->iImgHeightInPixel;
+ }
+ }
+
+ //if threadCount > 1, then each thread must contain exact one complete frame.
+ if (GetThreadCount (sThreadCtx.pCtx) > 1) {
+ sThreadCtx.pCtx->pAccessUnitList->uiAvailUnitsNum = 0;
+ sThreadCtx.pCtx->pAccessUnitList->uiActualUnitsNum = 0;
+ }
+
+ int32_t iRet = DecodeFrame2WithCtx (sThreadCtx.pCtx, sThreadCtx.kpSrc, sThreadCtx.kiSrcLen, sThreadCtx.ppDst,
+ &sThreadCtx.sDstInfo);
+
+ int32_t iErr = InitConstructAccessUnit (sThreadCtx.pCtx, &sThreadCtx.sDstInfo);
+ if (ERR_NONE != iErr) {
+ return (DECODING_STATE) (iRet | iErr);
+ }
+ if (sThreadCtx.pCtx->bNewSeqBegin) {
+ m_pPicBuff = sThreadCtx.pCtx->pPicBuff;
+ } else if (bPicBuffChanged) {
+ InitialDqLayersContext (sThreadCtx.pCtx, sThreadCtx.pCtx->pSps->iMbWidth << 4, sThreadCtx.pCtx->pSps->iMbHeight << 4);
+ }
+ m_bParamSetsLostFlag = sThreadCtx.pCtx->bNewSeqBegin ? false : sThreadCtx.pCtx->bParamSetsLostFlag;
+ m_bFreezeOutput = sThreadCtx.pCtx->bNewSeqBegin ? false : sThreadCtx.pCtx->bFreezeOutput;
+ return (DECODING_STATE)iErr;
+}
+/*
+* Run decoding picture in separate thread.
+*/
+
+int CWelsDecoder::ThreadDecodeFrameInternal (const unsigned char* kpSrc, const int kiSrcLen, unsigned char** ppDst,
+ SBufferInfo* pDstInfo) {
+ int state = dsErrorFree;
+ int32_t i, j;
+ int32_t signal = 0;
+
+ //serial using of threads
+ if (m_DecCtxActiveCount < m_iThreadCount) {
+ signal = m_DecCtxActiveCount;
+ } else {
+ signal = m_pDecThrCtxActive[0]->sThreadInfo.uiThrNum;
+ }
+
+ WAIT_SEMAPHORE (&m_pDecThrCtx[signal].sThreadInfo.sIsIdle, WELS_DEC_THREAD_WAIT_INFINITE);
+
+ for (i = 0; i < m_DecCtxActiveCount; ++i) {
+ if (m_pDecThrCtxActive[i] == &m_pDecThrCtx[signal]) {
+ m_pDecThrCtxActive[i] = NULL;
+ for (j = i; j < m_DecCtxActiveCount - 1; j++) {
+ m_pDecThrCtxActive[j] = m_pDecThrCtxActive[j + 1];
+ m_pDecThrCtxActive[j + 1] = NULL;
+ }
+ --m_DecCtxActiveCount;
+ break;
+ }
+ }
+
+ m_pDecThrCtxActive[m_DecCtxActiveCount++] = &m_pDecThrCtx[signal];
+ if (m_pLastDecThrCtx != NULL) {
+ m_pDecThrCtx[signal].pCtx->pLastThreadCtx = m_pLastDecThrCtx;
+ }
+ m_pDecThrCtx[signal].kpSrc = const_cast<uint8_t*> (kpSrc);
+ m_pDecThrCtx[signal].kiSrcLen = kiSrcLen;
+ m_pDecThrCtx[signal].ppDst = ppDst;
+ memcpy (&m_pDecThrCtx[signal].sDstInfo, pDstInfo, sizeof (SBufferInfo));
+
+ ParseAccessUnit (m_pDecThrCtx[signal]);
+ if (m_iThreadCount > 1) {
+ m_pLastDecThrCtx = &m_pDecThrCtx[signal];
+ }
+ m_pDecThrCtx[signal].sThreadInfo.uiCommand = WELS_DEC_THREAD_COMMAND_RUN;
+ RELEASE_SEMAPHORE (&m_pDecThrCtx[signal].sThreadInfo.sIsActivated);
+
+ // wait early picture
+ if (m_DecCtxActiveCount >= m_iThreadCount) {
+ WAIT_SEMAPHORE (&m_pDecThrCtxActive[0]->sThreadInfo.sIsIdle, WELS_DEC_THREAD_WAIT_INFINITE);
+ RELEASE_SEMAPHORE (&m_pDecThrCtxActive[0]->sThreadInfo.sIsIdle);
+ }
+ return state;
+}
} // namespace WelsDec
diff --git a/chromium/third_party/openh264/src/codec/decoder/targets.mk b/chromium/third_party/openh264/src/codec/decoder/targets.mk
index eaf5d3c0780..88dc5afb123 100644
--- a/chromium/third_party/openh264/src/codec/decoder/targets.mk
+++ b/chromium/third_party/openh264/src/codec/decoder/targets.mk
@@ -22,6 +22,7 @@ DECODER_CPP_SRCS=\
$(DECODER_SRCDIR)/core/src/parse_mb_syn_cavlc.cpp\
$(DECODER_SRCDIR)/core/src/pic_queue.cpp\
$(DECODER_SRCDIR)/core/src/rec_mb.cpp\
+ $(DECODER_SRCDIR)/core/src/wels_decoder_thread.cpp\
$(DECODER_SRCDIR)/plus/src/welsDecoderExt.cpp\
DECODER_OBJS += $(DECODER_CPP_SRCS:.cpp=.$(OBJ))
@@ -56,14 +57,24 @@ DECODER_OBJS += $(DECODER_OBJSARM64)
endif
OBJS += $(DECODER_OBJSARM64)
-DECODER_ASM_MIPS_SRCS=\
+DECODER_ASM_MIPS_MMI_SRCS=\
$(DECODER_SRCDIR)/core/mips/dct_mmi.c\
-DECODER_OBJSMIPS += $(DECODER_ASM_MIPS_SRCS:.c=.$(OBJ))
+DECODER_OBJSMIPS_MMI += $(DECODER_ASM_MIPS_MMI_SRCS:.c=.$(OBJ))
+
+DECODER_ASM_MIPS_MSA_SRCS=\
+
+DECODER_OBJSMIPS_MSA += $(DECODER_ASM_MIPS_MSA_SRCS:.c=.$(OBJ))
ifeq ($(ASM_ARCH), mips)
-DECODER_OBJS += $(DECODER_OBJSMIPS)
+ifeq ($(ENABLE_MMI), Yes)
+DECODER_OBJS += $(DECODER_OBJSMIPS_MMI)
+endif
+ifeq ($(ENABLE_MSA), Yes)
+DECODER_OBJS += $(DECODER_OBJSMIPS_MSA)
+endif
endif
-OBJS += $(DECODER_OBJSMIPS)
+OBJS += $(DECODER_OBJSMIPS_MMI)
+OBJS += $(DECODER_OBJSMIPS_MSA)
OBJS += $(DECODER_OBJS)
diff --git a/chromium/third_party/openh264/src/codec/encoder/core/inc/param_svc.h b/chromium/third_party/openh264/src/codec/encoder/core/inc/param_svc.h
index a33830440d6..780b4df414d 100644
--- a/chromium/third_party/openh264/src/codec/encoder/core/inc/param_svc.h
+++ b/chromium/third_party/openh264/src/codec/encoder/core/inc/param_svc.h
@@ -365,7 +365,7 @@ typedef struct TagWelsSvcCodingParam: SEncParamExt {
uiIntraPeriod = ((uiIntraPeriod + uiGopSize - 1) / uiGopSize) * uiGopSize;
if (((pCodingParam.iNumRefFrame != AUTO_REF_PIC_COUNT)
- && ((pCodingParam.iNumRefFrame > MAX_REF_PIC_COUNT) || (pCodingParam.iNumRefFrame < MIN_REF_PIC_COUNT)))
+ && !((pCodingParam.iNumRefFrame > MAX_REF_PIC_COUNT) || (pCodingParam.iNumRefFrame < MIN_REF_PIC_COUNT)))
|| ((iNumRefFrame != AUTO_REF_PIC_COUNT) && (pCodingParam.iNumRefFrame == AUTO_REF_PIC_COUNT))) {
iNumRefFrame = pCodingParam.iNumRefFrame;
}
diff --git a/chromium/third_party/openh264/src/codec/encoder/core/src/au_set.cpp b/chromium/third_party/openh264/src/codec/encoder/core/src/au_set.cpp
index ef2758cd6b3..a49df475203 100644
--- a/chromium/third_party/openh264/src/codec/encoder/core/src/au_set.cpp
+++ b/chromium/third_party/openh264/src/codec/encoder/core/src/au_set.cpp
@@ -134,7 +134,7 @@ static int32_t WelsCheckNumRefSetting (SLogContext* pLogCtx, SWelsSvcCodingParam
int32_t WelsCheckRefFrameLimitationNumRefFirst (SLogContext* pLogCtx, SWelsSvcCodingParam* pParam) {
- if (WelsCheckNumRefSetting (pLogCtx, pParam, true)) {
+ if (WelsCheckNumRefSetting (pLogCtx, pParam, false)) {
// we take num-ref as the honored setting but it conflicts with temporal and LTR
return ENC_RETURN_UNSUPPORTED_PARA;
}
diff --git a/chromium/third_party/openh264/src/codec/encoder/core/src/deblocking.cpp b/chromium/third_party/openh264/src/codec/encoder/core/src/deblocking.cpp
index aec6b111788..8fd00ea6119 100644
--- a/chromium/third_party/openh264/src/codec/encoder/core/src/deblocking.cpp
+++ b/chromium/third_party/openh264/src/codec/encoder/core/src/deblocking.cpp
@@ -783,6 +783,11 @@ void WelsBlockFuncInit (PSetNoneZeroCountZeroFunc* pfSetNZCZero, int32_t iCpu)
*pfSetNZCZero = WelsNonZeroCount_mmi;
}
#endif
+#if defined(HAVE_MSA)
+ if (iCpu & WELS_CPU_MSA) {
+ *pfSetNZCZero = WelsNonZeroCount_msa;
+ }
+#endif
}
void DeblockingInit (DeblockingFunc* pFunc, int32_t iCpu) {
@@ -860,6 +865,19 @@ void DeblockingInit (DeblockingFunc* pFunc, int32_t iCpu) {
pFunc->pfChromaDeblockingEQ4Hor = DeblockChromaEq4H_mmi;
}
#endif//HAVE_MMI
+
+#if defined(HAVE_MSA)
+ if (iCpu & WELS_CPU_MSA) {
+ pFunc->pfLumaDeblockingLT4Ver = DeblockLumaLt4V_msa;
+ pFunc->pfLumaDeblockingEQ4Ver = DeblockLumaEq4V_msa;
+ pFunc->pfLumaDeblockingLT4Hor = DeblockLumaLt4H_msa;
+ pFunc->pfLumaDeblockingEQ4Hor = DeblockLumaEq4H_msa;
+ pFunc->pfChromaDeblockingLT4Ver = DeblockChromaLt4V_msa;
+ pFunc->pfChromaDeblockingEQ4Ver = DeblockChromaEq4V_msa;
+ pFunc->pfChromaDeblockingLT4Hor = DeblockChromaLt4H_msa;
+ pFunc->pfChromaDeblockingEQ4Hor = DeblockChromaEq4H_msa;
+ }
+#endif//HAVE_MSA
}
diff --git a/chromium/third_party/openh264/src/codec/encoder/core/src/encode_mb_aux.cpp b/chromium/third_party/openh264/src/codec/encoder/core/src/encode_mb_aux.cpp
index 6f11f36ebf6..f9bc6c4768f 100644
--- a/chromium/third_party/openh264/src/codec/encoder/core/src/encode_mb_aux.cpp
+++ b/chromium/third_party/openh264/src/codec/encoder/core/src/encode_mb_aux.cpp
@@ -464,7 +464,7 @@ int32_t WelsHadamardQuant2x2Skip_AArch64_neon (int16_t* pRes, int16_t iFF, int1
void WelsInitEncodingFuncs (SWelsFuncPtrList* pFuncList, uint32_t uiCpuFlag) {
pFuncList->pfCopy8x8Aligned = WelsCopy8x8_c;
pFuncList->pfCopy16x16Aligned =
- pFuncList->pfCopy16x16NotAligned = WelsCopy16x16_c;
+ pFuncList->pfCopy16x16NotAligned = WelsCopy16x16_c;
pFuncList->pfCopy16x8NotAligned = WelsCopy16x8_c;
pFuncList->pfCopy8x16Aligned = WelsCopy8x16_c;
pFuncList->pfCopy4x4 = WelsCopy4x4_c;
@@ -612,5 +612,16 @@ void WelsInitEncodingFuncs (SWelsFuncPtrList* pFuncList, uint32_t uiCpuFlag) {
pFuncList->pfDctFourT4 = WelsDctFourT4_mmi;
}
#endif//HAVE_MMI
+
+#if defined(HAVE_MSA)
+ if (uiCpuFlag & WELS_CPU_MSA) {
+ pFuncList->pfCopy8x8Aligned = WelsCopy8x8_msa;
+ pFuncList->pfCopy8x16Aligned = WelsCopy8x16_msa;
+
+ pFuncList->pfCopy16x16Aligned =
+ pFuncList->pfCopy16x16NotAligned = WelsCopy16x16_msa;
+ pFuncList->pfCopy16x8NotAligned = WelsCopy16x8_msa;
+ }
+#endif
}
}
diff --git a/chromium/third_party/openh264/src/codec/encoder/core/src/encoder_ext.cpp b/chromium/third_party/openh264/src/codec/encoder/core/src/encoder_ext.cpp
index 9f79da89365..9bc6e103b73 100644
--- a/chromium/third_party/openh264/src/codec/encoder/core/src/encoder_ext.cpp
+++ b/chromium/third_party/openh264/src/codec/encoder/core/src/encoder_ext.cpp
@@ -374,12 +374,12 @@ int32_t ParamValidation (SLogContext* pLogCtx, SWelsSvcCodingParam* pCfg) {
pCfg->bEnableFrameSkip);
if ((pCfg->iMaxQp <= 0) || (pCfg->iMinQp <= 0)) {
if (pCfg->iUsageType == SCREEN_CONTENT_REAL_TIME) {
- WelsLog (pLogCtx, WELS_LOG_WARNING, "Change QP Range from(%d,%d) to (%d,%d)", pCfg->iMinQp, pCfg->iMaxQp, MIN_SCREEN_QP,
+ WelsLog (pLogCtx, WELS_LOG_INFO, "Change QP Range from(%d,%d) to (%d,%d)", pCfg->iMinQp, pCfg->iMaxQp, MIN_SCREEN_QP,
MAX_SCREEN_QP);
pCfg->iMinQp = MIN_SCREEN_QP;
pCfg->iMaxQp = MAX_SCREEN_QP;
} else {
- WelsLog (pLogCtx, WELS_LOG_WARNING, "Change QP Range from(%d,%d) to (%d,%d)", pCfg->iMinQp, pCfg->iMaxQp,
+ WelsLog (pLogCtx, WELS_LOG_INFO, "Change QP Range from(%d,%d) to (%d,%d)", pCfg->iMinQp, pCfg->iMaxQp,
GOM_MIN_QP_MODE, MAX_LOW_BR_QP);
pCfg->iMinQp = GOM_MIN_QP_MODE;
pCfg->iMaxQp = MAX_LOW_BR_QP;
diff --git a/chromium/third_party/openh264/src/codec/encoder/core/src/ref_list_mgr_svc.cpp b/chromium/third_party/openh264/src/codec/encoder/core/src/ref_list_mgr_svc.cpp
index 22fcb792041..80ba25aa83b 100644
--- a/chromium/third_party/openh264/src/codec/encoder/core/src/ref_list_mgr_svc.cpp
+++ b/chromium/third_party/openh264/src/codec/encoder/core/src/ref_list_mgr_svc.cpp
@@ -628,7 +628,6 @@ bool WelsBuildRefList (sWelsEncCtx* pCtx, const int32_t iPOC, int32_t iBestLtrRe
WelsLog (& (pCtx->sLogCtx), WELS_LOG_DETAIL,
"WelsBuildRefList pCtx->uiTemporalId = %d,pRef->iFrameNum = %d,pRef->uiTemporalId = %d",
pCtx->uiTemporalId, pRef->iFrameNum, pRef->uiTemporalId);
- break;
}
}
}
diff --git a/chromium/third_party/openh264/src/codec/encoder/core/src/svc_encode_slice.cpp b/chromium/third_party/openh264/src/codec/encoder/core/src/svc_encode_slice.cpp
index 0bb9f141fe8..90139136f91 100644
--- a/chromium/third_party/openh264/src/codec/encoder/core/src/svc_encode_slice.cpp
+++ b/chromium/third_party/openh264/src/codec/encoder/core/src/svc_encode_slice.cpp
@@ -101,7 +101,7 @@ void WelsSliceHeaderExtInit (sWelsEncCtx* pEncCtx, SDqLayer* pCurLayer, SSlice*
if (P_SLICE == pEncCtx->eSliceType) {
pCurSliceHeader->uiNumRefIdxL0Active = 1;
if (pCurSliceHeader->uiRefCount > 0 &&
- pCurSliceHeader->uiRefCount < pCurLayer->sLayerInfo.pSpsP->iNumRefFrames) {
+ pCurSliceHeader->uiRefCount <= pCurLayer->sLayerInfo.pSpsP->iNumRefFrames) {
pCurSliceHeader->bNumRefIdxActiveOverrideFlag = true;
pCurSliceHeader->uiNumRefIdxL0Active = pCurSliceHeader->uiRefCount;
}
diff --git a/chromium/third_party/openh264/src/codec/encoder/meson.build b/chromium/third_party/openh264/src/codec/encoder/meson.build
index 4e5c3314b6c..5f46854510b 100644
--- a/chromium/third_party/openh264/src/codec/encoder/meson.build
+++ b/chromium/third_party/openh264/src/codec/encoder/meson.build
@@ -33,19 +33,41 @@ cpp_sources = [
'plus/src/welsEncoderExt.cpp',
]
-asm_sources = [
- 'core/x86/coeff.asm',
- 'core/x86/dct.asm',
- 'core/x86/intra_pred.asm',
- 'core/x86/matrix_transpose.asm',
- 'core/x86/memzero.asm',
- 'core/x86/quant.asm',
- 'core/x86/sample_sc.asm',
- 'core/x86/score.asm',
-]
-
-objs_asm = asm_gen.process(asm_sources)
+objs_asm = []
+if ['x86', 'x86_64'].contains(cpu_family)
+ asm_sources = [
+ 'core/x86/coeff.asm',
+ 'core/x86/dct.asm',
+ 'core/x86/intra_pred.asm',
+ 'core/x86/matrix_transpose.asm',
+ 'core/x86/memzero.asm',
+ 'core/x86/quant.asm',
+ 'core/x86/sample_sc.asm',
+ 'core/x86/score.asm',
+ ]
+ objs_asm = asm_gen.process(asm_sources)
+elif cpu_family == 'arm'
+ cpp_sources += [
+ 'core/arm/intra_pred_neon.S',
+ 'core/arm/intra_pred_sad_3_opt_neon.S',
+ 'core/arm/memory_neon.S',
+ 'core/arm/pixel_neon.S',
+ 'core/arm/reconstruct_neon.S',
+ 'core/arm/svc_motion_estimation.S',
+ ]
+elif cpu_family == 'aarch64'
+ cpp_sources += [
+ 'core/arm64/intra_pred_aarch64_neon.S',
+ 'core/arm64/intra_pred_sad_3_opt_aarch64_neon.S',
+ 'core/arm64/memory_aarch64_neon.S',
+ 'core/arm64/pixel_aarch64_neon.S',
+ 'core/arm64/reconstruct_aarch64_neon.S',
+ 'core/arm64/svc_motion_estimation_aarch64_neon.S',
+ ]
+else
+ error('Unsupported cpu family @0@'.format(cpu_family))
+endif
libencoder = static_library('encoder', cpp_sources, objs_asm,
- include_directories: [inc, processing_inc, encoder_inc],
+ include_directories: [inc, processing_inc, encoder_inc, casm_inc],
dependencies: deps)
diff --git a/chromium/third_party/openh264/src/codec/encoder/targets.mk b/chromium/third_party/openh264/src/codec/encoder/targets.mk
index 1f053280e1e..4fb2e690ea4 100644
--- a/chromium/third_party/openh264/src/codec/encoder/targets.mk
+++ b/chromium/third_party/openh264/src/codec/encoder/targets.mk
@@ -82,16 +82,26 @@ ENCODER_OBJS += $(ENCODER_OBJSARM64)
endif
OBJS += $(ENCODER_OBJSARM64)
-ENCODER_ASM_MIPS_SRCS=\
+ENCODER_ASM_MIPS_MMI_SRCS=\
$(ENCODER_SRCDIR)/core/mips/dct_mmi.c\
$(ENCODER_SRCDIR)/core/mips/quant_mmi.c\
$(ENCODER_SRCDIR)/core/mips/score_mmi.c\
-ENCODER_OBJSMIPS += $(ENCODER_ASM_MIPS_SRCS:.c=.$(OBJ))
+ENCODER_OBJSMIPS_MMI += $(ENCODER_ASM_MIPS_MMI_SRCS:.c=.$(OBJ))
+
+ENCODER_ASM_MIPS_MSA_SRCS=\
+
+ENCODER_OBJSMIPS_MSA += $(ENCODER_ASM_MIPS_MSA_SRCS:.c=.$(OBJ))
ifeq ($(ASM_ARCH), mips)
-ENCODER_OBJS += $(ENCODER_OBJSMIPS)
+ifeq ($(ENABLE_MMI), Yes)
+ENCODER_OBJS += $(ENCODER_OBJSMIPS_MMI)
+endif
+ifeq ($(ENABLE_MSA), Yes)
+ENCODER_OBJS += $(ENCODER_OBJSMIPS_MSA)
+endif
endif
-OBJS += $(ENCODER_OBJSMIPS)
+OBJS += $(ENCODER_OBJSMIPS_MMI)
+OBJS += $(ENCODER_OBJSMIPS_MSA)
OBJS += $(ENCODER_OBJS)
diff --git a/chromium/third_party/openh264/src/codec/meson.build b/chromium/third_party/openh264/src/codec/meson.build
index 7b610d24c4d..7a427f498df 100644
--- a/chromium/third_party/openh264/src/codec/meson.build
+++ b/chromium/third_party/openh264/src/codec/meson.build
@@ -2,5 +2,8 @@ subdir('common')
subdir('decoder')
subdir('encoder')
subdir('processing')
-subdir('console')
+if not ['android', 'ios'].contains(system)
+ # also disabled in the Makefile for these platforms
+ subdir('console')
+endif
subdir('api')
diff --git a/chromium/third_party/openh264/src/codec/processing/meson.build b/chromium/third_party/openh264/src/codec/processing/meson.build
index b7560e3d69a..d38dfb1f2ef 100644
--- a/chromium/third_party/openh264/src/codec/processing/meson.build
+++ b/chromium/third_party/openh264/src/codec/processing/meson.build
@@ -18,14 +18,32 @@ cpp_sources = [
'src/vaacalc/vaacalculation.cpp',
]
-asm_sources = [
- 'src/x86/denoisefilter.asm',
- 'src/x86/downsample_bilinear.asm',
- 'src/x86/vaa.asm',
-]
-
-objs_asm = asm_gen.process(asm_sources)
+objs_asm = []
+if ['x86', 'x86_64'].contains(cpu_family)
+ asm_sources = [
+ 'src/x86/denoisefilter.asm',
+ 'src/x86/downsample_bilinear.asm',
+ 'src/x86/vaa.asm',
+ ]
+ objs_asm = asm_gen.process(asm_sources)
+elif cpu_family == 'arm'
+ cpp_sources += [
+ 'src/arm/adaptive_quantization.S',
+ 'src/arm/down_sample_neon.S',
+ 'src/arm/pixel_sad_neon.S',
+ 'src/arm/vaa_calc_neon.S',
+ ]
+elif cpu_family == 'aarch64'
+ cpp_sources += [
+ 'src/arm64/adaptive_quantization_aarch64_neon.S',
+ 'src/arm64/down_sample_aarch64_neon.S',
+ 'src/arm64/pixel_sad_aarch64_neon.S',
+ 'src/arm64/vaa_calc_aarch64_neon.S',
+ ]
+else
+ error('Unsupported cpu family @0@'.format(cpu_family))
+endif
libprocessing = static_library('processing', cpp_sources, objs_asm,
- include_directories: [inc, processing_inc],
+ include_directories: [inc, processing_inc, casm_inc],
dependencies: deps)
diff --git a/chromium/third_party/openh264/src/codec/processing/src/scenechangedetection/SceneChangeDetection.h b/chromium/third_party/openh264/src/codec/processing/src/scenechangedetection/SceneChangeDetection.h
index ae146cff71b..78c225ee795 100644
--- a/chromium/third_party/openh264/src/codec/processing/src/scenechangedetection/SceneChangeDetection.h
+++ b/chromium/third_party/openh264/src/codec/processing/src/scenechangedetection/SceneChangeDetection.h
@@ -89,6 +89,12 @@ class CSceneChangeDetectorVideo {
}
#endif
+#ifdef HAVE_MMI
+ if (iCpuFlag & WELS_CPU_MMI) {
+ m_pfSad = WelsSampleSad8x8_mmi;
+ }
+#endif
+
m_fSceneChangeMotionRatioLarge = SCENE_CHANGE_MOTION_RATIO_LARGE_VIDEO;
m_fSceneChangeMotionRatioMedium = SCENE_CHANGE_MOTION_RATIO_MEDIUM;
}
diff --git a/chromium/third_party/openh264/src/codec/processing/targets.mk b/chromium/third_party/openh264/src/codec/processing/targets.mk
index 300de2d803b..0f8873335aa 100644
--- a/chromium/third_party/openh264/src/codec/processing/targets.mk
+++ b/chromium/third_party/openh264/src/codec/processing/targets.mk
@@ -58,14 +58,24 @@ PROCESSING_OBJS += $(PROCESSING_OBJSARM64)
endif
OBJS += $(PROCESSING_OBJSARM64)
-PROCESSING_ASM_MIPS_SRCS=\
+PROCESSING_ASM_MIPS_MMI_SRCS=\
$(PROCESSING_SRCDIR)/src/mips/vaa_mmi.c\
-PROCESSING_OBJSMIPS += $(PROCESSING_ASM_MIPS_SRCS:.c=.$(OBJ))
+PROCESSING_OBJSMIPS_MMI += $(PROCESSING_ASM_MIPS_MMI_SRCS:.c=.$(OBJ))
+
+PROCESSING_ASM_MIPS_MSA_SRCS=\
+
+PROCESSING_OBJSMIPS_MSA += $(PROCESSING_ASM_MIPS_MSA_SRCS:.c=.$(OBJ))
ifeq ($(ASM_ARCH), mips)
-PROCESSING_OBJS += $(PROCESSING_OBJSMIPS)
+ifeq ($(ENABLE_MMI), Yes)
+PROCESSING_OBJS += $(PROCESSING_OBJSMIPS_MMI)
+endif
+ifeq ($(ENABLE_MSA), Yes)
+PROCESSING_OBJS += $(PROCESSING_OBJSMIPS_MSA)
+endif
endif
-OBJS += $(PROCESSING_OBJSMIPS)
+OBJS += $(PROCESSING_OBJSMIPS_MMI)
+OBJS += $(PROCESSING_OBJSMIPS_MSA)
OBJS += $(PROCESSING_OBJS)
diff --git a/chromium/third_party/openh264/src/gmpopenh264.info b/chromium/third_party/openh264/src/gmpopenh264.info
index ad01420f0d9..7a666efb8be 100644
--- a/chromium/third_party/openh264/src/gmpopenh264.info
+++ b/chromium/third_party/openh264/src/gmpopenh264.info
@@ -1,4 +1,4 @@
Name: gmpopenh264
Description: GMP Plugin for OpenH264.
-Version: 1.8.0
+Version: 2.1.0
APIs: encode-video[h264], decode-video[h264]
diff --git a/chromium/third_party/openh264/src/include/wels/meson.build b/chromium/third_party/openh264/src/include/wels/meson.build
index 1b0049222a5..73fcef3acfe 100644
--- a/chromium/third_party/openh264/src/include/wels/meson.build
+++ b/chromium/third_party/openh264/src/include/wels/meson.build
@@ -4,5 +4,5 @@ foreach header : api_headers
api_header_deps += configure_file(
input : header[1],
output : header[0],
- configuration : configuration_data())
+ copy : true)
endforeach
diff --git a/chromium/third_party/openh264/src/meson.build b/chromium/third_party/openh264/src/meson.build
index c5793dbca68..a8692285743 100644
--- a/chromium/third_party/openh264/src/meson.build
+++ b/chromium/third_party/openh264/src/meson.build
@@ -1,10 +1,10 @@
project('openh264', ['c', 'cpp'],
- version : '1.8.0',
- meson_version : '>= 0.43',
+ version : '2.1.0',
+ meson_version : '>= 0.47',
default_options : [ 'warning_level=1',
'buildtype=debugoptimized' ])
-major_version = '4'
+major_version = '6'
cpp = meson.get_compiler('cpp')
@@ -36,8 +36,6 @@ encoder_inc = include_directories([
join_paths('codec', 'encoder', 'plus', 'inc'),
])
-asm_inc = join_paths(meson.current_source_dir(), 'codec', 'common', 'x86', '')
-
nasm = find_program('nasm', 'nasm.exe')
system = host_machine.system()
@@ -54,24 +52,52 @@ deps = [dependency('threads')]
c_args = []
cpp_args = []
asm_args = []
-
-if system == 'linux'
+asm_inc = []
+casm_inc = []
+cpp_lib = '-lstdc++'
+
+# TODO: should rely on dependency('threads') instead and change the pkg-config
+# generator below
+pthread_dep = cpp.find_library('pthread', required : false)
+libm_dep = cpp.find_library('libm', required : false)
+deps += [libm_dep]
+
+if ['linux', 'android', 'ios', 'darwin'].contains(system)
+ asm_format32 = 'elf'
+ asm_format64 = 'elf64'
+ if ['ios', 'darwin'].contains(system)
+ asm_format32 = 'macho32'
+ asm_format64 = 'macho64'
+ endif
if cpu_family == 'x86'
- asm_format = 'elf'
- asm_args += ['-DX86_32']
- add_project_arguments('-DX86_32_ASM', language: 'c')
+ asm_format = asm_format32
+ asm_args += ['-DX86_32', '-DHAVE_AVX2']
+ add_project_arguments('-DHAVE_AVX2', language: 'cpp')
+ add_project_arguments('-DHAVE_AVX2', '-DX86_ASM', '-DX86_32_ASM', language: 'c')
+ asm_inc = join_paths(meson.current_source_dir(), 'codec', 'common', 'x86', '')
elif cpu_family == 'x86_64'
- asm_format = 'elf64'
- asm_args += ['-DUNIX64']
+ asm_format = asm_format64
+ asm_args += ['-DUNIX64', '-DHAVE_AVX2']
+ add_project_arguments('-DHAVE_AVX2', language: 'cpp')
+ add_project_arguments('-DHAVE_AVX2', '-DX86_ASM', language: 'c')
+ asm_inc = join_paths(meson.current_source_dir(), 'codec', 'common', 'x86', '')
+ elif cpu_family == 'arm'
+ asm_format = asm_format32
+ add_project_arguments('-DHAVE_NEON', language: 'c')
+ add_project_arguments('-DHAVE_NEON', language: 'c')
+ casm_inc = include_directories(join_paths('codec', 'common', 'arm'))
+ elif cpu_family == 'aarch64'
+ asm_format = asm_format64
+ add_project_arguments('-DHAVE_NEON_ARM64', language: 'c')
+ add_project_arguments('-DHAVE_NEON_ARM64', language: 'cpp')
+ casm_inc = include_directories(join_paths('codec', 'common', 'arm64'))
else
- error ('FIXME: unhandled CPU family @0@ for Linux'.format(cpu_family))
+ error ('FIXME: unhandled CPU family @0@ for @1@'.format(cpu_family, system))
endif
- deps += [cpp.find_library('libm')]
-
- asm_args += ['-DHAVE_AVX2']
- add_project_arguments('-DHAVE_AVX2', language: 'cpp')
- add_project_arguments('-DHAVE_AVX2', '-DX86_ASM', language: 'c')
+ if ['ios', 'darwin', 'android'].contains(system)
+ cpp_lib = '-lc++'
+ endif
elif system == 'windows'
if cpu_family == 'x86'
asm_format = 'win32'
@@ -82,17 +108,20 @@ elif system == 'windows'
else
error ('FIXME: unhandled CPU family @0@ for Windows'.format(cpu_family))
endif
+ asm_inc = join_paths(meson.current_source_dir(), 'codec', 'common', 'x86', '')
else
error ('FIXME: Unhandled system @0@'.format(system))
endif
-asm_gen = generator(nasm,
- output : '@BASENAME@.o',
- arguments : [
- '-f', asm_format,
- '-i', asm_inc,
- '@INPUT@',
- '-o', '@OUTPUT@'] + asm_args)
+if ['x86', 'x86_64'].contains(cpu_family)
+ asm_gen = generator(nasm,
+ output : '@BASENAME@.o',
+ arguments : [
+ '-f', asm_format,
+ '-i', asm_inc,
+ '@INPUT@',
+ '-o', '@OUTPUT@'] + asm_args)
+endif
api_headers = []
api_header_deps = []
@@ -112,6 +141,7 @@ libopenh264_shared = shared_library('openh264',
install: true,
soversion: major_version,
version: meson.project_version(),
+ vs_module_defs: 'openh264.def',
dependencies: deps)
libopenh264_static = static_library('openh264',
@@ -124,19 +154,23 @@ pkg_install_dir = '@0@/pkgconfig'.format(get_option('libdir'))
foreach t : ['', '-static']
pkgconf = configuration_data()
pkgconf.set('prefix', join_paths(get_option('prefix')))
+ pkgconf.set('libdir', '${prefix}/@0@'.format(get_option('libdir')))
pkgconf.set('VERSION', meson.project_version())
+ pkglibs = cpp_lib
+ if libm_dep.found()
+ pkglibs += ' -lm'
+ endif
+ if pthread_dep.found()
+ pkglibs += ' -lpthread'
+ endif
if t == '-static'
- do_install = false
- pkgconf.set('LIBS', '-lstdc++ -lpthread -lm')
+ pkgconf.set('LIBS', pkglibs)
pkgconf.set('LIBS_PRIVATE', '')
else
- do_install = true
pkgconf.set('LIBS', '')
- pkgconf.set('LIBS_PRIVATE', '-lstdc++ -lpthread -lm')
+ pkgconf.set('LIBS_PRIVATE', pkglibs)
endif
- message('do_install: @0@'.format(do_install))
-
configure_file(
input: 'openh264.pc.in',
output: 'openh264@0@.pc'.format(t),
diff --git a/chromium/third_party/openh264/src/meson_options.txt b/chromium/third_party/openh264/src/meson_options.txt
new file mode 100644
index 00000000000..a2c14d168b5
--- /dev/null
+++ b/chromium/third_party/openh264/src/meson_options.txt
@@ -0,0 +1 @@
+option('tests', type : 'feature', value : 'auto', yield : true)
diff --git a/chromium/third_party/openh264/src/openh264.pc.in b/chromium/third_party/openh264/src/openh264.pc.in
index 7fb5d0c13eb..f86225c0bca 100644
--- a/chromium/third_party/openh264/src/openh264.pc.in
+++ b/chromium/third_party/openh264/src/openh264.pc.in
@@ -1,5 +1,5 @@
prefix=@prefix@
-libdir=${prefix}/lib
+libdir=@libdir@
includedir=${prefix}/include
Name: OpenH264
diff --git a/chromium/third_party/openh264/src/openh264.rc b/chromium/third_party/openh264/src/openh264.rc
index d06a147066b..7ff7ad803b3 100644
--- a/chromium/third_party/openh264/src/openh264.rc
+++ b/chromium/third_party/openh264/src/openh264.rc
@@ -24,8 +24,8 @@ LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US
//
VS_VERSION_INFO VERSIONINFO
- FILEVERSION 1,9,0,1806
- PRODUCTVERSION 1,9,0,1806
+ FILEVERSION 2,1,0,2002
+ PRODUCTVERSION 2,1,0,2002
FILEFLAGSMASK 0x3fL
#ifdef _DEBUG
FILEFLAGS 0x1L
@@ -43,12 +43,12 @@ BEGIN
VALUE "Comments", "Cisco OpenH264 codec"
VALUE "CompanyName", "Cisco Systems Inc."
VALUE "FileDescription", "Cisco OpenH264 codec"
- VALUE "FileVersion", "1.9.0.1806"
+ VALUE "FileVersion", "2.1.0.2002"
VALUE "InternalName", "openh264.dll"
VALUE "LegalCopyright", "© 2011-2015 Cisco and/or its affiliates. All rights reserved."
VALUE "OriginalFilename", "openh264.dll"
VALUE "ProductName", "Cisco OpenH264 codec"
- VALUE "ProductVersion", "1.9.0.1806"
+ VALUE "ProductVersion", "2.1.0.2002"
END
END
BLOCK "VarFileInfo"