summaryrefslogtreecommitdiff
path: root/chromium/third_party/libyuv
diff options
context:
space:
mode:
authorAllan Sandfeld Jensen <allan.jensen@qt.io>2018-01-29 16:35:13 +0100
committerAllan Sandfeld Jensen <allan.jensen@qt.io>2018-02-01 15:33:35 +0000
commitc8c2d1901aec01e934adf561a9fdf0cc776cdef8 (patch)
tree9157c3d9815e5870799e070b113813bec53e0535 /chromium/third_party/libyuv
parentabefd5095b41dac94ca451d784ab6e27372e981a (diff)
downloadqtwebengine-chromium-c8c2d1901aec01e934adf561a9fdf0cc776cdef8.tar.gz
BASELINE: Update Chromium to 64.0.3282.139
Change-Id: I1cae68fe9c94ff7608b26b8382fc19862cdb293a Reviewed-by: Alexandru Croitor <alexandru.croitor@qt.io>
Diffstat (limited to 'chromium/third_party/libyuv')
-rw-r--r--chromium/third_party/libyuv/Android.bp135
-rw-r--r--chromium/third_party/libyuv/BUILD.gn9
-rw-r--r--chromium/third_party/libyuv/DEPS18
-rw-r--r--chromium/third_party/libyuv/OWNERS9
-rw-r--r--chromium/third_party/libyuv/README.chromium2
-rw-r--r--chromium/third_party/libyuv/docs/environment_variables.md2
-rw-r--r--chromium/third_party/libyuv/docs/formats.md7
-rw-r--r--chromium/third_party/libyuv/docs/getting_started.md64
-rw-r--r--chromium/third_party/libyuv/include/libyuv/compare_row.h31
-rw-r--r--chromium/third_party/libyuv/include/libyuv/convert_from.h24
-rw-r--r--chromium/third_party/libyuv/include/libyuv/cpu_id.h18
-rw-r--r--chromium/third_party/libyuv/include/libyuv/planar_functions.h2
-rw-r--r--chromium/third_party/libyuv/include/libyuv/rotate_row.h4
-rw-r--r--chromium/third_party/libyuv/include/libyuv/row.h100
-rw-r--r--chromium/third_party/libyuv/include/libyuv/scale_row.h24
-rw-r--r--chromium/third_party/libyuv/include/libyuv/version.h2
-rw-r--r--chromium/third_party/libyuv/infra/config/OWNERS2
-rw-r--r--chromium/third_party/libyuv/infra/config/cq.cfg8
-rw-r--r--chromium/third_party/libyuv/linux.mk2
-rw-r--r--chromium/third_party/libyuv/source/compare.cc22
-rw-r--r--chromium/third_party/libyuv/source/compare_common.cc12
-rw-r--r--chromium/third_party/libyuv/source/compare_gcc.cc208
-rw-r--r--chromium/third_party/libyuv/source/compare_msa.cc2
-rw-r--r--chromium/third_party/libyuv/source/compare_win.cc6
-rw-r--r--chromium/third_party/libyuv/source/convert_from.cc40
-rw-r--r--chromium/third_party/libyuv/source/cpu_id.cc17
-rw-r--r--chromium/third_party/libyuv/source/mjpeg_validate.cc2
-rw-r--r--chromium/third_party/libyuv/source/rotate_win.cc2
-rw-r--r--chromium/third_party/libyuv/source/row_common.cc29
-rw-r--r--chromium/third_party/libyuv/source/row_gcc.cc99
-rw-r--r--chromium/third_party/libyuv/source/row_msa.cc29
-rw-r--r--chromium/third_party/libyuv/source/row_neon64.cc36
-rw-r--r--chromium/third_party/libyuv/source/scale.cc7
-rw-r--r--chromium/third_party/libyuv/source/scale_msa.cc8
-rw-r--r--chromium/third_party/libyuv/source/scale_win.cc2
-rw-r--r--chromium/third_party/libyuv/tools_libyuv/OWNERS2
-rw-r--r--chromium/third_party/libyuv/tools_libyuv/msan/OWNERS2
-rw-r--r--chromium/third_party/libyuv/tools_libyuv/ubsan/OWNERS2
-rw-r--r--chromium/third_party/libyuv/unit_test/color_test.cc48
-rw-r--r--chromium/third_party/libyuv/unit_test/compare_test.cc120
-rw-r--r--chromium/third_party/libyuv/unit_test/convert_test.cc7
-rw-r--r--chromium/third_party/libyuv/unit_test/cpu_test.cc39
-rw-r--r--chromium/third_party/libyuv/unit_test/planar_test.cc272
-rw-r--r--chromium/third_party/libyuv/unit_test/scale_test.cc534
-rw-r--r--chromium/third_party/libyuv/unit_test/unit_test.h16
-rw-r--r--chromium/third_party/libyuv/util/cpuid.c21
46 files changed, 1465 insertions, 582 deletions
diff --git a/chromium/third_party/libyuv/Android.bp b/chromium/third_party/libyuv/Android.bp
new file mode 100644
index 00000000000..a3d8d834ac7
--- /dev/null
+++ b/chromium/third_party/libyuv/Android.bp
@@ -0,0 +1,135 @@
+cc_library {
+ name: "libyuv",
+ vendor_available: true,
+ vndk: {
+ enabled: true,
+ },
+
+ srcs: [
+ "source/compare.cc",
+ "source/compare_common.cc",
+ "source/compare_gcc.cc",
+ "source/compare_neon.cc",
+ "source/compare_neon64.cc",
+ "source/compare_msa.cc",
+ "source/convert.cc",
+ "source/convert_argb.cc",
+ "source/convert_from.cc",
+ "source/convert_from_argb.cc",
+ "source/convert_to_argb.cc",
+ "source/convert_to_i420.cc",
+ "source/cpu_id.cc",
+ "source/planar_functions.cc",
+ "source/rotate.cc",
+ "source/rotate_any.cc",
+ "source/rotate_argb.cc",
+ "source/rotate_common.cc",
+ "source/rotate_dspr2.cc",
+ "source/rotate_gcc.cc",
+ "source/rotate_msa.cc",
+ "source/rotate_neon.cc",
+ "source/rotate_neon64.cc",
+ "source/row_any.cc",
+ "source/row_common.cc",
+ "source/row_dspr2.cc",
+ "source/row_gcc.cc",
+ "source/row_msa.cc",
+ "source/row_neon.cc",
+ "source/row_neon64.cc",
+ "source/scale.cc",
+ "source/scale_any.cc",
+ "source/scale_argb.cc",
+ "source/scale_common.cc",
+ "source/scale_dspr2.cc",
+ "source/scale_gcc.cc",
+ "source/scale_msa.cc",
+ "source/scale_neon.cc",
+ "source/scale_neon64.cc",
+ "source/video_common.cc",
+
+ "source/convert_jpeg.cc",
+ "source/mjpeg_decoder.cc",
+ "source/mjpeg_validate.cc",
+ ],
+
+ cflags: [
+ "-Wall",
+ "-Werror",
+ "-Wno-unused-parameter",
+ "-fexceptions",
+ "-DHAVE_JPEG",
+ ],
+
+ shared_libs: ["libjpeg"],
+
+ export_include_dirs: ["include"],
+}
+
+// compatibilty static library until all uses of libyuv_static are replaced
+// with libyuv (b/37646797)
+cc_library_static {
+ name: "libyuv_static",
+ whole_static_libs: ["libyuv"],
+}
+
+cc_test {
+ name: "libyuv_unittest",
+ static_libs: ["libyuv"],
+ shared_libs: ["libjpeg"],
+ cflags: ["-Wall", "-Werror"],
+ srcs: [
+ "unit_test/unit_test.cc",
+ "unit_test/basictypes_test.cc",
+ "unit_test/color_test.cc",
+ "unit_test/compare_test.cc",
+ "unit_test/convert_test.cc",
+ "unit_test/cpu_test.cc",
+ "unit_test/cpu_thread_test.cc",
+ "unit_test/math_test.cc",
+ "unit_test/planar_test.cc",
+ "unit_test/rotate_argb_test.cc",
+ "unit_test/rotate_test.cc",
+ "unit_test/scale_argb_test.cc",
+ "unit_test/scale_test.cc",
+ "unit_test/video_common_test.cc",
+ ],
+}
+
+cc_test {
+ name: "compare",
+ gtest: false,
+ srcs: [
+ "util/compare.cc",
+ ],
+ static_libs: ["libyuv"],
+}
+
+cc_test {
+ name: "cpuid",
+ gtest: false,
+ srcs: [
+ "util/cpuid.c",
+ ],
+ static_libs: ["libyuv"],
+}
+
+cc_test {
+ name: "psnr",
+ gtest: false,
+ srcs: [
+ "util/psnr_main.cc",
+ "util/psnr.cc",
+ "util/ssim.cc",
+ ],
+ static_libs: ["libyuv"],
+}
+
+cc_test {
+ name: "yuvconvert",
+ gtest: false,
+ srcs: [
+ "util/yuvconvert.cc",
+ ],
+ static_libs: ["libyuv"],
+ shared_libs: ["libjpeg"],
+}
diff --git a/chromium/third_party/libyuv/BUILD.gn b/chromium/third_party/libyuv/BUILD.gn
index 34a9975bf82..9badf08c846 100644
--- a/chromium/third_party/libyuv/BUILD.gn
+++ b/chromium/third_party/libyuv/BUILD.gn
@@ -158,9 +158,13 @@ static_library("libyuv_internal") {
}
# To enable AVX2 or other cpu optimization, pass flag here
- # cflags = [ "-mavx2", "-mpopcnt", "-mavx2", "-mfma" ]
if (!is_win) {
- cflags = [ "-ffp-contract=fast" ] # Enable fma vectorization for NEON.
+ cflags = [
+ # "-mpopcnt",
+ # "-mavx2",
+ # "-mfma",
+ "-ffp-contract=fast", # Enable fma vectorization for NEON.
+ ]
}
}
if (libyuv_use_neon) {
@@ -185,6 +189,7 @@ if (libyuv_use_neon) {
configs -= [ "//build/config/compiler:default_optimization" ]
# Enable optimize for speed (-O2) over size (-Os).
+ # TODO(fbarchard): Consider optimize_speed which is O3.
configs += [ "//build/config/compiler:optimize_max" ]
}
diff --git a/chromium/third_party/libyuv/DEPS b/chromium/third_party/libyuv/DEPS
index 852735e2c57..fdb133c7ac1 100644
--- a/chromium/third_party/libyuv/DEPS
+++ b/chromium/third_party/libyuv/DEPS
@@ -1,6 +1,6 @@
vars = {
'chromium_git': 'https://chromium.googlesource.com',
- 'chromium_revision': '3a3410e0eb66727afa4f2557954ecfbd9b230c83',
+ 'chromium_revision': 'ff3b31782d552b03104a6d831c7530605b52b13f',
'swarming_revision': '5e8001d9a710121ce7a68efd0804430a34b4f9e4',
# Three lines of non-changing comments so that
# the commit queue can handle CLs rolling lss
@@ -9,18 +9,18 @@ vars = {
# Three lines of non-changing comments so that
# the commit queue can handle CLs rolling catapult
# and whatever else without interference from each other.
- 'catapult_revision': '1a2a373481ce2376a7c2719802e1dbb6d0d83c6b',
+ 'catapult_revision': 'aa736cc76ee5e35215abcfb83a8c354f12d0c684',
}
deps = {
'src/build':
- Var('chromium_git') + '/chromium/src/build' + '@' + '800cde0e9ec6dfb73e64d33f28d3858fdeea63b9',
+ Var('chromium_git') + '/chromium/src/build' + '@' + '156ba982d749902e3403c242e23ded87fd316494',
'src/buildtools':
- Var('chromium_git') + '/chromium/buildtools.git' + '@' + 'cbc33b9c0a9d1bb913895a4319a742c504a2d541',
+ Var('chromium_git') + '/chromium/buildtools.git' + '@' + 'f6d165d9d842ddd29056c127a5f3a3c5d8e0d2e3',
'src/testing':
- Var('chromium_git') + '/chromium/src/testing' + '@' + '8ca56a609f13dfd7808d48162338c4d8c7ce8997',
+ Var('chromium_git') + '/chromium/src/testing' + '@' + 'cc96d3d66b5b9613fd0fe055509cfec5eb54b19c',
'src/third_party':
- Var('chromium_git') + '/chromium/src/third_party' + '@' + '5d202b9b7d090b8e92a3832a5257e8b94a44e1c9',
+ Var('chromium_git') + '/chromium/src/third_party' + '@' + '72c52c224cdd3c377f7caff8ffed0f5749e79549',
'src/third_party/catapult':
Var('chromium_git') + '/external/github.com/catapult-project/catapult.git' + '@' + Var('catapult_revision'),
'src/third_party/colorama/src':
@@ -32,7 +32,7 @@ deps = {
'src/third_party/yasm/source/patched-yasm':
Var('chromium_git') + '/chromium/deps/yasm/patched-yasm.git' + '@' + 'b98114e18d8b9b84586b10d24353ab8616d4c5fc',
'src/tools':
- Var('chromium_git') + '/chromium/src/tools' + '@' + '9069575d376649d4cfcec7333ba05315a8f7dd29',
+ Var('chromium_git') + '/chromium/src/tools' + '@' + 'eceb2c420b20350a2d2ba261953109280968647a',
'src/tools/gyp':
Var('chromium_git') + '/external/gyp.git' + '@' + 'd61a9397e668fa9843c4aa7da9e79460fe590bfb',
'src/tools/swarming_client':
@@ -50,7 +50,7 @@ deps = {
deps_os = {
'android': {
'src/base':
- Var('chromium_git') + '/chromium/src/base' + '@' + 'b120cdc8d849a20eb32129a7e8c02fb8491f3149',
+ Var('chromium_git') + '/chromium/src/base' + '@' + '9b543d487c7c38be191c6180001ff9ce186ae326',
'src/third_party/android_tools':
Var('chromium_git') + '/android_tools.git' + '@' + 'aadb2fed04af8606545b0afe4e3060bc1a15fad7',
'src/third_party/ced/src':
@@ -74,7 +74,7 @@ deps_os = {
},
'ios': {
'src/ios':
- Var('chromium_git') + '/chromium/src/ios' + '@' + 'a4c7e589665f97e9900ee806bd8afa34b7e21d11',
+ Var('chromium_git') + '/chromium/src/ios' + '@' + '39c4b2fcf73f5b1e82af3b9c57267c17217d6a30',
},
'unix': {
'src/third_party/lss':
diff --git a/chromium/third_party/libyuv/OWNERS b/chromium/third_party/libyuv/OWNERS
index 2db52d30797..7b21adfe6c7 100644
--- a/chromium/third_party/libyuv/OWNERS
+++ b/chromium/third_party/libyuv/OWNERS
@@ -1,13 +1,8 @@
fbarchard@chromium.org
magjed@chromium.org
-torbjorng@chromium.org
-per-file *.gyp=kjellander@chromium.org
-per-file *.gn=kjellander@chromium.org
+per-file *.gn=phoglund@chromium.org
per-file .gitignore=*
per-file AUTHORS=*
per-file DEPS=*
-per-file PRESUBMIT.py=kjellander@chromium.org
-per-file gyp_libyuv.py=kjellander@chromium.org
-per-file setup_links.py=*
-per-file sync_chromium.py=kjellander@chromium.org
+per-file PRESUBMIT.py=phoglund@chromium.org
diff --git a/chromium/third_party/libyuv/README.chromium b/chromium/third_party/libyuv/README.chromium
index 3de0df132c9..88c069734ac 100644
--- a/chromium/third_party/libyuv/README.chromium
+++ b/chromium/third_party/libyuv/README.chromium
@@ -1,6 +1,6 @@
Name: libyuv
URL: http://code.google.com/p/libyuv/
-Version: 1670
+Version: 1678
License: BSD
License File: LICENSE
diff --git a/chromium/third_party/libyuv/docs/environment_variables.md b/chromium/third_party/libyuv/docs/environment_variables.md
index 5802599e9d3..9071c54de23 100644
--- a/chromium/third_party/libyuv/docs/environment_variables.md
+++ b/chromium/third_party/libyuv/docs/environment_variables.md
@@ -14,7 +14,7 @@ By default the cpu is detected and the most advanced form of SIMD is used. But
LIBYUV_DISABLE_SSE42
LIBYUV_DISABLE_AVX
LIBYUV_DISABLE_AVX2
- LIBYUV_DISABLE_AVX3
+ LIBYUV_DISABLE_AVX512BW
LIBYUV_DISABLE_ERMS
LIBYUV_DISABLE_FMA3
LIBYUV_DISABLE_DSPR2
diff --git a/chromium/third_party/libyuv/docs/formats.md b/chromium/third_party/libyuv/docs/formats.md
index cddfe027e2b..2b75d31ac75 100644
--- a/chromium/third_party/libyuv/docs/formats.md
+++ b/chromium/third_party/libyuv/docs/formats.md
@@ -138,3 +138,10 @@ Some are channel order agnostic (e.g. ARGBScale).
Some functions are symmetric (e.g. ARGBToBGRA is the same as BGRAToARGB, so its a macro).
ARGBBlend expects preattenuated ARGB. The R,G,B are premultiplied by alpha. Other functions don't care.
+
+# RGB24 and RAW
+
+There are 2 RGB layouts - RGB24 (aka 24BG) and RAW
+
+RGB24 is B,G,R in memory
+RAW is R,G,B in memory
diff --git a/chromium/third_party/libyuv/docs/getting_started.md b/chromium/third_party/libyuv/docs/getting_started.md
index be0b9a9d7a6..58e05f3cbcb 100644
--- a/chromium/third_party/libyuv/docs/getting_started.md
+++ b/chromium/third_party/libyuv/docs/getting_started.md
@@ -62,30 +62,15 @@ To get just the source (not buildable):
### Windows
- call gn gen out/Release "--args=is_debug=false target_cpu=\"x86\""
- call gn gen out/Debug "--args=is_debug=true target_cpu=\"x86\""
- ninja -v -C out/Release
- ninja -v -C out/Debug
-
- call gn gen out/Release "--args=is_debug=false target_cpu=\"x64\""
- call gn gen out/Debug "--args=is_debug=true target_cpu=\"x64\""
- ninja -v -C out/Release
- ninja -v -C out/Debug
-
-#### Building with clang-cl
-
- set GYP_DEFINES=clang=1 target_arch=ia32
- call python tools\clang\scripts\update.py
-
- call gn gen out/Release "--args=is_debug=false is_official_build=false is_clang=true target_cpu=\"x86\""
- call gn gen out/Debug "--args=is_debug=true is_official_build=false is_clang=true target_cpu=\"x86\""
- ninja -v -C out/Release
- ninja -v -C out/Debug
+ call gn gen out\Release "--args=is_debug=false target_cpu=\"x64\""
+ call gn gen out\Debug "--args=is_debug=true target_cpu=\"x64\""
+ ninja -v -C out\Release
+ ninja -v -C out\Debug
- call gn gen out/Release "--args=is_debug=false is_official_build=false is_clang=true target_cpu=\"x64\""
- call gn gen out/Debug "--args=is_debug=true is_official_build=false is_clang=true target_cpu=\"x64\""
- ninja -v -C out/Release
- ninja -v -C out/Debug
+ call gn gen out\Release "--args=is_debug=false target_cpu=\"x86\""
+ call gn gen out\Debug "--args=is_debug=true target_cpu=\"x86\""
+ ninja -v -C out\Release
+ ninja -v -C out\Debug
### macOS and Linux
@@ -123,17 +108,17 @@ https://code.google.com/p/chromium/wiki/AndroidBuildInstructions
Add to .gclient last line: `target_os=['android'];`
-armv7
+arm64
- gn gen out/Release "--args=is_debug=false target_os=\"android\" target_cpu=\"arm\""
- gn gen out/Debug "--args=is_debug=true target_os=\"android\" target_cpu=\"arm\""
+ gn gen out/Release "--args=is_debug=false target_os=\"android\" target_cpu=\"arm64\""
+ gn gen out/Debug "--args=is_debug=true target_os=\"android\" target_cpu=\"arm64\""
ninja -v -C out/Debug libyuv_unittest
ninja -v -C out/Release libyuv_unittest
-arm64
+armv7
- gn gen out/Release "--args=is_debug=false target_os=\"android\" target_cpu=\"arm64\""
- gn gen out/Debug "--args=is_debug=true target_os=\"android\" target_cpu=\"arm64\""
+ gn gen out/Release "--args=is_debug=false target_os=\"android\" target_cpu=\"arm\""
+ gn gen out/Debug "--args=is_debug=true target_os=\"android\" target_cpu=\"arm\""
ninja -v -C out/Debug libyuv_unittest
ninja -v -C out/Release libyuv_unittest
@@ -181,7 +166,7 @@ Running test with C code:
ninja -C out/Debug libyuv
ninja -C out/Debug libyuv_unittest
ninja -C out/Debug compare
- ninja -C out/Debug convert
+ ninja -C out/Debug yuvconvert
ninja -C out/Debug psnr
ninja -C out/Debug cpuid
@@ -251,16 +236,11 @@ See also https://www.ccoderun.ca/programming/2015-12-20_CrossCompiling/index.htm
out\Release\libyuv_unittest.exe --gtest_catch_exceptions=0 --gtest_filter="*"
-### OSX
-
- out/Release/libyuv_unittest --gtest_filter="*"
-
-### Linux
+### macOS and Linux
out/Release/libyuv_unittest --gtest_filter="*"
-Replace --gtest_filter="*" with specific unittest to run. May include wildcards. e.g.
-
+Replace --gtest_filter="*" with specific unittest to run. May include wildcards.
out/Release/libyuv_unittest --gtest_filter=*I420ToARGB_Opt
## CPU Emulator tools
@@ -275,12 +255,20 @@ Then run:
~/intelsde/sde -skx -- out/Release/libyuv_unittest --gtest_filter=**I420ToARGB_Opt
+### Intel Architecture Code Analyzer
+
+Inset these 2 macros into assembly code to be analyzed:
+ IACA_ASM_START
+ IACA_ASM_END
+Build the code as usual, then run iaca on the object file.
+ ~/iaca-lin64/bin/iaca.sh -reduceout -arch HSW out/Release/obj/libyuv_internal/compare_gcc.o
+
## Sanitizers
gn gen out/Release "--args=is_debug=false is_msan=true"
ninja -v -C out/Release
- Sanitizers available: asan, msan, tsan, ubsan, lsan, ubsan_vptr
+Sanitizers available: asan, msan, tsan, ubsan, lsan, ubsan_vptr
### Running Dr Memory memcheck for Windows
diff --git a/chromium/third_party/libyuv/include/libyuv/compare_row.h b/chromium/third_party/libyuv/include/libyuv/compare_row.h
index 9316dc22bf5..2e5ebe508d1 100644
--- a/chromium/third_party/libyuv/include/libyuv/compare_row.h
+++ b/chromium/third_party/libyuv/include/libyuv/compare_row.h
@@ -19,7 +19,7 @@ extern "C" {
#endif
#if defined(__pnacl__) || defined(__CLR_VER) || \
- (defined(__i386__) && !defined(__SSE2__))
+ (defined(__i386__) && !defined(__SSE__) && !defined(__clang__))
#define LIBYUV_DISABLE_X86
#endif
// MemorySanitizer does not support assembly code yet. http://crbug.com/344505
@@ -42,13 +42,7 @@ extern "C" {
#endif // clang >= 3.4
#endif // __clang__
-// clang 6 mips issue https://bugs.chromium.org/p/libyuv/issues/detail?id=715
-// broken in clang version 6.0.0 (trunk 308728)
-// fixed in clang version 6.0.0 (trunk 310694)
-#if defined(__clang__)
-// #define DISABLE_CLANG_MSA 1
-#endif
-
+// The following are available for Visual C:
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && \
(defined(VISUALC_HAS_AVX2) || defined(CLANG_HAS_AVX2))
#define HAS_HASHDJB2_AVX2
@@ -59,7 +53,7 @@ extern "C" {
(defined(__x86_64__) || defined(__i386__) || defined(_M_IX86))
#define HAS_HASHDJB2_SSE41
#define HAS_SUMSQUAREERROR_SSE2
-#define HAS_HAMMINGDISTANCE_X86
+#define HAS_HAMMINGDISTANCE_SSE42
#endif
// The following are available for Visual C and clangcl 32 bit:
@@ -69,6 +63,18 @@ extern "C" {
#define HAS_SUMSQUAREERROR_AVX2
#endif
+// The following are available for GCC and clangcl 64 bit:
+#if !defined(LIBYUV_DISABLE_X86) && \
+ (defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER)))
+#define HAS_HAMMINGDISTANCE_SSSE3
+#endif
+
+// The following are available for GCC and clangcl 64 bit:
+#if !defined(LIBYUV_DISABLE_X86) && defined(CLANG_HAS_AVX2) && \
+ (defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER)))
+#define HAS_HAMMINGDISTANCE_AVX2
+#endif
+
// The following are available for Neon:
#if !defined(LIBYUV_DISABLE_NEON) && \
(defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__))
@@ -78,14 +84,13 @@ extern "C" {
#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
#define HAS_HAMMINGDISTANCE_MSA
-
-#ifndef DISABLE_CLANG_MSA
#define HAS_SUMSQUAREERROR_MSA
#endif
-#endif
uint32 HammingDistance_C(const uint8* src_a, const uint8* src_b, int count);
-uint32 HammingDistance_X86(const uint8* src_a, const uint8* src_b, int count);
+uint32 HammingDistance_SSE42(const uint8* src_a, const uint8* src_b, int count);
+uint32 HammingDistance_SSSE3(const uint8* src_a, const uint8* src_b, int count);
+uint32 HammingDistance_AVX2(const uint8* src_a, const uint8* src_b, int count);
uint32 HammingDistance_NEON(const uint8* src_a, const uint8* src_b, int count);
uint32 HammingDistance_MSA(const uint8* src_a, const uint8* src_b, int count);
diff --git a/chromium/third_party/libyuv/include/libyuv/convert_from.h b/chromium/third_party/libyuv/include/libyuv/convert_from.h
index a050e4457f3..237f68f57aa 100644
--- a/chromium/third_party/libyuv/include/libyuv/convert_from.h
+++ b/chromium/third_party/libyuv/include/libyuv/convert_from.h
@@ -189,6 +189,30 @@ int I420ToRAW(const uint8* src_y,
int height);
LIBYUV_API
+int H420ToRGB24(const uint8* src_y,
+ int src_stride_y,
+ const uint8* src_u,
+ int src_stride_u,
+ const uint8* src_v,
+ int src_stride_v,
+ uint8* dst_frame,
+ int dst_stride_frame,
+ int width,
+ int height);
+
+LIBYUV_API
+int H420ToRAW(const uint8* src_y,
+ int src_stride_y,
+ const uint8* src_u,
+ int src_stride_u,
+ const uint8* src_v,
+ int src_stride_v,
+ uint8* dst_frame,
+ int dst_stride_frame,
+ int width,
+ int height);
+
+LIBYUV_API
int I420ToRGB565(const uint8* src_y,
int src_stride_y,
const uint8* src_u,
diff --git a/chromium/third_party/libyuv/include/libyuv/cpu_id.h b/chromium/third_party/libyuv/include/libyuv/cpu_id.h
index 6d1afbdbb12..c2e9bbbd954 100644
--- a/chromium/third_party/libyuv/include/libyuv/cpu_id.h
+++ b/chromium/third_party/libyuv/include/libyuv/cpu_id.h
@@ -36,15 +36,19 @@ static const int kCpuHasAVX = 0x200;
static const int kCpuHasAVX2 = 0x400;
static const int kCpuHasERMS = 0x800;
static const int kCpuHasFMA3 = 0x1000;
-static const int kCpuHasAVX3 = 0x2000;
-static const int kCpuHasF16C = 0x4000;
-
-// 0x8000 reserved for future X86 flags.
+static const int kCpuHasF16C = 0x2000;
+static const int kCpuHasGFNI = 0x4000;
+static const int kCpuHasAVX512BW = 0x8000;
+static const int kCpuHasAVX512VL = 0x10000;
+static const int kCpuHasAVX512VBMI = 0x20000;
+static const int kCpuHasAVX512VBMI2 = 0x40000;
+static const int kCpuHasAVX512VBITALG = 0x80000;
+static const int kCpuHasAVX512VPOPCNTDQ = 0x100000;
// These flags are only valid on MIPS processors.
-static const int kCpuHasMIPS = 0x10000;
-static const int kCpuHasDSPR2 = 0x20000;
-static const int kCpuHasMSA = 0x40000;
+static const int kCpuHasMIPS = 0x200000;
+static const int kCpuHasDSPR2 = 0x400000;
+static const int kCpuHasMSA = 0x800000;
// Optional init function. TestCpuFlag does an auto-init.
// Returns cpu_info flags.
diff --git a/chromium/third_party/libyuv/include/libyuv/planar_functions.h b/chromium/third_party/libyuv/include/libyuv/planar_functions.h
index d97965cb88d..c91501a9c2c 100644
--- a/chromium/third_party/libyuv/include/libyuv/planar_functions.h
+++ b/chromium/third_party/libyuv/include/libyuv/planar_functions.h
@@ -746,7 +746,7 @@ int I420Interpolate(const uint8* src0_y,
int interpolation);
#if defined(__pnacl__) || defined(__CLR_VER) || \
- (defined(__i386__) && !defined(__SSE2__))
+ (defined(__i386__) && !defined(__SSE__) && !defined(__clang__))
#define LIBYUV_DISABLE_X86
#endif
// MemorySanitizer does not support assembly code yet. http://crbug.com/344505
diff --git a/chromium/third_party/libyuv/include/libyuv/rotate_row.h b/chromium/third_party/libyuv/include/libyuv/rotate_row.h
index 2c51584eee8..973fc15284f 100644
--- a/chromium/third_party/libyuv/include/libyuv/rotate_row.h
+++ b/chromium/third_party/libyuv/include/libyuv/rotate_row.h
@@ -19,7 +19,7 @@ extern "C" {
#endif
#if defined(__pnacl__) || defined(__CLR_VER) || \
- (defined(__i386__) && !defined(__SSE2__))
+ (defined(__i386__) && !defined(__SSE__) && !defined(__clang__))
#define LIBYUV_DISABLE_X86
#endif
// MemorySanitizer does not support assembly code yet. http://crbug.com/344505
@@ -29,7 +29,7 @@ extern "C" {
#endif
#endif
// The following are available for Visual C and clangcl 32 bit:
-#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)
+#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
#define HAS_TRANSPOSEWX8_SSSE3
#define HAS_TRANSPOSEUVWX8_SSE2
#endif
diff --git a/chromium/third_party/libyuv/include/libyuv/row.h b/chromium/third_party/libyuv/include/libyuv/row.h
index 0b1e017448f..34d727641a8 100644
--- a/chromium/third_party/libyuv/include/libyuv/row.h
+++ b/chromium/third_party/libyuv/include/libyuv/row.h
@@ -31,7 +31,7 @@ extern "C" {
var = 0
#if defined(__pnacl__) || defined(__CLR_VER) || \
- (defined(__i386__) && !defined(__SSE2__))
+ (defined(__i386__) && !defined(__SSE__) && !defined(__clang__))
#define LIBYUV_DISABLE_X86
#endif
// MemorySanitizer does not support assembly code yet. http://crbug.com/344505
@@ -75,13 +75,6 @@ extern "C" {
#define VISUALC_HAS_AVX2 1
#endif // VisualStudio >= 2012
-// clang 6 mips issue https://bugs.chromium.org/p/libyuv/issues/detail?id=715
-// broken in clang version 6.0.0 (trunk 308728)
-// fixed in clang version 6.0.0 (trunk 310694)
-#if defined(__clang__)
-// #define DISABLE_CLANG_MSA 1
-#endif
-
// The following are available on all x86 platforms:
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
@@ -271,7 +264,7 @@ extern "C" {
#define HAS_I422TOARGBROW_SSSE3
#endif
-// The following are available forr gcc/clang x86 platforms:
+// The following are available for gcc/clang x86 platforms:
// TODO(fbarchard): Port to Visual C
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER)))
@@ -279,6 +272,15 @@ extern "C" {
#define HAS_SPLITRGBROW_SSSE3
#endif
+// The following are available for AVX2 gcc/clang x86 platforms:
+// TODO(fbarchard): Port to Visual C
+#if !defined(LIBYUV_DISABLE_X86) && \
+ (defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER))) && \
+ (defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2))
+#define HAS_MERGEUVROW_16_AVX2
+#define HAS_MULTIPLYROW_16_AVX2
+#endif
+
// The following are available on Neon platforms:
#if !defined(LIBYUV_DISABLE_NEON) && \
(defined(__aarch64__) || defined(__ARM_NEON__) || defined(LIBYUV_NEON))
@@ -406,12 +408,23 @@ extern "C" {
#endif
#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
+#define HAS_ABGRTOUVROW_MSA
+#define HAS_ABGRTOYROW_MSA
+#define HAS_ARGB1555TOARGBROW_MSA
+#define HAS_ARGB1555TOUVROW_MSA
+#define HAS_ARGB1555TOYROW_MSA
#define HAS_ARGB4444TOARGBROW_MSA
#define HAS_ARGBADDROW_MSA
#define HAS_ARGBATTENUATEROW_MSA
+#define HAS_ARGBBLENDROW_MSA
+#define HAS_ARGBCOLORMATRIXROW_MSA
+#define HAS_ARGBEXTRACTALPHAROW_MSA
#define HAS_ARGBGRAYROW_MSA
#define HAS_ARGBMIRRORROW_MSA
#define HAS_ARGBMULTIPLYROW_MSA
+#define HAS_ARGBQUANTIZEROW_MSA
+#define HAS_ARGBSEPIAROW_MSA
+#define HAS_ARGBSETROW_MSA
#define HAS_ARGBSHADEROW_MSA
#define HAS_ARGBSHUFFLEROW_MSA
#define HAS_ARGBSUBTRACTROW_MSA
@@ -419,33 +432,8 @@ extern "C" {
#define HAS_ARGBTOARGB4444ROW_MSA
#define HAS_ARGBTORAWROW_MSA
#define HAS_ARGBTORGB24ROW_MSA
-#define HAS_ARGBTORGB565ROW_MSA
-#define HAS_I422TOUYVYROW_MSA
-#define HAS_I422TOYUY2ROW_MSA
-#define HAS_INTERPOLATEROW_MSA
-#define HAS_MERGEUVROW_MSA
-#define HAS_MIRRORROW_MSA
-#define HAS_RAWTORGB24ROW_MSA
-#define HAS_SOBELTOPLANEROW_MSA
-#define HAS_UYVYTOUVROW_MSA
-#define HAS_UYVYTOYROW_MSA
-#define HAS_YUY2TOUV422ROW_MSA
-#define HAS_YUY2TOUVROW_MSA
-#define HAS_YUY2TOYROW_MSA
-#define HAS_ARGBEXTRACTALPHAROW_MSA
-#define HAS_SPLITUVROW_MSA
-#define HAS_MIRRORUVROW_MSA
-#define HAS_HALFFLOATROW_MSA
-
-#ifndef DISABLE_CLANG_MSA
-#define HAS_ABGRTOUVROW_MSA
-#define HAS_ABGRTOYROW_MSA
-#define HAS_ARGB1555TOARGBROW_MSA
-#define HAS_ARGB1555TOUVROW_MSA
-#define HAS_ARGB1555TOYROW_MSA
-#define HAS_ARGBSEPIAROW_MSA
-#define HAS_ARGBSETROW_MSA
#define HAS_ARGBTORGB565DITHERROW_MSA
+#define HAS_ARGBTORGB565ROW_MSA
#define HAS_ARGBTOUV444ROW_MSA
#define HAS_ARGBTOUVJROW_MSA
#define HAS_ARGBTOUVROW_MSA
@@ -453,17 +441,25 @@ extern "C" {
#define HAS_ARGBTOYROW_MSA
#define HAS_BGRATOUVROW_MSA
#define HAS_BGRATOYROW_MSA
+#define HAS_HALFFLOATROW_MSA
#define HAS_I400TOARGBROW_MSA
#define HAS_I422ALPHATOARGBROW_MSA
#define HAS_I422TOARGBROW_MSA
#define HAS_I422TORGB24ROW_MSA
#define HAS_I422TORGBAROW_MSA
+#define HAS_I422TOUYVYROW_MSA
+#define HAS_I422TOYUY2ROW_MSA
#define HAS_I444TOARGBROW_MSA
+#define HAS_INTERPOLATEROW_MSA
#define HAS_J400TOARGBROW_MSA
+#define HAS_MERGEUVROW_MSA
+#define HAS_MIRRORROW_MSA
+#define HAS_MIRRORUVROW_MSA
#define HAS_NV12TOARGBROW_MSA
#define HAS_NV12TORGB565ROW_MSA
#define HAS_NV21TOARGBROW_MSA
#define HAS_RAWTOARGBROW_MSA
+#define HAS_RAWTORGB24ROW_MSA
#define HAS_RAWTOUVROW_MSA
#define HAS_RAWTOYROW_MSA
#define HAS_RGB24TOARGBROW_MSA
@@ -474,17 +470,20 @@ extern "C" {
#define HAS_RGB565TOYROW_MSA
#define HAS_RGBATOUVROW_MSA
#define HAS_RGBATOYROW_MSA
+#define HAS_SETROW_MSA
#define HAS_SOBELROW_MSA
+#define HAS_SOBELTOPLANEROW_MSA
+#define HAS_SOBELXROW_MSA
#define HAS_SOBELXYROW_MSA
+#define HAS_SOBELYROW_MSA
+#define HAS_SPLITUVROW_MSA
#define HAS_UYVYTOARGBROW_MSA
+#define HAS_UYVYTOUVROW_MSA
+#define HAS_UYVYTOYROW_MSA
#define HAS_YUY2TOARGBROW_MSA
-#define HAS_ARGBBLENDROW_MSA
-#define HAS_ARGBQUANTIZEROW_MSA
-#define HAS_ARGBCOLORMATRIXROW_MSA
-#define HAS_SETROW_MSA
-#define HAS_SOBELXROW_MSA
-#define HAS_SOBELYROW_MSA
-#endif
+#define HAS_YUY2TOUV422ROW_MSA
+#define HAS_YUY2TOUVROW_MSA
+#define HAS_YUY2TOYROW_MSA
#endif
#if defined(_MSC_VER) && !defined(__CLR_VER) && !defined(__clang__)
@@ -1523,6 +1522,23 @@ void MergeRGBRow_Any_NEON(const uint8* src_r,
uint8* dst_rgb,
int width);
+void MergeUVRow_16_C(const uint16* src_u,
+ const uint16* src_v,
+ uint16* dst_uv,
+ int scale, /* 64 for 10 bit */
+ int width);
+void MergeUVRow_16_AVX2(const uint16* src_u,
+ const uint16* src_v,
+ uint16* dst_uv,
+ int scale,
+ int width);
+
+void MultiplyRow_16_AVX2(const uint16* src_y,
+ uint16* dst_y,
+ int scale,
+ int width);
+void MultiplyRow_16_C(const uint16* src_y, uint16* dst_y, int scale, int width);
+
void CopyRow_SSE2(const uint8* src, uint8* dst, int count);
void CopyRow_AVX(const uint8* src, uint8* dst, int count);
void CopyRow_ERMS(const uint8* src, uint8* dst, int count);
diff --git a/chromium/third_party/libyuv/include/libyuv/scale_row.h b/chromium/third_party/libyuv/include/libyuv/scale_row.h
index ebafac4f679..c4a66aa07b1 100644
--- a/chromium/third_party/libyuv/include/libyuv/scale_row.h
+++ b/chromium/third_party/libyuv/include/libyuv/scale_row.h
@@ -20,7 +20,7 @@ extern "C" {
#endif
#if defined(__pnacl__) || defined(__CLR_VER) || \
- (defined(__i386__) && !defined(__SSE2__))
+ (defined(__i386__) && !defined(__SSE__) && !defined(__clang__))
#define LIBYUV_DISABLE_X86
#endif
// MemorySanitizer does not support assembly code yet. http://crbug.com/344505
@@ -50,13 +50,6 @@ extern "C" {
#define VISUALC_HAS_AVX2 1
#endif // VisualStudio >= 2012
-// clang 6 mips issue https://bugs.chromium.org/p/libyuv/issues/detail?id=715
-// broken in clang version 6.0.0 (trunk 308728)
-// fixed in clang version 6.0.0 (trunk 310694)
-#if defined(__clang__)
-// #define DISABLE_CLANG_MSA 1
-#endif
-
// The following are available on all x86 platforms:
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
@@ -112,19 +105,16 @@ extern "C" {
#endif
#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
-#define HAS_SCALEARGBROWDOWN2_MSA
-#define HAS_SCALEROWDOWN2_MSA
-#define HAS_SCALEROWDOWN4_MSA
#define HAS_SCALEADDROW_MSA
#define HAS_SCALEARGBCOLS_MSA
-#define HAS_SCALEROWDOWN34_MSA
-
-#ifndef DISABLE_CLANG_MSA
+#define HAS_SCALEARGBFILTERCOLS_MSA
+#define HAS_SCALEARGBROWDOWN2_MSA
#define HAS_SCALEARGBROWDOWNEVEN_MSA
-#define HAS_SCALEROWDOWN38_MSA
#define HAS_SCALEFILTERCOLS_MSA
-#define HAS_SCALEARGBFILTERCOLS_MSA
-#endif
+#define HAS_SCALEROWDOWN2_MSA
+#define HAS_SCALEROWDOWN34_MSA
+#define HAS_SCALEROWDOWN38_MSA
+#define HAS_SCALEROWDOWN4_MSA
#endif
// Scale ARGB vertically with bilinear interpolation.
diff --git a/chromium/third_party/libyuv/include/libyuv/version.h b/chromium/third_party/libyuv/include/libyuv/version.h
index f27ccfbb0c8..838c70f1349 100644
--- a/chromium/third_party/libyuv/include/libyuv/version.h
+++ b/chromium/third_party/libyuv/include/libyuv/version.h
@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_
-#define LIBYUV_VERSION 1670
+#define LIBYUV_VERSION 1678
#endif // INCLUDE_LIBYUV_VERSION_H_
diff --git a/chromium/third_party/libyuv/infra/config/OWNERS b/chromium/third_party/libyuv/infra/config/OWNERS
index 02eccd5eb50..b61b29d6c25 100644
--- a/chromium/third_party/libyuv/infra/config/OWNERS
+++ b/chromium/third_party/libyuv/infra/config/OWNERS
@@ -1,3 +1,3 @@
set noparent
agable@chromium.org
-kjellander@chromium.org
+phoglund@chromium.org
diff --git a/chromium/third_party/libyuv/infra/config/cq.cfg b/chromium/third_party/libyuv/infra/config/cq.cfg
index 345723e17bc..604de7814d9 100644
--- a/chromium/third_party/libyuv/infra/config/cq.cfg
+++ b/chromium/third_party/libyuv/infra/config/cq.cfg
@@ -7,16 +7,8 @@ cq_status_url: "https://chromium-cq-status.appspot.com"
git_repo_url: "https://chromium.googlesource.com/libyuv/libyuv.git"
gerrit {}
-rietveld {
- url: "https://codereview.chromium.org"
-}
-
verifiers {
- reviewer_lgtm {
- committer_list: "project-libyuv-committers"
- dry_run_access_list: "project-libyuv-tryjob-access"
- }
gerrit_cq_ability {
committer_list: "project-libyuv-committers"
dry_run_access_list: "project-libyuv-tryjob-access"
diff --git a/chromium/third_party/libyuv/linux.mk b/chromium/third_party/libyuv/linux.mk
index 1dd527c7570..7e9aa5e4e8b 100644
--- a/chromium/third_party/libyuv/linux.mk
+++ b/chromium/third_party/libyuv/linux.mk
@@ -80,4 +80,4 @@ cpuid: util/cpuid.c libyuv.a
$(CC) $(CFLAGS) -o $@ util/cpuid.c libyuv.a
clean:
- /bin/rm -f source/*.o *.ii *.s libyuv.a convert cpuid psnr
+ /bin/rm -f source/*.o *.ii *.s libyuv.a yuvconvert cpuid psnr
diff --git a/chromium/third_party/libyuv/source/compare.cc b/chromium/third_party/libyuv/source/compare.cc
index 20afa0cef36..8c379b59cb8 100644
--- a/chromium/third_party/libyuv/source/compare.cc
+++ b/chromium/third_party/libyuv/source/compare.cc
@@ -110,12 +110,17 @@ uint32 ARGBDetect(const uint8* argb, int stride_argb, int width, int height) {
return fourcc;
}
+// NEON version accumulates in 16 bit shorts which overflow at 65536 bytes.
+// So actual maximum is 1 less loop, which is 64436 - 32 bytes.
+
LIBYUV_API
uint64 ComputeHammingDistance(const uint8* src_a,
const uint8* src_b,
int count) {
- const int kBlockSize = 65536;
- int remainder = count & (kBlockSize - 1) & ~31;
+ const int kBlockSize = 1 << 15; // 32768;
+ const int kSimdSize = 64;
+ // SIMD for multiple of 64, and C for remainder
+ int remainder = count & (kBlockSize - 1) & ~(kSimdSize - 1);
uint64 diff = 0;
int i;
uint32 (*HammingDistance)(const uint8* src_a, const uint8* src_b, int count) =
@@ -125,9 +130,14 @@ uint64 ComputeHammingDistance(const uint8* src_a,
HammingDistance = HammingDistance_NEON;
}
#endif
-#if defined(HAS_HAMMINGDISTANCE_X86)
- if (TestCpuFlag(kCpuHasX86)) {
- HammingDistance = HammingDistance_X86;
+#if defined(HAS_HAMMINGDISTANCE_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ HammingDistance = HammingDistance_SSSE3;
+ }
+#endif
+#if defined(HAS_HAMMINGDISTANCE_SSE42)
+ if (TestCpuFlag(kCpuHasSSE42)) {
+ HammingDistance = HammingDistance_SSE42;
}
#endif
#if defined(HAS_HAMMINGDISTANCE_AVX2)
@@ -153,7 +163,7 @@ uint64 ComputeHammingDistance(const uint8* src_a,
src_a += remainder;
src_b += remainder;
}
- remainder = count & 31;
+ remainder = count & (kSimdSize - 1);
if (remainder) {
diff += HammingDistance_C(src_a, src_b, remainder);
}
diff --git a/chromium/third_party/libyuv/source/compare_common.cc b/chromium/third_party/libyuv/source/compare_common.cc
index d3e46fb502b..83564a1bcb5 100644
--- a/chromium/third_party/libyuv/source/compare_common.cc
+++ b/chromium/third_party/libyuv/source/compare_common.cc
@@ -18,7 +18,7 @@ extern "C" {
#endif
#if ORIGINAL_OPT
-uint32 HammingDistance_C(const uint8* src_a, const uint8* src_b, int count) {
+uint32 HammingDistance_C1(const uint8* src_a, const uint8* src_b, int count) {
uint32 diff = 0u;
int i;
@@ -58,6 +58,16 @@ uint32 HammingDistance_C(const uint8* src_a, const uint8* src_b, int count) {
src_a += 4;
src_b += 4;
}
+
+ for (; i < count; ++i) {
+ uint32 x = *src_a ^ *src_b;
+ uint32 u = x - ((x >> 1) & 0x55);
+ u = ((u >> 2) & 0x33) + (u & 0x33);
+ diff += (u + (u >> 4)) & 0x0f;
+ src_a += 1;
+ src_b += 1;
+ }
+
return diff;
}
diff --git a/chromium/third_party/libyuv/source/compare_gcc.cc b/chromium/third_party/libyuv/source/compare_gcc.cc
index 994fb10fd59..595c8ec4ae2 100644
--- a/chromium/third_party/libyuv/source/compare_gcc.cc
+++ b/chromium/third_party/libyuv/source/compare_gcc.cc
@@ -22,18 +22,210 @@ extern "C" {
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER)))
-uint32 HammingDistance_X86(const uint8* src_a, const uint8* src_b, int count) {
+#if defined(__x86_64__)
+uint32 HammingDistance_SSE42(const uint8* src_a,
+ const uint8* src_b,
+ int count) {
+ uint64 diff = 0u;
+
+ asm volatile(
+ "xor %3,%3 \n"
+ "xor %%r8,%%r8 \n"
+ "xor %%r9,%%r9 \n"
+ "xor %%r10,%%r10 \n"
+
+ // Process 32 bytes per loop.
+ LABELALIGN
+ "1: \n"
+ "mov (%0),%%rcx \n"
+ "mov 0x8(%0),%%rdx \n"
+ "xor (%1),%%rcx \n"
+ "xor 0x8(%1),%%rdx \n"
+ "popcnt %%rcx,%%rcx \n"
+ "popcnt %%rdx,%%rdx \n"
+ "mov 0x10(%0),%%rsi \n"
+ "mov 0x18(%0),%%rdi \n"
+ "xor 0x10(%1),%%rsi \n"
+ "xor 0x18(%1),%%rdi \n"
+ "popcnt %%rsi,%%rsi \n"
+ "popcnt %%rdi,%%rdi \n"
+ "add $0x20,%0 \n"
+ "add $0x20,%1 \n"
+ "add %%rcx,%3 \n"
+ "add %%rdx,%%r8 \n"
+ "add %%rsi,%%r9 \n"
+ "add %%rdi,%%r10 \n"
+ "sub $0x20,%2 \n"
+ "jg 1b \n"
+
+ "add %%r8, %3 \n"
+ "add %%r9, %3 \n"
+ "add %%r10, %3 \n"
+ : "+r"(src_a), // %0
+ "+r"(src_b), // %1
+ "+r"(count), // %2
+ "=r"(diff) // %3
+ :
+ : "memory", "cc", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10");
+
+ return static_cast<uint32>(diff);
+}
+#else
+uint32 HammingDistance_SSE42(const uint8* src_a,
+ const uint8* src_b,
+ int count) {
+ uint32 diff = 0u;
+
+ asm volatile(
+ // Process 16 bytes per loop.
+ LABELALIGN
+ "1: \n"
+ "mov (%0),%%ecx \n"
+ "mov 0x4(%0),%%edx \n"
+ "xor (%1),%%ecx \n"
+ "xor 0x4(%1),%%edx \n"
+ "popcnt %%ecx,%%ecx \n"
+ "add %%ecx,%3 \n"
+ "popcnt %%edx,%%edx \n"
+ "add %%edx,%3 \n"
+ "mov 0x8(%0),%%ecx \n"
+ "mov 0xc(%0),%%edx \n"
+ "xor 0x8(%1),%%ecx \n"
+ "xor 0xc(%1),%%edx \n"
+ "popcnt %%ecx,%%ecx \n"
+ "add %%ecx,%3 \n"
+ "popcnt %%edx,%%edx \n"
+ "add %%edx,%3 \n"
+ "add $0x10,%0 \n"
+ "add $0x10,%1 \n"
+ "sub $0x10,%2 \n"
+ "jg 1b \n"
+ : "+r"(src_a), // %0
+ "+r"(src_b), // %1
+ "+r"(count), // %2
+ "+r"(diff) // %3
+ :
+ : "memory", "cc", "ecx", "edx");
+
+ return diff;
+}
+#endif
+
+static vec8 kNibbleMask = {15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15};
+static vec8 kBitCount = {0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4};
+
+uint32 HammingDistance_SSSE3(const uint8* src_a,
+ const uint8* src_b,
+ int count) {
+ uint32 diff = 0u;
+
+ asm volatile(
+ "movdqa %4,%%xmm2 \n"
+ "movdqa %5,%%xmm3 \n"
+ "pxor %%xmm0,%%xmm0 \n"
+ "pxor %%xmm1,%%xmm1 \n"
+ "sub %0,%1 \n"
+
+ LABELALIGN
+ "1: \n"
+ "movdqa (%0),%%xmm4 \n"
+ "movdqa 0x10(%0), %%xmm5 \n"
+ "pxor (%0,%1), %%xmm4 \n"
+ "movdqa %%xmm4,%%xmm6 \n"
+ "pand %%xmm2,%%xmm6 \n"
+ "psrlw $0x4,%%xmm4 \n"
+ "movdqa %%xmm3,%%xmm7 \n"
+ "pshufb %%xmm6,%%xmm7 \n"
+ "pand %%xmm2,%%xmm4 \n"
+ "movdqa %%xmm3,%%xmm6 \n"
+ "pshufb %%xmm4,%%xmm6 \n"
+ "paddb %%xmm7,%%xmm6 \n"
+ "pxor 0x10(%0,%1),%%xmm5 \n"
+ "add $0x20,%0 \n"
+ "movdqa %%xmm5,%%xmm4 \n"
+ "pand %%xmm2,%%xmm5 \n"
+ "psrlw $0x4,%%xmm4 \n"
+ "movdqa %%xmm3,%%xmm7 \n"
+ "pshufb %%xmm5,%%xmm7 \n"
+ "pand %%xmm2,%%xmm4 \n"
+ "movdqa %%xmm3,%%xmm5 \n"
+ "pshufb %%xmm4,%%xmm5 \n"
+ "paddb %%xmm7,%%xmm5 \n"
+ "paddb %%xmm5,%%xmm6 \n"
+ "psadbw %%xmm1,%%xmm6 \n"
+ "paddd %%xmm6,%%xmm0 \n"
+ "sub $0x20,%2 \n"
+ "jg 1b \n"
+
+ "pshufd $0xaa,%%xmm0,%%xmm1 \n"
+ "paddd %%xmm1,%%xmm0 \n"
+ "movd %%xmm0, %3 \n"
+ : "+r"(src_a), // %0
+ "+r"(src_b), // %1
+ "+r"(count), // %2
+ "=r"(diff) // %3
+ : "m"(kNibbleMask), // %4
+ "m"(kBitCount) // %5
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
+ "xmm7");
+
+ return diff;
+}
+
+#ifdef HAS_HAMMINGDISTANCE_AVX2
+uint32 HammingDistance_AVX2(const uint8* src_a, const uint8* src_b, int count) {
uint32 diff = 0u;
- int i;
- for (i = 0; i < count - 7; i += 8) {
- uint64 x = *((uint64*)src_a) ^ *((uint64*)src_b);
- src_a += 8;
- src_b += 8;
- diff += __builtin_popcountll(x);
- }
+ asm volatile(
+ "vbroadcastf128 %4,%%ymm2 \n"
+ "vbroadcastf128 %5,%%ymm3 \n"
+ "vpxor %%ymm0,%%ymm0,%%ymm0 \n"
+ "vpxor %%ymm1,%%ymm1,%%ymm1 \n"
+ "sub %0,%1 \n"
+
+ LABELALIGN
+ "1: \n"
+ "vmovdqa (%0),%%ymm4 \n"
+ "vmovdqa 0x20(%0), %%ymm5 \n"
+ "vpxor (%0,%1), %%ymm4, %%ymm4 \n"
+ "vpand %%ymm2,%%ymm4,%%ymm6 \n"
+ "vpsrlw $0x4,%%ymm4,%%ymm4 \n"
+ "vpshufb %%ymm6,%%ymm3,%%ymm6 \n"
+ "vpand %%ymm2,%%ymm4,%%ymm4 \n"
+ "vpshufb %%ymm4,%%ymm3,%%ymm4 \n"
+ "vpaddb %%ymm4,%%ymm6,%%ymm6 \n"
+ "vpxor 0x20(%0,%1),%%ymm5,%%ymm4 \n"
+ "add $0x40,%0 \n"
+ "vpand %%ymm2,%%ymm4,%%ymm5 \n"
+ "vpsrlw $0x4,%%ymm4,%%ymm4 \n"
+ "vpshufb %%ymm5,%%ymm3,%%ymm5 \n"
+ "vpand %%ymm2,%%ymm4,%%ymm4 \n"
+ "vpshufb %%ymm4,%%ymm3,%%ymm4 \n"
+ "vpaddb %%ymm5,%%ymm4,%%ymm4 \n"
+ "vpaddb %%ymm6,%%ymm4,%%ymm4 \n"
+ "vpsadbw %%ymm1,%%ymm4,%%ymm4 \n"
+ "vpaddd %%ymm0,%%ymm4,%%ymm0 \n"
+ "sub $0x40,%2 \n"
+ "jg 1b \n"
+
+ "vpermq $0xb1,%%ymm0,%%ymm1 \n"
+ "vpaddd %%ymm1,%%ymm0,%%ymm0 \n"
+ "vpermq $0xaa,%%ymm0,%%ymm1 \n"
+ "vpaddd %%ymm1,%%ymm0,%%ymm0 \n"
+ "vmovd %%xmm0, %3 \n"
+ "vzeroupper \n"
+ : "+r"(src_a), // %0
+ "+r"(src_b), // %1
+ "+r"(count), // %2
+ "=r"(diff) // %3
+ : "m"(kNibbleMask), // %4
+ "m"(kBitCount) // %5
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6");
+
return diff;
}
+#endif // HAS_HAMMINGDISTANCE_AVX2
uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
uint32 sse;
diff --git a/chromium/third_party/libyuv/source/compare_msa.cc b/chromium/third_party/libyuv/source/compare_msa.cc
index d5db4452a71..57857cf5127 100644
--- a/chromium/third_party/libyuv/source/compare_msa.cc
+++ b/chromium/third_party/libyuv/source/compare_msa.cc
@@ -47,7 +47,6 @@ uint32 HammingDistance_MSA(const uint8* src_a, const uint8* src_b, int count) {
return diff;
}
-#ifndef DISABLE_CLANG_MSA
uint32 SumSquareError_MSA(const uint8* src_a, const uint8* src_b, int count) {
uint32 sse = 0u;
int i;
@@ -85,7 +84,6 @@ uint32 SumSquareError_MSA(const uint8* src_a, const uint8* src_b, int count) {
sse += (uint32)__msa_copy_u_w((v4i32)tmp0, 2);
return sse;
}
-#endif
#ifdef __cplusplus
} // extern "C"
diff --git a/chromium/third_party/libyuv/source/compare_win.cc b/chromium/third_party/libyuv/source/compare_win.cc
index 5ca59178dbc..bcd6a88ebbb 100644
--- a/chromium/third_party/libyuv/source/compare_win.cc
+++ b/chromium/third_party/libyuv/source/compare_win.cc
@@ -23,9 +23,11 @@ extern "C" {
#endif
// This module is for 32 bit Visual C x86 and clangcl
-#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)
+#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
-uint32 HammingDistance_X86(const uint8* src_a, const uint8* src_b, int count) {
+uint32 HammingDistance_SSE42(const uint8* src_a,
+ const uint8* src_b,
+ int count) {
uint32 diff = 0u;
int i;
diff --git a/chromium/third_party/libyuv/source/convert_from.cc b/chromium/third_party/libyuv/source/convert_from.cc
index d623731d9ff..0f52f9ef9e0 100644
--- a/chromium/third_party/libyuv/source/convert_from.cc
+++ b/chromium/third_party/libyuv/source/convert_from.cc
@@ -657,6 +657,42 @@ int I420ToRAW(const uint8* src_y,
width, height);
}
+// Convert H420 to RGB24.
+LIBYUV_API
+int H420ToRGB24(const uint8* src_y,
+ int src_stride_y,
+ const uint8* src_u,
+ int src_stride_u,
+ const uint8* src_v,
+ int src_stride_v,
+ uint8* dst_rgb24,
+ int dst_stride_rgb24,
+ int width,
+ int height) {
+ return I420ToRGB24Matrix(src_y, src_stride_y, src_u, src_stride_u, src_v,
+ src_stride_v, dst_rgb24, dst_stride_rgb24,
+ &kYuvH709Constants, width, height);
+}
+
+// Convert H420 to RAW.
+LIBYUV_API
+int H420ToRAW(const uint8* src_y,
+ int src_stride_y,
+ const uint8* src_u,
+ int src_stride_u,
+ const uint8* src_v,
+ int src_stride_v,
+ uint8* dst_raw,
+ int dst_stride_raw,
+ int width,
+ int height) {
+ return I420ToRGB24Matrix(src_y, src_stride_y, src_v,
+ src_stride_v, // Swap U and V
+ src_u, src_stride_u, dst_raw, dst_stride_raw,
+ &kYvuH709Constants, // Use Yvu matrix
+ width, height);
+}
+
// Convert I420 to ARGB1555.
LIBYUV_API
int I420ToARGB1555(const uint8* src_y,
@@ -1075,8 +1111,8 @@ int I420ToRGB565Dither(const uint8* src_y,
for (y = 0; y < height; ++y) {
I422ToARGBRow(src_y, src_u, src_v, row_argb, &kYuvI601Constants, width);
ARGBToRGB565DitherRow(row_argb, dst_rgb565,
- *(uint32*)(dither4x4 + ((y & 3) << 2)),
- width); // NOLINT
+ *(uint32*)(dither4x4 + ((y & 3) << 2)), // NOLINT
+ width); // NOLINT
dst_rgb565 += dst_stride_rgb565;
src_y += src_stride_y;
if (y & 1) {
diff --git a/chromium/third_party/libyuv/source/cpu_id.cc b/chromium/third_party/libyuv/source/cpu_id.cc
index 12e03345b9f..344f3c06a2b 100644
--- a/chromium/third_party/libyuv/source/cpu_id.cc
+++ b/chromium/third_party/libyuv/source/cpu_id.cc
@@ -124,7 +124,7 @@ void CpuId(int eax, int ecx, int* cpu_info) {
int GetXCR0() {
int xcr0 = 0;
#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 160040219)
- xcr0 = (int) _xgetbv(0); // VS2010 SP1 required. NOLINT
+ xcr0 = (int)_xgetbv(0); // VS2010 SP1 required. NOLINT
#elif defined(__i386__) || defined(__x86_64__)
asm(".byte 0x0f, 0x01, 0xd0" : "=a"(xcr0) : "c"(0) : "%edx");
#endif // defined(__i386__) || defined(__x86_64__)
@@ -242,10 +242,17 @@ static SAFEBUFFERS int GetCpuFlags(void) {
// Detect AVX512bw
if ((GetXCR0() & 0xe0) == 0xe0) {
- cpu_info |= (cpu_info7[1] & 0x40000000) ? kCpuHasAVX3 : 0;
+ cpu_info |= (cpu_info7[1] & 0x40000000) ? kCpuHasAVX512BW : 0;
+ cpu_info |= (cpu_info7[1] & 0x80000000) ? kCpuHasAVX512VL : 0;
+ cpu_info |= (cpu_info7[2] & 0x00000002) ? kCpuHasAVX512VBMI : 0;
+ cpu_info |= (cpu_info7[2] & 0x00000040) ? kCpuHasAVX512VBMI2 : 0;
+ cpu_info |= (cpu_info7[2] & 0x00001000) ? kCpuHasAVX512VBITALG : 0;
+ cpu_info |= (cpu_info7[2] & 0x00004000) ? kCpuHasAVX512VPOPCNTDQ : 0;
+ cpu_info |= (cpu_info7[2] & 0x00000100) ? kCpuHasGFNI : 0;
}
}
+ // TODO(fbarchard): Consider moving these to gtest
// Environment variable overrides for testing.
if (TestEnv("LIBYUV_DISABLE_X86")) {
cpu_info &= ~kCpuHasX86;
@@ -274,12 +281,12 @@ static SAFEBUFFERS int GetCpuFlags(void) {
if (TestEnv("LIBYUV_DISABLE_FMA3")) {
cpu_info &= ~kCpuHasFMA3;
}
- if (TestEnv("LIBYUV_DISABLE_AVX3")) {
- cpu_info &= ~kCpuHasAVX3;
- }
if (TestEnv("LIBYUV_DISABLE_F16C")) {
cpu_info &= ~kCpuHasF16C;
}
+ if (TestEnv("LIBYUV_DISABLE_AVX512BW")) {
+ cpu_info &= ~kCpuHasAVX512BW;
+ }
#endif
#if defined(__mips__) && defined(__linux__)
diff --git a/chromium/third_party/libyuv/source/mjpeg_validate.cc b/chromium/third_party/libyuv/source/mjpeg_validate.cc
index 1a17dd7216b..bd760425359 100644
--- a/chromium/third_party/libyuv/source/mjpeg_validate.cc
+++ b/chromium/third_party/libyuv/source/mjpeg_validate.cc
@@ -24,7 +24,7 @@ static LIBYUV_BOOL ScanEOI(const uint8* sample, size_t sample_size) {
const uint8* it = sample;
while (it < end) {
// TODO(fbarchard): scan for 0xd9 instead.
- it = static_cast<const uint8*>(memchr(it, 0xff, end - it));
+ it = (const uint8*)(memchr(it, 0xff, end - it));
if (it == NULL) {
break;
}
diff --git a/chromium/third_party/libyuv/source/rotate_win.cc b/chromium/third_party/libyuv/source/rotate_win.cc
index ee523a0b913..fb052f65212 100644
--- a/chromium/third_party/libyuv/source/rotate_win.cc
+++ b/chromium/third_party/libyuv/source/rotate_win.cc
@@ -17,7 +17,7 @@ extern "C" {
#endif
// This module is for 32 bit Visual C x86 and clangcl
-#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)
+#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
__declspec(naked) void TransposeWx8_SSSE3(const uint8* src,
int src_stride,
diff --git a/chromium/third_party/libyuv/source/row_common.cc b/chromium/third_party/libyuv/source/row_common.cc
index 2d01a789b6a..6ffc4febbf6 100644
--- a/chromium/third_party/libyuv/source/row_common.cc
+++ b/chromium/third_party/libyuv/source/row_common.cc
@@ -1798,6 +1798,35 @@ void MergeRGBRow_C(const uint8* src_r,
}
}
+void MergeUVRow_16_C(const uint16* src_u,
+ const uint16* src_v,
+ uint16* dst_uv,
+ int scale,
+ int width) {
+ int x;
+ for (x = 0; x < width - 1; x += 2) {
+ dst_uv[0] = src_u[x] * scale;
+ dst_uv[1] = src_v[x] * scale;
+ dst_uv[2] = src_u[x + 1] * scale;
+ dst_uv[3] = src_v[x + 1] * scale;
+ dst_uv += 4;
+ }
+ if (width & 1) {
+ dst_uv[0] = src_u[width - 1] * scale;
+ dst_uv[1] = src_v[width - 1] * scale;
+ }
+}
+
+void MultiplyRow_16_C(const uint16* src_y,
+ uint16* dst_y,
+ int scale,
+ int width) {
+ int x;
+ for (x = 0; x < width; ++x) {
+ dst_y[x] = src_y[x] * scale;
+ }
+}
+
void CopyRow_C(const uint8* src, uint8* dst, int count) {
memcpy(dst, src, count);
}
diff --git a/chromium/third_party/libyuv/source/row_gcc.cc b/chromium/third_party/libyuv/source/row_gcc.cc
index 86f0880be2a..b5c2e65c938 100644
--- a/chromium/third_party/libyuv/source/row_gcc.cc
+++ b/chromium/third_party/libyuv/source/row_gcc.cc
@@ -2753,6 +2753,87 @@ void MergeUVRow_SSE2(const uint8* src_u,
}
#endif // HAS_MERGEUVROW_SSE2
+// Use scale to convert lsb formats to msb, depending how many bits there are:
+// 128 = 9 bits
+// 64 = 10 bits
+// 16 = 12 bits
+// 1 = 16 bits
+#ifdef HAS_MERGEUVROW_16_AVX2
+void MergeUVRow_16_AVX2(const uint16* src_u,
+ const uint16* src_v,
+ uint16* dst_uv,
+ int scale,
+ int width) {
+ // clang-format off
+ asm volatile (
+ "vmovd %4,%%xmm3 \n"
+ "vpunpcklwd %%xmm3,%%xmm3,%%xmm3 \n"
+ "vbroadcastss %%xmm3,%%ymm3 \n"
+ "sub %0,%1 \n"
+
+ // 16 pixels per loop.
+ LABELALIGN
+ "1: \n"
+ "vmovdqu (%0),%%ymm0 \n"
+ "vmovdqu (%0,%1,1),%%ymm1 \n"
+ "add $0x20,%0 \n"
+
+ "vpmullw %%ymm3,%%ymm0,%%ymm0 \n"
+ "vpmullw %%ymm3,%%ymm1,%%ymm1 \n"
+ "vpunpcklwd %%ymm1,%%ymm0,%%ymm2 \n" // mutates
+ "vpunpckhwd %%ymm1,%%ymm0,%%ymm0 \n"
+ "vextractf128 $0x0,%%ymm2,(%2) \n"
+ "vextractf128 $0x0,%%ymm0,0x10(%2) \n"
+ "vextractf128 $0x1,%%ymm2,0x20(%2) \n"
+ "vextractf128 $0x1,%%ymm0,0x30(%2) \n"
+ "add $0x40,%2 \n"
+ "sub $0x10,%3 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_u), // %0
+ "+r"(src_v), // %1
+ "+r"(dst_uv), // %2
+ "+r"(width) // %3
+ : "r"(scale) // %4
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3");
+ // clang-format on
+}
+#endif // HAS_MERGEUVROW_AVX2
+
+#ifdef HAS_MULTIPLYROW_16_AVX2
+void MultiplyRow_16_AVX2(const uint16* src_y,
+ uint16* dst_y,
+ int scale,
+ int width) {
+ // clang-format off
+ asm volatile (
+ "vmovd %3,%%xmm3 \n"
+ "vpunpcklwd %%xmm3,%%xmm3,%%xmm3 \n"
+ "vbroadcastss %%xmm3,%%ymm3 \n"
+ "sub %0,%1 \n"
+
+ // 16 pixels per loop.
+ LABELALIGN
+ "1: \n"
+ "vmovdqu (%0),%%ymm0 \n"
+ "vmovdqu 0x20(%0),%%ymm1 \n"
+ "vpmullw %%ymm3,%%ymm0,%%ymm0 \n"
+ "vpmullw %%ymm3,%%ymm1,%%ymm1 \n"
+ "vmovdqu %%ymm0,(%0,%1) \n"
+ "vmovdqu %%ymm1,0x20(%0,%1) \n"
+ "add $0x40,%0 \n"
+ "sub $0x20,%2 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_y), // %0
+ "+r"(dst_y), // %1
+ "+r"(width) // %2
+ : "r"(scale) // %3
+ : "memory", "cc", "xmm0", "xmm1", "xmm3");
+ // clang-format on
+}
+#endif // HAS_MULTIPLYROW_16_AVX2
+
#ifdef HAS_SPLITRGBROW_SSSE3
// Shuffle table for converting RGB to Planar.
@@ -5645,6 +5726,7 @@ void ARGBPolynomialRow_AVX2(const uint8* src_argb,
#ifdef HAS_HALFFLOATROW_SSE2
static float kScaleBias = 1.9259299444e-34f;
void HalfFloatRow_SSE2(const uint16* src, uint16* dst, float scale, int width) {
+ scale *= kScaleBias;
asm volatile (
"pshufd $0x0,%3,%%xmm4 \n"
"pxor %%xmm5,%%xmm5 \n"
@@ -5671,7 +5753,11 @@ void HalfFloatRow_SSE2(const uint16* src, uint16* dst, float scale, int width) {
: "+r"(src), // %0
"+r"(dst), // %1
"+r"(width) // %2
- : "x"(scale * kScaleBias) // %3
+#if defined(__x86_64__)
+ : "x"(scale) // %3
+#else
+ : "m"(scale) // %3
+#endif
: "memory", "cc",
"xmm2", "xmm3", "xmm4", "xmm5"
);
@@ -5680,6 +5766,7 @@ void HalfFloatRow_SSE2(const uint16* src, uint16* dst, float scale, int width) {
#ifdef HAS_HALFFLOATROW_AVX2
void HalfFloatRow_AVX2(const uint16* src, uint16* dst, float scale, int width) {
+ scale *= kScaleBias;
asm volatile (
"vbroadcastss %3, %%ymm4 \n"
"vpxor %%ymm5,%%ymm5,%%ymm5 \n"
@@ -5707,7 +5794,11 @@ void HalfFloatRow_AVX2(const uint16* src, uint16* dst, float scale, int width) {
: "+r"(src), // %0
"+r"(dst), // %1
"+r"(width) // %2
- : "x"(scale * kScaleBias) // %3
+#if defined(__x86_64__)
+ : "x"(scale) // %3
+#else
+ : "m"(scale) // %3
+#endif
: "memory", "cc",
"xmm2", "xmm3", "xmm4", "xmm5"
);
@@ -5740,7 +5831,11 @@ void HalfFloatRow_F16C(const uint16* src, uint16* dst, float scale, int width) {
: "+r"(src), // %0
"+r"(dst), // %1
"+r"(width) // %2
+#if defined(__x86_64__)
: "x"(scale) // %3
+#else
+ : "m"(scale) // %3
+#endif
: "memory", "cc",
"xmm2", "xmm3", "xmm4"
);
diff --git a/chromium/third_party/libyuv/source/row_msa.cc b/chromium/third_party/libyuv/source/row_msa.cc
index 89fc248f880..5cc23450a52 100644
--- a/chromium/third_party/libyuv/source/row_msa.cc
+++ b/chromium/third_party/libyuv/source/row_msa.cc
@@ -16,12 +16,6 @@
#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
#include "libyuv/macros_msa.h"
-// caveat - as of clang 6, some functions do not build under clang.
-// The macro DISABLE_CLANG_MSA is set for clang builds to disable
-// affected functions.
-// __msa_fill_w() is one affected intrinsic.
-// See Also: https://bugs.chromium.org/p/libyuv/issues/detail?id=715
-
#ifdef __cplusplus
namespace libyuv {
extern "C" {
@@ -373,7 +367,6 @@ void I422ToUYVYRow_MSA(const uint8* src_y,
}
}
-#ifndef DISABLE_CLANG_MSA
void I422ToARGBRow_MSA(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
@@ -644,7 +637,6 @@ void I422ToARGB1555Row_MSA(const uint8* src_y,
dst_argb1555 += 16;
}
}
-#endif
void YUY2ToYRow_MSA(const uint8* src_yuy2, uint8* dst_y, int width) {
int x;
@@ -776,8 +768,6 @@ void UYVYToUV422Row_MSA(const uint8* src_uyvy,
}
}
-#ifndef DISABLE_CLANG_MSA
-
void ARGBToYRow_MSA(const uint8* src_argb0, uint8* dst_y, int width) {
int x;
v16u8 src0, src1, src2, src3, vec0, vec1, vec2, vec3, dst0;
@@ -941,7 +931,6 @@ void ARGBToUVRow_MSA(const uint8* src_argb0,
dst_v += 16;
}
}
-#endif
void ARGBToRGB24Row_MSA(const uint8* src_argb, uint8* dst_rgb, int width) {
int x;
@@ -1088,7 +1077,6 @@ void ARGBToARGB4444Row_MSA(const uint8* src_argb, uint8* dst_rgb, int width) {
}
}
-#ifndef DISABLE_CLANG_MSA
void ARGBToUV444Row_MSA(const uint8* src_argb,
uint8* dst_u,
uint8* dst_v,
@@ -1160,7 +1148,6 @@ void ARGBToUV444Row_MSA(const uint8* src_argb,
dst_v += 16;
}
}
-#endif
void ARGBMultiplyRow_MSA(const uint8* src_argb0,
const uint8* src_argb1,
@@ -1308,7 +1295,6 @@ void ARGBAttenuateRow_MSA(const uint8* src_argb, uint8* dst_argb, int width) {
}
}
-#ifndef DISABLE_CLANG_MSA
void ARGBToRGB565DitherRow_MSA(const uint8* src_argb,
uint8* dst_rgb,
uint32 dither4,
@@ -1352,7 +1338,6 @@ void ARGBToRGB565DitherRow_MSA(const uint8* src_argb,
dst_rgb += 16;
}
}
-#endif
void ARGBShuffleRow_MSA(const uint8* src_argb,
uint8* dst_argb,
@@ -1442,7 +1427,6 @@ void ARGBGrayRow_MSA(const uint8* src_argb, uint8* dst_argb, int width) {
}
}
-#ifndef DISABLE_CLANG_MSA
void ARGBSepiaRow_MSA(uint8* dst_argb, int width) {
int x;
v16u8 src0, src1, dst0, dst1, vec0, vec1, vec2, vec3, vec4, vec5;
@@ -1483,7 +1467,6 @@ void ARGBSepiaRow_MSA(uint8* dst_argb, int width) {
dst_argb += 32;
}
}
-#endif
void ARGB4444ToARGBRow_MSA(const uint8* src_argb4444,
uint8* dst_argb,
@@ -1514,7 +1497,6 @@ void ARGB4444ToARGBRow_MSA(const uint8* src_argb4444,
}
}
-#ifndef DISABLE_CLANG_MSA
void ARGB1555ToARGBRow_MSA(const uint8* src_argb1555,
uint8* dst_argb,
int width) {
@@ -2372,7 +2354,6 @@ void SobelRow_MSA(const uint8* src_sobelx,
dst_argb += 64;
}
}
-#endif
void SobelToPlaneRow_MSA(const uint8* src_sobelx,
const uint8* src_sobely,
@@ -2395,7 +2376,6 @@ void SobelToPlaneRow_MSA(const uint8* src_sobelx,
}
}
-#ifndef DISABLE_CLANG_MSA
void SobelXYRow_MSA(const uint8* src_sobelx,
const uint8* src_sobely,
uint8* dst_argb,
@@ -2870,7 +2850,6 @@ void UYVYToARGBRow_MSA(const uint8* src_uyvy,
rgb_buf += 32;
}
}
-#endif
void InterpolateRow_MSA(uint8* dst_ptr,
const uint8* src_ptr,
@@ -2936,7 +2915,6 @@ void InterpolateRow_MSA(uint8* dst_ptr,
}
}
-#ifndef DISABLE_CLANG_MSA
void ARGBSetRow_MSA(uint8* dst_argb, uint32 v32, int width) {
int x;
v4i32 dst0 = __builtin_msa_fill_w(v32);
@@ -2946,7 +2924,6 @@ void ARGBSetRow_MSA(uint8* dst_argb, uint32 v32, int width) {
dst_argb += 16;
}
}
-#endif
void RAWToRGB24Row_MSA(const uint8* src_raw, uint8* dst_rgb24, int width) {
int x;
@@ -3010,7 +2987,6 @@ void ARGBExtractAlphaRow_MSA(const uint8* src_argb, uint8* dst_a, int width) {
}
}
-#ifndef DISABLE_CLANG_MSA
void ARGBBlendRow_MSA(const uint8* src_argb0,
const uint8* src_argb1,
uint8* dst_argb,
@@ -3290,7 +3266,6 @@ void ARGBColorMatrixRow_MSA(const uint8* src_argb,
dst_argb += 32;
}
}
-#endif
void SplitUVRow_MSA(const uint8* src_uv,
uint8* dst_u,
@@ -3316,7 +3291,6 @@ void SplitUVRow_MSA(const uint8* src_uv,
}
}
-#ifndef DISABLE_CLANG_MSA
void SetRow_MSA(uint8* dst, uint8 v8, int width) {
int x;
v16u8 dst0 = (v16u8)__msa_fill_b(v8);
@@ -3326,7 +3300,6 @@ void SetRow_MSA(uint8* dst, uint8 v8, int width) {
dst += 16;
}
}
-#endif
void MirrorUVRow_MSA(const uint8* src_uv,
uint8* dst_u,
@@ -3357,7 +3330,6 @@ void MirrorUVRow_MSA(const uint8* src_uv,
}
}
-#ifndef DISABLE_CLANG_MSA
void SobelXRow_MSA(const uint8* src_y0,
const uint8* src_y1,
const uint8* src_y2,
@@ -3456,7 +3428,6 @@ void SobelYRow_MSA(const uint8* src_y0,
dst_sobely += 16;
}
}
-#endif
void HalfFloatRow_MSA(const uint16* src, uint16* dst, float scale, int width) {
int i;
diff --git a/chromium/third_party/libyuv/source/row_neon64.cc b/chromium/third_party/libyuv/source/row_neon64.cc
index de17f8b7314..5616d8a5b5f 100644
--- a/chromium/third_party/libyuv/source/row_neon64.cc
+++ b/chromium/third_party/libyuv/source/row_neon64.cc
@@ -628,19 +628,19 @@ void MergeRGBRow_NEON(const uint8* src_r,
);
}
-// Copy multiple of 32. vld4.8 allow unaligned and is fastest on a15.
+// Copy multiple of 32.
void CopyRow_NEON(const uint8* src, uint8* dst, int count) {
asm volatile(
"1: \n"
- "ld1 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 32
+ "ldp q0, q1, [%0], #32 \n"
"subs %w2, %w2, #32 \n" // 32 processed per loop
- "st1 {v0.8b,v1.8b,v2.8b,v3.8b}, [%1], #32 \n" // store 32
+ "stp q0, q1, [%1], #32 \n"
"b.gt 1b \n"
- : "+r"(src), // %0
- "+r"(dst), // %1
- "+r"(count) // %2 // Output registers
- : // Input registers
- : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(count) // %2 // Output registers
+ : // Input registers
+ : "cc", "memory", "v0", "v1" // Clobber List
);
}
@@ -2646,11 +2646,11 @@ float ScaleMaxSamples_NEON(const float* src,
"b.gt 1b \n"
"fmax v5.4s, v5.4s, v6.4s \n" // max
"fmaxv %s3, v5.4s \n" // signed max acculator
- : "+r"(src), // %0
- "+r"(dst), // %1
- "+r"(width), // %2
- "=w"(fmax) // %3
- : "w"(scale) // %4
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width), // %2
+ "=w"(fmax) // %3
+ : "w"(scale) // %4
: "cc", "memory", "v1", "v2", "v3", "v4", "v5", "v6");
return fmax;
}
@@ -2676,11 +2676,11 @@ float ScaleSumSamples_NEON(const float* src,
"faddp v5.4s, v5.4s, v6.4s \n"
"faddp v5.4s, v5.4s, v5.4s \n"
"faddp %3.4s, v5.4s, v5.4s \n" // sum
- : "+r"(src), // %0
- "+r"(dst), // %1
- "+r"(width), // %2
- "=w"(fsum) // %3
- : "w"(scale) // %4
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width), // %2
+ "=w"(fsum) // %3
+ : "w"(scale) // %4
: "cc", "memory", "v1", "v2", "v3", "v4", "v5", "v6");
return fsum;
}
diff --git a/chromium/third_party/libyuv/source/scale.cc b/chromium/third_party/libyuv/source/scale.cc
index 13e242ba030..9104acb95fc 100644
--- a/chromium/third_party/libyuv/source/scale.cc
+++ b/chromium/third_party/libyuv/source/scale.cc
@@ -822,11 +822,12 @@ static void ScaleAddCols2_16_C(int dst_width,
static void ScaleAddCols0_C(int dst_width,
int boxheight,
int x,
- int,
+ int dx,
const uint16* src_ptr,
uint8* dst_ptr) {
int scaleval = 65536 / boxheight;
int i;
+ (void)dx;
src_ptr += (x >> 16);
for (i = 0; i < dst_width; ++i) {
*dst_ptr++ = src_ptr[i] * scaleval >> 16;
@@ -1699,7 +1700,7 @@ void ScalePlane_16(const uint16* src,
CopyPlane_16(src, src_stride, dst, dst_stride, dst_width, dst_height);
return;
}
- if (dst_width == src_width) {
+ if (dst_width == src_width && filtering != kFilterBox) {
int dy = FixedDiv(src_height, dst_height);
// Arbitrary scale vertically, but unscaled vertically.
ScalePlaneVertical_16(src_height, dst_width, dst_height, src_stride,
@@ -1728,7 +1729,7 @@ void ScalePlane_16(const uint16* src,
return;
}
if (4 * dst_width == src_width && 4 * dst_height == src_height &&
- filtering != kFilterBilinear) {
+ (filtering == kFilterBox || filtering == kFilterNone)) {
// optimized, 1/4
ScalePlaneDown4_16(src_width, src_height, dst_width, dst_height,
src_stride, dst_stride, src, dst, filtering);
diff --git a/chromium/third_party/libyuv/source/scale_msa.cc b/chromium/third_party/libyuv/source/scale_msa.cc
index c246e5e8ec9..df1f482be6d 100644
--- a/chromium/third_party/libyuv/source/scale_msa.cc
+++ b/chromium/third_party/libyuv/source/scale_msa.cc
@@ -127,7 +127,6 @@ void ScaleARGBRowDownEven_MSA(const uint8_t* src_argb,
}
}
-#ifndef DISABLE_CLANG_MSA
void ScaleARGBRowDownEvenBox_MSA(const uint8* src_argb,
ptrdiff_t src_stride,
int src_stepx,
@@ -182,7 +181,6 @@ void ScaleARGBRowDownEvenBox_MSA(const uint8* src_argb,
dst_argb += 16;
}
}
-#endif
void ScaleRowDown2_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
@@ -385,7 +383,6 @@ void ScaleRowDown38_MSA(const uint8_t* src_ptr,
}
}
-#ifndef DISABLE_CLANG_MSA
void ScaleRowDown38_2_Box_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
@@ -535,7 +532,6 @@ void ScaleRowDown38_3_Box_MSA(const uint8_t* src_ptr,
dst_ptr += 12;
}
}
-#endif
void ScaleAddRow_MSA(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width) {
int x;
@@ -557,7 +553,6 @@ void ScaleAddRow_MSA(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width) {
}
}
-#ifndef DISABLE_CLANG_MSA
void ScaleFilterCols_MSA(uint8* dst_ptr,
const uint8* src_ptr,
int dst_width,
@@ -634,7 +629,6 @@ void ScaleFilterCols_MSA(uint8* dst_ptr,
dst_ptr += 16;
}
}
-#endif
void ScaleARGBCols_MSA(uint8* dst_argb,
const uint8* src_argb,
@@ -663,7 +657,6 @@ void ScaleARGBCols_MSA(uint8* dst_argb,
}
}
-#ifndef DISABLE_CLANG_MSA
void ScaleARGBFilterCols_MSA(uint8* dst_argb,
const uint8* src_argb,
int dst_width,
@@ -728,7 +721,6 @@ void ScaleARGBFilterCols_MSA(uint8* dst_argb,
dst_argb += 32;
}
}
-#endif
void ScaleRowDown34_MSA(const uint8* src_ptr,
ptrdiff_t src_stride,
diff --git a/chromium/third_party/libyuv/source/scale_win.cc b/chromium/third_party/libyuv/source/scale_win.cc
index 3e93312832a..b5fd6638262 100644
--- a/chromium/third_party/libyuv/source/scale_win.cc
+++ b/chromium/third_party/libyuv/source/scale_win.cc
@@ -17,7 +17,7 @@ extern "C" {
#endif
// This module is for 32 bit Visual C x86 and clangcl
-#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)
+#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
// Offsets for source bytes 0 to 9
static uvec8 kShuf0 = {0, 1, 3, 4, 5, 7, 8, 9,
diff --git a/chromium/third_party/libyuv/tools_libyuv/OWNERS b/chromium/third_party/libyuv/tools_libyuv/OWNERS
index aca046d45e3..2cb971d2b72 100644
--- a/chromium/third_party/libyuv/tools_libyuv/OWNERS
+++ b/chromium/third_party/libyuv/tools_libyuv/OWNERS
@@ -1 +1 @@
-kjellander@chromium.org
+phoglund@chromium.org
diff --git a/chromium/third_party/libyuv/tools_libyuv/msan/OWNERS b/chromium/third_party/libyuv/tools_libyuv/msan/OWNERS
index 60351e7ea2a..0a919805c2c 100644
--- a/chromium/third_party/libyuv/tools_libyuv/msan/OWNERS
+++ b/chromium/third_party/libyuv/tools_libyuv/msan/OWNERS
@@ -1,3 +1,3 @@
pbos@chromium.org
-kjellander@chromium.org
+phoglund@chromium.org
diff --git a/chromium/third_party/libyuv/tools_libyuv/ubsan/OWNERS b/chromium/third_party/libyuv/tools_libyuv/ubsan/OWNERS
index b608519abf6..da77b4ef23f 100644
--- a/chromium/third_party/libyuv/tools_libyuv/ubsan/OWNERS
+++ b/chromium/third_party/libyuv/tools_libyuv/ubsan/OWNERS
@@ -1,4 +1,4 @@
pbos@webrtc.org
-kjellander@webrtc.org
+phoglund@webrtc.org
fbarchard@chromium.org
diff --git a/chromium/third_party/libyuv/unit_test/color_test.cc b/chromium/third_party/libyuv/unit_test/color_test.cc
index 0aa7a54ac01..30b6411283f 100644
--- a/chromium/third_party/libyuv/unit_test/color_test.cc
+++ b/chromium/third_party/libyuv/unit_test/color_test.cc
@@ -471,21 +471,22 @@ static void PrintHistogram(int rh[256], int gh[256], int bh[256]) {
printf("\n");
}
+// Step by 5 on inner loop goes from 0 to 255 inclusive.
+// Set to 1 for better converage. 3, 5 or 17 for faster testing.
+#define FASTSTEP 5
TEST_F(LibYUVColorTest, TestFullYUV) {
- int rh[256] =
- {
- 0,
- },
- gh[256] =
- {
- 0,
- },
- bh[256] = {
- 0,
- };
+ int rh[256] = {
+ 0,
+ };
+ int gh[256] = {
+ 0,
+ };
+ int bh[256] = {
+ 0,
+ };
for (int u = 0; u < 256; ++u) {
for (int v = 0; v < 256; ++v) {
- for (int y2 = 0; y2 < 256; ++y2) {
+ for (int y2 = 0; y2 < 256; y2 += FASTSTEP) {
int r0, g0, b0, r1, g1, b1;
int y = RANDOM256(y2);
YUVToRGBReference(y, u, v, &r0, &g0, &b0);
@@ -503,20 +504,18 @@ TEST_F(LibYUVColorTest, TestFullYUV) {
}
TEST_F(LibYUVColorTest, TestFullYUVJ) {
- int rh[256] =
- {
- 0,
- },
- gh[256] =
- {
- 0,
- },
- bh[256] = {
- 0,
- };
+ int rh[256] = {
+ 0,
+ };
+ int gh[256] = {
+ 0,
+ };
+ int bh[256] = {
+ 0,
+ };
for (int u = 0; u < 256; ++u) {
for (int v = 0; v < 256; ++v) {
- for (int y2 = 0; y2 < 256; ++y2) {
+ for (int y2 = 0; y2 < 256; y2 += FASTSTEP) {
int r0, g0, b0, r1, g1, b1;
int y = RANDOM256(y2);
YUVJToRGBReference(y, u, v, &r0, &g0, &b0);
@@ -532,6 +531,7 @@ TEST_F(LibYUVColorTest, TestFullYUVJ) {
}
PrintHistogram(rh, gh, bh);
}
+#undef FASTSTEP
TEST_F(LibYUVColorTest, TestGreyYUVJ) {
int r0, g0, b0, r1, g1, b1, r2, g2, b2;
diff --git a/chromium/third_party/libyuv/unit_test/compare_test.cc b/chromium/third_party/libyuv/unit_test/compare_test.cc
index 149a9a13ae6..ff39b2b0f60 100644
--- a/chromium/third_party/libyuv/unit_test/compare_test.cc
+++ b/chromium/third_party/libyuv/unit_test/compare_test.cc
@@ -229,13 +229,34 @@ TEST_F(LibYUVCompareTest, BenchmarkHammingDistance_Opt) {
for (int i = 0; i < count; ++i) {
#if defined(HAS_HAMMINGDISTANCE_NEON)
h1 = HammingDistance_NEON(src_a, src_b, kMaxWidth);
-#elif defined(HAS_HAMMINGDISTANCE_X86)
- h1 = HammingDistance_X86(src_a, src_b, kMaxWidth);
+#elif defined(HAS_HAMMINGDISTANCE_AVX2)
+ int has_avx2 = TestCpuFlag(kCpuHasAVX2);
+ if (has_avx2) {
+ h1 = HammingDistance_AVX2(src_a, src_b, kMaxWidth);
+ } else {
+ int has_sse42 = TestCpuFlag(kCpuHasSSE42);
+ if (has_sse42) {
+ h1 = HammingDistance_SSE42(src_a, src_b, kMaxWidth);
+ } else {
+ int has_ssse3 = TestCpuFlag(kCpuHasSSSE3);
+ if (has_ssse3) {
+ h1 = HammingDistance_SSSE3(src_a, src_b, kMaxWidth);
+ } else {
+ h1 = HammingDistance_C(src_a, src_b, kMaxWidth);
+ }
+ }
+ }
+#elif defined(HAS_HAMMINGDISTANCE_SSE42)
+ int has_sse42 = TestCpuFlag(kCpuHasSSE42);
+ if (has_sse42) {
+ h1 = HammingDistance_SSE42(src_a, src_b, kMaxWidth);
+ } else {
+ h1 = HammingDistance_C(src_a, src_b, kMaxWidth);
+ }
#else
h1 = HammingDistance_C(src_a, src_b, kMaxWidth);
#endif
}
-
EXPECT_EQ(h0, h1);
free_aligned_buffer_page_end(src_a);
@@ -305,6 +326,99 @@ TEST_F(LibYUVCompareTest, BenchmarkHammingDistance) {
free_aligned_buffer_page_end(src_b);
}
+// Tests low levels match reference C for specified size.
+// The opt implementations have size limitations
+// For NEON the counters are 16 bit so the shorts overflow after 65536 bytes.
+// So doing one less iteration of the loop is the maximum.
+#if defined(HAS_HAMMINGDISTANCE_NEON)
+static const int kMaxOptCount = 65536 - 32; // 65504
+#else
+static const int kMaxOptCount = (1 << (32 - 3)) - 64; // 536870848
+#endif
+
+TEST_F(LibYUVCompareTest, TestHammingDistance_Opt) {
+ uint32 h1 = 0;
+ const int kMaxWidth = benchmark_width_ * benchmark_height_;
+ align_buffer_page_end(src_a, kMaxWidth);
+ align_buffer_page_end(src_b, kMaxWidth);
+ memset(src_a, 255u, kMaxWidth);
+ memset(src_b, 0u, kMaxWidth);
+
+ uint64 h0 = ComputeHammingDistance(src_a, src_b, kMaxWidth);
+ EXPECT_EQ(kMaxWidth * 8ULL, h0);
+
+ for (int i = 0; i < benchmark_iterations_; ++i) {
+#if defined(HAS_HAMMINGDISTANCE_NEON)
+ h1 = HammingDistance_NEON(src_a, src_b, kMaxWidth);
+#elif defined(HAS_HAMMINGDISTANCE_AVX2)
+ int has_avx2 = TestCpuFlag(kCpuHasAVX2);
+ if (has_avx2) {
+ h1 = HammingDistance_AVX2(src_a, src_b, kMaxWidth);
+ } else {
+ int has_sse42 = TestCpuFlag(kCpuHasSSE42);
+ if (has_sse42) {
+ h1 = HammingDistance_SSE42(src_a, src_b, kMaxWidth);
+ } else {
+ int has_ssse3 = TestCpuFlag(kCpuHasSSSE3);
+ if (has_ssse3) {
+ h1 = HammingDistance_SSSE3(src_a, src_b, kMaxWidth);
+ } else {
+ h1 = HammingDistance_C(src_a, src_b, kMaxWidth);
+ }
+ }
+ }
+#elif defined(HAS_HAMMINGDISTANCE_SSE42)
+ int has_sse42 = TestCpuFlag(kCpuHasSSE42);
+ if (has_sse42) {
+ h1 = HammingDistance_SSE42(src_a, src_b, kMaxWidth);
+ } else {
+ h1 = HammingDistance_C(src_a, src_b, kMaxWidth);
+ }
+#else
+ h1 = HammingDistance_C(src_a, src_b, kMaxWidth);
+#endif
+ }
+
+ // A large count will cause the low level to potentially overflow so the
+ // result can not be expected to be correct.
+ // TODO(fbarchard): Consider expecting the low 16 bits to match.
+ if (kMaxWidth <= kMaxOptCount) {
+ EXPECT_EQ(kMaxWidth * 8U, h1);
+ } else {
+ if (kMaxWidth * 8ULL != static_cast<uint64>(h1)) {
+ printf(
+ "warning - HammingDistance_Opt %u does not match %llu "
+ "but length of %u is longer than guaranteed.\n",
+ h1, kMaxWidth * 8ULL, kMaxWidth);
+ } else {
+ printf(
+ "warning - HammingDistance_Opt %u matches but length of %u "
+ "is longer than guaranteed.\n",
+ h1, kMaxWidth);
+ }
+ }
+
+ free_aligned_buffer_page_end(src_a);
+ free_aligned_buffer_page_end(src_b);
+}
+
+TEST_F(LibYUVCompareTest, TestHammingDistance) {
+ align_buffer_page_end(src_a, benchmark_width_ * benchmark_height_);
+ align_buffer_page_end(src_b, benchmark_width_ * benchmark_height_);
+ memset(src_a, 255u, benchmark_width_ * benchmark_height_);
+ memset(src_b, 0, benchmark_width_ * benchmark_height_);
+
+ uint64 h1 = 0;
+ for (int i = 0; i < benchmark_iterations_; ++i) {
+ h1 = ComputeHammingDistance(src_a, src_b,
+ benchmark_width_ * benchmark_height_);
+ }
+ EXPECT_EQ(benchmark_width_ * benchmark_height_ * 8ULL, h1);
+
+ free_aligned_buffer_page_end(src_a);
+ free_aligned_buffer_page_end(src_b);
+}
+
TEST_F(LibYUVCompareTest, BenchmarkSumSquareError_Opt) {
const int kMaxWidth = 4096 * 3;
align_buffer_page_end(src_a, kMaxWidth);
diff --git a/chromium/third_party/libyuv/unit_test/convert_test.cc b/chromium/third_party/libyuv/unit_test/convert_test.cc
index deeb30e4985..56b6364e5eb 100644
--- a/chromium/third_party/libyuv/unit_test/convert_test.cc
+++ b/chromium/third_party/libyuv/unit_test/convert_test.cc
@@ -572,6 +572,8 @@ TESTPLANARTOB(I420, 2, 2, ABGR, 4, 4, 1, 2, ARGB, 4)
TESTPLANARTOB(I420, 2, 2, RGBA, 4, 4, 1, 2, ARGB, 4)
TESTPLANARTOB(I420, 2, 2, RAW, 3, 3, 1, 2, ARGB, 4)
TESTPLANARTOB(I420, 2, 2, RGB24, 3, 3, 1, 2, ARGB, 4)
+TESTPLANARTOB(H420, 2, 2, RAW, 3, 3, 1, 2, ARGB, 4)
+TESTPLANARTOB(H420, 2, 2, RGB24, 3, 3, 1, 2, ARGB, 4)
TESTPLANARTOB(I420, 2, 2, RGB565, 2, 2, 1, 9, ARGB, 4)
TESTPLANARTOB(I420, 2, 2, ARGB1555, 2, 2, 1, 9, ARGB, 4)
TESTPLANARTOB(I420, 2, 2, ARGB4444, 2, 2, 1, 17, ARGB, 4)
@@ -1798,6 +1800,11 @@ TESTPLANARTOE(I420, 2, 2, RAW, 1, 3, RGB24, 3)
TESTPLANARTOE(I420, 2, 2, RGB24, 1, 3, RAW, 3)
TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, RAW, 3)
TESTPLANARTOE(I420, 2, 2, RAW, 1, 3, ARGB, 4)
+TESTPLANARTOE(H420, 2, 2, RGB24, 1, 3, ARGB, 4)
+TESTPLANARTOE(H420, 2, 2, RAW, 1, 3, RGB24, 3)
+TESTPLANARTOE(H420, 2, 2, RGB24, 1, 3, RAW, 3)
+TESTPLANARTOE(H420, 2, 2, ARGB, 1, 4, RAW, 3)
+TESTPLANARTOE(H420, 2, 2, RAW, 1, 3, ARGB, 4)
TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, RGB565, 2)
TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, ARGB1555, 2)
TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, ARGB4444, 2)
diff --git a/chromium/third_party/libyuv/unit_test/cpu_test.cc b/chromium/third_party/libyuv/unit_test/cpu_test.cc
index c0b8910a115..4e694f55ce5 100644
--- a/chromium/third_party/libyuv/unit_test/cpu_test.cc
+++ b/chromium/third_party/libyuv/unit_test/cpu_test.cc
@@ -21,38 +21,55 @@ namespace libyuv {
TEST_F(LibYUVBaseTest, TestCpuHas) {
int cpu_flags = TestCpuFlag(-1);
printf("Cpu Flags %x\n", cpu_flags);
+#if defined(__arm__) || defined(__aarch64__)
int has_arm = TestCpuFlag(kCpuHasARM);
printf("Has ARM %x\n", has_arm);
int has_neon = TestCpuFlag(kCpuHasNEON);
printf("Has NEON %x\n", has_neon);
+#endif
int has_x86 = TestCpuFlag(kCpuHasX86);
- printf("Has X86 %x\n", has_x86);
int has_sse2 = TestCpuFlag(kCpuHasSSE2);
- printf("Has SSE2 %x\n", has_sse2);
int has_ssse3 = TestCpuFlag(kCpuHasSSSE3);
- printf("Has SSSE3 %x\n", has_ssse3);
int has_sse41 = TestCpuFlag(kCpuHasSSE41);
- printf("Has SSE4.1 %x\n", has_sse41);
int has_sse42 = TestCpuFlag(kCpuHasSSE42);
- printf("Has SSE4.2 %x\n", has_sse42);
int has_avx = TestCpuFlag(kCpuHasAVX);
- printf("Has AVX %x\n", has_avx);
int has_avx2 = TestCpuFlag(kCpuHasAVX2);
- printf("Has AVX2 %x\n", has_avx2);
int has_erms = TestCpuFlag(kCpuHasERMS);
- printf("Has ERMS %x\n", has_erms);
int has_fma3 = TestCpuFlag(kCpuHasFMA3);
- printf("Has FMA3 %x\n", has_fma3);
- int has_avx3 = TestCpuFlag(kCpuHasAVX3);
- printf("Has AVX3 %x\n", has_avx3);
int has_f16c = TestCpuFlag(kCpuHasF16C);
+ int has_gfni = TestCpuFlag(kCpuHasGFNI);
+ int has_avx512bw = TestCpuFlag(kCpuHasAVX512BW);
+ int has_avx512vl = TestCpuFlag(kCpuHasAVX512VL);
+ int has_avx512vbmi = TestCpuFlag(kCpuHasAVX512VBMI);
+ int has_avx512vbmi2 = TestCpuFlag(kCpuHasAVX512VBMI2);
+ int has_avx512vbitalg = TestCpuFlag(kCpuHasAVX512VBITALG);
+ int has_avx512vpopcntdq = TestCpuFlag(kCpuHasAVX512VPOPCNTDQ);
+ printf("Has X86 %x\n", has_x86);
+ printf("Has SSE2 %x\n", has_sse2);
+ printf("Has SSSE3 %x\n", has_ssse3);
+ printf("Has SSE4.1 %x\n", has_sse41);
+ printf("Has SSE4.2 %x\n", has_sse42);
+ printf("Has AVX %x\n", has_avx);
+ printf("Has AVX2 %x\n", has_avx2);
+ printf("Has ERMS %x\n", has_erms);
+ printf("Has FMA3 %x\n", has_fma3);
printf("Has F16C %x\n", has_f16c);
+ printf("Has GFNI %x\n", has_gfni);
+ printf("Has AVX512BW %x\n", has_avx512bw);
+ printf("Has AVX512VL %x\n", has_avx512vl);
+ printf("Has AVX512VBMI %x\n", has_avx512vbmi);
+ printf("Has AVX512VBMI2 %x\n", has_avx512vbmi2);
+ printf("Has AVX512VBITALG %x\n", has_avx512vbitalg);
+ printf("Has AVX512VPOPCNTDQ %x\n", has_avx512vpopcntdq);
+
+#if defined(__mips__)
int has_mips = TestCpuFlag(kCpuHasMIPS);
printf("Has MIPS %x\n", has_mips);
int has_dspr2 = TestCpuFlag(kCpuHasDSPR2);
printf("Has DSPR2 %x\n", has_dspr2);
int has_msa = TestCpuFlag(kCpuHasMSA);
printf("Has MSA %x\n", has_msa);
+#endif
}
TEST_F(LibYUVBaseTest, TestCpuCompilerEnabled) {
diff --git a/chromium/third_party/libyuv/unit_test/planar_test.cc b/chromium/third_party/libyuv/unit_test/planar_test.cc
index 32bcb871065..f9e6f8abb2f 100644
--- a/chromium/third_party/libyuv/unit_test/planar_test.cc
+++ b/chromium/third_party/libyuv/unit_test/planar_test.cc
@@ -8,6 +8,7 @@
* be found in the AUTHORS file in the root of the source tree.
*/
+#include <math.h>
#include <stdlib.h>
#include <time.h>
@@ -2616,6 +2617,88 @@ TEST_F(LibYUVPlanarTest, SplitRGBPlane_Opt) {
free_aligned_buffer_page_end(dst_pixels_c);
}
+// TODO(fbarchard): improve test for platforms and cpu detect
+#ifdef HAS_MERGEUVROW_16_AVX2
+TEST_F(LibYUVPlanarTest, MergeUVRow_16_Opt) {
+ const int kPixels = benchmark_width_ * benchmark_height_;
+ align_buffer_page_end(src_pixels_u, kPixels * 2);
+ align_buffer_page_end(src_pixels_v, kPixels * 2);
+ align_buffer_page_end(dst_pixels_uv_opt, kPixels * 2 * 2);
+ align_buffer_page_end(dst_pixels_uv_c, kPixels * 2 * 2);
+
+ MemRandomize(src_pixels_u, kPixels * 2);
+ MemRandomize(src_pixels_v, kPixels * 2);
+ memset(dst_pixels_uv_opt, 0, kPixels * 2 * 2);
+ memset(dst_pixels_uv_c, 1, kPixels * 2 * 2);
+
+ MergeUVRow_16_C(reinterpret_cast<const uint16*>(src_pixels_u),
+ reinterpret_cast<const uint16*>(src_pixels_v),
+ reinterpret_cast<uint16*>(dst_pixels_uv_c), 64, kPixels);
+
+ int has_avx2 = TestCpuFlag(kCpuHasAVX2);
+ for (int i = 0; i < benchmark_iterations_; ++i) {
+ if (has_avx2) {
+ MergeUVRow_16_AVX2(reinterpret_cast<const uint16*>(src_pixels_u),
+ reinterpret_cast<const uint16*>(src_pixels_v),
+ reinterpret_cast<uint16*>(dst_pixels_uv_opt), 64,
+ kPixels);
+ } else {
+ MergeUVRow_16_C(reinterpret_cast<const uint16*>(src_pixels_u),
+ reinterpret_cast<const uint16*>(src_pixels_v),
+ reinterpret_cast<uint16*>(dst_pixels_uv_opt), 64,
+ kPixels);
+ }
+ }
+
+ for (int i = 0; i < kPixels * 2 * 2; ++i) {
+ EXPECT_EQ(dst_pixels_uv_opt[i], dst_pixels_uv_c[i]);
+ }
+
+ free_aligned_buffer_page_end(src_pixels_u);
+ free_aligned_buffer_page_end(src_pixels_v);
+ free_aligned_buffer_page_end(dst_pixels_uv_opt);
+ free_aligned_buffer_page_end(dst_pixels_uv_c);
+}
+#endif
+
+// TODO(fbarchard): improve test for platforms and cpu detect
+#ifdef HAS_MULTIPLYROW_16_AVX2
+TEST_F(LibYUVPlanarTest, MultiplyRow_16_Opt) {
+ const int kPixels = benchmark_width_ * benchmark_height_;
+ align_buffer_page_end(src_pixels_y, kPixels * 2);
+ align_buffer_page_end(dst_pixels_y_opt, kPixels * 2);
+ align_buffer_page_end(dst_pixels_y_c, kPixels * 2);
+
+ MemRandomize(src_pixels_y, kPixels * 2);
+ memset(dst_pixels_y_opt, 0, kPixels * 2);
+ memset(dst_pixels_y_c, 1, kPixels * 2);
+
+ MultiplyRow_16_C(reinterpret_cast<const uint16*>(src_pixels_y),
+ reinterpret_cast<uint16*>(dst_pixels_y_c), 64, kPixels);
+
+ int has_avx2 = TestCpuFlag(kCpuHasAVX2);
+ for (int i = 0; i < benchmark_iterations_; ++i) {
+ if (has_avx2) {
+ MultiplyRow_16_AVX2(reinterpret_cast<const uint16*>(src_pixels_y),
+ reinterpret_cast<uint16*>(dst_pixels_y_opt), 64,
+ kPixels);
+ } else {
+ MultiplyRow_16_C(reinterpret_cast<const uint16*>(src_pixels_y),
+ reinterpret_cast<uint16*>(dst_pixels_y_opt), 64,
+ kPixels);
+ }
+ }
+
+ for (int i = 0; i < kPixels * 2; ++i) {
+ EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]);
+ }
+
+ free_aligned_buffer_page_end(src_pixels_y);
+ free_aligned_buffer_page_end(dst_pixels_y_opt);
+ free_aligned_buffer_page_end(dst_pixels_y_c);
+}
+#endif
+
float TestScaleMaxSamples(int benchmark_width,
int benchmark_height,
int benchmark_iterations,
@@ -2623,44 +2706,44 @@ float TestScaleMaxSamples(int benchmark_width,
bool opt) {
int i, j;
float max_c, max_opt = 0.f;
- const int y_plane_size = benchmark_width * benchmark_height * 4;
-
- align_buffer_page_end(orig_y, y_plane_size * 3);
- uint8* dst_opt = orig_y + y_plane_size;
- uint8* dst_c = orig_y + y_plane_size * 2;
+ // NEON does multiple of 8, so round count up
+ const int kPixels = (benchmark_width * benchmark_height + 7) & ~7;
+ align_buffer_page_end(orig_y, kPixels * 4 * 3 + 48);
+ uint8* dst_c = orig_y + kPixels * 4 + 16;
+ uint8* dst_opt = orig_y + kPixels * 4 * 2 + 32;
// Randomize works but may contain some denormals affecting performance.
- // MemRandomize(orig_y, y_plane_size);
- for (i = 0; i < y_plane_size / 4; ++i) {
- (reinterpret_cast<float*>(orig_y))[i] = (i - y_plane_size / 8) * 3.1415f;
+ // MemRandomize(orig_y, kPixels * 4);
+ // large values are problematic. audio is really -1 to 1.
+ for (i = 0; i < kPixels; ++i) {
+ (reinterpret_cast<float*>(orig_y))[i] = sinf(static_cast<float>(i) * 0.1f);
}
- memset(dst_c, 0, y_plane_size);
- memset(dst_opt, 1, y_plane_size);
+ memset(dst_c, 0, kPixels * 4);
+ memset(dst_opt, 1, kPixels * 4);
max_c = ScaleMaxSamples_C(reinterpret_cast<float*>(orig_y),
- reinterpret_cast<float*>(dst_c), scale,
- benchmark_width * benchmark_height);
+ reinterpret_cast<float*>(dst_c), scale, kPixels);
for (j = 0; j < benchmark_iterations; j++) {
if (opt) {
#ifdef HAS_SCALESUMSAMPLES_NEON
max_opt = ScaleMaxSamples_NEON(reinterpret_cast<float*>(orig_y),
reinterpret_cast<float*>(dst_opt), scale,
- benchmark_width * benchmark_height);
+ kPixels);
#else
- max_opt = ScaleMaxSamples_C(reinterpret_cast<float*>(orig_y),
- reinterpret_cast<float*>(dst_opt), scale,
- benchmark_width * benchmark_height);
+ max_opt =
+ ScaleMaxSamples_C(reinterpret_cast<float*>(orig_y),
+ reinterpret_cast<float*>(dst_opt), scale, kPixels);
#endif
} else {
- max_opt = ScaleMaxSamples_C(reinterpret_cast<float*>(orig_y),
- reinterpret_cast<float*>(dst_opt), scale,
- benchmark_width * benchmark_height);
+ max_opt =
+ ScaleMaxSamples_C(reinterpret_cast<float*>(orig_y),
+ reinterpret_cast<float*>(dst_opt), scale, kPixels);
}
}
float max_diff = FAbs(max_opt - max_c);
- for (i = 0; i < y_plane_size / 4; ++i) {
+ for (i = 0; i < kPixels; ++i) {
float abs_diff = FAbs((reinterpret_cast<float*>(dst_c)[i]) -
(reinterpret_cast<float*>(dst_opt)[i]));
if (abs_diff > max_diff) {
@@ -2691,44 +2774,55 @@ float TestScaleSumSamples(int benchmark_width,
bool opt) {
int i, j;
float sum_c, sum_opt = 0.f;
- const int y_plane_size = benchmark_width * benchmark_height * 4;
-
- align_buffer_page_end(orig_y, y_plane_size * 3);
- uint8* dst_opt = orig_y + y_plane_size;
- uint8* dst_c = orig_y + y_plane_size * 2;
+ // NEON does multiple of 8, so round count up
+ const int kPixels = (benchmark_width * benchmark_height + 7) & ~7;
+ align_buffer_page_end(orig_y, kPixels * 4 * 3);
+ uint8* dst_c = orig_y + kPixels * 4;
+ uint8* dst_opt = orig_y + kPixels * 4 * 2;
// Randomize works but may contain some denormals affecting performance.
- // MemRandomize(orig_y, y_plane_size);
- for (i = 0; i < y_plane_size / 4; ++i) {
- (reinterpret_cast<float*>(orig_y))[i] = (i - y_plane_size / 8) * 3.1415f;
+ // MemRandomize(orig_y, kPixels * 4);
+ // large values are problematic. audio is really -1 to 1.
+ for (i = 0; i < kPixels; ++i) {
+ (reinterpret_cast<float*>(orig_y))[i] = sinf(static_cast<float>(i) * 0.1f);
}
- memset(dst_c, 0, y_plane_size);
- memset(dst_opt, 1, y_plane_size);
+ memset(dst_c, 0, kPixels * 4);
+ memset(dst_opt, 1, kPixels * 4);
sum_c = ScaleSumSamples_C(reinterpret_cast<float*>(orig_y),
- reinterpret_cast<float*>(dst_c), scale,
- benchmark_width * benchmark_height);
+ reinterpret_cast<float*>(dst_c), scale, kPixels);
for (j = 0; j < benchmark_iterations; j++) {
if (opt) {
#ifdef HAS_SCALESUMSAMPLES_NEON
sum_opt = ScaleSumSamples_NEON(reinterpret_cast<float*>(orig_y),
reinterpret_cast<float*>(dst_opt), scale,
- benchmark_width * benchmark_height);
+ kPixels);
#else
- sum_opt = ScaleSumSamples_C(reinterpret_cast<float*>(orig_y),
- reinterpret_cast<float*>(dst_opt), scale,
- benchmark_width * benchmark_height);
+ sum_opt =
+ ScaleSumSamples_C(reinterpret_cast<float*>(orig_y),
+ reinterpret_cast<float*>(dst_opt), scale, kPixels);
#endif
} else {
- sum_opt = ScaleSumSamples_C(reinterpret_cast<float*>(orig_y),
- reinterpret_cast<float*>(dst_opt), scale,
- benchmark_width * benchmark_height);
+ sum_opt =
+ ScaleSumSamples_C(reinterpret_cast<float*>(orig_y),
+ reinterpret_cast<float*>(dst_opt), scale, kPixels);
}
}
- float max_diff = FAbs(sum_opt - sum_c);
- for (i = 0; i < y_plane_size / 4; ++i) {
+ float mse_opt = sum_opt / kPixels * 4;
+ float mse_c = sum_c / kPixels * 4;
+ float mse_error = FAbs(mse_opt - mse_c) / mse_c;
+
+ // If the sum of a float is more than 4 million, small adds are round down on
+ // float and produce different results with vectorized sum vs scalar sum.
+ // Ignore the difference if the sum is large.
+ float max_diff = 0.f;
+ if (mse_error > 0.0001 && sum_c < 4000000) { // allow .01% difference of mse
+ max_diff = mse_error;
+ }
+
+ for (i = 0; i < kPixels; ++i) {
float abs_diff = FAbs((reinterpret_cast<float*>(dst_c)[i]) -
(reinterpret_cast<float*>(dst_opt)[i]));
if (abs_diff > max_diff) {
@@ -2758,45 +2852,41 @@ float TestScaleSamples(int benchmark_width,
float scale,
bool opt) {
int i, j;
- const int y_plane_size = benchmark_width * benchmark_height * 4;
-
- align_buffer_page_end(orig_y, y_plane_size * 3);
- uint8* dst_opt = orig_y + y_plane_size;
- uint8* dst_c = orig_y + y_plane_size * 2;
+ // NEON does multiple of 8, so round count up
+ const int kPixels = (benchmark_width * benchmark_height + 7) & ~7;
+ align_buffer_page_end(orig_y, kPixels * 4 * 3);
+ uint8* dst_c = orig_y + kPixels * 4;
+ uint8* dst_opt = orig_y + kPixels * 4 * 2;
// Randomize works but may contain some denormals affecting performance.
- // MemRandomize(orig_y, y_plane_size);
- for (i = 0; i < y_plane_size / 4; ++i) {
- (reinterpret_cast<float*>(orig_y))[i] = (i - y_plane_size / 8) * 3.1415f;
+ // MemRandomize(orig_y, kPixels * 4);
+ // large values are problematic. audio is really -1 to 1.
+ for (i = 0; i < kPixels; ++i) {
+ (reinterpret_cast<float*>(orig_y))[i] = sinf(static_cast<float>(i) * 0.1f);
}
-
- memset(dst_c, 0, y_plane_size);
- memset(dst_opt, 1, y_plane_size);
+ memset(dst_c, 0, kPixels * 4);
+ memset(dst_opt, 1, kPixels * 4);
ScaleSamples_C(reinterpret_cast<float*>(orig_y),
- reinterpret_cast<float*>(dst_c), scale,
- benchmark_width * benchmark_height);
+ reinterpret_cast<float*>(dst_c), scale, kPixels);
for (j = 0; j < benchmark_iterations; j++) {
if (opt) {
#ifdef HAS_SCALESUMSAMPLES_NEON
ScaleSamples_NEON(reinterpret_cast<float*>(orig_y),
- reinterpret_cast<float*>(dst_opt), scale,
- benchmark_width * benchmark_height);
+ reinterpret_cast<float*>(dst_opt), scale, kPixels);
#else
ScaleSamples_C(reinterpret_cast<float*>(orig_y),
- reinterpret_cast<float*>(dst_opt), scale,
- benchmark_width * benchmark_height);
+ reinterpret_cast<float*>(dst_opt), scale, kPixels);
#endif
} else {
ScaleSamples_C(reinterpret_cast<float*>(orig_y),
- reinterpret_cast<float*>(dst_opt), scale,
- benchmark_width * benchmark_height);
+ reinterpret_cast<float*>(dst_opt), scale, kPixels);
}
}
float max_diff = 0.f;
- for (i = 0; i < y_plane_size / 4; ++i) {
+ for (i = 0; i < kPixels; ++i) {
float abs_diff = FAbs((reinterpret_cast<float*>(dst_c)[i]) -
(reinterpret_cast<float*>(dst_opt)[i]));
if (abs_diff > max_diff) {
@@ -2820,6 +2910,66 @@ TEST_F(LibYUVPlanarTest, TestScaleSamples_Opt) {
EXPECT_EQ(0, diff);
}
+float TestCopySamples(int benchmark_width,
+ int benchmark_height,
+ int benchmark_iterations,
+ bool opt) {
+ int i, j;
+ // NEON does multiple of 16 floats, so round count up
+ const int kPixels = (benchmark_width * benchmark_height + 15) & ~15;
+ align_buffer_page_end(orig_y, kPixels * 4 * 3);
+ uint8* dst_c = orig_y + kPixels * 4;
+ uint8* dst_opt = orig_y + kPixels * 4 * 2;
+
+ // Randomize works but may contain some denormals affecting performance.
+ // MemRandomize(orig_y, kPixels * 4);
+ // large values are problematic. audio is really -1 to 1.
+ for (i = 0; i < kPixels; ++i) {
+ (reinterpret_cast<float*>(orig_y))[i] = sinf(static_cast<float>(i) * 0.1f);
+ }
+ memset(dst_c, 0, kPixels * 4);
+ memset(dst_opt, 1, kPixels * 4);
+
+ memcpy(reinterpret_cast<void*>(dst_c), reinterpret_cast<void*>(orig_y),
+ kPixels * 4);
+
+ for (j = 0; j < benchmark_iterations; j++) {
+ if (opt) {
+#ifdef HAS_COPYROW_NEON
+ CopyRow_NEON(orig_y, dst_opt, kPixels * 4);
+#else
+ CopyRow_C(orig_y, dst_opt, kPixels * 4);
+#endif
+ } else {
+ CopyRow_C(orig_y, dst_opt, kPixels * 4);
+ }
+ }
+
+ float max_diff = 0.f;
+ for (i = 0; i < kPixels; ++i) {
+ float abs_diff = FAbs((reinterpret_cast<float*>(dst_c)[i]) -
+ (reinterpret_cast<float*>(dst_opt)[i]));
+ if (abs_diff > max_diff) {
+ max_diff = abs_diff;
+ }
+ }
+
+ free_aligned_buffer_page_end(orig_y);
+ return max_diff;
+}
+
+TEST_F(LibYUVPlanarTest, TestCopySamples_C) {
+ float diff = TestCopySamples(benchmark_width_, benchmark_height_,
+ benchmark_iterations_, false);
+ EXPECT_EQ(0, diff);
+}
+
+TEST_F(LibYUVPlanarTest, TestCopySamples_Opt) {
+ float diff = TestCopySamples(benchmark_width_, benchmark_height_,
+ benchmark_iterations_, true);
+ EXPECT_EQ(0, diff);
+}
+
extern "C" void GaussRow_NEON(const uint32* src, uint16* dst, int width);
extern "C" void GaussRow_C(const uint32* src, uint16* dst, int width);
diff --git a/chromium/third_party/libyuv/unit_test/scale_test.cc b/chromium/third_party/libyuv/unit_test/scale_test.cc
index 8046de7d4bf..c39211a161b 100644
--- a/chromium/third_party/libyuv/unit_test/scale_test.cc
+++ b/chromium/third_party/libyuv/unit_test/scale_test.cc
@@ -35,19 +35,19 @@ static int TestFilter(int src_width,
}
int i, j;
- const int b = 0; // 128 to test for padding/stride.
int src_width_uv = (Abs(src_width) + 1) >> 1;
int src_height_uv = (Abs(src_height) + 1) >> 1;
- int64 src_y_plane_size = (Abs(src_width) + b * 2) * (Abs(src_height) + b * 2);
- int64 src_uv_plane_size = (src_width_uv + b * 2) * (src_height_uv + b * 2);
+ int64 src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
+ int64 src_uv_plane_size = (src_width_uv) * (src_height_uv);
- int src_stride_y = b * 2 + Abs(src_width);
- int src_stride_uv = b * 2 + src_width_uv;
+ int src_stride_y = Abs(src_width);
+ int src_stride_uv = src_width_uv;
- align_buffer_page_end(src_y, src_y_plane_size)
- align_buffer_page_end(src_u, src_uv_plane_size) align_buffer_page_end(
- src_v, src_uv_plane_size) if (!src_y || !src_u || !src_v) {
+ align_buffer_page_end(src_y, src_y_plane_size);
+ align_buffer_page_end(src_u, src_uv_plane_size);
+ align_buffer_page_end(src_v, src_uv_plane_size);
+ if (!src_y || !src_u || !src_v) {
printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
return 0;
}
@@ -58,60 +58,51 @@ static int TestFilter(int src_width,
int dst_width_uv = (dst_width + 1) >> 1;
int dst_height_uv = (dst_height + 1) >> 1;
- int64 dst_y_plane_size = (dst_width + b * 2) * (dst_height + b * 2);
- int64 dst_uv_plane_size = (dst_width_uv + b * 2) * (dst_height_uv + b * 2);
-
- int dst_stride_y = b * 2 + dst_width;
- int dst_stride_uv = b * 2 + dst_width_uv;
-
- align_buffer_page_end(dst_y_c, dst_y_plane_size)
- align_buffer_page_end(dst_u_c, dst_uv_plane_size)
- align_buffer_page_end(dst_v_c, dst_uv_plane_size)
- align_buffer_page_end(dst_y_opt, dst_y_plane_size)
- align_buffer_page_end(dst_u_opt, dst_uv_plane_size)
- align_buffer_page_end(
- dst_v_opt,
- dst_uv_plane_size) if (!dst_y_c || !dst_u_c ||
- !dst_v_c || !dst_y_opt ||
- !dst_u_opt || !dst_v_opt) {
+ int64 dst_y_plane_size = (dst_width) * (dst_height);
+ int64 dst_uv_plane_size = (dst_width_uv) * (dst_height_uv);
+
+ int dst_stride_y = dst_width;
+ int dst_stride_uv = dst_width_uv;
+
+ align_buffer_page_end(dst_y_c, dst_y_plane_size);
+ align_buffer_page_end(dst_u_c, dst_uv_plane_size);
+ align_buffer_page_end(dst_v_c, dst_uv_plane_size);
+ align_buffer_page_end(dst_y_opt, dst_y_plane_size);
+ align_buffer_page_end(dst_u_opt, dst_uv_plane_size);
+ align_buffer_page_end(dst_v_opt, dst_uv_plane_size);
+ if (!dst_y_c || !dst_u_c || !dst_v_c || !dst_y_opt || !dst_u_opt ||
+ !dst_v_opt) {
printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
return 0;
}
MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
double c_time = get_time();
- I420Scale(src_y + (src_stride_y * b) + b, src_stride_y,
- src_u + (src_stride_uv * b) + b, src_stride_uv,
- src_v + (src_stride_uv * b) + b, src_stride_uv, src_width,
- src_height, dst_y_c + (dst_stride_y * b) + b, dst_stride_y,
- dst_u_c + (dst_stride_uv * b) + b, dst_stride_uv,
- dst_v_c + (dst_stride_uv * b) + b, dst_stride_uv, dst_width,
- dst_height, f);
+ I420Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
+ src_width, src_height, dst_y_c, dst_stride_y, dst_u_c,
+ dst_stride_uv, dst_v_c, dst_stride_uv, dst_width, dst_height, f);
c_time = (get_time() - c_time);
MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
double opt_time = get_time();
for (i = 0; i < benchmark_iterations; ++i) {
- I420Scale(src_y + (src_stride_y * b) + b, src_stride_y,
- src_u + (src_stride_uv * b) + b, src_stride_uv,
- src_v + (src_stride_uv * b) + b, src_stride_uv, src_width,
- src_height, dst_y_opt + (dst_stride_y * b) + b, dst_stride_y,
- dst_u_opt + (dst_stride_uv * b) + b, dst_stride_uv,
- dst_v_opt + (dst_stride_uv * b) + b, dst_stride_uv, dst_width,
- dst_height, f);
+ I420Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
+ src_width, src_height, dst_y_opt, dst_stride_y, dst_u_opt,
+ dst_stride_uv, dst_v_opt, dst_stride_uv, dst_width, dst_height,
+ f);
}
opt_time = (get_time() - opt_time) / benchmark_iterations;
- // Report performance of C vs OPT
+ // Report performance of C vs OPT.
printf("filter %d - %8d us C - %8d us OPT\n", f,
static_cast<int>(c_time * 1e6), static_cast<int>(opt_time * 1e6));
// C version may be a little off from the optimized. Order of
// operations may introduce rounding somewhere. So do a difference
- // of the buffers and look to see that the max difference isn't
- // over 2.
+ // of the buffers and look to see that the max difference is not
+ // over 3.
int max_diff = 0;
- for (i = b; i < (dst_height + b); ++i) {
- for (j = b; j < (dst_width + b); ++j) {
+ for (i = 0; i < (dst_height); ++i) {
+ for (j = 0; j < (dst_width); ++j) {
int abs_diff = Abs(dst_y_c[(i * dst_stride_y) + j] -
dst_y_opt[(i * dst_stride_y) + j]);
if (abs_diff > max_diff) {
@@ -120,8 +111,8 @@ static int TestFilter(int src_width,
}
}
- for (i = b; i < (dst_height_uv + b); ++i) {
- for (j = b; j < (dst_width_uv + b); ++j) {
+ for (i = 0; i < (dst_height_uv); ++i) {
+ for (j = 0; j < (dst_width_uv); ++j) {
int abs_diff = Abs(dst_u_c[(i * dst_stride_uv) + j] -
dst_u_opt[(i * dst_stride_uv) + j]);
if (abs_diff > max_diff) {
@@ -135,17 +126,17 @@ static int TestFilter(int src_width,
}
}
- free_aligned_buffer_page_end(dst_y_c) free_aligned_buffer_page_end(dst_u_c)
- free_aligned_buffer_page_end(dst_v_c)
- free_aligned_buffer_page_end(dst_y_opt)
- free_aligned_buffer_page_end(dst_u_opt)
- free_aligned_buffer_page_end(dst_v_opt)
-
- free_aligned_buffer_page_end(src_y)
- free_aligned_buffer_page_end(src_u)
- free_aligned_buffer_page_end(src_v)
-
- return max_diff;
+ free_aligned_buffer_page_end(dst_y_c);
+ free_aligned_buffer_page_end(dst_u_c);
+ free_aligned_buffer_page_end(dst_v_c);
+ free_aligned_buffer_page_end(dst_y_opt);
+ free_aligned_buffer_page_end(dst_u_opt);
+ free_aligned_buffer_page_end(dst_v_opt);
+ free_aligned_buffer_page_end(src_y);
+ free_aligned_buffer_page_end(src_u);
+ free_aligned_buffer_page_end(src_v);
+
+ return max_diff;
}
// Test scaling with 8 bit C vs 16 bit C and return maximum pixel difference.
@@ -155,28 +146,34 @@ static int TestFilter_16(int src_width,
int dst_width,
int dst_height,
FilterMode f,
- int benchmark_iterations) {
+ int benchmark_iterations,
+ int disable_cpu_flags,
+ int benchmark_cpu_info) {
if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
return 0;
}
- int i, j;
- const int b = 0; // 128 to test for padding/stride.
+ int i;
int src_width_uv = (Abs(src_width) + 1) >> 1;
int src_height_uv = (Abs(src_height) + 1) >> 1;
- int64 src_y_plane_size = (Abs(src_width) + b * 2) * (Abs(src_height) + b * 2);
- int64 src_uv_plane_size = (src_width_uv + b * 2) * (src_height_uv + b * 2);
+ int64 src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
+ int64 src_uv_plane_size = (src_width_uv) * (src_height_uv);
- int src_stride_y = b * 2 + Abs(src_width);
- int src_stride_uv = b * 2 + src_width_uv;
+ int src_stride_y = Abs(src_width);
+ int src_stride_uv = src_width_uv;
- align_buffer_page_end(src_y, src_y_plane_size) align_buffer_page_end(
- src_u, src_uv_plane_size) align_buffer_page_end(src_v, src_uv_plane_size)
- align_buffer_page_end(src_y_16, src_y_plane_size * 2)
- align_buffer_page_end(src_u_16, src_uv_plane_size * 2)
- align_buffer_page_end(src_v_16, src_uv_plane_size * 2)
- uint16* p_src_y_16 = reinterpret_cast<uint16*>(src_y_16);
+ align_buffer_page_end(src_y, src_y_plane_size);
+ align_buffer_page_end(src_u, src_uv_plane_size);
+ align_buffer_page_end(src_v, src_uv_plane_size);
+ align_buffer_page_end(src_y_16, src_y_plane_size * 2);
+ align_buffer_page_end(src_u_16, src_uv_plane_size * 2);
+ align_buffer_page_end(src_v_16, src_uv_plane_size * 2);
+ if (!src_y || !src_u || !src_v || !src_y_16 || !src_u_16 || !src_v_16) {
+ printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
+ return 0;
+ }
+ uint16* p_src_y_16 = reinterpret_cast<uint16*>(src_y_16);
uint16* p_src_u_16 = reinterpret_cast<uint16*>(src_u_16);
uint16* p_src_v_16 = reinterpret_cast<uint16*>(src_v_16);
@@ -184,104 +181,84 @@ static int TestFilter_16(int src_width,
MemRandomize(src_u, src_uv_plane_size);
MemRandomize(src_v, src_uv_plane_size);
- for (i = b; i < src_height + b; ++i) {
- for (j = b; j < src_width + b; ++j) {
- p_src_y_16[(i * src_stride_y) + j] = src_y[(i * src_stride_y) + j];
- }
+ for (i = 0; i < src_y_plane_size; ++i) {
+ p_src_y_16[i] = src_y[i];
}
-
- for (i = b; i < (src_height_uv + b); ++i) {
- for (j = b; j < (src_width_uv + b); ++j) {
- p_src_u_16[(i * src_stride_uv) + j] = src_u[(i * src_stride_uv) + j];
- p_src_v_16[(i * src_stride_uv) + j] = src_v[(i * src_stride_uv) + j];
- }
+ for (i = 0; i < src_uv_plane_size; ++i) {
+ p_src_u_16[i] = src_u[i];
+ p_src_v_16[i] = src_v[i];
}
int dst_width_uv = (dst_width + 1) >> 1;
int dst_height_uv = (dst_height + 1) >> 1;
- int dst_y_plane_size = (dst_width + b * 2) * (dst_height + b * 2);
- int dst_uv_plane_size = (dst_width_uv + b * 2) * (dst_height_uv + b * 2);
+ int dst_y_plane_size = (dst_width) * (dst_height);
+ int dst_uv_plane_size = (dst_width_uv) * (dst_height_uv);
- int dst_stride_y = b * 2 + dst_width;
- int dst_stride_uv = b * 2 + dst_width_uv;
+ int dst_stride_y = dst_width;
+ int dst_stride_uv = dst_width_uv;
- align_buffer_page_end(dst_y_8, dst_y_plane_size)
- align_buffer_page_end(dst_u_8, dst_uv_plane_size)
- align_buffer_page_end(dst_v_8, dst_uv_plane_size)
- align_buffer_page_end(dst_y_16, dst_y_plane_size * 2)
- align_buffer_page_end(dst_u_16, dst_uv_plane_size * 2)
- align_buffer_page_end(dst_v_16, dst_uv_plane_size * 2)
+ align_buffer_page_end(dst_y_8, dst_y_plane_size);
+ align_buffer_page_end(dst_u_8, dst_uv_plane_size);
+ align_buffer_page_end(dst_v_8, dst_uv_plane_size);
+ align_buffer_page_end(dst_y_16, dst_y_plane_size * 2);
+ align_buffer_page_end(dst_u_16, dst_uv_plane_size * 2);
+ align_buffer_page_end(dst_v_16, dst_uv_plane_size * 2);
- uint16* p_dst_y_16 =
- reinterpret_cast<uint16*>(dst_y_16);
+ uint16* p_dst_y_16 = reinterpret_cast<uint16*>(dst_y_16);
uint16* p_dst_u_16 = reinterpret_cast<uint16*>(dst_u_16);
uint16* p_dst_v_16 = reinterpret_cast<uint16*>(dst_v_16);
- I420Scale(src_y + (src_stride_y * b) + b, src_stride_y,
- src_u + (src_stride_uv * b) + b, src_stride_uv,
- src_v + (src_stride_uv * b) + b, src_stride_uv, src_width,
- src_height, dst_y_8 + (dst_stride_y * b) + b, dst_stride_y,
- dst_u_8 + (dst_stride_uv * b) + b, dst_stride_uv,
- dst_v_8 + (dst_stride_uv * b) + b, dst_stride_uv, dst_width,
- dst_height, f);
-
+ MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
+ I420Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
+ src_width, src_height, dst_y_8, dst_stride_y, dst_u_8,
+ dst_stride_uv, dst_v_8, dst_stride_uv, dst_width, dst_height, f);
+ MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
for (i = 0; i < benchmark_iterations; ++i) {
- I420Scale_16(p_src_y_16 + (src_stride_y * b) + b, src_stride_y,
- p_src_u_16 + (src_stride_uv * b) + b, src_stride_uv,
- p_src_v_16 + (src_stride_uv * b) + b, src_stride_uv, src_width,
- src_height, p_dst_y_16 + (dst_stride_y * b) + b, dst_stride_y,
- p_dst_u_16 + (dst_stride_uv * b) + b, dst_stride_uv,
- p_dst_v_16 + (dst_stride_uv * b) + b, dst_stride_uv, dst_width,
- dst_height, f);
+ I420Scale_16(p_src_y_16, src_stride_y, p_src_u_16, src_stride_uv,
+ p_src_v_16, src_stride_uv, src_width, src_height, p_dst_y_16,
+ dst_stride_y, p_dst_u_16, dst_stride_uv, p_dst_v_16,
+ dst_stride_uv, dst_width, dst_height, f);
}
- // Expect an exact match
+ // Expect an exact match.
int max_diff = 0;
- for (i = b; i < (dst_height + b); ++i) {
- for (j = b; j < (dst_width + b); ++j) {
- int abs_diff = Abs(dst_y_8[(i * dst_stride_y) + j] -
- p_dst_y_16[(i * dst_stride_y) + j]);
- if (abs_diff > max_diff) {
- max_diff = abs_diff;
- }
+ for (i = 0; i < dst_y_plane_size; ++i) {
+ int abs_diff = Abs(dst_y_8[i] - p_dst_y_16[i]);
+ if (abs_diff > max_diff) {
+ max_diff = abs_diff;
}
}
-
- for (i = b; i < (dst_height_uv + b); ++i) {
- for (j = b; j < (dst_width_uv + b); ++j) {
- int abs_diff = Abs(dst_u_8[(i * dst_stride_uv) + j] -
- p_dst_u_16[(i * dst_stride_uv) + j]);
- if (abs_diff > max_diff) {
- max_diff = abs_diff;
- }
- abs_diff = Abs(dst_v_8[(i * dst_stride_uv) + j] -
- p_dst_v_16[(i * dst_stride_uv) + j]);
- if (abs_diff > max_diff) {
- max_diff = abs_diff;
- }
+ for (i = 0; i < dst_uv_plane_size; ++i) {
+ int abs_diff = Abs(dst_u_8[i] - p_dst_u_16[i]);
+ if (abs_diff > max_diff) {
+ max_diff = abs_diff;
+ }
+ abs_diff = Abs(dst_v_8[i] - p_dst_v_16[i]);
+ if (abs_diff > max_diff) {
+ max_diff = abs_diff;
}
}
- free_aligned_buffer_page_end(dst_y_8) free_aligned_buffer_page_end(dst_u_8)
- free_aligned_buffer_page_end(dst_v_8)
- free_aligned_buffer_page_end(dst_y_16)
- free_aligned_buffer_page_end(dst_u_16)
- free_aligned_buffer_page_end(dst_v_16)
-
- free_aligned_buffer_page_end(src_y)
- free_aligned_buffer_page_end(src_u)
- free_aligned_buffer_page_end(src_v)
- free_aligned_buffer_page_end(src_y_16)
- free_aligned_buffer_page_end(src_u_16)
- free_aligned_buffer_page_end(src_v_16)
-
- return max_diff;
+ free_aligned_buffer_page_end(dst_y_8);
+ free_aligned_buffer_page_end(dst_u_8);
+ free_aligned_buffer_page_end(dst_v_8);
+ free_aligned_buffer_page_end(dst_y_16);
+ free_aligned_buffer_page_end(dst_u_16);
+ free_aligned_buffer_page_end(dst_v_16);
+ free_aligned_buffer_page_end(src_y);
+ free_aligned_buffer_page_end(src_u);
+ free_aligned_buffer_page_end(src_v);
+ free_aligned_buffer_page_end(src_y_16);
+ free_aligned_buffer_page_end(src_u_16);
+ free_aligned_buffer_page_end(src_v_16);
+
+ return max_diff;
}
// The following adjustments in dimensions ensure the scale factor will be
// exactly achieved.
-// 2 is chroma subsample
+// 2 is chroma subsample.
#define DX(x, nom, denom) static_cast<int>(((Abs(x) / nom + 1) / 2) * nom * 2)
#define SX(x, nom, denom) static_cast<int>(((x / nom + 1) / 2) * denom * 2)
@@ -294,11 +271,12 @@ static int TestFilter_16(int src_width,
benchmark_cpu_info_); \
EXPECT_LE(diff, max_diff); \
} \
- TEST_F(LibYUVScaleTest, DISABLED_ScaleDownBy##name##_##filter##_16) { \
+ TEST_F(LibYUVScaleTest, ScaleDownBy##name##_##filter##_16) { \
int diff = TestFilter_16( \
SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
- kFilter##filter, benchmark_iterations_); \
+ kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
+ benchmark_cpu_info_); \
EXPECT_LE(diff, max_diff); \
}
@@ -335,26 +313,26 @@ TEST_FACTOR(3, 1, 3, 0)
benchmark_cpu_info_); \
EXPECT_LE(diff, max_diff); \
} \
- TEST_F(LibYUVScaleTest, \
- DISABLED_##name##To##width##x##height##_##filter##_16) { \
+ TEST_F(LibYUVScaleTest, name##To##width##x##height##_##filter##_16) { \
int diff = TestFilter_16(benchmark_width_, benchmark_height_, width, \
- height, kFilter##filter, benchmark_iterations_); \
+ height, kFilter##filter, benchmark_iterations_, \
+ disable_cpu_flags_, benchmark_cpu_info_); \
EXPECT_LE(diff, max_diff); \
} \
- TEST_F(LibYUVScaleTest, \
- DISABLED_##name##From##width##x##height##_##filter##_16) { \
+ TEST_F(LibYUVScaleTest, name##From##width##x##height##_##filter##_16) { \
int diff = TestFilter_16(width, height, Abs(benchmark_width_), \
Abs(benchmark_height_), kFilter##filter, \
- benchmark_iterations_); \
+ benchmark_iterations_, disable_cpu_flags_, \
+ benchmark_cpu_info_); \
EXPECT_LE(diff, max_diff); \
}
// Test scale to a specified size with all 4 filters.
#define TEST_SCALETO(name, width, height) \
TEST_SCALETO1(name, width, height, None, 0) \
- TEST_SCALETO1(name, width, height, Linear, 0) \
- TEST_SCALETO1(name, width, height, Bilinear, 0) \
- TEST_SCALETO1(name, width, height, Box, 0)
+ TEST_SCALETO1(name, width, height, Linear, 3) \
+ TEST_SCALETO1(name, width, height, Bilinear, 3) \
+ TEST_SCALETO1(name, width, height, Box, 3)
TEST_SCALETO(Scale, 1, 1)
TEST_SCALETO(Scale, 320, 240)
@@ -366,7 +344,7 @@ TEST_SCALETO(Scale, 1280, 720)
#undef TEST_SCALETO
#ifdef HAS_SCALEROWDOWN2_SSSE3
-TEST_F(LibYUVScaleTest, TestScaleOdd) {
+TEST_F(LibYUVScaleTest, TestScaleRowDown2Box_Odd_SSSE3) {
SIMD_ALIGNED(uint8 orig_pixels[128 * 2]);
SIMD_ALIGNED(uint8 dst_pixels_opt[64]);
SIMD_ALIGNED(uint8 dst_pixels_c[64]);
@@ -374,78 +352,83 @@ TEST_F(LibYUVScaleTest, TestScaleOdd) {
memset(dst_pixels_opt, 0, sizeof(dst_pixels_opt));
memset(dst_pixels_c, 0, sizeof(dst_pixels_c));
- // TL
- orig_pixels[0] = 255u;
- orig_pixels[1] = 0u;
- orig_pixels[128 + 0] = 0u;
- orig_pixels[128 + 1] = 0u;
- // TR
- orig_pixels[2] = 0u;
- orig_pixels[3] = 100u;
- orig_pixels[128 + 2] = 0u;
- orig_pixels[128 + 3] = 0u;
- // BL
- orig_pixels[4] = 0u;
- orig_pixels[5] = 0u;
- orig_pixels[128 + 4] = 50u;
- orig_pixels[128 + 5] = 0u;
- // BR
- orig_pixels[6] = 0u;
- orig_pixels[7] = 0u;
- orig_pixels[128 + 6] = 0u;
- orig_pixels[128 + 7] = 20u;
- // Odd
- orig_pixels[126] = 4u;
- orig_pixels[127] = 255u;
- orig_pixels[128 + 126] = 16u;
- orig_pixels[128 + 127] = 255u;
-
- // Test regular half size.
- ScaleRowDown2Box_C(orig_pixels, 128, dst_pixels_c, 64);
-
- EXPECT_EQ(64u, dst_pixels_c[0]);
- EXPECT_EQ(25u, dst_pixels_c[1]);
- EXPECT_EQ(13u, dst_pixels_c[2]);
- EXPECT_EQ(5u, dst_pixels_c[3]);
- EXPECT_EQ(0u, dst_pixels_c[4]);
- EXPECT_EQ(133u, dst_pixels_c[63]);
-
- // Test Odd width version - Last pixel is just 1 horizontal pixel.
- ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 64);
-
- EXPECT_EQ(64u, dst_pixels_c[0]);
- EXPECT_EQ(25u, dst_pixels_c[1]);
- EXPECT_EQ(13u, dst_pixels_c[2]);
- EXPECT_EQ(5u, dst_pixels_c[3]);
- EXPECT_EQ(0u, dst_pixels_c[4]);
- EXPECT_EQ(10u, dst_pixels_c[63]);
-
- // Test one pixel less, should skip the last pixel.
- memset(dst_pixels_c, 0, sizeof(dst_pixels_c));
- ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 63);
-
- EXPECT_EQ(64u, dst_pixels_c[0]);
- EXPECT_EQ(25u, dst_pixels_c[1]);
- EXPECT_EQ(13u, dst_pixels_c[2]);
- EXPECT_EQ(5u, dst_pixels_c[3]);
- EXPECT_EQ(0u, dst_pixels_c[4]);
- EXPECT_EQ(0u, dst_pixels_c[63]);
-
- // Test regular half size SSSE3.
- ScaleRowDown2Box_SSSE3(orig_pixels, 128, dst_pixels_opt, 64);
-
- EXPECT_EQ(64u, dst_pixels_opt[0]);
- EXPECT_EQ(25u, dst_pixels_opt[1]);
- EXPECT_EQ(13u, dst_pixels_opt[2]);
- EXPECT_EQ(5u, dst_pixels_opt[3]);
- EXPECT_EQ(0u, dst_pixels_opt[4]);
- EXPECT_EQ(133u, dst_pixels_opt[63]);
-
- // Compare C and SSSE3 match.
- ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 64);
- ScaleRowDown2Box_Odd_SSSE3(orig_pixels, 128, dst_pixels_opt, 64);
- for (int i = 0; i < 64; ++i) {
- EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
+ int has_ssse3 = TestCpuFlag(kCpuHasSSSE3);
+ if (!has_ssse3) {
+ printf("Warning SSSE3 not detected; Skipping test.\n");
+ } else {
+ // TL.
+ orig_pixels[0] = 255u;
+ orig_pixels[1] = 0u;
+ orig_pixels[128 + 0] = 0u;
+ orig_pixels[128 + 1] = 0u;
+ // TR.
+ orig_pixels[2] = 0u;
+ orig_pixels[3] = 100u;
+ orig_pixels[128 + 2] = 0u;
+ orig_pixels[128 + 3] = 0u;
+ // BL.
+ orig_pixels[4] = 0u;
+ orig_pixels[5] = 0u;
+ orig_pixels[128 + 4] = 50u;
+ orig_pixels[128 + 5] = 0u;
+ // BR.
+ orig_pixels[6] = 0u;
+ orig_pixels[7] = 0u;
+ orig_pixels[128 + 6] = 0u;
+ orig_pixels[128 + 7] = 20u;
+ // Odd.
+ orig_pixels[126] = 4u;
+ orig_pixels[127] = 255u;
+ orig_pixels[128 + 126] = 16u;
+ orig_pixels[128 + 127] = 255u;
+
+ // Test regular half size.
+ ScaleRowDown2Box_C(orig_pixels, 128, dst_pixels_c, 64);
+
+ EXPECT_EQ(64u, dst_pixels_c[0]);
+ EXPECT_EQ(25u, dst_pixels_c[1]);
+ EXPECT_EQ(13u, dst_pixels_c[2]);
+ EXPECT_EQ(5u, dst_pixels_c[3]);
+ EXPECT_EQ(0u, dst_pixels_c[4]);
+ EXPECT_EQ(133u, dst_pixels_c[63]);
+
+ // Test Odd width version - Last pixel is just 1 horizontal pixel.
+ ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 64);
+
+ EXPECT_EQ(64u, dst_pixels_c[0]);
+ EXPECT_EQ(25u, dst_pixels_c[1]);
+ EXPECT_EQ(13u, dst_pixels_c[2]);
+ EXPECT_EQ(5u, dst_pixels_c[3]);
+ EXPECT_EQ(0u, dst_pixels_c[4]);
+ EXPECT_EQ(10u, dst_pixels_c[63]);
+
+ // Test one pixel less, should skip the last pixel.
+ memset(dst_pixels_c, 0, sizeof(dst_pixels_c));
+ ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 63);
+
+ EXPECT_EQ(64u, dst_pixels_c[0]);
+ EXPECT_EQ(25u, dst_pixels_c[1]);
+ EXPECT_EQ(13u, dst_pixels_c[2]);
+ EXPECT_EQ(5u, dst_pixels_c[3]);
+ EXPECT_EQ(0u, dst_pixels_c[4]);
+ EXPECT_EQ(0u, dst_pixels_c[63]);
+
+ // Test regular half size SSSE3.
+ ScaleRowDown2Box_SSSE3(orig_pixels, 128, dst_pixels_opt, 64);
+
+ EXPECT_EQ(64u, dst_pixels_opt[0]);
+ EXPECT_EQ(25u, dst_pixels_opt[1]);
+ EXPECT_EQ(13u, dst_pixels_opt[2]);
+ EXPECT_EQ(5u, dst_pixels_opt[3]);
+ EXPECT_EQ(0u, dst_pixels_opt[4]);
+ EXPECT_EQ(133u, dst_pixels_opt[63]);
+
+ // Compare C and SSSE3 match.
+ ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 64);
+ ScaleRowDown2Box_Odd_SSSE3(orig_pixels, 128, dst_pixels_opt, 64);
+ for (int i = 0; i < 64; ++i) {
+ EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
+ }
}
}
#endif // HAS_SCALEROWDOWN2_SSSE3
@@ -460,7 +443,7 @@ extern "C" void ScaleRowUp2_16_C(const uint16* src_ptr,
int dst_width);
TEST_F(LibYUVScaleTest, TestScaleRowUp2_16) {
- SIMD_ALIGNED(uint16 orig_pixels[640 * 2 + 1]); // 2 rows + 1 pixel overrun
+ SIMD_ALIGNED(uint16 orig_pixels[640 * 2 + 1]); // 2 rows + 1 pixel overrun.
SIMD_ALIGNED(uint16 dst_pixels_opt[1280]);
SIMD_ALIGNED(uint16 dst_pixels_c[1280]);
@@ -531,4 +514,101 @@ TEST_F(LibYUVScaleTest, TestScaleRowDown2Box_16) {
EXPECT_EQ(dst_pixels_c[1279], 3839);
}
+// Test scaling plane with 8 bit C vs 16 bit C and return maximum pixel
+// difference.
+// 0 = exact.
+static int TestPlaneFilter_16(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ FilterMode f,
+ int benchmark_iterations,
+ int disable_cpu_flags,
+ int benchmark_cpu_info) {
+ if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
+ return 0;
+ }
+
+ int i;
+ int64 src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
+ int src_stride_y = Abs(src_width);
+ int dst_y_plane_size = dst_width * dst_height;
+ int dst_stride_y = dst_width;
+
+ align_buffer_page_end(src_y, src_y_plane_size);
+ align_buffer_page_end(src_y_16, src_y_plane_size * 2);
+ align_buffer_page_end(dst_y_8, dst_y_plane_size);
+ align_buffer_page_end(dst_y_16, dst_y_plane_size * 2);
+ uint16* p_src_y_16 = reinterpret_cast<uint16*>(src_y_16);
+ uint16* p_dst_y_16 = reinterpret_cast<uint16*>(dst_y_16);
+
+ MemRandomize(src_y, src_y_plane_size);
+ memset(dst_y_8, 0, dst_y_plane_size);
+ memset(dst_y_16, 1, dst_y_plane_size * 2);
+
+ for (i = 0; i < src_y_plane_size; ++i) {
+ p_src_y_16[i] = src_y[i] & 255;
+ }
+
+ MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
+ ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y_8, dst_stride_y,
+ dst_width, dst_height, f);
+ MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
+
+ for (i = 0; i < benchmark_iterations; ++i) {
+ ScalePlane_16(p_src_y_16, src_stride_y, src_width, src_height, p_dst_y_16,
+ dst_stride_y, dst_width, dst_height, f);
+ }
+
+ // Expect an exact match.
+ int max_diff = 0;
+ for (i = 0; i < dst_y_plane_size; ++i) {
+ int abs_diff = Abs(dst_y_8[i] - p_dst_y_16[i]);
+ if (abs_diff > max_diff) {
+ max_diff = abs_diff;
+ }
+ }
+
+ free_aligned_buffer_page_end(dst_y_8);
+ free_aligned_buffer_page_end(dst_y_16);
+ free_aligned_buffer_page_end(src_y);
+ free_aligned_buffer_page_end(src_y_16);
+
+ return max_diff;
+}
+
+// The following adjustments in dimensions ensure the scale factor will be
+// exactly achieved.
+// 2 is chroma subsample.
+#define DX(x, nom, denom) static_cast<int>(((Abs(x) / nom + 1) / 2) * nom * 2)
+#define SX(x, nom, denom) static_cast<int>(((x / nom + 1) / 2) * denom * 2)
+
+#define TEST_FACTOR1(name, filter, nom, denom, max_diff) \
+ TEST_F(LibYUVScaleTest, ScalePlaneDownBy##name##_##filter##_16) { \
+ int diff = TestPlaneFilter_16( \
+ SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
+ DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
+ kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
+ benchmark_cpu_info_); \
+ EXPECT_LE(diff, max_diff); \
+ }
+
+// Test a scale factor with all 4 filters. Expect unfiltered to be exact, but
+// filtering is different fixed point implementations for SSSE3, Neon and C.
+#define TEST_FACTOR(name, nom, denom, boxdiff) \
+ TEST_FACTOR1(name, None, nom, denom, 0) \
+ TEST_FACTOR1(name, Linear, nom, denom, boxdiff) \
+ TEST_FACTOR1(name, Bilinear, nom, denom, boxdiff) \
+ TEST_FACTOR1(name, Box, nom, denom, boxdiff)
+
+TEST_FACTOR(2, 1, 2, 0)
+TEST_FACTOR(4, 1, 4, 0)
+TEST_FACTOR(8, 1, 8, 0)
+TEST_FACTOR(3by4, 3, 4, 1)
+TEST_FACTOR(3by8, 3, 8, 1)
+TEST_FACTOR(3, 1, 3, 0)
+#undef TEST_FACTOR1
+#undef TEST_FACTOR
+#undef SX
+#undef DX
} // namespace libyuv
diff --git a/chromium/third_party/libyuv/unit_test/unit_test.h b/chromium/third_party/libyuv/unit_test/unit_test.h
index 87201b11ddb..6454389d52d 100644
--- a/chromium/third_party/libyuv/unit_test/unit_test.h
+++ b/chromium/third_party/libyuv/unit_test/unit_test.h
@@ -69,19 +69,15 @@ static inline bool SizeValid(int src_width,
return true;
}
-#define align_buffer_page_end(var, size) \
- uint8* var; \
- uint8* var##_mem; \
- var##_mem = reinterpret_cast<uint8*>(malloc(((size) + 4095 + 63) & ~4095)); \
- var = (uint8*)((intptr_t)(var##_mem + \
- (((size) + 4095 + 63) & /* NOLINT */ \
- ~4095) - \
- (size)) & \
- ~63);
+#define align_buffer_page_end(var, size) \
+ uint8* var##_mem = \
+ reinterpret_cast<uint8*>(malloc(((size) + 4095 + 63) & ~4095)); \
+ uint8* var = reinterpret_cast<uint8*>( \
+ (intptr_t)(var##_mem + (((size) + 4095 + 63) & ~4095) - (size)) & ~63)
#define free_aligned_buffer_page_end(var) \
free(var##_mem); \
- var = 0;
+ var = 0
#ifdef WIN32
static inline double get_time() {
diff --git a/chromium/third_party/libyuv/util/cpuid.c b/chromium/third_party/libyuv/util/cpuid.c
index ec0217a65b6..9ff618e0d28 100644
--- a/chromium/third_party/libyuv/util/cpuid.c
+++ b/chromium/third_party/libyuv/util/cpuid.c
@@ -22,6 +22,9 @@ int main(int argc, const char* argv[]) {
int has_arm = TestCpuFlag(kCpuHasARM);
int has_mips = TestCpuFlag(kCpuHasMIPS);
int has_x86 = TestCpuFlag(kCpuHasX86);
+ (void)argc;
+ (void)argv;
+
#if defined(__i386__) || defined(__x86_64__) || \
defined(_M_IX86) || defined(_M_X64)
if (has_x86) {
@@ -76,20 +79,32 @@ int main(int argc, const char* argv[]) {
int has_sse42 = TestCpuFlag(kCpuHasSSE42);
int has_avx = TestCpuFlag(kCpuHasAVX);
int has_avx2 = TestCpuFlag(kCpuHasAVX2);
- int has_avx3 = TestCpuFlag(kCpuHasAVX3);
int has_erms = TestCpuFlag(kCpuHasERMS);
int has_fma3 = TestCpuFlag(kCpuHasFMA3);
- int has_f16c = TestCpuFlag(kCpuHasF16C);
+ int has_f16c = TestCpuFlag(kCpuHasF16C);
+ int has_gfni = TestCpuFlag(kCpuHasGFNI);
+ int has_avx512bw = TestCpuFlag(kCpuHasAVX512BW);
+ int has_avx512vl = TestCpuFlag(kCpuHasAVX512VL);
+ int has_avx512vbmi = TestCpuFlag(kCpuHasAVX512VBMI);
+ int has_avx512vbmi2 = TestCpuFlag(kCpuHasAVX512VBMI2);
+ int has_avx512vbitalg = TestCpuFlag(kCpuHasAVX512VBITALG);
+ int has_avx512vpopcntdq = TestCpuFlag(kCpuHasAVX512VPOPCNTDQ);
printf("Has SSE2 %x\n", has_sse2);
printf("Has SSSE3 %x\n", has_ssse3);
printf("Has SSE4.1 %x\n", has_sse41);
printf("Has SSE4.2 %x\n", has_sse42);
printf("Has AVX %x\n", has_avx);
printf("Has AVX2 %x\n", has_avx2);
- printf("Has AVX3 %x\n", has_avx3);
printf("Has ERMS %x\n", has_erms);
printf("Has FMA3 %x\n", has_fma3);
printf("Has F16C %x\n", has_f16c);
+ printf("Has GFNI %x\n", has_gfni);
+ printf("Has AVX512BW %x\n", has_avx512bw);
+ printf("Has AVX512VL %x\n", has_avx512vl);
+ printf("Has AVX512VBMI %x\n", has_avx512vbmi);
+ printf("Has AVX512VBMI2 %x\n", has_avx512vbmi2);
+ printf("Has AVX512VBITALG %x\n", has_avx512vbitalg);
+ printf("Has AVX512VPOPCNTDQ %x\n", has_avx512vpopcntdq);
}
return 0;
}