diff options
author | Allan Sandfeld Jensen <allan.jensen@qt.io> | 2019-08-30 10:22:43 +0200 |
---|---|---|
committer | Allan Sandfeld Jensen <allan.jensen@qt.io> | 2019-08-30 12:36:28 +0000 |
commit | 271a6c3487a14599023a9106329505597638d793 (patch) | |
tree | e040d58ffc86c1480b79ca8528020ca9ec919bf8 /chromium/third_party/libyuv | |
parent | 7b2ffa587235a47d4094787d72f38102089f402a (diff) | |
download | qtwebengine-chromium-271a6c3487a14599023a9106329505597638d793.tar.gz |
BASELINE: Update Chromium to 77.0.3865.59
Change-Id: I1e89a5f3b009a9519a6705102ad65c92fe736f21
Reviewed-by: Michael BrĂ¼ning <michael.bruning@qt.io>
Diffstat (limited to 'chromium/third_party/libyuv')
42 files changed, 2386 insertions, 408 deletions
diff --git a/chromium/third_party/libyuv/.gn b/chromium/third_party/libyuv/.gn index fc66637f362..cdae6832370 100644 --- a/chromium/third_party/libyuv/.gn +++ b/chromium/third_party/libyuv/.gn @@ -36,4 +36,7 @@ default_args = { # LibYUV relies on Chromium's Android test infrastructure. use_cxx11_on_android = false + + # https://bugs.chromium.org/p/libyuv/issues/detail?id=826 + ios_deployment_target = "10.0" } diff --git a/chromium/third_party/libyuv/Android.bp b/chromium/third_party/libyuv/Android.bp index 691686aad33..ea76f9435be 100644 --- a/chromium/third_party/libyuv/Android.bp +++ b/chromium/third_party/libyuv/Android.bp @@ -69,6 +69,7 @@ cc_library { // with libyuv (b/37646797) cc_library_static { name: "libyuv_static", + vendor_available: true, whole_static_libs: ["libyuv"], } diff --git a/chromium/third_party/libyuv/DEPS b/chromium/third_party/libyuv/DEPS index ac20e06276c..c5f81b86641 100644 --- a/chromium/third_party/libyuv/DEPS +++ b/chromium/third_party/libyuv/DEPS @@ -1,7 +1,7 @@ vars = { 'chromium_git': 'https://chromium.googlesource.com', - 'chromium_revision': '35b72bf255d6519506b7e732f9c74205d2ab452d', - 'swarming_revision': '486c9b53c4d54dd4b95bb6ce0e31160e600dfc11', + 'chromium_revision': '4476bd69d1c8e4e1cde8633d3b33c992f7d3a6d0', + 'swarming_revision': '0e3e1c4dc4e79f25a5b58fcbc135dc93183c0c54', # Three lines of non-changing comments so that # the commit queue can handle CLs rolling lss # and whatever else without interference from each other. @@ -9,34 +9,69 @@ vars = { # Three lines of non-changing comments so that # the commit queue can handle CLs rolling catapult # and whatever else without interference from each other. - 'catapult_revision': '0d25dda9b148bcd2dad9e1080b1dc57eaf9d2c2a', + 'catapult_revision': 'a24a725f7834c16b3628bfb63f349b3480bf9592', + # the commit queue can handle CLs rolling android_sdk_build-tools_version + # and whatever else without interference from each other. + 'android_sdk_build-tools_version': 'DLK621q5_Bga5EsOr7cp6bHWWxFKx6UHLu_Ix_m3AckC', + # Three lines of non-changing comments so that + # the commit queue can handle CLs rolling android_sdk_emulator_version + # and whatever else without interference from each other. + 'android_sdk_emulator_version': 'ki7EDQRAiZAUYlnTWR1XmI6cJTk65fJ-DNZUU1zrtS8C', + # Three lines of non-changing comments so that + # the commit queue can handle CLs rolling android_sdk_extras_version + # and whatever else without interference from each other. + 'android_sdk_extras_version': 'iIwhhDox5E-mHgwUhCz8JACWQCpUjdqt5KTY9VLugKQC', + # Three lines of non-changing comments so that + # the commit queue can handle CLs rolling android_sdk_patcher_version + # and whatever else without interference from each other. + 'android_sdk_patcher_version': 'I6FNMhrXlpB-E1lOhMlvld7xt9lBVNOO83KIluXDyA0C', + # Three lines of non-changing comments so that + # the commit queue can handle CLs rolling android_sdk_platform-tools_version + # and whatever else without interference from each other. + 'android_sdk_platform-tools_version': '4Y2Cb2LGzoc-qt-oIUIlhySotJaKeE3ELFedSVe6Uk8C', + # Three lines of non-changing comments so that + # the commit queue can handle CLs rolling android_sdk_platforms_version + # and whatever else without interference from each other. + 'android_sdk_platforms_version': 'Kg2t9p0YnQk8bldUv4VA3o156uPXLUfIFAmVZ-Gm5ewC', + # Three lines of non-changing comments so that + # the commit queue can handle CLs rolling android_sdk_sources_version + # and whatever else without interference from each other. + 'android_sdk_sources_version': 'K9uEn3JvNELEVjjVK_GQD3ZQD3rqAnJSxCWxjmUmRkgC', + # Three lines of non-changing comments so that + # the commit queue can handle CLs rolling android_sdk_tools_version + # and whatever else without interference from each other. + 'android_sdk_tools_version': 'wYcRQC2WHsw2dKWs4EA7fw9Qsyzu1ds1_fRjKmGxe5QC', + # Three lines of non-changing comments so that + # the commit queue can handle CLs rolling android_sdk_tools-lint_version + # and whatever else without interference from each other. + 'android_sdk_tools-lint_version': '89hXqZYzCum3delB5RV7J_QyWkaRodqdtQS0s3LMh3wC', } deps = { 'src/build': - Var('chromium_git') + '/chromium/src/build' + '@' + 'f79db013c75bff172913707cd762eba847838fea', + Var('chromium_git') + '/chromium/src/build' + '@' + '669e41d6f18842ed5740449662a71b715dc607c6', 'src/buildtools': - Var('chromium_git') + '/chromium/buildtools.git' + '@' + '9a90d9aaadeb5e04327ed05775f45132e4b3523f', + Var('chromium_git') + '/chromium/buildtools.git' + '@' + '0e1cbc4eab6861b0c84bf2ed9a3c4b7aa2063819', 'src/testing': - Var('chromium_git') + '/chromium/src/testing' + '@' + 'd2fde4ae5b8d0a5021e6f79d2f4a62e83ba348bc', + Var('chromium_git') + '/chromium/src/testing' + '@' + 'b1c6aeebeabcc177a83ff0a33dc6c3ab03d4aa94', 'src/third_party': - Var('chromium_git') + '/chromium/src/third_party' + '@' + 'f931bb4f2bdcb327d066052df1914cab4bd68c50', + Var('chromium_git') + '/chromium/src/third_party' + '@' + 'be3e0fc18f2e9ea14d0e9369e539eae5986335fd', 'src/third_party/catapult': Var('chromium_git') + '/catapult.git' + '@' + Var('catapult_revision'), 'src/third_party/colorama/src': Var('chromium_git') + '/external/colorama.git' + '@' + '799604a1041e9b3bc5d2789ecbd7e8db2e18e6b8', 'src/third_party/freetype/src': - Var('chromium_git') + '/chromium/src/third_party/freetype2.git' + '@' + '578bcf103a12fb742cdb314565819011d1ac12a7', + Var('chromium_git') + '/chromium/src/third_party/freetype2.git' + '@' + 'd01e28f41f8810c8ea422b854f8722659589fa99', 'src/third_party/googletest/src': - Var('chromium_git') + '/external/github.com/google/googletest.git' + '@' + 'd5266326752f0a1dadbd310932d8f4fd8c3c5e7d', + Var('chromium_git') + '/external/github.com/google/googletest.git' + '@' + '879ac092fde0a19e1b3a61b2546b2a422b1528bc', 'src/third_party/harfbuzz-ng/src': - Var('chromium_git') + '/external/github.com/harfbuzz/harfbuzz.git' + '@' + '2b76767bf572364d3d647cdd139f2044a7ad06b2', + Var('chromium_git') + '/external/github.com/harfbuzz/harfbuzz.git' + '@' + '26c5b54fb09fb45e02c9c4618bcea4958c698953', 'src/third_party/libjpeg_turbo': - Var('chromium_git') + '/chromium/deps/libjpeg_turbo.git' + '@' + 'a1750dbc79a8792dde3d3f7d7d8ac28ba01ac9dd', + Var('chromium_git') + '/chromium/deps/libjpeg_turbo.git' + '@' + '61a2bbaa9aec89cb2c882d87ace6aba9aee49bb9', 'src/third_party/yasm/source/patched-yasm': Var('chromium_git') + '/chromium/deps/yasm/patched-yasm.git' + '@' + '720b70524a4424b15fc57e82263568c8ba0496ad', 'src/tools': - Var('chromium_git') + '/chromium/src/tools' + '@' + 'f2c6ed916b94176158763400de308c2afd56b259', + Var('chromium_git') + '/chromium/src/tools' + '@' + '419541c8352b3b75a99c9a5a7c0d1e7b92f3fcf7', 'src/tools/swarming_client': Var('chromium_git') + '/infra/luci/client-py.git' + '@' + Var('swarming_revision'), @@ -69,7 +104,7 @@ deps = { 'condition': 'checkout_android', }, 'src/base': { - 'url': Var('chromium_git') + '/chromium/src/base' + '@' + '6c0497f398c5f6e6af0c66fbf4d77e875eb3f2b1', + 'url': Var('chromium_git') + '/chromium/src/base' + '@' + '162a5d66ad148f26bbbe6b6ecaf5c1bafa2173e6', 'condition': 'checkout_android', }, 'src/third_party/bazel': { @@ -93,7 +128,7 @@ deps = { 'dep_type': 'cipd', }, 'src/third_party/android_ndk': { - 'url': Var('chromium_git') + '/android_ndk.git' + '@' + '5cd86312e794bdf542a3685c6f10cbb96072990b', + 'url': Var('chromium_git') + '/android_ndk.git' + '@' + '4e2cea441bfd43f0863d14f57b1e1844260b9884', 'condition': 'checkout_android', }, 'src/third_party/android_support_test_runner': { @@ -107,9 +142,61 @@ deps = { 'dep_type': 'cipd', }, 'src/third_party/android_tools': { - 'url': Var('chromium_git') + '/android_tools.git' + '@' + '130499e25286f4d56acafa252fee09f3cc595c49', + 'url': Var('chromium_git') + '/android_tools.git' + '@' + 'e958d6ea74442d4e0849bb8a018d215a0e78981d', 'condition': 'checkout_android', }, + 'src/third_party/android_sdk/public': { + 'packages': [ + { + 'package': 'chromium/third_party/android_sdk/public/build-tools', + 'version': Var('android_sdk_build-tools_version'), + }, + { + 'package': 'chromium/third_party/android_sdk/public/emulator', + 'version': Var('android_sdk_emulator_version'), + }, + { + 'package': 'chromium/third_party/android_sdk/public/extras', + 'version': Var('android_sdk_extras_version'), + }, + { + 'package': 'chromium/third_party/android_sdk/public/patcher', + 'version': Var('android_sdk_patcher_version'), + }, + { + 'package': 'chromium/third_party/android_sdk/public/platform-tools', + 'version': Var('android_sdk_platform-tools_version'), + }, + { + 'package': 'chromium/third_party/android_sdk/public/platforms', + 'version': Var('android_sdk_platforms_version'), + }, + { + 'package': 'chromium/third_party/android_sdk/public/sources', + 'version': Var('android_sdk_sources_version'), + }, + { + 'package': 'chromium/third_party/android_sdk/public/tools', + 'version': Var('android_sdk_tools_version'), + }, + { + 'package': 'chromium/third_party/android_sdk/public/tools-lint', + 'version': Var('android_sdk_tools-lint_version'), + }, + ], + 'condition': 'checkout_android_native_support', + 'dep_type': 'cipd', + }, + 'src/third_party/android_build_tools/aapt2': { + 'packages': [ + { + 'package': 'chromium/third_party/android_tools_aapt2', + 'version': 'version:3.2.0-alpha18-4804415-cr0', + }, + ], + 'condition': 'checkout_android', + 'dep_type': 'cipd', + }, 'src/third_party/byte_buddy': { 'packages': [ { @@ -163,7 +250,7 @@ deps = { 'dep_type': 'cipd', }, 'src/third_party/icu': { - 'url': Var('chromium_git') + '/chromium/deps/icu.git' + '@' + '297a4dd02b9d36c92ab9b4f121e433c9c3bc14f8', + 'url': Var('chromium_git') + '/chromium/deps/icu.git' + '@' + 'd65301491c513d49163ad29c853eb85c02c8d5b4', }, 'src/third_party/icu4j': { 'packages': [ @@ -227,6 +314,16 @@ deps = { 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/proguard': { + 'packages': [ + { + 'package': 'chromium/third_party/proguard', + 'version': '3bd778c422ea5496de2ef25c007a517dbb5ce5ca', + }, + ], + 'condition': 'checkout_android', + 'dep_type': 'cipd', + }, 'src/third_party/requests/src': { 'url': Var('chromium_git') + '/external/github.com/kennethreitz/requests.git' + '@' + 'f172b30356d821d180fa4ecfa3e71c7274a32de4', 'condition': 'checkout_android', @@ -272,7 +369,7 @@ deps = { # iOS deps: 'src/ios': { - 'url': Var('chromium_git') + '/chromium/src/ios' + '@' + '8e45eb00dffadde7e0669a881991e237b0b7a8eb', + 'url': Var('chromium_git') + '/chromium/src/ios' + '@' + '44be3c093cf2db7ab4cf1997d6a1a07722f1f391', 'condition': 'checkout_ios' }, @@ -747,6 +844,17 @@ deps = { 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_squareup_javapoet': { + 'packages': [ + { + 'package': 'chromium/third_party/android_deps/libs/com_squareup_javapoet', + 'version': 'version:1.11.0-cr0', + }, + ], + 'condition': 'checkout_android', + 'dep_type': 'cipd', + }, + # === ANDROID_DEPS Generated Code End === } @@ -971,12 +1079,10 @@ hooks = [ # such dependencies we share with Chromium. { # This downloads SDK extras and puts them in the - # third_party/android_tools/sdk/extras directory. + # third_party/android_sdk/public/extras directory. 'name': 'sdkextras', 'condition': 'checkout_android', 'pattern': '.', - # When adding a new sdk extras package to download, add the package - # directory and zip file to .gitignore in third_party/android_tools. 'action': ['vpython', 'src/build/android/play_services/update.py', 'download' @@ -987,6 +1093,4 @@ hooks = [ recursedeps = [ # buildtools provides clang_format, libc++, and libc++abi. 'src/buildtools', - # android_tools manages the NDK. - 'src/third_party/android_tools', ] diff --git a/chromium/third_party/libyuv/README.chromium b/chromium/third_party/libyuv/README.chromium index 4a239b3cbf0..bddc20238d1 100644 --- a/chromium/third_party/libyuv/README.chromium +++ b/chromium/third_party/libyuv/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 1722 +Version: 1732 License: BSD License File: LICENSE diff --git a/chromium/third_party/libyuv/include/libyuv/convert.h b/chromium/third_party/libyuv/include/libyuv/convert.h index d8b47a83848..f571142fab3 100644 --- a/chromium/third_party/libyuv/include/libyuv/convert.h +++ b/chromium/third_party/libyuv/include/libyuv/convert.h @@ -226,6 +226,28 @@ int UYVYToI420(const uint8_t* src_uyvy, int width, int height); +// Convert AYUV to NV12. +LIBYUV_API +int AYUVToNV12(const uint8_t* src_ayuv, + int src_stride_ayuv, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_uv, + int dst_stride_uv, + int width, + int height); + +// Convert AYUV to NV21. +LIBYUV_API +int AYUVToNV21(const uint8_t* src_ayuv, + int src_stride_ayuv, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_vu, + int dst_stride_vu, + int width, + int height); + // Convert M420 to I420. LIBYUV_API int M420ToI420(const uint8_t* src_m420, @@ -322,6 +344,19 @@ int RGB24ToI420(const uint8_t* src_rgb24, int width, int height); +// RGB little endian (bgr in memory) to J420. +LIBYUV_API +int RGB24ToJ420(const uint8_t* src_rgb24, + int src_stride_rgb24, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); + // RGB big endian (rgb in memory) to I420. LIBYUV_API int RAWToI420(const uint8_t* src_raw, @@ -374,6 +409,15 @@ int ARGB4444ToI420(const uint8_t* src_argb4444, int width, int height); +// RGB little endian (bgr in memory) to J400. +LIBYUV_API +int RGB24ToJ400(const uint8_t* src_rgb24, + int src_stride_rgb24, + uint8_t* dst_yj, + int dst_stride_yj, + int width, + int height); + #ifdef HAVE_JPEG // src_width/height provided by capture. // dst_width/height for clipping determine final size. diff --git a/chromium/third_party/libyuv/include/libyuv/convert_argb.h b/chromium/third_party/libyuv/include/libyuv/convert_argb.h index 891c57de680..e8ed1f59f33 100644 --- a/chromium/third_party/libyuv/include/libyuv/convert_argb.h +++ b/chromium/third_party/libyuv/include/libyuv/convert_argb.h @@ -256,6 +256,7 @@ int NV21ToARGB(const uint8_t* src_y, int height); // Convert NV12 to ABGR. +LIBYUV_API int NV12ToABGR(const uint8_t* src_y, int src_stride_y, const uint8_t* src_uv, @@ -298,6 +299,17 @@ int NV21ToRGB24(const uint8_t* src_y, int width, int height); +// Convert NV21 to YUV24. +LIBYUV_API +int NV21ToYUV24(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_vu, + int src_stride_vu, + uint8_t* dst_yuv24, + int dst_stride_yuv24, + int width, + int height); + // Convert NV12 to RAW. LIBYUV_API int NV12ToRAW(const uint8_t* src_y, diff --git a/chromium/third_party/libyuv/include/libyuv/planar_functions.h b/chromium/third_party/libyuv/include/libyuv/planar_functions.h index 91137baba25..f6f5b3edd8e 100644 --- a/chromium/third_party/libyuv/include/libyuv/planar_functions.h +++ b/chromium/third_party/libyuv/include/libyuv/planar_functions.h @@ -224,6 +224,19 @@ int UYVYToNV12(const uint8_t* src_uyvy, int width, int height); +// Convert NV21 to NV12. +LIBYUV_API +int NV21ToNV12(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_vu, + int src_stride_vu, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_uv, + int dst_stride_uv, + int width, + int height); + LIBYUV_API int YUY2ToY(const uint8_t* src_yuy2, int src_stride_yuy2, diff --git a/chromium/third_party/libyuv/include/libyuv/rotate.h b/chromium/third_party/libyuv/include/libyuv/rotate.h index 76b692be8b0..c64e0216d53 100644 --- a/chromium/third_party/libyuv/include/libyuv/rotate.h +++ b/chromium/third_party/libyuv/include/libyuv/rotate.h @@ -49,6 +49,24 @@ int I420Rotate(const uint8_t* src_y, int height, enum RotationMode mode); +// Rotate I444 frame. +LIBYUV_API +int I444Rotate(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height, + enum RotationMode mode); + // Rotate NV12 input and store in I420. LIBYUV_API int NV12ToI420Rotate(const uint8_t* src_y, diff --git a/chromium/third_party/libyuv/include/libyuv/row.h b/chromium/third_party/libyuv/include/libyuv/row.h index cc948878f66..5cbdaadee18 100644 --- a/chromium/third_party/libyuv/include/libyuv/row.h +++ b/chromium/third_party/libyuv/include/libyuv/row.h @@ -284,9 +284,8 @@ extern "C" { (defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2)) #define HAS_ABGRTOAR30ROW_AVX2 #define HAS_ARGBTOAR30ROW_AVX2 -// Fix AVX2 b:118386049 segfault -//#define HAS_ARGBTORAWROW_AVX2 -//#define HAS_ARGBTORGB24ROW_AVX2 +#define HAS_ARGBTORAWROW_AVX2 +#define HAS_ARGBTORGB24ROW_AVX2 #define HAS_CONVERT16TO8ROW_AVX2 #define HAS_CONVERT8TO16ROW_AVX2 #define HAS_I210TOAR30ROW_AVX2 @@ -296,6 +295,8 @@ extern "C" { #define HAS_I422TOYUY2ROW_AVX2 #define HAS_MERGEUVROW_16_AVX2 #define HAS_MULTIPLYROW_16_AVX2 +// TODO(fbarchard): Fix AVX2 version of YUV24 +// #define HAS_NV21TOYUV24ROW_AVX2 #endif // The following are available for AVX512 clang x86 platforms: @@ -304,8 +305,7 @@ extern "C" { #if !defined(LIBYUV_DISABLE_X86) && \ (defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER))) && \ (defined(CLANG_HAS_AVX512)) -// Fix AVX2 b:118386049 segfault -//#define HAS_ARGBTORGB24ROW_AVX512VBMI +#define HAS_ARGBTORGB24ROW_AVX512VBMI #endif // The following are available on Neon platforms: @@ -332,6 +332,9 @@ extern "C" { #define HAS_ARGBTOUVROW_NEON #define HAS_ARGBTOYJROW_NEON #define HAS_ARGBTOYROW_NEON +#define HAS_AYUVTOUVROW_NEON +#define HAS_AYUVTOVUROW_NEON +#define HAS_AYUVTOYROW_NEON #define HAS_BGRATOUVROW_NEON #define HAS_BGRATOYROW_NEON #define HAS_BYTETOFLOATROW_NEON @@ -357,6 +360,7 @@ extern "C" { #define HAS_NV12TORGB565ROW_NEON #define HAS_NV21TOARGBROW_NEON #define HAS_NV21TORGB24ROW_NEON +#define HAS_NV21TOYUV24ROW_NEON #define HAS_RAWTOARGBROW_NEON #define HAS_RAWTORGB24ROW_NEON #define HAS_RAWTOUVROW_NEON @@ -372,6 +376,7 @@ extern "C" { #define HAS_SETROW_NEON #define HAS_SPLITRGBROW_NEON #define HAS_SPLITUVROW_NEON +#define HAS_SWAPUVROW_NEON #define HAS_UYVYTOARGBROW_NEON #define HAS_UYVYTOUV422ROW_NEON #define HAS_UYVYTOUVROW_NEON @@ -817,6 +822,10 @@ void NV21ToRGB24Row_NEON(const uint8_t* src_y, uint8_t* dst_rgb24, const struct YuvConstants* yuvconstants, int width); +void NV21ToYUV24Row_NEON(const uint8_t* src_y, + const uint8_t* src_vu, + uint8_t* dst_yuv24, + int width); void YUY2ToARGBRow_NEON(const uint8_t* src_yuy2, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, @@ -2185,6 +2194,10 @@ void NV21ToRGB24Row_C(const uint8_t* src_y, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width); +void NV21ToYUV24Row_C(const uint8_t* src_y, + const uint8_t* src_vu, + uint8_t* dst_yuv24, + int width); void YUY2ToARGBRow_C(const uint8_t* src_yuy2, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, @@ -2351,6 +2364,10 @@ void NV21ToRGB24Row_AVX2(const uint8_t* src_y, uint8_t* dst_rgb24, const struct YuvConstants* yuvconstants, int width); +void NV21ToYUV24Row_AVX2(const uint8_t* src_y, + const uint8_t* src_vu, + uint8_t* dst_yuv24, + int width); void NV12ToRGB565Row_AVX2(const uint8_t* src_y, const uint8_t* src_uv, uint8_t* dst_rgb565, @@ -2556,6 +2573,10 @@ void NV21ToRGB24Row_Any_AVX2(const uint8_t* y_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); +void NV21ToYUV24Row_Any_AVX2(const uint8_t* src_y, + const uint8_t* src_vu, + uint8_t* dst_yuv24, + int width); void NV12ToRGB565Row_Any_SSSE3(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, @@ -3029,6 +3050,10 @@ void NV21ToRGB24Row_Any_NEON(const uint8_t* y_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); +void NV21ToYUV24Row_Any_NEON(const uint8_t* src_y, + const uint8_t* src_vu, + uint8_t* dst_yuv24, + int width); void NV12ToRGB565Row_Any_NEON(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, @@ -3346,6 +3371,36 @@ void UYVYToUV422Row_Any_MMI(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); +void SwapUVRow_C(const uint8_t* src_uv, uint8_t* dst_vu, int width); +void SwapUVRow_NEON(const uint8_t* src_uv, uint8_t* dst_vu, int width); +void SwapUVRow_Any_NEON(const uint8_t* src_uv, uint8_t* dst_vu, int width); +void AYUVToYRow_C(const uint8_t* src_ayuv, uint8_t* dst_y, int width); +void AYUVToUVRow_C(const uint8_t* src_ayuv, + int stride_ayuv, + uint8_t* dst_uv, + int width); +void AYUVToVURow_C(const uint8_t* src_ayuv, + int stride_ayuv, + uint8_t* dst_vu, + int width); +void AYUVToYRow_NEON(const uint8_t* src_ayuv, uint8_t* dst_y, int width); +void AYUVToUVRow_NEON(const uint8_t* src_ayuv, + int stride_ayuv, + uint8_t* dst_uv, + int width); +void AYUVToVURow_NEON(const uint8_t* src_ayuv, + int stride_ayuv, + uint8_t* dst_vu, + int width); +void AYUVToYRow_Any_NEON(const uint8_t* src_ayuv, uint8_t* dst_y, int width); +void AYUVToUVRow_Any_NEON(const uint8_t* src_ayuv, + int stride_ayuv, + uint8_t* dst_uv, + int width); +void AYUVToVURow_Any_NEON(const uint8_t* src_ayuv, + int stride_ayuv, + uint8_t* dst_vu, + int width); void I422ToYUY2Row_C(const uint8_t* src_y, const uint8_t* src_u, diff --git a/chromium/third_party/libyuv/include/libyuv/scale.h b/chromium/third_party/libyuv/include/libyuv/scale.h index b937d348cab..23ba1634f78 100644 --- a/chromium/third_party/libyuv/include/libyuv/scale.h +++ b/chromium/third_party/libyuv/include/libyuv/scale.h @@ -97,6 +97,54 @@ int I420Scale_16(const uint16_t* src_y, int dst_height, enum FilterMode filtering); +// Scales a YUV 4:4:4 image from the src width and height to the +// dst width and height. +// If filtering is kFilterNone, a simple nearest-neighbor algorithm is +// used. This produces basic (blocky) quality at the fastest speed. +// If filtering is kFilterBilinear, interpolation is used to produce a better +// quality image, at the expense of speed. +// If filtering is kFilterBox, averaging is used to produce ever better +// quality image, at further expense of speed. +// Returns 0 if successful. + +LIBYUV_API +int I444Scale(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + int src_width, + int src_height, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int dst_width, + int dst_height, + enum FilterMode filtering); + +LIBYUV_API +int I444Scale_16(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + int src_width, + int src_height, + uint16_t* dst_y, + int dst_stride_y, + uint16_t* dst_u, + int dst_stride_u, + uint16_t* dst_v, + int dst_stride_v, + int dst_width, + int dst_height, + enum FilterMode filtering); + #ifdef __cplusplus // Legacy API. Deprecated. LIBYUV_API diff --git a/chromium/third_party/libyuv/include/libyuv/version.h b/chromium/third_party/libyuv/include/libyuv/version.h index 1a38ba7d6dd..741ef34df42 100644 --- a/chromium/third_party/libyuv/include/libyuv/version.h +++ b/chromium/third_party/libyuv/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1722 +#define LIBYUV_VERSION 1732 #endif // INCLUDE_LIBYUV_VERSION_H_ diff --git a/chromium/third_party/libyuv/infra/config/OWNERS b/chromium/third_party/libyuv/infra/config/OWNERS deleted file mode 100644 index b61b29d6c25..00000000000 --- a/chromium/third_party/libyuv/infra/config/OWNERS +++ /dev/null @@ -1,3 +0,0 @@ -set noparent -agable@chromium.org -phoglund@chromium.org diff --git a/chromium/third_party/libyuv/infra/config/PRESUBMIT.py b/chromium/third_party/libyuv/infra/config/PRESUBMIT.py deleted file mode 100644 index 89eaa5192c2..00000000000 --- a/chromium/third_party/libyuv/infra/config/PRESUBMIT.py +++ /dev/null @@ -1,15 +0,0 @@ -# Copyright 2018 The LibYuv Project Authors. All rights reserved. -# -# Use of this source code is governed by a BSD-style license -# that can be found in the LICENSE file in the root of the source -# tree. An additional intellectual property rights grant can be found -# in the file PATENTS. All contributing project authors may -# be found in the AUTHORS file in the root of the source tree. - - -def CheckChangeOnUpload(input_api, output_api): - return input_api.canned_checks.CheckChangedLUCIConfigs(input_api, output_api) - - -def CheckChangeOnCommit(input_api, output_api): - return input_api.canned_checks.CheckChangedLUCIConfigs(input_api, output_api) diff --git a/chromium/third_party/libyuv/infra/config/README.md b/chromium/third_party/libyuv/infra/config/README.md deleted file mode 100644 index c036d610c5f..00000000000 --- a/chromium/third_party/libyuv/infra/config/README.md +++ /dev/null @@ -1 +0,0 @@ -This directory contains configuration files for infra services. diff --git a/chromium/third_party/libyuv/infra/config/cq.cfg b/chromium/third_party/libyuv/infra/config/cq.cfg deleted file mode 100644 index 7bcc05955a3..00000000000 --- a/chromium/third_party/libyuv/infra/config/cq.cfg +++ /dev/null @@ -1,51 +0,0 @@ -# Commit Queue configuration file. The documentation of the format can be found -# at http://luci-config.appspot.com/schemas/projects/refs:cq.cfg. - -version: 1 -cq_status_url: "https://chromium-cq-status.appspot.com" -git_repo_url: "https://chromium.googlesource.com/libyuv/libyuv.git" - -gerrit {} - -verifiers { - gerrit_cq_ability { - committer_list: "project-libyuv-committers" - dry_run_access_list: "project-libyuv-tryjob-access" - } - - try_job { - buckets { - name: "luci.libyuv.try" - builders { name: "win" } - builders { name: "win_rel" } - builders { name: "win_x64_rel" } - builders { name: "win_clang" } - builders { name: "win_clang_rel" } - builders { name: "win_x64_clang_rel" } - builders { name: "mac" } - builders { name: "mac_rel" } - builders { name: "mac_asan" } - builders { name: "ios" } - builders { name: "ios_rel" } - builders { name: "ios_arm64" } - builders { name: "ios_arm64_rel" } - builders { name: "linux" } - builders { name: "linux_rel" } - builders { - name: "linux_gcc" - experiment_percentage: 100 - } - builders { name: "linux_memcheck" } - builders { name: "linux_tsan2" } - builders { name: "linux_asan" } - builders { name: "linux_msan" } - builders { name: "linux_ubsan" } - builders { name: "linux_ubsan_vptr" } - builders { name: "android" } - builders { name: "android_rel" } - builders { name: "android_arm64" } - builders { name: "android_x86" } - builders { name: "android_x64" } - } - } -} diff --git a/chromium/third_party/libyuv/source/compare.cc b/chromium/third_party/libyuv/source/compare.cc index 5aa3a4db86d..7f4828104fe 100644 --- a/chromium/third_party/libyuv/source/compare.cc +++ b/chromium/third_party/libyuv/source/compare.cc @@ -69,13 +69,13 @@ static uint32_t ARGBDetectRow_C(const uint8_t* argb, int width) { if (argb[0] != 255) { // First byte is not Alpha of 255, so not ARGB. return FOURCC_BGRA; } - if (argb[3] != 255) { // 4th byte is not Alpha of 255, so not BGRA. + if (argb[3] != 255) { // Fourth byte is not Alpha of 255, so not BGRA. return FOURCC_ARGB; } if (argb[4] != 255) { // Second pixel first byte is not Alpha of 255. return FOURCC_BGRA; } - if (argb[7] != 255) { // Second pixel 4th byte is not Alpha of 255. + if (argb[7] != 255) { // Second pixel fourth byte is not Alpha of 255. return FOURCC_ARGB; } argb += 8; diff --git a/chromium/third_party/libyuv/source/convert.cc b/chromium/third_party/libyuv/source/convert.cc index 4b8d0dc57f7..614fa48241e 100644 --- a/chromium/third_party/libyuv/source/convert.cc +++ b/chromium/third_party/libyuv/source/convert.cc @@ -880,6 +880,144 @@ int UYVYToI420(const uint8_t* src_uyvy, return 0; } +// Convert AYUV to NV12. +LIBYUV_API +int AYUVToNV12(const uint8_t* src_ayuv, + int src_stride_ayuv, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_uv, + int dst_stride_uv, + int width, + int height) { + int y; + void (*AYUVToUVRow)(const uint8_t* src_ayuv, int src_stride_ayuv, + uint8_t* dst_uv, int width) = AYUVToUVRow_C; + void (*AYUVToYRow)(const uint8_t* src_ayuv, uint8_t* dst_y, int width) = + AYUVToYRow_C; + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_ayuv = src_ayuv + (height - 1) * src_stride_ayuv; + src_stride_ayuv = -src_stride_ayuv; + } +// place holders for future intel code +#if defined(HAS_AYUVTOYROW_SSE2) + if (TestCpuFlag(kCpuHasSSE2)) { + AYUVToUVRow = AYUVToUVRow_Any_SSE2; + AYUVToYRow = AYUVToYRow_Any_SSE2; + if (IS_ALIGNED(width, 16)) { + AYUVToUVRow = AYUVToUVRow_SSE2; + AYUVToYRow = AYUVToYRow_SSE2; + } + } +#endif +#if defined(HAS_AYUVTOYROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + AYUVToUVRow = AYUVToUVRow_Any_AVX2; + AYUVToYRow = AYUVToYRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + AYUVToUVRow = AYUVToUVRow_AVX2; + AYUVToYRow = AYUVToYRow_AVX2; + } + } +#endif + +#if defined(HAS_AYUVTOYROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + AYUVToYRow = AYUVToYRow_Any_NEON; + AYUVToUVRow = AYUVToUVRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + AYUVToYRow = AYUVToYRow_NEON; + AYUVToUVRow = AYUVToUVRow_NEON; + } + } +#endif + + for (y = 0; y < height - 1; y += 2) { + AYUVToUVRow(src_ayuv, src_stride_ayuv, dst_uv, width); + AYUVToYRow(src_ayuv, dst_y, width); + AYUVToYRow(src_ayuv + src_stride_ayuv, dst_y + dst_stride_y, width); + src_ayuv += src_stride_ayuv * 2; + dst_y += dst_stride_y * 2; + dst_uv += dst_stride_uv; + } + if (height & 1) { + AYUVToUVRow(src_ayuv, 0, dst_uv, width); + AYUVToYRow(src_ayuv, dst_y, width); + } + return 0; +} + +// Convert AYUV to NV21. +LIBYUV_API +int AYUVToNV21(const uint8_t* src_ayuv, + int src_stride_ayuv, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_vu, + int dst_stride_vu, + int width, + int height) { + int y; + void (*AYUVToVURow)(const uint8_t* src_ayuv, int src_stride_ayuv, + uint8_t* dst_vu, int width) = AYUVToVURow_C; + void (*AYUVToYRow)(const uint8_t* src_ayuv, uint8_t* dst_y, int width) = + AYUVToYRow_C; + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_ayuv = src_ayuv + (height - 1) * src_stride_ayuv; + src_stride_ayuv = -src_stride_ayuv; + } +// place holders for future intel code +#if defined(HAS_AYUVTOYROW_SSE2) + if (TestCpuFlag(kCpuHasSSE2)) { + AYUVToVURow = AYUVToVURow_Any_SSE2; + AYUVToYRow = AYUVToYRow_Any_SSE2; + if (IS_ALIGNED(width, 16)) { + AYUVToVURow = AYUVToVURow_SSE2; + AYUVToYRow = AYUVToYRow_SSE2; + } + } +#endif +#if defined(HAS_AYUVTOYROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + AYUVToVURow = AYUVToVURow_Any_AVX2; + AYUVToYRow = AYUVToYRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + AYUVToVURow = AYUVToVURow_AVX2; + AYUVToYRow = AYUVToYRow_AVX2; + } + } +#endif + +#if defined(HAS_AYUVTOYROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + AYUVToYRow = AYUVToYRow_Any_NEON; + AYUVToVURow = AYUVToVURow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + AYUVToYRow = AYUVToYRow_NEON; + AYUVToVURow = AYUVToVURow_NEON; + } + } +#endif + + for (y = 0; y < height - 1; y += 2) { + AYUVToVURow(src_ayuv, src_stride_ayuv, dst_vu, width); + AYUVToYRow(src_ayuv, dst_y, width); + AYUVToYRow(src_ayuv + src_stride_ayuv, dst_y + dst_stride_y, width); + src_ayuv += src_stride_ayuv * 2; + dst_y += dst_stride_y * 2; + dst_vu += dst_stride_vu; + } + if (height & 1) { + AYUVToVURow(src_ayuv, 0, dst_vu, width); + AYUVToYRow(src_ayuv, dst_y, width); + } + return 0; +} + // Convert ARGB to I420. LIBYUV_API int ARGBToI420(const uint8_t* src_argb, @@ -1446,6 +1584,155 @@ int RGB24ToI420(const uint8_t* src_rgb24, return 0; } +// TODO(fbarchard): Use Matrix version to implement I420 and J420. +// Convert RGB24 to J420. +LIBYUV_API +int RGB24ToJ420(const uint8_t* src_rgb24, + int src_stride_rgb24, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height) { + int y; +#if (defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA) || \ + defined(HAS_RGB24TOYJROW_MMI)) + void (*RGB24ToUVJRow)(const uint8_t* src_rgb24, int src_stride_rgb24, + uint8_t* dst_u, uint8_t* dst_v, int width) = + RGB24ToUVJRow_C; + void (*RGB24ToYJRow)(const uint8_t* src_rgb24, uint8_t* dst_y, int width) = + RGB24ToYJRow_C; +#else + void (*RGB24ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) = + RGB24ToARGBRow_C; + void (*ARGBToUVJRow)(const uint8_t* src_argb0, int src_stride_argb, + uint8_t* dst_u, uint8_t* dst_v, int width) = + ARGBToUVJRow_C; + void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) = + ARGBToYJRow_C; +#endif + if (!src_rgb24 || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_rgb24 = src_rgb24 + (height - 1) * src_stride_rgb24; + src_stride_rgb24 = -src_stride_rgb24; + } + +// Neon version does direct RGB24 to YUV. +#if defined(HAS_RGB24TOYJROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + RGB24ToUVJRow = RGB24ToUVJRow_Any_NEON; + RGB24ToYJRow = RGB24ToYJRow_Any_NEON; + if (IS_ALIGNED(width, 8)) { + RGB24ToYJRow = RGB24ToYJRow_NEON; + if (IS_ALIGNED(width, 16)) { + RGB24ToUVJRow = RGB24ToUVJRow_NEON; + } + } + } +#elif defined(HAS_RGB24TOYJROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + RGB24ToUVJRow = RGB24ToUVJRow_Any_MSA; + RGB24ToYJRow = RGB24ToYJRow_Any_MSA; + if (IS_ALIGNED(width, 16)) { + RGB24ToYJRow = RGB24ToYJRow_MSA; + RGB24ToUVJRow = RGB24ToUVJRow_MSA; + } + } +#elif defined(HAS_RGB24TOYJROW_MMI) + if (TestCpuFlag(kCpuHasMMI)) { + RGB24ToUVJRow = RGB24ToUVJRow_Any_MMI; + RGB24ToYJRow = RGB24ToYJRow_Any_MMI; + if (IS_ALIGNED(width, 8)) { + RGB24ToYJRow = RGB24ToYJRow_MMI; + if (IS_ALIGNED(width, 16)) { + RGB24ToUVJRow = RGB24ToUVJRow_MMI; + } + } + } +// Other platforms do intermediate conversion from RGB24 to ARGB. +#else +#if defined(HAS_RGB24TOARGBROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + RGB24ToARGBRow = RGB24ToARGBRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + RGB24ToARGBRow = RGB24ToARGBRow_SSSE3; + } + } +#endif +#if defined(HAS_ARGBTOYJROW_SSSE3) && defined(HAS_ARGBTOUVJROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToUVJRow = ARGBToUVJRow_Any_SSSE3; + ARGBToYJRow = ARGBToYJRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ARGBToUVJRow = ARGBToUVJRow_SSSE3; + ARGBToYJRow = ARGBToYJRow_SSSE3; + } + } +#endif +#if defined(HAS_ARGBTOYJROW_AVX2) && defined(HAS_ARGBTOUVJROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToUVJRow = ARGBToUVJRow_Any_AVX2; + ARGBToYJRow = ARGBToYJRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ARGBToUVJRow = ARGBToUVJRow_AVX2; + ARGBToYJRow = ARGBToYJRow_AVX2; + } + } +#endif +#endif + + { +#if !(defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA) || \ + defined(HAS_RGB24TOYJROW_MMI)) + // Allocate 2 rows of ARGB. + const int kRowSize = (width * 4 + 31) & ~31; + align_buffer_64(row, kRowSize * 2); +#endif + + for (y = 0; y < height - 1; y += 2) { +#if (defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA) || \ + defined(HAS_RGB24TOYJROW_MMI)) + RGB24ToUVJRow(src_rgb24, src_stride_rgb24, dst_u, dst_v, width); + RGB24ToYJRow(src_rgb24, dst_y, width); + RGB24ToYJRow(src_rgb24 + src_stride_rgb24, dst_y + dst_stride_y, width); +#else + RGB24ToARGBRow(src_rgb24, row, width); + RGB24ToARGBRow(src_rgb24 + src_stride_rgb24, row + kRowSize, width); + ARGBToUVJRow(row, kRowSize, dst_u, dst_v, width); + ARGBToYJRow(row, dst_y, width); + ARGBToYJRow(row + kRowSize, dst_y + dst_stride_y, width); +#endif + src_rgb24 += src_stride_rgb24 * 2; + dst_y += dst_stride_y * 2; + dst_u += dst_stride_u; + dst_v += dst_stride_v; + } + if (height & 1) { +#if (defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA) || \ + defined(HAS_RGB24TOYJROW_MMI)) + RGB24ToUVJRow(src_rgb24, 0, dst_u, dst_v, width); + RGB24ToYJRow(src_rgb24, dst_y, width); +#else + RGB24ToARGBRow(src_rgb24, row, width); + ARGBToUVJRow(row, 0, dst_u, dst_v, width); + ARGBToYJRow(row, dst_y, width); +#endif + } +#if !(defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA) || \ + defined(HAS_RGB24TOYJROW_MMI)) + free_aligned_buffer_64(row); +#endif + } + return 0; +} + // Convert RAW to I420. LIBYUV_API int RAWToI420(const uint8_t* src_raw, @@ -2082,6 +2369,124 @@ int ARGB4444ToI420(const uint8_t* src_argb4444, return 0; } +// Convert RGB24 to J400. +LIBYUV_API +int RGB24ToJ400(const uint8_t* src_rgb24, + int src_stride_rgb24, + uint8_t* dst_yj, + int dst_stride_yj, + int width, + int height) { + int y; +#if (defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA) || \ + defined(HAS_RGB24TOYJROW_MMI)) + void (*RGB24ToYJRow)(const uint8_t* src_rgb24, uint8_t* dst_yj, int width) = + RGB24ToYJRow_C; +#else + void (*RGB24ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) = + RGB24ToARGBRow_C; + void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_yj, int width) = + ARGBToYJRow_C; +#endif + if (!src_rgb24 || !dst_yj || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_rgb24 = src_rgb24 + (height - 1) * src_stride_rgb24; + src_stride_rgb24 = -src_stride_rgb24; + } + +// Neon version does direct RGB24 to YUV. +#if defined(HAS_RGB24TOYJROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + RGB24ToYJRow = RGB24ToYJRow_Any_NEON; + if (IS_ALIGNED(width, 8)) { + RGB24ToYJRow = RGB24ToYJRow_NEON; + } + } +#elif defined(HAS_RGB24TOYJROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + RGB24ToYJRow = RGB24ToYJRow_Any_MSA; + if (IS_ALIGNED(width, 16)) { + RGB24ToYJRow = RGB24ToYJRow_MSA; + } + } +#elif defined(HAS_RGB24TOYJROW_MMI) + if (TestCpuFlag(kCpuHasMMI)) { + RGB24ToYJRow = RGB24ToYJRow_Any_MMI; + if (IS_ALIGNED(width, 8)) { + RGB24ToYJRow = RGB24ToYJRow_MMI; + } + } +// Other platforms do intermediate conversion from RGB24 to ARGB. +#else +#if defined(HAS_RGB24TOARGBROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + RGB24ToARGBRow = RGB24ToARGBRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + RGB24ToARGBRow = RGB24ToARGBRow_SSSE3; + } + } +#endif +#if defined(HAS_ARGBTOYJROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToYJRow = ARGBToYJRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ARGBToYJRow = ARGBToYJRow_SSSE3; + } + } +#endif +#if defined(HAS_ARGBTOYJROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToYJRow = ARGBToYJRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ARGBToYJRow = ARGBToYJRow_AVX2; + } + } +#endif +#endif + + { +#if !(defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA) || \ + defined(HAS_RGB24TOYJROW_MMI)) + // Allocate 2 rows of ARGB. + const int kRowSize = (width * 4 + 31) & ~31; + align_buffer_64(row, kRowSize * 2); +#endif + + for (y = 0; y < height - 1; y += 2) { +#if (defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA) || \ + defined(HAS_RGB24TOYJROW_MMI)) + RGB24ToYJRow(src_rgb24, dst_yj, width); + RGB24ToYJRow(src_rgb24 + src_stride_rgb24, dst_yj + dst_stride_yj, width); +#else + RGB24ToARGBRow(src_rgb24, row, width); + RGB24ToARGBRow(src_rgb24 + src_stride_rgb24, row + kRowSize, width); + ARGBToYJRow(row, dst_yj, width); + ARGBToYJRow(row + kRowSize, dst_yj + dst_stride_yj, width); +#endif + src_rgb24 += src_stride_rgb24 * 2; + dst_yj += dst_stride_yj * 2; + } + if (height & 1) { +#if (defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA) || \ + defined(HAS_RGB24TOYJROW_MMI)) + RGB24ToYJRow(src_rgb24, dst_yj, width); +#else + RGB24ToARGBRow(src_rgb24, row, width); + ARGBToYJRow(row, dst_yj, width); +#endif + } +#if !(defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA) || \ + defined(HAS_RGB24TOYJROW_MMI)) + free_aligned_buffer_64(row); +#endif + } + return 0; +} + static void SplitPixels(const uint8_t* src_u, int src_pixel_stride_uv, uint8_t* dst_u, diff --git a/chromium/third_party/libyuv/source/convert_argb.cc b/chromium/third_party/libyuv/source/convert_argb.cc index b376a0f3876..540503330dc 100644 --- a/chromium/third_party/libyuv/source/convert_argb.cc +++ b/chromium/third_party/libyuv/source/convert_argb.cc @@ -1793,8 +1793,9 @@ int NV21ToARGB(const uint8_t* src_y, } // Convert NV12 to ABGR. -// To output ABGR instead of ARGB swap the UV and use a mirrrored yuc matrix. +// To output ABGR instead of ARGB swap the UV and use a mirrored yuv matrix. // To swap the UV use NV12 instead of NV21.LIBYUV_API +LIBYUV_API int NV12ToABGR(const uint8_t* src_y, int src_stride_y, const uint8_t* src_uv, @@ -1998,6 +1999,54 @@ int NV21ToRAW(const uint8_t* src_y, dst_stride_raw, &kYvuI601Constants, width, height); } +// Convert NV21 to YUV24 +int NV21ToYUV24(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_vu, + int src_stride_vu, + uint8_t* dst_yuv24, + int dst_stride_yuv24, + int width, + int height) { + int y; + void (*NV21ToYUV24Row)(const uint8_t* src_y, const uint8_t* src_vu, + uint8_t* dst_yuv24, int width) = NV21ToYUV24Row_C; + if (!src_y || !src_vu || !dst_yuv24 || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_yuv24 = dst_yuv24 + (height - 1) * dst_stride_yuv24; + dst_stride_yuv24 = -dst_stride_yuv24; + } +#if defined(HAS_NV21TOYUV24ROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + NV21ToYUV24Row = NV21ToYUV24Row_Any_NEON; + if (IS_ALIGNED(width, 16)) { + NV21ToYUV24Row = NV21ToYUV24Row_NEON; + } + } +#endif +#if defined(HAS_NV21TOYUV24ROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + NV21ToYUV24Row = NV21ToYUV24Row_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + NV21ToYUV24Row = NV21ToYUV24Row_AVX2; + } + } +#endif + for (y = 0; y < height; ++y) { + NV21ToYUV24Row(src_y, src_vu, dst_yuv24, width); + dst_yuv24 += dst_stride_yuv24; + src_y += src_stride_y; + if (y & 1) { + src_vu += src_stride_vu; + } + } + return 0; +} + // Convert M420 to ARGB. LIBYUV_API int M420ToARGB(const uint8_t* src_m420, diff --git a/chromium/third_party/libyuv/source/convert_from.cc b/chromium/third_party/libyuv/source/convert_from.cc index 706067bb29b..60140cb4e2e 100644 --- a/chromium/third_party/libyuv/source/convert_from.cc +++ b/chromium/third_party/libyuv/source/convert_from.cc @@ -670,7 +670,7 @@ static int I420ToRGB24Matrix(const uint8_t* src_y, #if defined(HAS_I422TORGB24ROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { I422ToRGB24Row = I422ToRGB24Row_Any_SSSE3; - if (IS_ALIGNED(width, 8)) { + if (IS_ALIGNED(width, 16)) { I422ToRGB24Row = I422ToRGB24Row_SSSE3; } } @@ -678,7 +678,7 @@ static int I420ToRGB24Matrix(const uint8_t* src_y, #if defined(HAS_I422TORGB24ROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { I422ToRGB24Row = I422ToRGB24Row_Any_AVX2; - if (IS_ALIGNED(width, 16)) { + if (IS_ALIGNED(width, 32)) { I422ToRGB24Row = I422ToRGB24Row_AVX2; } } diff --git a/chromium/third_party/libyuv/source/convert_jpeg.cc b/chromium/third_party/libyuv/source/convert_jpeg.cc index 56a95c57707..f440c7c2e97 100644 --- a/chromium/third_party/libyuv/source/convert_jpeg.cc +++ b/chromium/third_party/libyuv/source/convert_jpeg.cc @@ -89,12 +89,12 @@ static void JpegI400ToI420(void* opaque, // Query size of MJPG in pixels. LIBYUV_API -int MJPGSize(const uint8_t* sample, - size_t sample_size, +int MJPGSize(const uint8_t* src_mjpg, + size_t src_size_mjpg, int* width, int* height) { MJpegDecoder mjpeg_decoder; - LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(sample, sample_size); + LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(src_mjpg, src_size_mjpg); if (ret) { *width = mjpeg_decoder.GetWidth(); *height = mjpeg_decoder.GetHeight(); @@ -107,8 +107,8 @@ int MJPGSize(const uint8_t* sample, // TODO(fbarchard): review src_width and src_height requirement. dst_width and // dst_height may be enough. LIBYUV_API -int MJPGToI420(const uint8_t* sample, - size_t sample_size, +int MJPGToI420(const uint8_t* src_mjpg, + size_t src_size_mjpg, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, @@ -119,14 +119,14 @@ int MJPGToI420(const uint8_t* sample, int src_height, int dst_width, int dst_height) { - if (sample_size == kUnknownDataSize) { + if (src_size_mjpg == kUnknownDataSize) { // ERROR: MJPEG frame size unknown return -1; } // TODO(fbarchard): Port MJpeg to C. MJpegDecoder mjpeg_decoder; - LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(sample, sample_size); + LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(src_mjpg, src_size_mjpg); if (ret && (mjpeg_decoder.GetWidth() != src_width || mjpeg_decoder.GetHeight() != src_height)) { // ERROR: MJPEG frame has unexpected dimensions @@ -180,9 +180,9 @@ int MJPGToI420(const uint8_t* sample, ret = mjpeg_decoder.DecodeToCallback(&JpegI400ToI420, &bufs, dst_width, dst_height); } else { - // TODO(fbarchard): Implement conversion for any other colorspace/sample - // factors that occur in practice. - // ERROR: Unable to convert MJPEG frame because format is not supported + // TODO(fbarchard): Implement conversion for any other + // colorspace/subsample factors that occur in practice. ERROR: Unable to + // convert MJPEG frame because format is not supported mjpeg_decoder.UnloadFrame(); return 1; } @@ -249,8 +249,8 @@ static void JpegI400ToNV21(void* opaque, // MJPG (Motion JPeg) to NV21 LIBYUV_API -int MJPGToNV21(const uint8_t* sample, - size_t sample_size, +int MJPGToNV21(const uint8_t* src_mjpg, + size_t src_size_mjpg, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_vu, @@ -259,14 +259,14 @@ int MJPGToNV21(const uint8_t* sample, int src_height, int dst_width, int dst_height) { - if (sample_size == kUnknownDataSize) { + if (src_size_mjpg == kUnknownDataSize) { // ERROR: MJPEG frame size unknown return -1; } // TODO(fbarchard): Port MJpeg to C. MJpegDecoder mjpeg_decoder; - LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(sample, sample_size); + LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(src_mjpg, src_size_mjpg); if (ret && (mjpeg_decoder.GetWidth() != src_width || mjpeg_decoder.GetHeight() != src_height)) { // ERROR: MJPEG frame has unexpected dimensions @@ -382,22 +382,22 @@ static void JpegI400ToARGB(void* opaque, // TODO(fbarchard): review src_width and src_height requirement. dst_width and // dst_height may be enough. LIBYUV_API -int MJPGToARGB(const uint8_t* sample, - size_t sample_size, +int MJPGToARGB(const uint8_t* src_mjpg, + size_t src_size_mjpg, uint8_t* dst_argb, int dst_stride_argb, int src_width, int src_height, int dst_width, int dst_height) { - if (sample_size == kUnknownDataSize) { + if (src_size_mjpg == kUnknownDataSize) { // ERROR: MJPEG frame size unknown return -1; } // TODO(fbarchard): Port MJpeg to C. MJpegDecoder mjpeg_decoder; - LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(sample, sample_size); + LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(src_mjpg, src_size_mjpg); if (ret && (mjpeg_decoder.GetWidth() != src_width || mjpeg_decoder.GetHeight() != src_height)) { // ERROR: MJPEG frame has unexpected dimensions @@ -450,9 +450,9 @@ int MJPGToARGB(const uint8_t* sample, ret = mjpeg_decoder.DecodeToCallback(&JpegI400ToARGB, &bufs, dst_width, dst_height); } else { - // TODO(fbarchard): Implement conversion for any other colorspace/sample - // factors that occur in practice. - // ERROR: Unable to convert MJPEG frame because format is not supported + // TODO(fbarchard): Implement conversion for any other + // colorspace/subsample factors that occur in practice. ERROR: Unable to + // convert MJPEG frame because format is not supported mjpeg_decoder.UnloadFrame(); return 1; } diff --git a/chromium/third_party/libyuv/source/mjpeg_decoder.cc b/chromium/third_party/libyuv/source/mjpeg_decoder.cc index eaf2530130b..80e381dd6b0 100644 --- a/chromium/third_party/libyuv/source/mjpeg_decoder.cc +++ b/chromium/third_party/libyuv/source/mjpeg_decoder.cc @@ -25,7 +25,8 @@ #endif #endif -struct FILE; // For jpeglib.h. + +#include <stdio.h> // For jpeglib.h. // C++ build requires extern C for jpeg internals. #ifdef __cplusplus @@ -427,7 +428,15 @@ boolean fill_input_buffer(j_decompress_ptr cinfo) { } void skip_input_data(j_decompress_ptr cinfo, long num_bytes) { // NOLINT - cinfo->src->next_input_byte += num_bytes; + jpeg_source_mgr* src = cinfo->src; + size_t bytes = static_cast<size_t>(num_bytes); + if (bytes > src->bytes_in_buffer) { + src->next_input_byte = nullptr; + src->bytes_in_buffer = 0; + } else { + src->next_input_byte += bytes; + src->bytes_in_buffer -= bytes; + } } void term_source(j_decompress_ptr cinfo) { diff --git a/chromium/third_party/libyuv/source/mjpeg_validate.cc b/chromium/third_party/libyuv/source/mjpeg_validate.cc index 80c2cc0cb9b..ba0a03ab9e5 100644 --- a/chromium/third_party/libyuv/source/mjpeg_validate.cc +++ b/chromium/third_party/libyuv/source/mjpeg_validate.cc @@ -18,10 +18,10 @@ extern "C" { #endif // Helper function to scan for EOI marker (0xff 0xd9). -static LIBYUV_BOOL ScanEOI(const uint8_t* sample, size_t sample_size) { - if (sample_size >= 2) { - const uint8_t* end = sample + sample_size - 1; - const uint8_t* it = sample; +static LIBYUV_BOOL ScanEOI(const uint8_t* src_mjpg, size_t src_size_mjpg) { + if (src_size_mjpg >= 2) { + const uint8_t* end = src_mjpg + src_size_mjpg - 1; + const uint8_t* it = src_mjpg; while (it < end) { // TODO(fbarchard): scan for 0xd9 instead. it = (const uint8_t*)(memchr(it, 0xff, end - it)); @@ -34,34 +34,35 @@ static LIBYUV_BOOL ScanEOI(const uint8_t* sample, size_t sample_size) { ++it; // Skip over current 0xff. } } - // ERROR: Invalid jpeg end code not found. Size sample_size + // ERROR: Invalid jpeg end code not found. Size src_size_mjpg return LIBYUV_FALSE; } // Helper function to validate the jpeg appears intact. -LIBYUV_BOOL ValidateJpeg(const uint8_t* sample, size_t sample_size) { +LIBYUV_BOOL ValidateJpeg(const uint8_t* src_mjpg, size_t src_size_mjpg) { // Maximum size that ValidateJpeg will consider valid. const size_t kMaxJpegSize = 0x7fffffffull; const size_t kBackSearchSize = 1024; - if (sample_size < 64 || sample_size > kMaxJpegSize || !sample) { - // ERROR: Invalid jpeg size: sample_size + if (src_size_mjpg < 64 || src_size_mjpg > kMaxJpegSize || !src_mjpg) { + // ERROR: Invalid jpeg size: src_size_mjpg return LIBYUV_FALSE; } - if (sample[0] != 0xff || sample[1] != 0xd8) { // SOI marker + // SOI marker + if (src_mjpg[0] != 0xff || src_mjpg[1] != 0xd8 || src_mjpg[2] != 0xff) { // ERROR: Invalid jpeg initial start code return LIBYUV_FALSE; } // Look for the End Of Image (EOI) marker near the end of the buffer. - if (sample_size > kBackSearchSize) { - if (ScanEOI(sample + sample_size - kBackSearchSize, kBackSearchSize)) { + if (src_size_mjpg > kBackSearchSize) { + if (ScanEOI(src_mjpg + src_size_mjpg - kBackSearchSize, kBackSearchSize)) { return LIBYUV_TRUE; // Success: Valid jpeg. } // Reduce search size for forward search. - sample_size = sample_size - kBackSearchSize + 1; + src_size_mjpg = src_size_mjpg - kBackSearchSize + 1; } // Step over SOI marker and scan for EOI. - return ScanEOI(sample + 2, sample_size - 2); + return ScanEOI(src_mjpg + 2, src_size_mjpg - 2); } #ifdef __cplusplus diff --git a/chromium/third_party/libyuv/source/planar_functions.cc b/chromium/third_party/libyuv/source/planar_functions.cc index b49bf0a0b4b..59e687afd61 100644 --- a/chromium/third_party/libyuv/source/planar_functions.cc +++ b/chromium/third_party/libyuv/source/planar_functions.cc @@ -440,7 +440,6 @@ void MergeUVPlane(const uint8_t* src_u, int y; void (*MergeUVRow)(const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_uv, int width) = MergeUVRow_C; - // Coalesce rows. // Negative height means invert the image. if (height < 0) { height = -height; @@ -504,6 +503,63 @@ void MergeUVPlane(const uint8_t* src_u, } } +// Convert NV21 to NV12. +LIBYUV_API +int NV21ToNV12(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_vu, + int src_stride_vu, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_uv, + int dst_stride_uv, + int width, + int height) { + int y; + void (*SwapUVRow)(const uint8_t* src_uv, uint8_t* dst_vu, int width) = + SwapUVRow_C; + + int halfwidth = (width + 1) >> 1; + int halfheight = (height + 1) >> 1; + if (!src_vu || !dst_uv || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + halfheight = (height + 1) >> 1; + src_y = src_y + (height - 1) * src_stride_y; + src_vu = src_vu + (halfheight - 1) * src_stride_vu; + src_stride_y = -src_stride_y; + src_stride_vu = -src_stride_vu; + } + // Coalesce rows. + if (src_stride_vu == halfwidth * 2 && dst_stride_uv == halfwidth * 2) { + halfwidth *= halfheight; + halfheight = 1; + src_stride_vu = dst_stride_uv = 0; + } + +#if defined(HAS_SWAPUVROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + SwapUVRow = SwapUVRow_Any_NEON; + if (IS_ALIGNED(halfwidth, 16)) { + SwapUVRow = SwapUVRow_NEON; + } + } +#endif + if (dst_y) { + CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); + } + + for (y = 0; y < halfheight; ++y) { + SwapUVRow(src_vu, dst_uv, halfwidth); + src_vu += src_stride_vu; + dst_uv += dst_stride_uv; + } + return 0; +} + // Support function for NV12 etc RGB channels. // Width and height are plane sizes (typically half pixel width). LIBYUV_API diff --git a/chromium/third_party/libyuv/source/rotate.cc b/chromium/third_party/libyuv/source/rotate.cc index f28a06d38a9..d414186a5a0 100644 --- a/chromium/third_party/libyuv/source/rotate.cc +++ b/chromium/third_party/libyuv/source/rotate.cc @@ -482,6 +482,66 @@ int I420Rotate(const uint8_t* src_y, } LIBYUV_API +int I444Rotate(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height, + enum libyuv::RotationMode mode) { + if (!src_y || !src_u || !src_v || width <= 0 || height == 0 || !dst_y || + !dst_u || !dst_v) { + return -1; + } + + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_y = src_y + (height - 1) * src_stride_y; + src_u = src_u + (height - 1) * src_stride_u; + src_v = src_v + (height - 1) * src_stride_v; + src_stride_y = -src_stride_y; + src_stride_u = -src_stride_u; + src_stride_v = -src_stride_v; + } + + switch (mode) { + case libyuv::kRotate0: + // copy frame + CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); + CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, width, height); + CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, width, height); + return 0; + case libyuv::kRotate90: + RotatePlane90(src_y, src_stride_y, dst_y, dst_stride_y, width, height); + RotatePlane90(src_u, src_stride_u, dst_u, dst_stride_u, width, height); + RotatePlane90(src_v, src_stride_v, dst_v, dst_stride_v, width, height); + return 0; + case libyuv::kRotate270: + RotatePlane270(src_y, src_stride_y, dst_y, dst_stride_y, width, height); + RotatePlane270(src_u, src_stride_u, dst_u, dst_stride_u, width, height); + RotatePlane270(src_v, src_stride_v, dst_v, dst_stride_v, width, height); + return 0; + case libyuv::kRotate180: + RotatePlane180(src_y, src_stride_y, dst_y, dst_stride_y, width, height); + RotatePlane180(src_u, src_stride_u, dst_u, dst_stride_u, width, height); + RotatePlane180(src_v, src_stride_v, dst_v, dst_stride_v, width, height); + return 0; + default: + break; + } + return -1; +} + +LIBYUV_API int NV12ToI420Rotate(const uint8_t* src_y, int src_stride_y, const uint8_t* src_uv, diff --git a/chromium/third_party/libyuv/source/row_any.cc b/chromium/third_party/libyuv/source/row_any.cc index 031a8f6490e..ef89350ece4 100644 --- a/chromium/third_party/libyuv/source/row_any.cc +++ b/chromium/third_party/libyuv/source/row_any.cc @@ -286,7 +286,12 @@ ANY21(MergeUVRow_Any_MSA, MergeUVRow_MSA, 0, 1, 1, 2, 15) #ifdef HAS_MERGEUVROW_MMI ANY21(MergeUVRow_Any_MMI, MergeUVRow_MMI, 0, 1, 1, 2, 7) #endif - +#ifdef HAS_NV21TOYUV24ROW_NEON +ANY21(NV21ToYUV24Row_Any_NEON, NV21ToYUV24Row_NEON, 1, 1, 2, 3, 15) +#endif +#ifdef HAS_NV21TOYUV24ROW_AVX2 +ANY21(NV21ToYUV24Row_Any_AVX2, NV21ToYUV24Row_AVX2, 1, 1, 2, 3, 31) +#endif // Math functions. #ifdef HAS_ARGBMULTIPLYROW_SSE2 ANY21(ARGBMultiplyRow_Any_SSE2, ARGBMultiplyRow_SSE2, 0, 4, 4, 4, 3) @@ -702,6 +707,12 @@ ANY11(UYVYToYRow_Any_MSA, UYVYToYRow_MSA, 1, 4, 1, 31) #ifdef HAS_UYVYTOYROW_MMI ANY11(UYVYToYRow_Any_MMI, UYVYToYRow_MMI, 1, 4, 1, 15) #endif +#ifdef HAS_AYUVTOYROW_NEON +ANY11(AYUVToYRow_Any_NEON, AYUVToYRow_NEON, 0, 4, 1, 15) +#endif +#ifdef HAS_SWAPUVROW_NEON +ANY11(SwapUVRow_Any_NEON, SwapUVRow_NEON, 0, 2, 2, 15) +#endif #ifdef HAS_RGB24TOARGBROW_NEON ANY11(RGB24ToARGBRow_Any_NEON, RGB24ToARGBRow_NEON, 0, 3, 4, 7) #endif @@ -1381,6 +1392,37 @@ ANY12S(UYVYToUVRow_Any_MMI, UYVYToUVRow_MMI, 1, 4, 15) #endif #undef ANY12S +// Any 1 to 1 with source stride (2 rows of source). Outputs UV plane. +// 128 byte row allows for 32 avx ARGB pixels. +#define ANY11S(NAMEANY, ANY_SIMD, UVSHIFT, BPP, MASK) \ + void NAMEANY(const uint8_t* src_ptr, int src_stride_ptr, uint8_t* dst_vu, \ + int width) { \ + SIMD_ALIGNED(uint8_t temp[128 * 3]); \ + memset(temp, 0, 128 * 2); /* for msan */ \ + int r = width & MASK; \ + int n = width & ~MASK; \ + if (n > 0) { \ + ANY_SIMD(src_ptr, src_stride_ptr, dst_vu, n); \ + } \ + memcpy(temp, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \ + memcpy(temp + 128, src_ptr + src_stride_ptr + (n >> UVSHIFT) * BPP, \ + SS(r, UVSHIFT) * BPP); \ + if ((width & 1) && UVSHIFT == 0) { /* repeat last pixel for subsample */ \ + memcpy(temp + SS(r, UVSHIFT) * BPP, temp + SS(r, UVSHIFT) * BPP - BPP, \ + BPP); \ + memcpy(temp + 128 + SS(r, UVSHIFT) * BPP, \ + temp + 128 + SS(r, UVSHIFT) * BPP - BPP, BPP); \ + } \ + ANY_SIMD(temp, 128, temp + 256, MASK + 1); \ + memcpy(dst_vu + (n >> 1) * 2, temp + 256, SS(r, 1) * 2); \ + } + +#ifdef HAS_AYUVTOVUROW_NEON +ANY11S(AYUVToUVRow_Any_NEON, AYUVToUVRow_NEON, 0, 4, 15) +ANY11S(AYUVToVURow_Any_NEON, AYUVToVURow_NEON, 0, 4, 15) +#endif +#undef ANY11S + #ifdef __cplusplus } // extern "C" } // namespace libyuv diff --git a/chromium/third_party/libyuv/source/row_common.cc b/chromium/third_party/libyuv/source/row_common.cc index 2bbc5adbf14..8ef1b1c534b 100644 --- a/chromium/third_party/libyuv/source/row_common.cc +++ b/chromium/third_party/libyuv/source/row_common.cc @@ -3231,6 +3231,106 @@ void GaussCol_C(const uint16_t* src0, } } +// Convert biplanar NV21 to packed YUV24 +void NV21ToYUV24Row_C(const uint8_t* src_y, + const uint8_t* src_vu, + uint8_t* dst_yuv24, + int width) { + int x; + for (x = 0; x < width - 1; x += 2) { + dst_yuv24[0] = src_vu[0]; // V + dst_yuv24[1] = src_vu[1]; // U + dst_yuv24[2] = src_y[0]; // Y0 + dst_yuv24[3] = src_vu[0]; // V + dst_yuv24[4] = src_vu[1]; // U + dst_yuv24[5] = src_y[1]; // Y1 + src_y += 2; + src_vu += 2; + dst_yuv24 += 6; // Advance 2 pixels. + } + if (width & 1) { + dst_yuv24[0] = src_vu[0]; // V + dst_yuv24[1] = src_vu[1]; // U + dst_yuv24[2] = src_y[0]; // Y0 + } +} + +// Filter 2 rows of AYUV UV's (444) into UV (420). +void AYUVToUVRow_C(const uint8_t* src_ayuv, + int src_stride_ayuv, + uint8_t* dst_uv, + int width) { + // Output a row of UV values, filtering 2x2 rows of AYUV. + int x; + for (x = 0; x < width; x += 2) { + dst_uv[0] = (src_ayuv[1] + src_ayuv[5] + src_ayuv[src_stride_ayuv + 1] + + src_ayuv[src_stride_ayuv + 5] + 2) >> + 2; + dst_uv[1] = (src_ayuv[0] + src_ayuv[4] + src_ayuv[src_stride_ayuv + 0] + + src_ayuv[src_stride_ayuv + 4] + 2) >> + 2; + src_ayuv += 8; + dst_uv += 2; + } + if (width & 1) { + dst_uv[0] = (src_ayuv[0] + src_ayuv[0] + src_ayuv[src_stride_ayuv + 0] + + src_ayuv[src_stride_ayuv + 0] + 2) >> + 2; + dst_uv[1] = (src_ayuv[1] + src_ayuv[1] + src_ayuv[src_stride_ayuv + 1] + + src_ayuv[src_stride_ayuv + 1] + 2) >> + 2; + } +} + +// Filter 2 rows of AYUV UV's (444) into VU (420). +void AYUVToVURow_C(const uint8_t* src_ayuv, + int src_stride_ayuv, + uint8_t* dst_vu, + int width) { + // Output a row of VU values, filtering 2x2 rows of AYUV. + int x; + for (x = 0; x < width; x += 2) { + dst_vu[0] = (src_ayuv[0] + src_ayuv[4] + src_ayuv[src_stride_ayuv + 0] + + src_ayuv[src_stride_ayuv + 4] + 2) >> + 2; + dst_vu[1] = (src_ayuv[1] + src_ayuv[5] + src_ayuv[src_stride_ayuv + 1] + + src_ayuv[src_stride_ayuv + 5] + 2) >> + 2; + src_ayuv += 8; + dst_vu += 2; + } + if (width & 1) { + dst_vu[0] = (src_ayuv[0] + src_ayuv[0] + src_ayuv[src_stride_ayuv + 0] + + src_ayuv[src_stride_ayuv + 0] + 2) >> + 2; + dst_vu[1] = (src_ayuv[1] + src_ayuv[1] + src_ayuv[src_stride_ayuv + 1] + + src_ayuv[src_stride_ayuv + 1] + 2) >> + 2; + } +} + +// Copy row of AYUV Y's into Y +void AYUVToYRow_C(const uint8_t* src_ayuv, uint8_t* dst_y, int width) { + // Output a row of Y values. + int x; + for (x = 0; x < width; ++x) { + dst_y[x] = src_ayuv[2]; // v,u,y,a + src_ayuv += 4; + } +} + +void SwapUVRow_C(const uint8_t* src_uv, uint8_t* dst_vu, int width) { + int x; + for (x = 0; x < width; ++x) { + uint8_t u = src_uv[0]; + uint8_t v = src_uv[1]; + dst_vu[0] = v; + dst_vu[1] = u; + src_uv += 2; + dst_vu += 2; + } +} + #ifdef __cplusplus } // extern "C" } // namespace libyuv diff --git a/chromium/third_party/libyuv/source/row_gcc.cc b/chromium/third_party/libyuv/source/row_gcc.cc index 8d3cb81cec2..18b6350b8ef 100644 --- a/chromium/third_party/libyuv/source/row_gcc.cc +++ b/chromium/third_party/libyuv/source/row_gcc.cc @@ -5238,7 +5238,7 @@ void ARGBMultiplyRow_AVX2(const uint8_t* src_argb0, , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" #endif - ); + ); } #endif // HAS_ARGBMULTIPLYROW_AVX2 @@ -6120,24 +6120,24 @@ void I422ToYUY2Row_SSE2(const uint8_t* src_y, int width) { asm volatile( - "sub %1,%2 \n" + "sub %1,%2 \n" LABELALIGN - "1: \n" - "movq (%1),%%xmm2 \n" - "movq 0x00(%1,%2,1),%%xmm1 \n" - "add $0x8,%1 \n" - "punpcklbw %%xmm1,%%xmm2 \n" - "movdqu (%0),%%xmm0 \n" - "add $0x10,%0 \n" - "movdqa %%xmm0,%%xmm1 \n" - "punpcklbw %%xmm2,%%xmm0 \n" - "punpckhbw %%xmm2,%%xmm1 \n" - "movdqu %%xmm0,(%3) \n" - "movdqu %%xmm1,0x10(%3) \n" - "lea 0x20(%3),%3 \n" - "sub $0x10,%4 \n" - "jg 1b \n" + "1: \n" + "movq (%1),%%xmm2 \n" + "movq 0x00(%1,%2,1),%%xmm1 \n" + "add $0x8,%1 \n" + "punpcklbw %%xmm1,%%xmm2 \n" + "movdqu (%0),%%xmm0 \n" + "add $0x10,%0 \n" + "movdqa %%xmm0,%%xmm1 \n" + "punpcklbw %%xmm2,%%xmm0 \n" + "punpckhbw %%xmm2,%%xmm1 \n" + "movdqu %%xmm0,(%3) \n" + "movdqu %%xmm1,0x10(%3) \n" + "lea 0x20(%3),%3 \n" + "sub $0x10,%4 \n" + "jg 1b \n" : "+r"(src_y), // %0 "+r"(src_u), // %1 "+r"(src_v), // %2 @@ -6156,24 +6156,24 @@ void I422ToUYVYRow_SSE2(const uint8_t* src_y, int width) { asm volatile( - "sub %1,%2 \n" + "sub %1,%2 \n" LABELALIGN - "1: \n" - "movq (%1),%%xmm2 \n" - "movq 0x00(%1,%2,1),%%xmm1 \n" - "add $0x8,%1 \n" - "punpcklbw %%xmm1,%%xmm2 \n" - "movdqu (%0),%%xmm0 \n" - "movdqa %%xmm2,%%xmm1 \n" - "add $0x10,%0 \n" - "punpcklbw %%xmm0,%%xmm1 \n" - "punpckhbw %%xmm0,%%xmm2 \n" - "movdqu %%xmm1,(%3) \n" - "movdqu %%xmm2,0x10(%3) \n" - "lea 0x20(%3),%3 \n" - "sub $0x10,%4 \n" - "jg 1b \n" + "1: \n" + "movq (%1),%%xmm2 \n" + "movq 0x00(%1,%2,1),%%xmm1 \n" + "add $0x8,%1 \n" + "punpcklbw %%xmm1,%%xmm2 \n" + "movdqu (%0),%%xmm0 \n" + "movdqa %%xmm2,%%xmm1 \n" + "add $0x10,%0 \n" + "punpcklbw %%xmm0,%%xmm1 \n" + "punpckhbw %%xmm0,%%xmm2 \n" + "movdqu %%xmm1,(%3) \n" + "movdqu %%xmm2,0x10(%3) \n" + "lea 0x20(%3),%3 \n" + "sub $0x10,%4 \n" + "jg 1b \n" : "+r"(src_y), // %0 "+r"(src_u), // %1 "+r"(src_v), // %2 @@ -6192,27 +6192,27 @@ void I422ToYUY2Row_AVX2(const uint8_t* src_y, int width) { asm volatile( - "sub %1,%2 \n" + "sub %1,%2 \n" LABELALIGN - "1: \n" - "vpmovzxbw (%1),%%ymm1 \n" - "vpmovzxbw 0x00(%1,%2,1),%%ymm2 \n" - "add $0x10,%1 \n" - "vpsllw $0x8,%%ymm2,%%ymm2 \n" - "vpor %%ymm1,%%ymm2,%%ymm2 \n" - "vmovdqu (%0),%%ymm0 \n" - "add $0x20,%0 \n" - "vpunpcklbw %%ymm2,%%ymm0,%%ymm1 \n" - "vpunpckhbw %%ymm2,%%ymm0,%%ymm2 \n" - "vextractf128 $0x0,%%ymm1,(%3) \n" - "vextractf128 $0x0,%%ymm2,0x10(%3) \n" - "vextractf128 $0x1,%%ymm1,0x20(%3) \n" - "vextractf128 $0x1,%%ymm2,0x30(%3) \n" - "lea 0x40(%3),%3 \n" - "sub $0x20,%4 \n" - "jg 1b \n" - "vzeroupper \n" + "1: \n" + "vpmovzxbw (%1),%%ymm1 \n" + "vpmovzxbw 0x00(%1,%2,1),%%ymm2 \n" + "add $0x10,%1 \n" + "vpsllw $0x8,%%ymm2,%%ymm2 \n" + "vpor %%ymm1,%%ymm2,%%ymm2 \n" + "vmovdqu (%0),%%ymm0 \n" + "add $0x20,%0 \n" + "vpunpcklbw %%ymm2,%%ymm0,%%ymm1 \n" + "vpunpckhbw %%ymm2,%%ymm0,%%ymm2 \n" + "vextractf128 $0x0,%%ymm1,(%3) \n" + "vextractf128 $0x0,%%ymm2,0x10(%3) \n" + "vextractf128 $0x1,%%ymm1,0x20(%3) \n" + "vextractf128 $0x1,%%ymm2,0x30(%3) \n" + "lea 0x40(%3),%3 \n" + "sub $0x20,%4 \n" + "jg 1b \n" + "vzeroupper \n" : "+r"(src_y), // %0 "+r"(src_u), // %1 "+r"(src_v), // %2 @@ -6231,27 +6231,27 @@ void I422ToUYVYRow_AVX2(const uint8_t* src_y, int width) { asm volatile( - "sub %1,%2 \n" + "sub %1,%2 \n" LABELALIGN - "1: \n" - "vpmovzxbw (%1),%%ymm1 \n" - "vpmovzxbw 0x00(%1,%2,1),%%ymm2 \n" - "add $0x10,%1 \n" - "vpsllw $0x8,%%ymm2,%%ymm2 \n" - "vpor %%ymm1,%%ymm2,%%ymm2 \n" - "vmovdqu (%0),%%ymm0 \n" - "add $0x20,%0 \n" - "vpunpcklbw %%ymm0,%%ymm2,%%ymm1 \n" - "vpunpckhbw %%ymm0,%%ymm2,%%ymm2 \n" - "vextractf128 $0x0,%%ymm1,(%3) \n" - "vextractf128 $0x0,%%ymm2,0x10(%3) \n" - "vextractf128 $0x1,%%ymm1,0x20(%3) \n" - "vextractf128 $0x1,%%ymm2,0x30(%3) \n" - "lea 0x40(%3),%3 \n" - "sub $0x20,%4 \n" - "jg 1b \n" - "vzeroupper \n" + "1: \n" + "vpmovzxbw (%1),%%ymm1 \n" + "vpmovzxbw 0x00(%1,%2,1),%%ymm2 \n" + "add $0x10,%1 \n" + "vpsllw $0x8,%%ymm2,%%ymm2 \n" + "vpor %%ymm1,%%ymm2,%%ymm2 \n" + "vmovdqu (%0),%%ymm0 \n" + "add $0x20,%0 \n" + "vpunpcklbw %%ymm0,%%ymm2,%%ymm1 \n" + "vpunpckhbw %%ymm0,%%ymm2,%%ymm2 \n" + "vextractf128 $0x0,%%ymm1,(%3) \n" + "vextractf128 $0x0,%%ymm2,0x10(%3) \n" + "vextractf128 $0x1,%%ymm1,0x20(%3) \n" + "vextractf128 $0x1,%%ymm2,0x30(%3) \n" + "lea 0x40(%3),%3 \n" + "sub $0x20,%4 \n" + "jg 1b \n" + "vzeroupper \n" : "+r"(src_y), // %0 "+r"(src_u), // %1 "+r"(src_v), // %2 @@ -6669,6 +6669,127 @@ void ARGBLumaColorTableRow_SSSE3(const uint8_t* src_argb, } #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 +#ifdef HAS_NV21TOYUV24ROW_AVX2 + +// begin NV21ToYUV24Row_C avx2 constants +static const ulvec8 kBLEND0 = {0x80, 0x00, 0x80, 0x80, 0x00, 0x80, 0x80, 0x00, + 0x80, 0x80, 0x00, 0x80, 0x80, 0x00, 0x80, 0x80, + 0x00, 0x80, 0x00, 0x00, 0x80, 0x00, 0x00, 0x80, + 0x00, 0x00, 0x80, 0x00, 0x00, 0x80, 0x00, 0x00}; + +static const ulvec8 kBLEND1 = {0x00, 0x00, 0x80, 0x00, 0x00, 0x80, 0x00, 0x00, + 0x80, 0x00, 0x00, 0x80, 0x00, 0x00, 0x80, 0x00, + 0x80, 0x00, 0x00, 0x80, 0x00, 0x00, 0x80, 0x00, + 0x00, 0x80, 0x00, 0x00, 0x80, 0x00, 0x00, 0x80}; + +static const ulvec8 kBLEND2 = {0x80, 0x00, 0x00, 0x80, 0x00, 0x00, 0x80, 0x00, + 0x00, 0x80, 0x00, 0x00, 0x80, 0x00, 0x00, 0x80, + 0x00, 0x00, 0x80, 0x00, 0x00, 0x80, 0x00, 0x00, + 0x80, 0x00, 0x00, 0x80, 0x00, 0x00, 0x80, 0x00}; + +static const ulvec8 kSHUF0 = {0x00, 0x0b, 0x80, 0x01, 0x0c, 0x80, 0x02, 0x0d, + 0x80, 0x03, 0x0e, 0x80, 0x04, 0x0f, 0x80, 0x05, + 0x00, 0x0b, 0x80, 0x01, 0x0c, 0x80, 0x02, 0x0d, + 0x80, 0x03, 0x0e, 0x80, 0x04, 0x0f, 0x80, 0x05}; + +static const ulvec8 kSHUF1 = {0x80, 0x00, 0x0b, 0x80, 0x01, 0x0c, 0x80, 0x02, + 0x0d, 0x80, 0x03, 0x0e, 0x80, 0x04, 0x0f, 0x80, + 0x80, 0x00, 0x0b, 0x80, 0x01, 0x0c, 0x80, 0x02, + 0x0d, 0x80, 0x03, 0x0e, 0x80, 0x04, 0x0f, 0x80}; + +static const ulvec8 kSHUF2 = {0x0a, 0x80, 0x00, 0x0b, 0x80, 0x01, 0x0c, 0x80, + 0x02, 0x0d, 0x80, 0x03, 0x0e, 0x80, 0x04, 0x0f, + 0x0a, 0x80, 0x00, 0x0b, 0x80, 0x01, 0x0c, 0x80, + 0x02, 0x0d, 0x80, 0x03, 0x0e, 0x80, 0x04, 0x0f}; + +static const ulvec8 kSHUF3 = {0x80, 0x80, 0x06, 0x80, 0x80, 0x07, 0x80, 0x80, + 0x08, 0x80, 0x80, 0x09, 0x80, 0x80, 0x0a, 0x80, + 0x80, 0x80, 0x06, 0x80, 0x80, 0x07, 0x80, 0x80, + 0x08, 0x80, 0x80, 0x09, 0x80, 0x80, 0x0a, 0x80}; + +static const ulvec8 kSHUF4 = {0x05, 0x80, 0x80, 0x06, 0x80, 0x80, 0x07, 0x80, + 0x80, 0x08, 0x80, 0x80, 0x09, 0x80, 0x80, 0x0a, + 0x05, 0x80, 0x80, 0x06, 0x80, 0x80, 0x07, 0x80, + 0x80, 0x08, 0x80, 0x80, 0x09, 0x80, 0x80, 0x0a}; + +static const ulvec8 kSHUF5 = {0x80, 0x05, 0x80, 0x80, 0x06, 0x80, 0x80, 0x07, + 0x80, 0x80, 0x08, 0x80, 0x80, 0x09, 0x80, 0x80, + 0x80, 0x05, 0x80, 0x80, 0x06, 0x80, 0x80, 0x07, + 0x80, 0x80, 0x08, 0x80, 0x80, 0x09, 0x80, 0x80}; + +// NV21ToYUV24Row_AVX2 +void NV21ToYUV24Row_AVX2(const uint8_t* src_y, + const uint8_t* src_vu, + uint8_t* dst_yuv24, + int width) { + uint8_t* src_y_ptr; + uint64_t src_offset = 0; + uint64_t width64; + + width64 = width; + src_y_ptr = (uint8_t*)src_y; + + asm volatile( + "vmovdqu %5, %%ymm0 \n" // init blend value + "vmovdqu %6, %%ymm1 \n" // init blend value + "vmovdqu %7, %%ymm2 \n" // init blend value + // "sub $0x20, %3 \n" //sub 32 from width for final loop + + LABELALIGN + "1: \n" // label 1 + "vmovdqu (%0,%4), %%ymm3 \n" // src_y + "vmovdqu 1(%1,%4), %%ymm4 \n" // src_uv+1 + "vmovdqu (%1), %%ymm5 \n" // src_uv + "vpshufb %8, %%ymm3, %%ymm13 \n" // y, kSHUF0 for shuf + "vpshufb %9, %%ymm4, %%ymm14 \n" // uv+1, kSHUF1 for + // shuf + "vpshufb %10, %%ymm5, %%ymm15 \n" // uv, kSHUF2 for + // shuf + "vpshufb %11, %%ymm3, %%ymm3 \n" // y kSHUF3 for shuf + "vpshufb %12, %%ymm4, %%ymm4 \n" // uv+1 kSHUF4 for + // shuf + "vpblendvb %%ymm0, %%ymm14, %%ymm13, %%ymm12 \n" // blend 0 + "vpblendvb %%ymm0, %%ymm13, %%ymm14, %%ymm14 \n" // blend 0 + "vpblendvb %%ymm2, %%ymm15, %%ymm12, %%ymm12 \n" // blend 2 + "vpblendvb %%ymm1, %%ymm15, %%ymm14, %%ymm13 \n" // blend 1 + "vpshufb %13, %%ymm5, %%ymm15 \n" // shuffle const + "vpor %%ymm4, %%ymm3, %%ymm5 \n" // get results + "vmovdqu %%ymm12, 0x20(%2) \n" // store dst_yuv+20h + "vpor %%ymm15, %%ymm5, %%ymm3 \n" // get results + "add $0x20, %4 \n" // add to src buffer + // ptr + "vinserti128 $0x1, %%xmm3, %%ymm13, %%ymm4 \n" // insert + "vperm2i128 $0x31, %%ymm13, %%ymm3, %%ymm5 \n" // insert + "vmovdqu %%ymm4, (%2) \n" // store dst_yuv + "vmovdqu %%ymm5, 0x40(%2) \n" // store dst_yuv+40h + "add $0x60,%2 \n" // add to dst buffer + // ptr + // "cmp %3, %4 \n" //(width64 - + // 32 bytes) and src_offset + "sub $0x20,%3 \n" // 32 pixels per loop + "jg 1b \n" + "vzeroupper \n" // sse-avx2 + // transistions + + : "+r"(src_y), //%0 + "+r"(src_vu), //%1 + "+r"(dst_yuv24), //%2 + "+r"(width64), //%3 + "+r"(src_offset) //%4 + : "m"(kBLEND0), //%5 + "m"(kBLEND1), //%6 + "m"(kBLEND2), //%7 + "m"(kSHUF0), //%8 + "m"(kSHUF1), //%9 + "m"(kSHUF2), //%10 + "m"(kSHUF3), //%11 + "m"(kSHUF4), //%12 + "m"(kSHUF5) //%13 + : "memory", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm12", + "xmm13", "xmm14", "xmm15"); +} +#endif // HAS_NV21TOYUV24ROW_AVX2 + #endif // defined(__x86_64__) || defined(__i386__) #ifdef __cplusplus diff --git a/chromium/third_party/libyuv/source/row_neon.cc b/chromium/third_party/libyuv/source/row_neon.cc index ff87e74c62c..09e1af11b9c 100644 --- a/chromium/third_party/libyuv/source/row_neon.cc +++ b/chromium/third_party/libyuv/source/row_neon.cc @@ -561,7 +561,7 @@ void SplitUVRow_NEON(const uint8_t* src_uv, "+r"(width) // %3 // Output registers : // Input registers : "cc", "memory", "q0", "q1" // Clobber List - ); + ); } // Reads 16 U's and V's and writes out 16 pairs of UV. @@ -582,7 +582,7 @@ void MergeUVRow_NEON(const uint8_t* src_u, "+r"(width) // %3 // Output registers : // Input registers : "cc", "memory", "q0", "q1" // Clobber List - ); + ); } // Reads 16 packed RGB and write to planar dst_r, dst_g, dst_b. @@ -607,7 +607,7 @@ void SplitRGBRow_NEON(const uint8_t* src_rgb, "+r"(width) // %4 : // Input registers : "cc", "memory", "d0", "d1", "d2" // Clobber List - ); + ); } // Reads 16 planar R's, G's and B's and writes out 16 packed RGB at a time @@ -632,7 +632,7 @@ void MergeRGBRow_NEON(const uint8_t* src_r, "+r"(width) // %4 : // Input registers : "cc", "memory", "q0", "q1", "q2" // Clobber List - ); + ); } // Copy multiple of 32. vld4.8 allow unaligned and is fastest on a15. @@ -648,7 +648,7 @@ void CopyRow_NEON(const uint8_t* src, uint8_t* dst, int width) { "+r"(width) // %2 // Output registers : // Input registers : "cc", "memory", "q0", "q1" // Clobber List - ); + ); } // SetRow writes 'width' bytes using an 8 bit value repeated. @@ -761,7 +761,7 @@ void RGB24ToARGBRow_NEON(const uint8_t* src_rgb24, "+r"(width) // %2 : : "cc", "memory", "d1", "d2", "d3", "d4" // Clobber List - ); + ); } void RAWToARGBRow_NEON(const uint8_t* src_raw, uint8_t* dst_argb, int width) { @@ -778,7 +778,7 @@ void RAWToARGBRow_NEON(const uint8_t* src_raw, uint8_t* dst_argb, int width) { "+r"(width) // %2 : : "cc", "memory", "d1", "d2", "d3", "d4" // Clobber List - ); + ); } void RAWToRGB24Row_NEON(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) { @@ -795,7 +795,7 @@ void RAWToRGB24Row_NEON(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) { "+r"(width) // %2 : : "cc", "memory", "d1", "d2", "d3" // Clobber List - ); + ); } #define RGB565TOARGB \ @@ -826,7 +826,7 @@ void RGB565ToARGBRow_NEON(const uint8_t* src_rgb565, "+r"(width) // %2 : : "cc", "memory", "q0", "q1", "q2", "q3" // Clobber List - ); + ); } #define ARGB1555TOARGB \ @@ -872,7 +872,7 @@ void ARGB1555ToARGBRow_NEON(const uint8_t* src_argb1555, "+r"(width) // %2 : : "cc", "memory", "q0", "q1", "q2", "q3" // Clobber List - ); + ); } #define ARGB4444TOARGB \ @@ -901,7 +901,7 @@ void ARGB4444ToARGBRow_NEON(const uint8_t* src_argb4444, "+r"(width) // %2 : : "cc", "memory", "q0", "q1", "q2" // Clobber List - ); + ); } void ARGBToRGB24Row_NEON(const uint8_t* src_argb, @@ -919,7 +919,7 @@ void ARGBToRGB24Row_NEON(const uint8_t* src_argb, "+r"(width) // %2 : : "cc", "memory", "d1", "d2", "d3", "d4" // Clobber List - ); + ); } void ARGBToRAWRow_NEON(const uint8_t* src_argb, uint8_t* dst_raw, int width) { @@ -935,7 +935,7 @@ void ARGBToRAWRow_NEON(const uint8_t* src_argb, uint8_t* dst_raw, int width) { "+r"(width) // %2 : : "cc", "memory", "d1", "d2", "d3", "d4" // Clobber List - ); + ); } void YUY2ToYRow_NEON(const uint8_t* src_yuy2, uint8_t* dst_y, int width) { @@ -950,7 +950,7 @@ void YUY2ToYRow_NEON(const uint8_t* src_yuy2, uint8_t* dst_y, int width) { "+r"(width) // %2 : : "cc", "memory", "q0", "q1" // Clobber List - ); + ); } void UYVYToYRow_NEON(const uint8_t* src_uyvy, uint8_t* dst_y, int width) { @@ -965,7 +965,7 @@ void UYVYToYRow_NEON(const uint8_t* src_uyvy, uint8_t* dst_y, int width) { "+r"(width) // %2 : : "cc", "memory", "q0", "q1" // Clobber List - ); + ); } void YUY2ToUV422Row_NEON(const uint8_t* src_yuy2, @@ -985,7 +985,7 @@ void YUY2ToUV422Row_NEON(const uint8_t* src_yuy2, "+r"(width) // %3 : : "cc", "memory", "d0", "d1", "d2", "d3" // Clobber List - ); + ); } void UYVYToUV422Row_NEON(const uint8_t* src_uyvy, @@ -1005,7 +1005,7 @@ void UYVYToUV422Row_NEON(const uint8_t* src_uyvy, "+r"(width) // %3 : : "cc", "memory", "d0", "d1", "d2", "d3" // Clobber List - ); + ); } void YUY2ToUVRow_NEON(const uint8_t* src_yuy2, @@ -1032,7 +1032,7 @@ void YUY2ToUVRow_NEON(const uint8_t* src_yuy2, : : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7" // Clobber List - ); + ); } void UYVYToUVRow_NEON(const uint8_t* src_uyvy, @@ -1059,7 +1059,7 @@ void UYVYToUVRow_NEON(const uint8_t* src_uyvy, : : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7" // Clobber List - ); + ); } // For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA. @@ -1081,7 +1081,7 @@ void ARGBShuffleRow_NEON(const uint8_t* src_argb, "+r"(width) // %2 : "r"(shuffler) // %3 : "cc", "memory", "q0", "q1", "q2" // Clobber List - ); + ); } void I422ToYUY2Row_NEON(const uint8_t* src_y, @@ -1241,7 +1241,7 @@ void ARGBExtractAlphaRow_NEON(const uint8_t* src_argb, "+r"(width) // %2 : : "cc", "memory", "q0", "q1", "q2", "q3" // Clobber List - ); + ); } void ARGBToYJRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width) { @@ -2564,7 +2564,7 @@ void SobelXRow_NEON(const uint8_t* src_y0, : "r"(2), // %5 "r"(6) // %6 : "cc", "memory", "q0", "q1" // Clobber List - ); + ); } // SobelY as a matrix is @@ -2601,7 +2601,7 @@ void SobelYRow_NEON(const uint8_t* src_y0, : "r"(1), // %4 "r"(6) // %5 : "cc", "memory", "q0", "q1" // Clobber List - ); + ); } // %y passes a float as a scalar vector for vector * scalar multiply. @@ -2685,6 +2685,205 @@ void ByteToFloatRow_NEON(const uint8_t* src, : "cc", "memory", "q1", "q2", "q3"); } +// filter 5 rows with 1, 4, 6, 4, 1 coefficients to produce 1 row. +void GaussCol_NEON(const uint16_t* src0, + const uint16_t* src1, + const uint16_t* src2, + const uint16_t* src3, + const uint16_t* src4, + uint32_t* dst, + int width) { + asm volatile( + "vmov.u16 d6, #4 \n" // constant 4 + "vmov.u16 d7, #6 \n" // constant 6 + + "1: \n" + "vld1.16 {q1}, [%0]! \n" // load 8 samples, 5 rows + "vld1.16 {q2}, [%4]! \n" + "vaddl.u16 q0, d2, d4 \n" // * 1 + "vaddl.u16 q1, d3, d5 \n" // * 1 + "vld1.16 {q2}, [%1]! \n" + "vmlal.u16 q0, d4, d6 \n" // * 4 + "vmlal.u16 q1, d5, d6 \n" // * 4 + "vld1.16 {q2}, [%2]! \n" + "vmlal.u16 q0, d4, d7 \n" // * 6 + "vmlal.u16 q1, d5, d7 \n" // * 6 + "vld1.16 {q2}, [%3]! \n" + "vmlal.u16 q0, d4, d6 \n" // * 4 + "vmlal.u16 q1, d5, d6 \n" // * 4 + "subs %6, %6, #8 \n" // 8 processed per loop + "vst1.32 {q0, q1}, [%5]! \n" // store 8 samples + "bgt 1b \n" + : "+r"(src0), // %0 + "+r"(src1), // %1 + "+r"(src2), // %2 + "+r"(src3), // %3 + "+r"(src4), // %4 + "+r"(dst), // %5 + "+r"(width) // %6 + : + : "cc", "memory", "q0", "q1", "q2", "q3"); +} + +// filter 5 rows with 1, 4, 6, 4, 1 coefficients to produce 1 row. +void GaussRow_NEON(const uint32_t* src, uint16_t* dst, int width) { + const uint32_t* src1 = src + 1; + const uint32_t* src2 = src + 2; + const uint32_t* src3 = src + 3; + asm volatile( + "vmov.u32 q10, #4 \n" // constant 4 + "vmov.u32 q11, #6 \n" // constant 6 + + "1: \n" + "vld1.32 {q0, q1}, [%0]! \n" // load 12 source samples + "vld1.32 {q2}, [%0] \n" + "vadd.u32 q0, q0, q1 \n" // * 1 + "vadd.u32 q1, q1, q2 \n" // * 1 + "vld1.32 {q2, q3}, [%2]! \n" + "vmla.u32 q0, q2, q11 \n" // * 6 + "vmla.u32 q1, q3, q11 \n" // * 6 + "vld1.32 {q2, q3}, [%1]! \n" + "vld1.32 {q8, q9}, [%3]! \n" + "vadd.u32 q2, q2, q8 \n" // add rows for * 4 + "vadd.u32 q3, q3, q9 \n" + "vmla.u32 q0, q2, q10 \n" // * 4 + "vmla.u32 q1, q3, q10 \n" // * 4 + "subs %5, %5, #8 \n" // 8 processed per loop + "vqshrn.u32 d0, q0, #8 \n" // round and pack + "vqshrn.u32 d1, q1, #8 \n" + "vst1.u16 {q0}, [%4]! \n" // store 8 samples + "bgt 1b \n" + : "+r"(src), // %0 + "+r"(src1), // %1 + "+r"(src2), // %2 + "+r"(src3), // %3 + "+r"(dst), // %4 + "+r"(width) // %5 + : + : "cc", "memory", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11"); +} + +// Convert biplanar NV21 to packed YUV24 +void NV21ToYUV24Row_NEON(const uint8_t* src_y, + const uint8_t* src_vu, + uint8_t* dst_yuv24, + int width) { + asm volatile( + "1: \n" + "vld1.8 {q2}, [%0]! \n" // load 16 Y values + "vld2.8 {d0, d2}, [%1]! \n" // load 8 VU values + "vmov d1, d0 \n" + "vzip.u8 d0, d1 \n" // VV + "vmov d3, d2 \n" + "vzip.u8 d2, d3 \n" // UU + "subs %3, %3, #16 \n" // 16 pixels per loop + "vst3.8 {d0, d2, d4}, [%2]! \n" // store 16 YUV pixels + "vst3.8 {d1, d3, d5}, [%2]! \n" + "bgt 1b \n" + : "+r"(src_y), // %0 + "+r"(src_vu), // %1 + "+r"(dst_yuv24), // %2 + "+r"(width) // %3 + : + : "cc", "memory", "q0", "q1", "q2"); +} + +void AYUVToUVRow_NEON(const uint8_t* src_ayuv, + int src_stride_ayuv, + uint8_t* dst_uv, + int width) { + asm volatile( + "add %1, %0, %1 \n" // src_stride + src_AYUV + "1: \n" + "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 AYUV pixels. + "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 AYUV + // pixels. + "vpaddl.u8 q0, q0 \n" // V 16 bytes -> 8 shorts. + "vpaddl.u8 q1, q1 \n" // U 16 bytes -> 8 shorts. + "vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more AYUV + // pixels. + "vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 AYUV + // pixels. + "vpadal.u8 q0, q4 \n" // B 16 bytes -> 8 shorts. + "vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts. + "vqrshrun.s16 d1, q0, #2 \n" // 2x2 average + "vqrshrun.s16 d0, q1, #2 \n" + "subs %3, %3, #16 \n" // 16 processed per loop. + "vst2.8 {d0, d1}, [%2]! \n" // store 8 pixels UV. + "bgt 1b \n" + : "+r"(src_ayuv), // %0 + "+r"(src_stride_ayuv), // %1 + "+r"(dst_uv), // %2 + "+r"(width) // %3 + : + : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7"); +} + +void AYUVToVURow_NEON(const uint8_t* src_ayuv, + int src_stride_ayuv, + uint8_t* dst_vu, + int width) { + asm volatile( + "add %1, %0, %1 \n" // src_stride + src_AYUV + "1: \n" + "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 AYUV pixels. + "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 AYUV + // pixels. + "vpaddl.u8 q0, q0 \n" // V 16 bytes -> 8 shorts. + "vpaddl.u8 q1, q1 \n" // U 16 bytes -> 8 shorts. + "vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more AYUV + // pixels. + "vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 AYUV + // pixels. + "vpadal.u8 q0, q4 \n" // B 16 bytes -> 8 shorts. + "vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts. + "vqrshrun.s16 d0, q0, #2 \n" // 2x2 average + "vqrshrun.s16 d1, q1, #2 \n" + "subs %3, %3, #16 \n" // 16 processed per loop. + "vst2.8 {d0, d1}, [%2]! \n" // store 8 pixels VU. + "bgt 1b \n" + : "+r"(src_ayuv), // %0 + "+r"(src_stride_ayuv), // %1 + "+r"(dst_vu), // %2 + "+r"(width) // %3 + : + : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7"); +} + +// Copy row of AYUV Y's into Y. +// Similar to ARGBExtractAlphaRow_NEON +void AYUVToYRow_NEON(const uint8_t* src_ayuv, uint8_t* dst_y, int width) { + asm volatile( + "1: \n" + "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 AYUV pixels + "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 AYUV pixels + "subs %2, %2, #16 \n" // 16 processed per loop + "vst1.8 {q2}, [%1]! \n" // store 16 Y's. + "bgt 1b \n" + : "+r"(src_ayuv), // %0 + "+r"(dst_y), // %1 + "+r"(width) // %2 + : + : "cc", "memory", "q0", "q1", "q2", "q3"); +} + +// Convert biplanar UV channel of NV12 to NV21 +void SwapUVRow_NEON(const uint8_t* src_uv, uint8_t* dst_vu, int width) { + asm volatile( + "1: \n" + "vld2.8 {d0, d2}, [%0]! \n" // load 16 UV values + "vld2.8 {d1, d3}, [%0]! \n" + "vorr.u8 q2, q0, q0 \n" // move U after V + "subs %2, %2, #16 \n" // 16 pixels per loop + "vst2.8 {q1, q2}, [%1]! \n" // store 16 VU pixels + "bgt 1b \n" + : "+r"(src_uv), // %0 + "+r"(dst_vu), // %1 + "+r"(width) // %2 + : + : "cc", "memory", "q0", "q1", "q2"); +} + #endif // !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__).. #ifdef __cplusplus diff --git a/chromium/third_party/libyuv/source/row_neon64.cc b/chromium/third_party/libyuv/source/row_neon64.cc index 24b4520babc..7314282b7aa 100644 --- a/chromium/third_party/libyuv/source/row_neon64.cc +++ b/chromium/third_party/libyuv/source/row_neon64.cc @@ -608,7 +608,7 @@ void SplitUVRow_NEON(const uint8_t* src_uv, "+r"(width) // %3 // Output registers : // Input registers : "cc", "memory", "v0", "v1" // Clobber List - ); + ); } // Reads 16 U's and V's and writes out 16 pairs of UV. @@ -629,7 +629,7 @@ void MergeUVRow_NEON(const uint8_t* src_u, "+r"(width) // %3 // Output registers : // Input registers : "cc", "memory", "v0", "v1" // Clobber List - ); + ); } // Reads 16 packed RGB and write to planar dst_r, dst_g, dst_b. @@ -653,7 +653,7 @@ void SplitRGBRow_NEON(const uint8_t* src_rgb, "+r"(width) // %4 : // Input registers : "cc", "memory", "v0", "v1", "v2" // Clobber List - ); + ); } // Reads 16 planar R's, G's and B's and writes out 16 packed RGB at a time @@ -677,7 +677,7 @@ void MergeRGBRow_NEON(const uint8_t* src_r, "+r"(width) // %4 : // Input registers : "cc", "memory", "v0", "v1", "v2" // Clobber List - ); + ); } // Copy multiple of 32. @@ -693,7 +693,7 @@ void CopyRow_NEON(const uint8_t* src, uint8_t* dst, int width) { "+r"(width) // %2 // Output registers : // Input registers : "cc", "memory", "v0", "v1" // Clobber List - ); + ); } // SetRow writes 'width' bytes using an 8 bit value repeated. @@ -800,7 +800,7 @@ void RGB24ToARGBRow_NEON(const uint8_t* src_rgb24, "+r"(width) // %2 : : "cc", "memory", "v1", "v2", "v3", "v4" // Clobber List - ); + ); } void RAWToARGBRow_NEON(const uint8_t* src_raw, uint8_t* dst_argb, int width) { @@ -818,7 +818,7 @@ void RAWToARGBRow_NEON(const uint8_t* src_raw, uint8_t* dst_argb, int width) { "+r"(width) // %2 : : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5" // Clobber List - ); + ); } void RAWToRGB24Row_NEON(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) { @@ -835,7 +835,7 @@ void RAWToRGB24Row_NEON(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) { "+r"(width) // %2 : : "cc", "memory", "v0", "v1", "v2", "v3", "v4" // Clobber List - ); + ); } #define RGB565TOARGB \ @@ -867,7 +867,7 @@ void RGB565ToARGBRow_NEON(const uint8_t* src_rgb565, "+r"(width) // %2 : : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v6" // Clobber List - ); + ); } #define ARGB1555TOARGB \ @@ -924,7 +924,7 @@ void ARGB1555ToARGBRow_NEON(const uint8_t* src_argb1555, "+r"(width) // %2 : : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List - ); + ); } #define ARGB4444TOARGB \ @@ -955,7 +955,7 @@ void ARGB4444ToARGBRow_NEON(const uint8_t* src_argb4444, "+r"(width) // %2 : : "cc", "memory", "v0", "v1", "v2", "v3", "v4" // Clobber List - ); + ); } void ARGBToRGB24Row_NEON(const uint8_t* src_argb, @@ -973,7 +973,7 @@ void ARGBToRGB24Row_NEON(const uint8_t* src_argb, "+r"(width) // %2 : : "cc", "memory", "v1", "v2", "v3", "v4" // Clobber List - ); + ); } void ARGBToRAWRow_NEON(const uint8_t* src_argb, uint8_t* dst_raw, int width) { @@ -990,7 +990,7 @@ void ARGBToRAWRow_NEON(const uint8_t* src_argb, uint8_t* dst_raw, int width) { "+r"(width) // %2 : : "cc", "memory", "v1", "v2", "v3", "v4", "v5" // Clobber List - ); + ); } void YUY2ToYRow_NEON(const uint8_t* src_yuy2, uint8_t* dst_y, int width) { @@ -1005,7 +1005,7 @@ void YUY2ToYRow_NEON(const uint8_t* src_yuy2, uint8_t* dst_y, int width) { "+r"(width) // %2 : : "cc", "memory", "v0", "v1" // Clobber List - ); + ); } void UYVYToYRow_NEON(const uint8_t* src_uyvy, uint8_t* dst_y, int width) { @@ -1020,7 +1020,7 @@ void UYVYToYRow_NEON(const uint8_t* src_uyvy, uint8_t* dst_y, int width) { "+r"(width) // %2 : : "cc", "memory", "v0", "v1" // Clobber List - ); + ); } void YUY2ToUV422Row_NEON(const uint8_t* src_yuy2, @@ -1040,7 +1040,7 @@ void YUY2ToUV422Row_NEON(const uint8_t* src_yuy2, "+r"(width) // %3 : : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List - ); + ); } void UYVYToUV422Row_NEON(const uint8_t* src_uyvy, @@ -1060,7 +1060,7 @@ void UYVYToUV422Row_NEON(const uint8_t* src_uyvy, "+r"(width) // %3 : : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List - ); + ); } void YUY2ToUVRow_NEON(const uint8_t* src_yuy2, @@ -1087,7 +1087,7 @@ void YUY2ToUVRow_NEON(const uint8_t* src_yuy2, : : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7" // Clobber List - ); + ); } void UYVYToUVRow_NEON(const uint8_t* src_uyvy, @@ -1114,7 +1114,7 @@ void UYVYToUVRow_NEON(const uint8_t* src_uyvy, : : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7" // Clobber List - ); + ); } // For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA. @@ -1135,7 +1135,7 @@ void ARGBShuffleRow_NEON(const uint8_t* src_argb, "+r"(width) // %2 : "r"(shuffler) // %3 : "cc", "memory", "v0", "v1", "v2" // Clobber List - ); + ); } void I422ToYUY2Row_NEON(const uint8_t* src_y, @@ -1298,7 +1298,7 @@ void ARGBExtractAlphaRow_NEON(const uint8_t* src_argb, "+r"(width) // %2 : : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List - ); + ); } void ARGBToYJRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width) { @@ -1863,7 +1863,7 @@ void ARGB4444ToUVRow_NEON(const uint8_t* src_argb4444, "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28" - ); + ); } void RGB565ToYRow_NEON(const uint8_t* src_rgb565, uint8_t* dst_y, int width) { @@ -2611,7 +2611,7 @@ void SobelXRow_NEON(const uint8_t* src_y0, : "r"(2LL), // %5 "r"(6LL) // %6 : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List - ); + ); } // SobelY as a matrix is @@ -2648,7 +2648,7 @@ void SobelYRow_NEON(const uint8_t* src_y0, : "r"(1LL), // %4 "r"(6LL) // %5 : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List - ); + ); } // Caveat - rounds float to half float whereas scaling version truncates. @@ -2876,6 +2876,115 @@ void GaussRow_NEON(const uint32_t* src, uint16_t* dst, int width) { : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7"); } +// Convert biplanar NV21 to packed YUV24 +void NV21ToYUV24Row_NEON(const uint8_t* src_y, + const uint8_t* src_vu, + uint8_t* dst_yuv24, + int width) { + asm volatile( + "1: \n" + "ld1 {v2.16b}, [%0], #16 \n" // load 16 Y values + "ld2 {v0.8b, v1.8b}, [%1], #16 \n" // load 8 VU values + "zip1 v0.16b, v0.16b, v0.16b \n" // replicate V values + "zip1 v1.16b, v1.16b, v1.16b \n" // replicate U values + "subs %w3, %w3, #16 \n" // 16 pixels per loop + "st3 {v0.16b,v1.16b,v2.16b}, [%2], #48 \n" // store 16 YUV pixels + "b.gt 1b \n" + : "+r"(src_y), // %0 + "+r"(src_vu), // %1 + "+r"(dst_yuv24), // %2 + "+r"(width) // %3 + : + : "cc", "memory", "v0", "v1", "v2"); +} + +void AYUVToUVRow_NEON(const uint8_t* src_ayuv, + int src_stride_ayuv, + uint8_t* dst_uv, + int width) { + const uint8_t* src_ayuv_1 = src_ayuv + src_stride_ayuv; + asm volatile( + + "1: \n" + "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 ayuv + "uaddlp v0.8h, v0.16b \n" // V 16 bytes -> 8 shorts. + "uaddlp v1.8h, v1.16b \n" // U 16 bytes -> 8 shorts. + "ld4 {v4.16b,v5.16b,v6.16b,v7.16b}, [%1], #64 \n" // load next 16 + "uadalp v0.8h, v4.16b \n" // V 16 bytes -> 8 shorts. + "uadalp v1.8h, v5.16b \n" // U 16 bytes -> 8 shorts. + "uqrshrn v3.8b, v0.8h, #2 \n" // 2x2 average + "uqrshrn v2.8b, v1.8h, #2 \n" + "subs %w3, %w3, #16 \n" // 16 processed per loop. + "st2 {v2.8b,v3.8b}, [%2], #16 \n" // store 8 pixels UV. + "b.gt 1b \n" + : "+r"(src_ayuv), // %0 + "+r"(src_ayuv_1), // %1 + "+r"(dst_uv), // %2 + "+r"(width) // %3 + : + : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7"); +} + +void AYUVToVURow_NEON(const uint8_t* src_ayuv, + int src_stride_ayuv, + uint8_t* dst_vu, + int width) { + const uint8_t* src_ayuv_1 = src_ayuv + src_stride_ayuv; + asm volatile( + + "1: \n" + "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 + // pixels. + "uaddlp v0.8h, v0.16b \n" // V 16 bytes -> 8 shorts. + "uaddlp v1.8h, v1.16b \n" // U 16 bytes -> 8 shorts. + "ld4 {v4.16b,v5.16b,v6.16b,v7.16b}, [%1], #64 \n" // load next 16 + "uadalp v0.8h, v4.16b \n" // V 16 bytes -> 8 shorts. + "uadalp v1.8h, v5.16b \n" // U 16 bytes -> 8 shorts. + "uqrshrn v0.8b, v0.8h, #2 \n" // 2x2 average + "uqrshrn v1.8b, v1.8h, #2 \n" + "subs %w3, %w3, #16 \n" // 16 processed per loop. + "st2 {v0.8b,v1.8b}, [%2], #16 \n" // store 8 pixels VU. + "b.gt 1b \n" + : "+r"(src_ayuv), // %0 + "+r"(src_ayuv_1), // %1 + "+r"(dst_vu), // %2 + "+r"(width) // %3 + : + : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7"); +} + +// Copy row of AYUV Y's into Y +void AYUVToYRow_NEON(const uint8_t* src_ayuv, uint8_t* dst_y, int width) { + asm volatile( + "1: \n" + "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 + // pixels + "subs %w2, %w2, #16 \n" // 16 pixels per loop + "st1 {v2.16b}, [%1], #16 \n" // store 16 Y pixels + "b.gt 1b \n" + : "+r"(src_ayuv), // %0 + "+r"(dst_y), // %1 + "+r"(width) // %2 + : + : "cc", "memory", "v0", "v1", "v2", "v3"); +} + +// Convert biplanar UV channel of NV12 to NV21 +void SwapUVRow_NEON(const uint8_t* src_uv, uint8_t* dst_vu, int width) { + asm volatile( + "1: \n" + "ld2 {v0.16b, v1.16b}, [%0], #32 \n" // load 16 UV values + "orr v2.16b, v0.16b, v0.16b \n" // move U after V + "subs %w2, %w2, #16 \n" // 16 pixels per loop + "st2 {v1.16b, v2.16b}, [%1], #32 \n" // store 16 VU pixels + "b.gt 1b \n" + : "+r"(src_uv), // %0 + "+r"(dst_vu), // %1 + "+r"(width) // %2 + : + : "cc", "memory", "v0", "v1", "v2"); +} + #endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) #ifdef __cplusplus diff --git a/chromium/third_party/libyuv/source/row_win.cc b/chromium/third_party/libyuv/source/row_win.cc index 5500d7f5a64..2214d272ee1 100644 --- a/chromium/third_party/libyuv/source/row_win.cc +++ b/chromium/third_party/libyuv/source/row_win.cc @@ -4222,7 +4222,7 @@ __declspec(naked) void ARGBBlendRow_SSSE3(const uint8_t* src_argb0, add ecx, 4 - 1 jl convertloop1b - // 1 pixel loop. + // 1 pixel loop. convertloop1: movd xmm3, [eax] // src argb lea eax, [eax + 4] @@ -5360,7 +5360,7 @@ void CumulativeSumToAverageRow_SSE2(const int32_t* topleft, add ecx, 4 - 1 jl l1b - // 1 pixel loop + // 1 pixel loop l1: movdqu xmm0, [eax] psubd xmm0, [eax + edx * 4] @@ -5448,9 +5448,9 @@ void ComputeCumulativeSumRow_SSE2(const uint8_t* row, add ecx, 4 - 1 jl l1b - // 1 pixel loop + // 1 pixel loop l1: - movd xmm2, dword ptr [eax] // 1 argb pixel 4 bytes. + movd xmm2, dword ptr [eax] // 1 argb pixel lea eax, [eax + 4] punpcklbw xmm2, xmm1 punpcklwd xmm2, xmm1 @@ -5534,7 +5534,7 @@ __declspec(naked) LIBYUV_API void ARGBAffineRow_SSE2(const uint8_t* src_argb, add ecx, 4 - 1 jl l1b - // 1 pixel loop + // 1 pixel loop l1: cvttps2dq xmm0, xmm2 // x, y float to int packssdw xmm0, xmm0 // x, y as shorts diff --git a/chromium/third_party/libyuv/source/scale.cc b/chromium/third_party/libyuv/source/scale.cc index a8db93fde48..ab085496375 100644 --- a/chromium/third_party/libyuv/source/scale.cc +++ b/chromium/third_party/libyuv/source/scale.cc @@ -1788,6 +1788,75 @@ int I420Scale_16(const uint16_t* src_y, return 0; } +// Scale an I444 image. +// This function in turn calls a scaling function for each plane. + +LIBYUV_API +int I444Scale(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + int src_width, + int src_height, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int dst_width, + int dst_height, + enum FilterMode filtering) { + if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 || + src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v || + dst_width <= 0 || dst_height <= 0) { + return -1; + } + + ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y, + dst_width, dst_height, filtering); + ScalePlane(src_u, src_stride_u, src_width, src_height, dst_u, dst_stride_u, + dst_width, dst_height, filtering); + ScalePlane(src_v, src_stride_v, src_width, src_height, dst_v, dst_stride_v, + dst_width, dst_height, filtering); + return 0; +} + +LIBYUV_API +int I444Scale_16(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + int src_width, + int src_height, + uint16_t* dst_y, + int dst_stride_y, + uint16_t* dst_u, + int dst_stride_u, + uint16_t* dst_v, + int dst_stride_v, + int dst_width, + int dst_height, + enum FilterMode filtering) { + if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 || + src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v || + dst_width <= 0 || dst_height <= 0) { + return -1; + } + + ScalePlane_16(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y, + dst_width, dst_height, filtering); + ScalePlane_16(src_u, src_stride_u, src_width, src_height, dst_u, dst_stride_u, + dst_width, dst_height, filtering); + ScalePlane_16(src_v, src_stride_v, src_width, src_height, dst_v, dst_stride_v, + dst_width, dst_height, filtering); + return 0; +} + // Deprecated api LIBYUV_API int Scale(const uint8_t* src_y, diff --git a/chromium/third_party/libyuv/source/scale_gcc.cc b/chromium/third_party/libyuv/source/scale_gcc.cc index 312236d2df8..90a49f30d73 100644 --- a/chromium/third_party/libyuv/source/scale_gcc.cc +++ b/chromium/third_party/libyuv/source/scale_gcc.cc @@ -483,7 +483,7 @@ void ScaleRowDown34_SSSE3(const uint8_t* src_ptr, : "m"(kShuf0), // %0 "m"(kShuf1), // %1 "m"(kShuf2) // %2 - ); + ); asm volatile( LABELALIGN @@ -521,7 +521,7 @@ void ScaleRowDown34_1_Box_SSSE3(const uint8_t* src_ptr, : "m"(kShuf01), // %0 "m"(kShuf11), // %1 "m"(kShuf21) // %2 - ); + ); asm volatile( "movdqa %0,%%xmm5 \n" // kMadd01 "movdqa %1,%%xmm0 \n" // kMadd11 @@ -530,7 +530,7 @@ void ScaleRowDown34_1_Box_SSSE3(const uint8_t* src_ptr, : "m"(kMadd01), // %0 "m"(kMadd11), // %1 "m"(kRound34) // %2 - ); + ); asm volatile( LABELALIGN @@ -587,7 +587,7 @@ void ScaleRowDown34_0_Box_SSSE3(const uint8_t* src_ptr, : "m"(kShuf01), // %0 "m"(kShuf11), // %1 "m"(kShuf21) // %2 - ); + ); asm volatile( "movdqa %0,%%xmm5 \n" // kMadd01 "movdqa %1,%%xmm0 \n" // kMadd11 @@ -596,7 +596,7 @@ void ScaleRowDown34_0_Box_SSSE3(const uint8_t* src_ptr, : "m"(kMadd01), // %0 "m"(kMadd11), // %1 "m"(kRound34) // %2 - ); + ); asm volatile( @@ -690,7 +690,7 @@ void ScaleRowDown38_2_Box_SSSE3(const uint8_t* src_ptr, "m"(kShufAb1), // %1 "m"(kShufAb2), // %2 "m"(kScaleAb2) // %3 - ); + ); asm volatile( LABELALIGN @@ -734,7 +734,7 @@ void ScaleRowDown38_3_Box_SSSE3(const uint8_t* src_ptr, : "m"(kShufAc), // %0 "m"(kShufAc3), // %1 "m"(kScaleAc33) // %2 - ); + ); asm volatile( LABELALIGN @@ -1272,7 +1272,7 @@ void ScaleARGBFilterCols_SSSE3(uint8_t* dst_argb, : : "m"(kShuffleColARGB), // %0 "m"(kShuffleFractions) // %1 - ); + ); asm volatile( "movd %5,%%xmm2 \n" diff --git a/chromium/third_party/libyuv/source/scale_mmi.cc b/chromium/third_party/libyuv/source/scale_mmi.cc index e12c6bb79ba..990463c2a60 100644 --- a/chromium/third_party/libyuv/source/scale_mmi.cc +++ b/chromium/third_party/libyuv/source/scale_mmi.cc @@ -26,6 +26,8 @@ extern "C" { // This module is for Mips MMI. #if !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A) +// clang-format off + // CPU agnostic row functions void ScaleRowDown2_MMI(const uint8_t* src_ptr, ptrdiff_t src_stride, @@ -1101,6 +1103,8 @@ void ScaleRowUp2_16_MMI(const uint16_t* src_ptr, : "memory"); } +// clang-format on + #endif // !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A) #ifdef __cplusplus diff --git a/chromium/third_party/libyuv/source/scale_neon.cc b/chromium/third_party/libyuv/source/scale_neon.cc index 46f5ba4cd4f..366b155ba4e 100644 --- a/chromium/third_party/libyuv/source/scale_neon.cc +++ b/chromium/third_party/libyuv/source/scale_neon.cc @@ -40,7 +40,7 @@ void ScaleRowDown2_NEON(const uint8_t* src_ptr, "+r"(dst_width) // %2 : : "q0", "q1" // Clobber List - ); + ); } // Read 32x1 average down and write 16x1. @@ -61,7 +61,7 @@ void ScaleRowDown2Linear_NEON(const uint8_t* src_ptr, "+r"(dst_width) // %2 : : "q0", "q1" // Clobber List - ); + ); } // Read 32x2 average down and write 16x1. @@ -92,7 +92,7 @@ void ScaleRowDown2Box_NEON(const uint8_t* src_ptr, "+r"(dst_width) // %3 : : "q0", "q1", "q2", "q3" // Clobber List - ); + ); } void ScaleRowDown4_NEON(const uint8_t* src_ptr, @@ -523,7 +523,7 @@ void ScaleAddRow_NEON(const uint8_t* src_ptr, "+r"(src_width) // %2 : : "memory", "cc", "q0", "q1", "q2" // Clobber List - ); + ); } // TODO(Yang Zhang): Investigate less load instructions for @@ -705,7 +705,7 @@ void ScaleARGBRowDown2_NEON(const uint8_t* src_ptr, "+r"(dst_width) // %2 : : "memory", "cc", "q0", "q1", "q2", "q3" // Clobber List - ); + ); } // 46: f964 018d vld4.32 {d16,d18,d20,d22}, [r4]! @@ -734,7 +734,7 @@ void ScaleARGBRowDown2Linear_NEON(const uint8_t* src_argb, "+r"(dst_width) // %2 : : "memory", "cc", "q0", "q1", "q2", "q3" // Clobber List - ); + ); } void ScaleARGBRowDown2Box_NEON(const uint8_t* src_ptr, diff --git a/chromium/third_party/libyuv/source/scale_neon64.cc b/chromium/third_party/libyuv/source/scale_neon64.cc index f4aed5fc92f..0a7b80ce1d4 100644 --- a/chromium/third_party/libyuv/source/scale_neon64.cc +++ b/chromium/third_party/libyuv/source/scale_neon64.cc @@ -38,7 +38,7 @@ void ScaleRowDown2_NEON(const uint8_t* src_ptr, "+r"(dst_width) // %2 : : "v0", "v1" // Clobber List - ); + ); } // Read 32x1 average down and write 16x1. @@ -60,7 +60,7 @@ void ScaleRowDown2Linear_NEON(const uint8_t* src_ptr, "+r"(dst_width) // %2 : : "v0", "v1" // Clobber List - ); + ); } // Read 32x2 average down and write 16x1. @@ -89,7 +89,7 @@ void ScaleRowDown2Box_NEON(const uint8_t* src_ptr, "+r"(dst_width) // %3 : : "v0", "v1", "v2", "v3" // Clobber List - ); + ); } void ScaleRowDown4_NEON(const uint8_t* src_ptr, @@ -534,7 +534,7 @@ void ScaleAddRow_NEON(const uint8_t* src_ptr, "+r"(src_width) // %2 : : "memory", "cc", "v0", "v1", "v2" // Clobber List - ); + ); } // TODO(Yang Zhang): Investigate less load instructions for @@ -719,7 +719,7 @@ void ScaleARGBRowDown2_NEON(const uint8_t* src_ptr, "+r"(dst_width) // %2 : : "memory", "cc", "v0", "v1", "v2", "v3" // Clobber List - ); + ); } void ScaleARGBRowDown2Linear_NEON(const uint8_t* src_argb, @@ -742,7 +742,7 @@ void ScaleARGBRowDown2Linear_NEON(const uint8_t* src_argb, "+r"(dst_width) // %2 : : "memory", "cc", "v0", "v1", "v2", "v3" // Clobber List - ); + ); } void ScaleARGBRowDown2Box_NEON(const uint8_t* src_ptr, @@ -991,7 +991,7 @@ void ScaleRowDown2Box_16_NEON(const uint16_t* src_ptr, "+r"(dst_width) // %3 : : "v0", "v1", "v2", "v3" // Clobber List - ); + ); } // Read 8x2 upsample with filtering and write 16x1. @@ -1041,7 +1041,7 @@ void ScaleRowUp2_16_NEON(const uint16_t* src_ptr, "r"(14LL) // %5 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", "v18", "v19" // Clobber List - ); + ); } #endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) diff --git a/chromium/third_party/libyuv/tools_libyuv/autoroller/roll_deps.py b/chromium/third_party/libyuv/tools_libyuv/autoroller/roll_deps.py index ea8f69f1882..8359d30965b 100755 --- a/chromium/third_party/libyuv/tools_libyuv/autoroller/roll_deps.py +++ b/chromium/third_party/libyuv/tools_libyuv/autoroller/roll_deps.py @@ -298,9 +298,6 @@ def GenerateCommitMessage(current_cr_rev, new_cr_rev, current_commit_pos, commit_msg.append('Change log: %s' % (CHROMIUM_LOG_TEMPLATE % rev_interval)) commit_msg.append('Full diff: %s\n' % (CHROMIUM_COMMIT_TEMPLATE % rev_interval)) - # TBR field will be empty unless in some custom cases, where some engineers - # are added. - tbr_authors = '' if changed_deps_list: commit_msg.append('Changed dependencies:') @@ -322,7 +319,11 @@ def GenerateCommitMessage(current_cr_rev, new_cr_rev, current_commit_pos, else: commit_msg.append('No update to Clang.\n') - commit_msg.append('TBR=%s' % tbr_authors) + # TBR needs to be non-empty for Gerrit to process it. + git_author = _RunCommand(['git', 'config', 'user.email'], + working_dir=CHECKOUT_SRC_DIR)[0].strip() + commit_msg.append('TBR=%s' % git_author) + commit_msg.append('BUG=None') return '\n'.join(commit_msg) diff --git a/chromium/third_party/libyuv/unit_test/convert_test.cc b/chromium/third_party/libyuv/unit_test/convert_test.cc index d97b4fc723c..31173779e2b 100644 --- a/chromium/third_party/libyuv/unit_test/convert_test.cc +++ b/chromium/third_party/libyuv/unit_test/convert_test.cc @@ -311,10 +311,10 @@ int I400ToNV21(const uint8_t* src_y, SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + \ OFF); \ align_buffer_page_end(dst_y_c, kWidth* kHeight); \ - align_buffer_page_end(dst_uv_c, SUBSAMPLE(kWidth * 2, SUBSAMP_X) * \ + align_buffer_page_end(dst_uv_c, SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * \ SUBSAMPLE(kHeight, SUBSAMP_Y)); \ align_buffer_page_end(dst_y_opt, kWidth* kHeight); \ - align_buffer_page_end(dst_uv_opt, SUBSAMPLE(kWidth * 2, SUBSAMP_X) * \ + align_buffer_page_end(dst_uv_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * \ SUBSAMPLE(kHeight, SUBSAMP_Y)); \ for (int i = 0; i < kHeight; ++i) \ for (int j = 0; j < kWidth; ++j) \ @@ -329,21 +329,21 @@ int I400ToNV21(const uint8_t* src_y, } \ memset(dst_y_c, 1, kWidth* kHeight); \ memset(dst_uv_c, 2, \ - SUBSAMPLE(kWidth * 2, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ memset(dst_y_opt, 101, kWidth* kHeight); \ memset(dst_uv_opt, 102, \ - SUBSAMPLE(kWidth * 2, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ MaskCpuFlags(disable_cpu_flags_); \ SRC_FMT_PLANAR##To##FMT_PLANAR( \ src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \ src_v + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), dst_y_c, kWidth, \ - dst_uv_c, SUBSAMPLE(kWidth * 2, SUBSAMP_X), kWidth, NEG kHeight); \ + dst_uv_c, SUBSAMPLE(kWidth, SUBSAMP_X) * 2, kWidth, NEG kHeight); \ MaskCpuFlags(benchmark_cpu_info_); \ for (int i = 0; i < benchmark_iterations_; ++i) { \ SRC_FMT_PLANAR##To##FMT_PLANAR( \ src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \ src_v + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), dst_y_opt, kWidth, \ - dst_uv_opt, SUBSAMPLE(kWidth * 2, SUBSAMP_X), kWidth, NEG kHeight); \ + dst_uv_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * 2, kWidth, NEG kHeight); \ } \ int max_diff = 0; \ for (int i = 0; i < kHeight; ++i) { \ @@ -357,12 +357,12 @@ int I400ToNV21(const uint8_t* src_y, } \ EXPECT_LE(max_diff, 1); \ for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \ - for (int j = 0; j < SUBSAMPLE(kWidth * 2, SUBSAMP_X); ++j) { \ + for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X) * 2; ++j) { \ int abs_diff = \ abs(static_cast<int>( \ - dst_uv_c[i * SUBSAMPLE(kWidth * 2, SUBSAMP_X) + j]) - \ + dst_uv_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) * 2 + j]) - \ static_cast<int>( \ - dst_uv_opt[i * SUBSAMPLE(kWidth * 2, SUBSAMP_X) + j])); \ + dst_uv_opt[i * SUBSAMPLE(kWidth, SUBSAMP_X) * 2 + j])); \ if (abs_diff > max_diff) { \ max_diff = abs_diff; \ } \ @@ -395,6 +395,99 @@ TESTPLANARTOBP(I422, 2, 1, NV21, 2, 2) TESTPLANARTOBP(I444, 1, 1, NV21, 2, 2) TESTPLANARTOBP(I400, 2, 2, NV21, 2, 2) +#define TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, W1280, N, NEG, \ + OFF) \ + TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \ + const int kWidth = ((W1280) > 0) ? (W1280) : 1; \ + const int kHeight = benchmark_height_; \ + align_buffer_page_end(src_y, kWidth* kHeight + OFF); \ + align_buffer_page_end(src_uv, SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * 2 * \ + SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + \ + OFF); \ + align_buffer_page_end(dst_y_c, kWidth* kHeight); \ + align_buffer_page_end(dst_uv_c, SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * \ + SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + align_buffer_page_end(dst_y_opt, kWidth* kHeight); \ + align_buffer_page_end(dst_uv_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * \ + SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + for (int i = 0; i < kHeight; ++i) \ + for (int j = 0; j < kWidth; ++j) \ + src_y[i * kWidth + j + OFF] = (fastrand() & 0xff); \ + for (int i = 0; i < SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); ++i) { \ + for (int j = 0; j < SUBSAMPLE(kWidth, SRC_SUBSAMP_X); ++j) { \ + src_uv[(i * SUBSAMPLE(kWidth, SRC_SUBSAMP_X)) * 2 + j + 0 + OFF] = \ + (fastrand() & 0xff); \ + src_uv[(i * SUBSAMPLE(kWidth, SRC_SUBSAMP_X)) * 2 + j + 1 + OFF] = \ + (fastrand() & 0xff); \ + } \ + } \ + memset(dst_y_c, 1, kWidth* kHeight); \ + memset(dst_uv_c, 2, \ + SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + memset(dst_y_opt, 101, kWidth* kHeight); \ + memset(dst_uv_opt, 102, \ + SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + MaskCpuFlags(disable_cpu_flags_); \ + SRC_FMT_PLANAR##To##FMT_PLANAR( \ + src_y + OFF, kWidth, src_uv + OFF, \ + SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * 2, dst_y_c, kWidth, dst_uv_c, \ + SUBSAMPLE(kWidth, SUBSAMP_X) * 2, kWidth, NEG kHeight); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + SRC_FMT_PLANAR##To##FMT_PLANAR( \ + src_y + OFF, kWidth, src_uv + OFF, \ + SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * 2, dst_y_opt, kWidth, dst_uv_opt, \ + SUBSAMPLE(kWidth, SUBSAMP_X) * 2, kWidth, NEG kHeight); \ + } \ + int max_diff = 0; \ + for (int i = 0; i < kHeight; ++i) { \ + for (int j = 0; j < kWidth; ++j) { \ + int abs_diff = abs(static_cast<int>(dst_y_c[i * kWidth + j]) - \ + static_cast<int>(dst_y_opt[i * kWidth + j])); \ + if (abs_diff > max_diff) { \ + max_diff = abs_diff; \ + } \ + } \ + } \ + EXPECT_LE(max_diff, 1); \ + for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \ + for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X) * 2; ++j) { \ + int abs_diff = \ + abs(static_cast<int>( \ + dst_uv_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) * 2 + j]) - \ + static_cast<int>( \ + dst_uv_opt[i * SUBSAMPLE(kWidth, SUBSAMP_X) * 2 + j])); \ + if (abs_diff > max_diff) { \ + max_diff = abs_diff; \ + } \ + } \ + } \ + EXPECT_LE(max_diff, 1); \ + free_aligned_buffer_page_end(dst_y_c); \ + free_aligned_buffer_page_end(dst_uv_c); \ + free_aligned_buffer_page_end(dst_y_opt); \ + free_aligned_buffer_page_end(dst_uv_opt); \ + free_aligned_buffer_page_end(src_y); \ + free_aligned_buffer_page_end(src_uv); \ + } + +#define TESTBIPLANARTOBP(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \ + TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \ + SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Opt, +, 0) \ + TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \ + SUBSAMP_X, SUBSAMP_Y, benchmark_width_ - 4, _Any, +, 0) \ + TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \ + SUBSAMP_X, SUBSAMP_Y, benchmark_width, _Unaligned, +, 1) \ + TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \ + SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Invert, -, 0) \ + TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \ + SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Opt, +, 0) + +// TODO(fbarchard): Fix msan on this unittest +// TESTBIPLANARTOBP(NV21, 2, 2, NV12, 2, 2) + #define TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, W1280, N, NEG, OFF, \ DOY) \ @@ -680,8 +773,8 @@ TESTPLANARTOB(H420, 2, 2, AR30, 4, 4, 1) TESTQPLANARTOB(I420Alpha, 2, 2, ARGB, 4, 4, 1, 2) TESTQPLANARTOB(I420Alpha, 2, 2, ABGR, 4, 4, 1, 2) -#define TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \ - W1280, DIFF, N, NEG, OFF) \ +#define TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, \ + BPP_B, W1280, DIFF, N, NEG, OFF) \ TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \ const int kWidth = ((W1280) > 0) ? (W1280) : 1; \ const int kHeight = benchmark_height_; \ @@ -716,9 +809,9 @@ TESTQPLANARTOB(I420Alpha, 2, 2, ABGR, 4, 4, 1, 2) align_buffer_page_end(dst_argb32_opt, kWidth * 4 * kHeight); \ memset(dst_argb32_c, 2, kWidth * 4 * kHeight); \ memset(dst_argb32_opt, 102, kWidth * 4 * kHeight); \ - FMT_B##ToARGB(dst_argb_c, kStrideB, dst_argb32_c, kWidth * 4, kWidth, \ + FMT_C##ToARGB(dst_argb_c, kStrideB, dst_argb32_c, kWidth * 4, kWidth, \ kHeight); \ - FMT_B##ToARGB(dst_argb_opt, kStrideB, dst_argb32_opt, kWidth * 4, kWidth, \ + FMT_C##ToARGB(dst_argb_opt, kStrideB, dst_argb32_opt, kWidth * 4, kWidth, \ kHeight); \ int max_diff = 0; \ for (int i = 0; i < kHeight; ++i) { \ @@ -740,25 +833,27 @@ TESTQPLANARTOB(I420Alpha, 2, 2, ABGR, 4, 4, 1, 2) free_aligned_buffer_page_end(dst_argb32_opt); \ } -#define TESTBIPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, DIFF) \ - TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \ - benchmark_width_ - 4, DIFF, _Any, +, 0) \ - TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \ - benchmark_width_, DIFF, _Unaligned, +, 1) \ - TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \ - benchmark_width_, DIFF, _Invert, -, 0) \ - TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \ +#define TESTBIPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \ + DIFF) \ + TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \ + benchmark_width_ - 4, DIFF, _Any, +, 0) \ + TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \ + benchmark_width_, DIFF, _Unaligned, +, 1) \ + TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \ + benchmark_width_, DIFF, _Invert, -, 0) \ + TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \ benchmark_width_, DIFF, _Opt, +, 0) -TESTBIPLANARTOB(NV12, 2, 2, ARGB, 4, 2) -TESTBIPLANARTOB(NV21, 2, 2, ARGB, 4, 2) -TESTBIPLANARTOB(NV12, 2, 2, ABGR, 4, 2) -TESTBIPLANARTOB(NV21, 2, 2, ABGR, 4, 2) -TESTBIPLANARTOB(NV12, 2, 2, RGB24, 3, 2) -TESTBIPLANARTOB(NV21, 2, 2, RGB24, 3, 2) -TESTBIPLANARTOB(NV12, 2, 2, RAW, 3, 2) -TESTBIPLANARTOB(NV21, 2, 2, RAW, 3, 2) -TESTBIPLANARTOB(NV12, 2, 2, RGB565, 2, 9) +TESTBIPLANARTOB(NV12, 2, 2, ARGB, ARGB, 4, 2) +TESTBIPLANARTOB(NV21, 2, 2, ARGB, ARGB, 4, 2) +TESTBIPLANARTOB(NV12, 2, 2, ABGR, ABGR, 4, 2) +TESTBIPLANARTOB(NV21, 2, 2, ABGR, ABGR, 4, 2) +TESTBIPLANARTOB(NV12, 2, 2, RGB24, RGB24, 3, 2) +TESTBIPLANARTOB(NV21, 2, 2, RGB24, RGB24, 3, 2) +TESTBIPLANARTOB(NV12, 2, 2, RAW, RAW, 3, 2) +TESTBIPLANARTOB(NV21, 2, 2, RAW, RAW, 3, 2) +TESTBIPLANARTOB(NV12, 2, 2, RGB565, RGB565, 2, 9) +TESTBIPLANARTOB(NV21, 2, 2, YUV24, RAW, 3, 2) #ifdef DO_THREE_PLANES // Do 3 allocations for yuv. conventional but slower. @@ -885,26 +980,26 @@ TESTBIPLANARTOB(NV12, 2, 2, RGB565, 2, 9) TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ benchmark_width_, DIFF, _Opt, +, 0) +TESTATOPLANAR(ABGR, 4, 1, I420, 2, 2, 4) TESTATOPLANAR(ARGB, 4, 1, I420, 2, 2, 4) +TESTATOPLANAR(ARGB, 4, 1, I422, 2, 1, 2) +TESTATOPLANAR(ARGB, 4, 1, I444, 1, 1, 2) TESTATOPLANAR(ARGB, 4, 1, J420, 2, 2, ARM_YUV_ERROR) TESTATOPLANAR(ARGB, 4, 1, J422, 2, 1, ARM_YUV_ERROR) +TESTATOPLANAR(ARGB1555, 2, 1, I420, 2, 2, 15) +TESTATOPLANAR(ARGB4444, 2, 1, I420, 2, 2, 17) TESTATOPLANAR(BGRA, 4, 1, I420, 2, 2, 4) -TESTATOPLANAR(ABGR, 4, 1, I420, 2, 2, 4) -TESTATOPLANAR(RGBA, 4, 1, I420, 2, 2, 4) +TESTATOPLANAR(I400, 1, 1, I420, 2, 2, 2) +TESTATOPLANAR(J400, 1, 1, J420, 2, 2, 2) TESTATOPLANAR(RAW, 3, 1, I420, 2, 2, 4) TESTATOPLANAR(RGB24, 3, 1, I420, 2, 2, 4) +TESTATOPLANAR(RGB24, 3, 1, J420, 2, 2, 4) TESTATOPLANAR(RGB565, 2, 1, I420, 2, 2, 5) -// TODO(fbarchard): Make 1555 neon work same as C code, reduce to diff 9. -TESTATOPLANAR(ARGB1555, 2, 1, I420, 2, 2, 15) -TESTATOPLANAR(ARGB4444, 2, 1, I420, 2, 2, 17) -TESTATOPLANAR(ARGB, 4, 1, I422, 2, 1, 2) -TESTATOPLANAR(ARGB, 4, 1, I444, 1, 1, 2) -TESTATOPLANAR(YUY2, 2, 1, I420, 2, 2, 2) +TESTATOPLANAR(RGBA, 4, 1, I420, 2, 2, 4) TESTATOPLANAR(UYVY, 2, 1, I420, 2, 2, 2) -TESTATOPLANAR(YUY2, 2, 1, I422, 2, 1, 2) TESTATOPLANAR(UYVY, 2, 1, I422, 2, 1, 2) -TESTATOPLANAR(I400, 1, 1, I420, 2, 2, 2) -TESTATOPLANAR(J400, 1, 1, J420, 2, 2, 2) +TESTATOPLANAR(YUY2, 2, 1, I420, 2, 2, 2) +TESTATOPLANAR(YUY2, 2, 1, I422, 2, 1, 2) #define TESTATOBIPLANARI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, \ SUBSAMP_Y, W1280, N, NEG, OFF) \ @@ -978,6 +1073,8 @@ TESTATOBIPLANAR(ARGB, 1, 4, NV12, 2, 2) TESTATOBIPLANAR(ARGB, 1, 4, NV21, 2, 2) TESTATOBIPLANAR(YUY2, 2, 4, NV12, 2, 2) TESTATOBIPLANAR(UYVY, 2, 4, NV12, 2, 2) +TESTATOBIPLANAR(AYUV, 1, 4, NV12, 2, 2) +TESTATOBIPLANAR(AYUV, 1, 4, NV21, 2, 2) #define TESTATOBI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \ HEIGHT_B, W1280, DIFF, N, NEG, OFF) \ @@ -1069,45 +1166,46 @@ TESTATOBIPLANAR(UYVY, 2, 4, NV12, 2, 2) HEIGHT_B, DIFF) // TODO(fbarchard): make ARM version of C code that matches NEON. +TESTATOB(AB30, 4, 4, 1, ABGR, 4, 4, 1, 0) +TESTATOB(AB30, 4, 4, 1, ARGB, 4, 4, 1, 0) +TESTATOB(ABGR, 4, 4, 1, AR30, 4, 4, 1, 0) +TESTATOB(ABGR, 4, 4, 1, ARGB, 4, 4, 1, 0) +TESTATOB(AR30, 4, 4, 1, AB30, 4, 4, 1, 0) +TESTATOB(AR30, 4, 4, 1, ABGR, 4, 4, 1, 0) +TESTATOB(AR30, 4, 4, 1, AR30, 4, 4, 1, 0) +TESTATOB(AR30, 4, 4, 1, ARGB, 4, 4, 1, 0) +TESTATOB(ARGB, 4, 4, 1, ABGR, 4, 4, 1, 0) +TESTATOB(ARGB, 4, 4, 1, AR30, 4, 4, 1, 0) TESTATOB(ARGB, 4, 4, 1, ARGB, 4, 4, 1, 0) +TESTATOB(ARGB, 4, 4, 1, ARGB1555, 2, 2, 1, 0) +TESTATOB(ARGB, 4, 4, 1, ARGB4444, 2, 2, 1, 0) +TESTATOB(ARGB, 4, 4, 1, ARGBMirror, 4, 4, 1, 0) TESTATOB(ARGB, 4, 4, 1, BGRA, 4, 4, 1, 0) -TESTATOB(ARGB, 4, 4, 1, ABGR, 4, 4, 1, 0) -TESTATOB(ARGB, 4, 4, 1, RGBA, 4, 4, 1, 0) +TESTATOB(ARGB, 4, 4, 1, I400, 1, 1, 1, 2) +TESTATOB(ARGB, 4, 4, 1, J400, 1, 1, 1, 2) TESTATOB(ARGB, 4, 4, 1, RAW, 3, 3, 1, 0) TESTATOB(ARGB, 4, 4, 1, RGB24, 3, 3, 1, 0) TESTATOB(ARGB, 4, 4, 1, RGB565, 2, 2, 1, 0) -TESTATOB(ARGB, 4, 4, 1, ARGB1555, 2, 2, 1, 0) -TESTATOB(ARGB, 4, 4, 1, ARGB4444, 2, 2, 1, 0) -TESTATOB(ABGR, 4, 4, 1, AR30, 4, 4, 1, 0) -TESTATOB(ARGB, 4, 4, 1, AR30, 4, 4, 1, 0) -TESTATOB(ARGB, 4, 4, 1, YUY2, 2, 4, 1, 4) +TESTATOB(ARGB, 4, 4, 1, RGBA, 4, 4, 1, 0) TESTATOB(ARGB, 4, 4, 1, UYVY, 2, 4, 1, 4) -TESTATOB(ARGB, 4, 4, 1, I400, 1, 1, 1, 2) -TESTATOB(ARGB, 4, 4, 1, J400, 1, 1, 1, 2) +TESTATOB(ARGB, 4, 4, 1, YUY2, 2, 4, 1, 4) +TESTATOB(ARGB1555, 2, 2, 1, ARGB, 4, 4, 1, 0) +TESTATOB(ARGB4444, 2, 2, 1, ARGB, 4, 4, 1, 0) TESTATOB(BGRA, 4, 4, 1, ARGB, 4, 4, 1, 0) -TESTATOB(ABGR, 4, 4, 1, ARGB, 4, 4, 1, 0) -TESTATOB(RGBA, 4, 4, 1, ARGB, 4, 4, 1, 0) -TESTATOB(AR30, 4, 4, 1, AR30, 4, 4, 1, 0) +TESTATOB(I400, 1, 1, 1, ARGB, 4, 4, 1, 0) +TESTATOB(I400, 1, 1, 1, I400, 1, 1, 1, 0) +TESTATOB(I400, 1, 1, 1, I400Mirror, 1, 1, 1, 0) +TESTATOB(J400, 1, 1, 1, ARGB, 4, 4, 1, 0) +TESTATOB(J400, 1, 1, 1, J400, 1, 1, 1, 0) TESTATOB(RAW, 3, 3, 1, ARGB, 4, 4, 1, 0) TESTATOB(RAW, 3, 3, 1, RGB24, 3, 3, 1, 0) TESTATOB(RGB24, 3, 3, 1, ARGB, 4, 4, 1, 0) +TESTATOB(RGB24, 3, 3, 1, J400, 1, 1, 1, 0) TESTATOB(RGB565, 2, 2, 1, ARGB, 4, 4, 1, 0) -TESTATOB(ARGB1555, 2, 2, 1, ARGB, 4, 4, 1, 0) -TESTATOB(ARGB4444, 2, 2, 1, ARGB, 4, 4, 1, 0) -TESTATOB(AR30, 4, 4, 1, ARGB, 4, 4, 1, 0) -TESTATOB(AR30, 4, 4, 1, ABGR, 4, 4, 1, 0) -TESTATOB(AB30, 4, 4, 1, ARGB, 4, 4, 1, 0) -TESTATOB(AB30, 4, 4, 1, ABGR, 4, 4, 1, 0) -TESTATOB(AR30, 4, 4, 1, AB30, 4, 4, 1, 0) -TESTATOB(YUY2, 2, 4, 1, ARGB, 4, 4, 1, ARM_YUV_ERROR) +TESTATOB(RGBA, 4, 4, 1, ARGB, 4, 4, 1, 0) TESTATOB(UYVY, 2, 4, 1, ARGB, 4, 4, 1, ARM_YUV_ERROR) +TESTATOB(YUY2, 2, 4, 1, ARGB, 4, 4, 1, ARM_YUV_ERROR) TESTATOB(YUY2, 2, 4, 1, Y, 1, 1, 1, 0) -TESTATOB(I400, 1, 1, 1, ARGB, 4, 4, 1, 0) -TESTATOB(J400, 1, 1, 1, ARGB, 4, 4, 1, 0) -TESTATOB(I400, 1, 1, 1, I400, 1, 1, 1, 0) -TESTATOB(J400, 1, 1, 1, J400, 1, 1, 1, 0) -TESTATOB(I400, 1, 1, 1, I400Mirror, 1, 1, 1, 0) -TESTATOB(ARGB, 4, 4, 1, ARGBMirror, 4, 4, 1, 0) #define TESTATOBDI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \ HEIGHT_B, W1280, DIFF, N, NEG, OFF) \ @@ -1291,6 +1389,7 @@ TEST_F(LibYUVConvertTest, ValidateJpeg) { // EOI, SOI. Expect pass. orig_pixels[0] = 0xff; orig_pixels[1] = 0xd8; // SOI. + orig_pixels[2] = 0xff; orig_pixels[kSize - kOff + 0] = 0xff; orig_pixels[kSize - kOff + 1] = 0xd9; // EOI. for (int times = 0; times < benchmark_iterations_; ++times) { @@ -1317,6 +1416,7 @@ TEST_F(LibYUVConvertTest, ValidateJpegLarge) { // EOI, SOI. Expect pass. orig_pixels[0] = 0xff; orig_pixels[1] = 0xd8; // SOI. + orig_pixels[2] = 0xff; orig_pixels[kSize - kOff + 0] = 0xff; orig_pixels[kSize - kOff + 1] = 0xd9; // EOI. for (int times = 0; times < benchmark_iterations_; ++times) { @@ -1350,6 +1450,7 @@ TEST_F(LibYUVConvertTest, InvalidateJpeg) { // SOI but no EOI. Expect fail. orig_pixels[0] = 0xff; orig_pixels[1] = 0xd8; // SOI. + orig_pixels[2] = 0xff; for (int times = 0; times < benchmark_iterations_; ++times) { EXPECT_FALSE(ValidateJpeg(orig_pixels, kSize)); } @@ -1367,22 +1468,24 @@ TEST_F(LibYUVConvertTest, InvalidateJpeg) { TEST_F(LibYUVConvertTest, FuzzJpeg) { // SOI but no EOI. Expect fail. for (int times = 0; times < benchmark_iterations_; ++times) { - const int kSize = fastrand() % 5000 + 2; + const int kSize = fastrand() % 5000 + 3; align_buffer_page_end(orig_pixels, kSize); MemRandomize(orig_pixels, kSize); // Add SOI so frame will be scanned. orig_pixels[0] = 0xff; orig_pixels[1] = 0xd8; // SOI. + orig_pixels[2] = 0xff; orig_pixels[kSize - 1] = 0xff; - ValidateJpeg(orig_pixels, kSize); // Failure normally expected. + ValidateJpeg(orig_pixels, + kSize); // Failure normally expected. free_aligned_buffer_page_end(orig_pixels); } } -// Test data created in GIMP. In export jpeg, disable thumbnails etc, -// choose a subsampling, and use low quality (50) to keep size small. -// Generated with xxd -i test.jpg +// Test data created in GIMP. In export jpeg, disable +// thumbnails etc, choose a subsampling, and use low quality +// (50) to keep size small. Generated with xxd -i test.jpg // test 0 is J400 static const uint8_t kTest0Jpg[] = { 0xff, 0xd8, 0xff, 0xe0, 0x00, 0x10, 0x4a, 0x46, 0x49, 0x46, 0x00, 0x01, @@ -1984,8 +2087,8 @@ TEST_F(LibYUVConvertTest, TestMJPGInfo) { EXPECT_EQ(1, ShowJPegInfo(kTest1Jpg, kTest1JpgLen)); EXPECT_EQ(1, ShowJPegInfo(kTest2Jpg, kTest2JpgLen)); EXPECT_EQ(1, ShowJPegInfo(kTest3Jpg, kTest3JpgLen)); - EXPECT_EQ(1, - ShowJPegInfo(kTest4Jpg, kTest4JpgLen)); // Valid but unsupported. + EXPECT_EQ(1, ShowJPegInfo(kTest4Jpg, + kTest4JpgLen)); // Valid but unsupported. } #endif // HAVE_JPEG @@ -2903,7 +3006,8 @@ TEST_F(LibYUVConvertTest, TestH010ToARGB) { } // Test 10 bit YUV to 10 bit RGB -// Caveat: Result is near due to float rounding in expected result. +// Caveat: Result is near due to float rounding in expected +// result. TEST_F(LibYUVConvertTest, TestH010ToAR30) { const int kSize = 1024; int histogram_b[1024]; @@ -2966,7 +3070,8 @@ TEST_F(LibYUVConvertTest, TestH010ToAR30) { } // Test 10 bit YUV to 10 bit RGB -// Caveat: Result is near due to float rounding in expected result. +// Caveat: Result is near due to float rounding in expected +// result. TEST_F(LibYUVConvertTest, TestH010ToAB30) { const int kSize = 1024; int histogram_b[1024]; diff --git a/chromium/third_party/libyuv/unit_test/planar_test.cc b/chromium/third_party/libyuv/unit_test/planar_test.cc index 756089558f7..22e48abb227 100644 --- a/chromium/third_party/libyuv/unit_test/planar_test.cc +++ b/chromium/third_party/libyuv/unit_test/planar_test.cc @@ -3186,7 +3186,8 @@ TEST_F(LibYUVPlanarTest, TestGaussRow_Opt) { } GaussRow_C(&orig_pixels[0], &dst_pixels_c[0], 640); for (int i = 0; i < benchmark_pixels_div1280_ * 2; ++i) { -#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) +#if !defined(LIBYUV_DISABLE_NEON) && \ + (defined(__aarch64__) || defined(__ARM_NEON__) || defined(LIBYUV_NEON)) int has_neon = TestCpuFlag(kCpuHasNEON); if (has_neon) { GaussRow_NEON(&orig_pixels[0], &dst_pixels_opt[0], 640); @@ -3239,7 +3240,8 @@ TEST_F(LibYUVPlanarTest, TestGaussCol_Opt) { &orig_pixels[640 * 3], &orig_pixels[640 * 4], &dst_pixels_c[0], 640); for (int i = 0; i < benchmark_pixels_div1280_ * 2; ++i) { -#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) +#if !defined(LIBYUV_DISABLE_NEON) && \ + (defined(__aarch64__) || defined(__ARM_NEON__) || defined(LIBYUV_NEON)) int has_neon = TestCpuFlag(kCpuHasNEON); if (has_neon) { GaussCol_NEON(&orig_pixels[0], &orig_pixels[640], &orig_pixels[640 * 2], @@ -3267,4 +3269,23 @@ TEST_F(LibYUVPlanarTest, TestGaussCol_Opt) { EXPECT_EQ(dst_pixels_c[639], static_cast<uint32_t>(30704)); } +TEST_F(LibYUVPlanarTest, SwapUVRow) { + const int kPixels = benchmark_width_ * benchmark_height_; + align_buffer_page_end(src_pixels_vu, kPixels * 2); + align_buffer_page_end(dst_pixels_uv, kPixels * 2); + + MemRandomize(src_pixels_vu, kPixels * 2); + memset(dst_pixels_uv, 1, kPixels * 2); + + SwapUVRow_C(src_pixels_vu, dst_pixels_uv, kPixels); + + for (int i = 0; i < kPixels; ++i) { + EXPECT_EQ(dst_pixels_uv[i * 2 + 0], src_pixels_vu[i * 2 + 1]); + EXPECT_EQ(dst_pixels_uv[i * 2 + 1], src_pixels_vu[i * 2 + 0]); + } + + free_aligned_buffer_page_end(src_pixels_vu); + free_aligned_buffer_page_end(dst_pixels_uv); +} + } // namespace libyuv diff --git a/chromium/third_party/libyuv/unit_test/rotate_test.cc b/chromium/third_party/libyuv/unit_test/rotate_test.cc index d04b96e9c68..61941e63e0e 100644 --- a/chromium/third_party/libyuv/unit_test/rotate_test.cc +++ b/chromium/third_party/libyuv/unit_test/rotate_test.cc @@ -135,6 +135,123 @@ TEST_F(LibYUVRotateTest, DISABLED_I420Rotate270_Odd) { benchmark_cpu_info_); } +static void I444TestRotate(int src_width, + int src_height, + int dst_width, + int dst_height, + libyuv::RotationMode mode, + int benchmark_iterations, + int disable_cpu_flags, + int benchmark_cpu_info) { + if (src_width < 1) { + src_width = 1; + } + if (src_height == 0) { + src_height = 1; + } + if (dst_width < 1) { + dst_width = 1; + } + if (dst_height < 1) { + dst_height = 1; + } + int src_i444_y_size = src_width * Abs(src_height); + int src_i444_uv_size = src_width * Abs(src_height); + int src_i444_size = src_i444_y_size + src_i444_uv_size * 2; + align_buffer_page_end(src_i444, src_i444_size); + for (int i = 0; i < src_i444_size; ++i) { + src_i444[i] = fastrand() & 0xff; + } + + int dst_i444_y_size = dst_width * dst_height; + int dst_i444_uv_size = dst_width * dst_height; + int dst_i444_size = dst_i444_y_size + dst_i444_uv_size * 2; + align_buffer_page_end(dst_i444_c, dst_i444_size); + align_buffer_page_end(dst_i444_opt, dst_i444_size); + memset(dst_i444_c, 2, dst_i444_size); + memset(dst_i444_opt, 3, dst_i444_size); + + MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization. + I444Rotate(src_i444, src_width, src_i444 + src_i444_y_size, src_width, + src_i444 + src_i444_y_size + src_i444_uv_size, src_width, + dst_i444_c, dst_width, dst_i444_c + dst_i444_y_size, dst_width, + dst_i444_c + dst_i444_y_size + dst_i444_uv_size, dst_width, + src_width, src_height, mode); + + MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization. + for (int i = 0; i < benchmark_iterations; ++i) { + I444Rotate(src_i444, src_width, src_i444 + src_i444_y_size, src_width, + src_i444 + src_i444_y_size + src_i444_uv_size, src_width, + dst_i444_opt, dst_width, dst_i444_opt + dst_i444_y_size, + dst_width, dst_i444_opt + dst_i444_y_size + dst_i444_uv_size, + dst_width, src_width, src_height, mode); + } + + // Rotation should be exact. + for (int i = 0; i < dst_i444_size; ++i) { + EXPECT_EQ(dst_i444_c[i], dst_i444_opt[i]); + } + + free_aligned_buffer_page_end(dst_i444_c); + free_aligned_buffer_page_end(dst_i444_opt); + free_aligned_buffer_page_end(src_i444); +} + +TEST_F(LibYUVRotateTest, I444Rotate0_Opt) { + I444TestRotate(benchmark_width_, benchmark_height_, benchmark_width_, + benchmark_height_, kRotate0, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, I444Rotate90_Opt) { + I444TestRotate(benchmark_width_, benchmark_height_, benchmark_height_, + benchmark_width_, kRotate90, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, I444Rotate180_Opt) { + I444TestRotate(benchmark_width_, benchmark_height_, benchmark_width_, + benchmark_height_, kRotate180, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, I444Rotate270_Opt) { + I444TestRotate(benchmark_width_, benchmark_height_, benchmark_height_, + benchmark_width_, kRotate270, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +// TODO(fbarchard): Remove odd width tests. +// Odd width tests work but disabled because they use C code and can be +// tested by passing an odd width command line or environment variable. +TEST_F(LibYUVRotateTest, DISABLED_I444Rotate0_Odd) { + I444TestRotate(benchmark_width_ - 3, benchmark_height_ - 1, + benchmark_width_ - 3, benchmark_height_ - 1, kRotate0, + benchmark_iterations_, disable_cpu_flags_, + benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, DISABLED_I444Rotate90_Odd) { + I444TestRotate(benchmark_width_ - 3, benchmark_height_ - 1, + benchmark_height_ - 1, benchmark_width_ - 3, kRotate90, + benchmark_iterations_, disable_cpu_flags_, + benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, DISABLED_I444Rotate180_Odd) { + I444TestRotate(benchmark_width_ - 3, benchmark_height_ - 1, + benchmark_width_ - 3, benchmark_height_ - 1, kRotate180, + benchmark_iterations_, disable_cpu_flags_, + benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, DISABLED_I444Rotate270_Odd) { + I444TestRotate(benchmark_width_ - 3, benchmark_height_ - 1, + benchmark_height_ - 1, benchmark_width_ - 3, kRotate270, + benchmark_iterations_, disable_cpu_flags_, + benchmark_cpu_info_); +} + static void NV12TestRotate(int src_width, int src_height, int dst_width, diff --git a/chromium/third_party/libyuv/unit_test/scale_test.cc b/chromium/third_party/libyuv/unit_test/scale_test.cc index d97d54a8830..811b2d0494b 100644 --- a/chromium/third_party/libyuv/unit_test/scale_test.cc +++ b/chromium/third_party/libyuv/unit_test/scale_test.cc @@ -22,14 +22,14 @@ namespace libyuv { // Test scaling with C vs Opt and return maximum pixel difference. 0 = exact. -static int TestFilter(int src_width, - int src_height, - int dst_width, - int dst_height, - FilterMode f, - int benchmark_iterations, - int disable_cpu_flags, - int benchmark_cpu_info) { +static int I420TestFilter(int src_width, + int src_height, + int dst_width, + int dst_height, + FilterMode f, + int benchmark_iterations, + int disable_cpu_flags, + int benchmark_cpu_info) { if (!SizeValid(src_width, src_height, dst_width, dst_height)) { return 0; } @@ -141,14 +141,14 @@ static int TestFilter(int src_width, // Test scaling with 8 bit C vs 16 bit C and return maximum pixel difference. // 0 = exact. -static int TestFilter_16(int src_width, - int src_height, - int dst_width, - int dst_height, - FilterMode f, - int benchmark_iterations, - int disable_cpu_flags, - int benchmark_cpu_info) { +static int I420TestFilter_16(int src_width, + int src_height, + int dst_width, + int dst_height, + FilterMode f, + int benchmark_iterations, + int disable_cpu_flags, + int benchmark_cpu_info) { if (!SizeValid(src_width, src_height, dst_width, dst_height)) { return 0; } @@ -256,6 +256,241 @@ static int TestFilter_16(int src_width, return max_diff; } +// Test scaling with C vs Opt and return maximum pixel difference. 0 = exact. +static int I444TestFilter(int src_width, + int src_height, + int dst_width, + int dst_height, + FilterMode f, + int benchmark_iterations, + int disable_cpu_flags, + int benchmark_cpu_info) { + if (!SizeValid(src_width, src_height, dst_width, dst_height)) { + return 0; + } + + int i, j; + int src_width_uv = Abs(src_width); + int src_height_uv = Abs(src_height); + + int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height)); + int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv); + + int src_stride_y = Abs(src_width); + int src_stride_uv = src_width_uv; + + align_buffer_page_end(src_y, src_y_plane_size); + align_buffer_page_end(src_u, src_uv_plane_size); + align_buffer_page_end(src_v, src_uv_plane_size); + if (!src_y || !src_u || !src_v) { + printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n"); + return 0; + } + MemRandomize(src_y, src_y_plane_size); + MemRandomize(src_u, src_uv_plane_size); + MemRandomize(src_v, src_uv_plane_size); + + int dst_width_uv = dst_width; + int dst_height_uv = dst_height; + + int64_t dst_y_plane_size = (dst_width) * (dst_height); + int64_t dst_uv_plane_size = (dst_width_uv) * (dst_height_uv); + + int dst_stride_y = dst_width; + int dst_stride_uv = dst_width_uv; + + align_buffer_page_end(dst_y_c, dst_y_plane_size); + align_buffer_page_end(dst_u_c, dst_uv_plane_size); + align_buffer_page_end(dst_v_c, dst_uv_plane_size); + align_buffer_page_end(dst_y_opt, dst_y_plane_size); + align_buffer_page_end(dst_u_opt, dst_uv_plane_size); + align_buffer_page_end(dst_v_opt, dst_uv_plane_size); + if (!dst_y_c || !dst_u_c || !dst_v_c || !dst_y_opt || !dst_u_opt || + !dst_v_opt) { + printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n"); + return 0; + } + + MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization. + double c_time = get_time(); + I444Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv, + src_width, src_height, dst_y_c, dst_stride_y, dst_u_c, + dst_stride_uv, dst_v_c, dst_stride_uv, dst_width, dst_height, f); + c_time = (get_time() - c_time); + + MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization. + double opt_time = get_time(); + for (i = 0; i < benchmark_iterations; ++i) { + I444Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv, + src_width, src_height, dst_y_opt, dst_stride_y, dst_u_opt, + dst_stride_uv, dst_v_opt, dst_stride_uv, dst_width, dst_height, + f); + } + opt_time = (get_time() - opt_time) / benchmark_iterations; + // Report performance of C vs OPT. + printf("filter %d - %8d us C - %8d us OPT\n", f, + static_cast<int>(c_time * 1e6), static_cast<int>(opt_time * 1e6)); + + // C version may be a little off from the optimized. Order of + // operations may introduce rounding somewhere. So do a difference + // of the buffers and look to see that the max difference is not + // over 3. + int max_diff = 0; + for (i = 0; i < (dst_height); ++i) { + for (j = 0; j < (dst_width); ++j) { + int abs_diff = Abs(dst_y_c[(i * dst_stride_y) + j] - + dst_y_opt[(i * dst_stride_y) + j]); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } + } + + for (i = 0; i < (dst_height_uv); ++i) { + for (j = 0; j < (dst_width_uv); ++j) { + int abs_diff = Abs(dst_u_c[(i * dst_stride_uv) + j] - + dst_u_opt[(i * dst_stride_uv) + j]); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + abs_diff = Abs(dst_v_c[(i * dst_stride_uv) + j] - + dst_v_opt[(i * dst_stride_uv) + j]); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } + } + + free_aligned_buffer_page_end(dst_y_c); + free_aligned_buffer_page_end(dst_u_c); + free_aligned_buffer_page_end(dst_v_c); + free_aligned_buffer_page_end(dst_y_opt); + free_aligned_buffer_page_end(dst_u_opt); + free_aligned_buffer_page_end(dst_v_opt); + free_aligned_buffer_page_end(src_y); + free_aligned_buffer_page_end(src_u); + free_aligned_buffer_page_end(src_v); + + return max_diff; +} + +// Test scaling with 8 bit C vs 16 bit C and return maximum pixel difference. +// 0 = exact. +static int I444TestFilter_16(int src_width, + int src_height, + int dst_width, + int dst_height, + FilterMode f, + int benchmark_iterations, + int disable_cpu_flags, + int benchmark_cpu_info) { + if (!SizeValid(src_width, src_height, dst_width, dst_height)) { + return 0; + } + + int i; + int src_width_uv = Abs(src_width); + int src_height_uv = Abs(src_height); + + int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height)); + int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv); + + int src_stride_y = Abs(src_width); + int src_stride_uv = src_width_uv; + + align_buffer_page_end(src_y, src_y_plane_size); + align_buffer_page_end(src_u, src_uv_plane_size); + align_buffer_page_end(src_v, src_uv_plane_size); + align_buffer_page_end(src_y_16, src_y_plane_size * 2); + align_buffer_page_end(src_u_16, src_uv_plane_size * 2); + align_buffer_page_end(src_v_16, src_uv_plane_size * 2); + if (!src_y || !src_u || !src_v || !src_y_16 || !src_u_16 || !src_v_16) { + printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n"); + return 0; + } + uint16_t* p_src_y_16 = reinterpret_cast<uint16_t*>(src_y_16); + uint16_t* p_src_u_16 = reinterpret_cast<uint16_t*>(src_u_16); + uint16_t* p_src_v_16 = reinterpret_cast<uint16_t*>(src_v_16); + + MemRandomize(src_y, src_y_plane_size); + MemRandomize(src_u, src_uv_plane_size); + MemRandomize(src_v, src_uv_plane_size); + + for (i = 0; i < src_y_plane_size; ++i) { + p_src_y_16[i] = src_y[i]; + } + for (i = 0; i < src_uv_plane_size; ++i) { + p_src_u_16[i] = src_u[i]; + p_src_v_16[i] = src_v[i]; + } + + int dst_width_uv = dst_width; + int dst_height_uv = dst_height; + + int dst_y_plane_size = (dst_width) * (dst_height); + int dst_uv_plane_size = (dst_width_uv) * (dst_height_uv); + + int dst_stride_y = dst_width; + int dst_stride_uv = dst_width_uv; + + align_buffer_page_end(dst_y_8, dst_y_plane_size); + align_buffer_page_end(dst_u_8, dst_uv_plane_size); + align_buffer_page_end(dst_v_8, dst_uv_plane_size); + align_buffer_page_end(dst_y_16, dst_y_plane_size * 2); + align_buffer_page_end(dst_u_16, dst_uv_plane_size * 2); + align_buffer_page_end(dst_v_16, dst_uv_plane_size * 2); + + uint16_t* p_dst_y_16 = reinterpret_cast<uint16_t*>(dst_y_16); + uint16_t* p_dst_u_16 = reinterpret_cast<uint16_t*>(dst_u_16); + uint16_t* p_dst_v_16 = reinterpret_cast<uint16_t*>(dst_v_16); + + MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization. + I444Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv, + src_width, src_height, dst_y_8, dst_stride_y, dst_u_8, + dst_stride_uv, dst_v_8, dst_stride_uv, dst_width, dst_height, f); + MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization. + for (i = 0; i < benchmark_iterations; ++i) { + I444Scale_16(p_src_y_16, src_stride_y, p_src_u_16, src_stride_uv, + p_src_v_16, src_stride_uv, src_width, src_height, p_dst_y_16, + dst_stride_y, p_dst_u_16, dst_stride_uv, p_dst_v_16, + dst_stride_uv, dst_width, dst_height, f); + } + + // Expect an exact match. + int max_diff = 0; + for (i = 0; i < dst_y_plane_size; ++i) { + int abs_diff = Abs(dst_y_8[i] - p_dst_y_16[i]); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } + for (i = 0; i < dst_uv_plane_size; ++i) { + int abs_diff = Abs(dst_u_8[i] - p_dst_u_16[i]); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + abs_diff = Abs(dst_v_8[i] - p_dst_v_16[i]); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } + + free_aligned_buffer_page_end(dst_y_8); + free_aligned_buffer_page_end(dst_u_8); + free_aligned_buffer_page_end(dst_v_8); + free_aligned_buffer_page_end(dst_y_16); + free_aligned_buffer_page_end(dst_u_16); + free_aligned_buffer_page_end(dst_v_16); + free_aligned_buffer_page_end(src_y); + free_aligned_buffer_page_end(src_u); + free_aligned_buffer_page_end(src_v); + free_aligned_buffer_page_end(src_y_16); + free_aligned_buffer_page_end(src_u_16); + free_aligned_buffer_page_end(src_v_16); + + return max_diff; +} + // The following adjustments in dimensions ensure the scale factor will be // exactly achieved. // 2 is chroma subsample. @@ -263,16 +498,32 @@ static int TestFilter_16(int src_width, #define SX(x, nom, denom) static_cast<int>(((x / nom + 1) / 2) * denom * 2) #define TEST_FACTOR1(name, filter, nom, denom, max_diff) \ - TEST_F(LibYUVScaleTest, ScaleDownBy##name##_##filter) { \ - int diff = TestFilter( \ + TEST_F(LibYUVScaleTest, I420ScaleDownBy##name##_##filter) { \ + int diff = I420TestFilter( \ + SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \ + DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \ + kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \ + benchmark_cpu_info_); \ + EXPECT_LE(diff, max_diff); \ + } \ + TEST_F(LibYUVScaleTest, I444ScaleDownBy##name##_##filter) { \ + int diff = I444TestFilter( \ SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \ DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \ kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \ benchmark_cpu_info_); \ EXPECT_LE(diff, max_diff); \ } \ - TEST_F(LibYUVScaleTest, ScaleDownBy##name##_##filter##_16) { \ - int diff = TestFilter_16( \ + TEST_F(LibYUVScaleTest, I420ScaleDownBy##name##_##filter##_16) { \ + int diff = I420TestFilter_16( \ + SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \ + DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \ + kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \ + benchmark_cpu_info_); \ + EXPECT_LE(diff, max_diff); \ + } \ + TEST_F(LibYUVScaleTest, I444ScaleDownBy##name##_##filter##_16) { \ + int diff = I444TestFilter_16( \ SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \ DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \ kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \ @@ -300,30 +551,58 @@ TEST_FACTOR(3, 1, 3, 0) #undef DX #define TEST_SCALETO1(name, width, height, filter, max_diff) \ - TEST_F(LibYUVScaleTest, name##To##width##x##height##_##filter) { \ - int diff = TestFilter(benchmark_width_, benchmark_height_, width, height, \ - kFilter##filter, benchmark_iterations_, \ - disable_cpu_flags_, benchmark_cpu_info_); \ + TEST_F(LibYUVScaleTest, I420##name##To##width##x##height##_##filter) { \ + int diff = I420TestFilter(benchmark_width_, benchmark_height_, width, \ + height, kFilter##filter, benchmark_iterations_, \ + disable_cpu_flags_, benchmark_cpu_info_); \ + EXPECT_LE(diff, max_diff); \ + } \ + TEST_F(LibYUVScaleTest, I444##name##To##width##x##height##_##filter) { \ + int diff = I444TestFilter(benchmark_width_, benchmark_height_, width, \ + height, kFilter##filter, benchmark_iterations_, \ + disable_cpu_flags_, benchmark_cpu_info_); \ + EXPECT_LE(diff, max_diff); \ + } \ + TEST_F(LibYUVScaleTest, I420##name##To##width##x##height##_##filter##_16) { \ + int diff = I420TestFilter_16( \ + benchmark_width_, benchmark_height_, width, height, kFilter##filter, \ + benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_); \ + EXPECT_LE(diff, max_diff); \ + } \ + TEST_F(LibYUVScaleTest, I444##name##To##width##x##height##_##filter##_16) { \ + int diff = I444TestFilter_16( \ + benchmark_width_, benchmark_height_, width, height, kFilter##filter, \ + benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_); \ + EXPECT_LE(diff, max_diff); \ + } \ + TEST_F(LibYUVScaleTest, I420##name##From##width##x##height##_##filter) { \ + int diff = I420TestFilter(width, height, Abs(benchmark_width_), \ + Abs(benchmark_height_), kFilter##filter, \ + benchmark_iterations_, disable_cpu_flags_, \ + benchmark_cpu_info_); \ EXPECT_LE(diff, max_diff); \ } \ - TEST_F(LibYUVScaleTest, name##From##width##x##height##_##filter) { \ - int diff = TestFilter(width, height, Abs(benchmark_width_), \ - Abs(benchmark_height_), kFilter##filter, \ - benchmark_iterations_, disable_cpu_flags_, \ - benchmark_cpu_info_); \ + TEST_F(LibYUVScaleTest, I444##name##From##width##x##height##_##filter) { \ + int diff = I444TestFilter(width, height, Abs(benchmark_width_), \ + Abs(benchmark_height_), kFilter##filter, \ + benchmark_iterations_, disable_cpu_flags_, \ + benchmark_cpu_info_); \ EXPECT_LE(diff, max_diff); \ } \ - TEST_F(LibYUVScaleTest, name##To##width##x##height##_##filter##_16) { \ - int diff = TestFilter_16(benchmark_width_, benchmark_height_, width, \ - height, kFilter##filter, benchmark_iterations_, \ - disable_cpu_flags_, benchmark_cpu_info_); \ + TEST_F(LibYUVScaleTest, \ + I420##name##From##width##x##height##_##filter##_16) { \ + int diff = I420TestFilter_16(width, height, Abs(benchmark_width_), \ + Abs(benchmark_height_), kFilter##filter, \ + benchmark_iterations_, disable_cpu_flags_, \ + benchmark_cpu_info_); \ EXPECT_LE(diff, max_diff); \ } \ - TEST_F(LibYUVScaleTest, name##From##width##x##height##_##filter##_16) { \ - int diff = TestFilter_16(width, height, Abs(benchmark_width_), \ - Abs(benchmark_height_), kFilter##filter, \ - benchmark_iterations_, disable_cpu_flags_, \ - benchmark_cpu_info_); \ + TEST_F(LibYUVScaleTest, \ + I444##name##From##width##x##height##_##filter##_16) { \ + int diff = I444TestFilter_16(width, height, Abs(benchmark_width_), \ + Abs(benchmark_height_), kFilter##filter, \ + benchmark_iterations_, disable_cpu_flags_, \ + benchmark_cpu_info_); \ EXPECT_LE(diff, max_diff); \ } diff --git a/chromium/third_party/libyuv/unit_test/unit_test.cc b/chromium/third_party/libyuv/unit_test/unit_test.cc index a1ae7ea3c7c..2aa9cdaad6e 100644 --- a/chromium/third_party/libyuv/unit_test/unit_test.cc +++ b/chromium/third_party/libyuv/unit_test/unit_test.cc @@ -17,6 +17,9 @@ #ifdef LIBYUV_USE_GFLAGS #include "gflags/gflags.h" #endif +#ifdef LIBYUV_USE_BASE_FLAGS +#include "base/commandlineflags.h" +#endif #include "libyuv/cpu_id.h" unsigned int fastrand_seed = 0xfb; diff --git a/chromium/third_party/libyuv/util/psnr.cc b/chromium/third_party/libyuv/util/psnr.cc index f54015bab82..c7bee7f97d2 100644 --- a/chromium/third_party/libyuv/util/psnr.cc +++ b/chromium/third_party/libyuv/util/psnr.cc @@ -189,7 +189,7 @@ static uint32_t SumSquareError_SSE2(const uint8_t* src_a, , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" #endif - ); // NOLINT + ); // NOLINT return sse; } #endif // LIBYUV_DISABLE_X86 etc |