From da51f56cc21233c2d30f0fe0d171727c3102b2e0 Mon Sep 17 00:00:00 2001 From: Allan Sandfeld Jensen Date: Wed, 31 Jan 2018 16:33:43 +0100 Subject: BASELINE: Update Chromium to 65.0.3525.40 Also imports missing submodules Change-Id: I36901b7c6a325cda3d2c10cedb2186c25af3b79b Reviewed-by: Alexandru Croitor --- chromium/third_party/libyuv/Android.bp | 3 - chromium/third_party/libyuv/Android.mk | 3 - chromium/third_party/libyuv/BUILD.gn | 16 +- chromium/third_party/libyuv/DEPS | 100 +- chromium/third_party/libyuv/README.chromium | 2 +- chromium/third_party/libyuv/README.md | 8 +- .../libyuv/docs/environment_variables.md | 2 +- chromium/third_party/libyuv/docs/formats.md | 35 +- .../third_party/libyuv/docs/getting_started.md | 11 +- .../third_party/libyuv/include/libyuv/convert.h | 37 + .../libyuv/include/libyuv/convert_argb.h | 149 ++ .../libyuv/include/libyuv/convert_from.h | 29 +- .../libyuv/include/libyuv/convert_from_argb.h | 9 + .../third_party/libyuv/include/libyuv/cpu_id.h | 3 +- .../libyuv/include/libyuv/planar_functions.h | 18 + .../third_party/libyuv/include/libyuv/rotate_row.h | 35 - chromium/third_party/libyuv/include/libyuv/row.h | 294 +--- .../third_party/libyuv/include/libyuv/scale_row.h | 55 - .../third_party/libyuv/include/libyuv/version.h | 2 +- .../libyuv/include/libyuv/video_common.h | 26 +- chromium/third_party/libyuv/libyuv.gyp | 1 - chromium/third_party/libyuv/libyuv.gypi | 3 - chromium/third_party/libyuv/libyuv_test.gyp | 1 - chromium/third_party/libyuv/linux.mk | 3 - chromium/third_party/libyuv/source/compare_gcc.cc | 16 +- chromium/third_party/libyuv/source/convert.cc | 165 +- chromium/third_party/libyuv/source/convert_argb.cc | 556 +++++-- chromium/third_party/libyuv/source/convert_from.cc | 198 ++- .../third_party/libyuv/source/convert_from_argb.cc | 149 +- chromium/third_party/libyuv/source/cpu_id.cc | 10 +- .../third_party/libyuv/source/planar_functions.cc | 154 +- chromium/third_party/libyuv/source/rotate.cc | 35 - chromium/third_party/libyuv/source/rotate_any.cc | 6 - chromium/third_party/libyuv/source/rotate_argb.cc | 5 - chromium/third_party/libyuv/source/rotate_dspr2.cc | 475 ------ chromium/third_party/libyuv/source/rotate_neon.cc | 8 +- .../third_party/libyuv/source/rotate_neon64.cc | 6 +- chromium/third_party/libyuv/source/row_any.cc | 129 +- chromium/third_party/libyuv/source/row_common.cc | 135 ++ chromium/third_party/libyuv/source/row_dspr2.cc | 1721 -------------------- chromium/third_party/libyuv/source/row_gcc.cc | 667 +++++--- chromium/third_party/libyuv/source/row_win.cc | 117 -- chromium/third_party/libyuv/source/scale.cc | 122 -- chromium/third_party/libyuv/source/scale_any.cc | 3 - chromium/third_party/libyuv/source/scale_argb.cc | 30 - chromium/third_party/libyuv/source/scale_common.cc | 20 - chromium/third_party/libyuv/source/scale_dspr2.cc | 668 -------- chromium/third_party/libyuv/source/scale_gcc.cc | 77 +- chromium/third_party/libyuv/source/scale_neon.cc | 17 +- chromium/third_party/libyuv/source/scale_neon64.cc | 17 +- chromium/third_party/libyuv/source/scale_win.cc | 77 +- .../libyuv/tools_libyuv/autoroller/roll_deps.py | 8 +- .../third_party/libyuv/unit_test/compare_test.cc | 2 +- .../third_party/libyuv/unit_test/convert_test.cc | 453 ++++-- chromium/third_party/libyuv/unit_test/cpu_test.cc | 4 +- .../third_party/libyuv/unit_test/planar_test.cc | 156 +- chromium/third_party/libyuv/unit_test/unit_test.cc | 60 +- .../libyuv/unit_test/video_common_test.cc | 3 + chromium/third_party/libyuv/util/Makefile | 3 + chromium/third_party/libyuv/util/cpuid.c | 4 +- 60 files changed, 2531 insertions(+), 4590 deletions(-) delete mode 100644 chromium/third_party/libyuv/source/rotate_dspr2.cc delete mode 100644 chromium/third_party/libyuv/source/row_dspr2.cc delete mode 100644 chromium/third_party/libyuv/source/scale_dspr2.cc (limited to 'chromium/third_party/libyuv') diff --git a/chromium/third_party/libyuv/Android.bp b/chromium/third_party/libyuv/Android.bp index a3d8d834ac7..7d95a7865ab 100644 --- a/chromium/third_party/libyuv/Android.bp +++ b/chromium/third_party/libyuv/Android.bp @@ -24,14 +24,12 @@ cc_library { "source/rotate_any.cc", "source/rotate_argb.cc", "source/rotate_common.cc", - "source/rotate_dspr2.cc", "source/rotate_gcc.cc", "source/rotate_msa.cc", "source/rotate_neon.cc", "source/rotate_neon64.cc", "source/row_any.cc", "source/row_common.cc", - "source/row_dspr2.cc", "source/row_gcc.cc", "source/row_msa.cc", "source/row_neon.cc", @@ -40,7 +38,6 @@ cc_library { "source/scale_any.cc", "source/scale_argb.cc", "source/scale_common.cc", - "source/scale_dspr2.cc", "source/scale_gcc.cc", "source/scale_msa.cc", "source/scale_neon.cc", diff --git a/chromium/third_party/libyuv/Android.mk b/chromium/third_party/libyuv/Android.mk index 854020610ea..dbc6cad37ab 100644 --- a/chromium/third_party/libyuv/Android.mk +++ b/chromium/third_party/libyuv/Android.mk @@ -24,14 +24,12 @@ LOCAL_SRC_FILES := \ source/rotate_any.cc \ source/rotate_argb.cc \ source/rotate_common.cc \ - source/rotate_dspr2.cc \ source/rotate_gcc.cc \ source/rotate_msa.cc \ source/rotate_neon.cc \ source/rotate_neon64.cc \ source/row_any.cc \ source/row_common.cc \ - source/row_dspr2.cc \ source/row_gcc.cc \ source/row_msa.cc \ source/row_neon.cc \ @@ -40,7 +38,6 @@ LOCAL_SRC_FILES := \ source/scale_any.cc \ source/scale_argb.cc \ source/scale_common.cc \ - source/scale_dspr2.cc \ source/scale_gcc.cc \ source/scale_msa.cc \ source/scale_neon.cc \ diff --git a/chromium/third_party/libyuv/BUILD.gn b/chromium/third_party/libyuv/BUILD.gn index 9badf08c846..10b5b819a9c 100644 --- a/chromium/third_party/libyuv/BUILD.gn +++ b/chromium/third_party/libyuv/BUILD.gn @@ -12,6 +12,11 @@ import("//testing/test.gni") declare_args() { # Set to false to disable building with gflags. libyuv_use_gflags = true + + # When building a shared library using a target in WebRTC or + # Chromium projects that depends on libyuv, setting this flag + # to true makes libyuv symbols visible inside that library. + libyuv_symbols_visible = false } config("libyuv_config") { @@ -42,7 +47,7 @@ group("default") { } group("libyuv") { - public_configs = [ ":libyuv_config" ] + all_dependent_configs = [ ":libyuv_config" ] if (is_win && target_cpu == "x64") { # Compile with clang in order to get inline assembly @@ -110,19 +115,16 @@ static_library("libyuv_internal") { "source/rotate_any.cc", "source/rotate_argb.cc", "source/rotate_common.cc", - "source/rotate_dspr2.cc", "source/rotate_gcc.cc", "source/rotate_win.cc", "source/row_any.cc", "source/row_common.cc", - "source/row_dspr2.cc", "source/row_gcc.cc", "source/row_win.cc", "source/scale.cc", "source/scale_any.cc", "source/scale_argb.cc", "source/scale_common.cc", - "source/scale_dspr2.cc", "source/scale_gcc.cc", "source/scale_win.cc", "source/video_common.cc", @@ -132,6 +134,11 @@ static_library("libyuv_internal") { defines = [] deps = [] + if (libyuv_symbols_visible) { + configs -= [ "//build/config/gcc:symbol_visibility_hidden" ] + configs += [ "//build/config/gcc:symbol_visibility_default" ] + } + if (!is_ios) { defines += [ "HAVE_JPEG" ] @@ -302,7 +309,6 @@ if (libyuv_include_tests) { # Enable the following 3 macros to turn off assembly for specified CPU. # "LIBYUV_DISABLE_X86", # "LIBYUV_DISABLE_NEON", - # "LIBYUV_DISABLE_DSPR2", # Enable the following macro to build libyuv as a shared library (dll). # "LIBYUV_USING_SHARED_LIBRARY" ] diff --git a/chromium/third_party/libyuv/DEPS b/chromium/third_party/libyuv/DEPS index fdb133c7ac1..688ff1299c2 100644 --- a/chromium/third_party/libyuv/DEPS +++ b/chromium/third_party/libyuv/DEPS @@ -5,7 +5,7 @@ vars = { # Three lines of non-changing comments so that # the commit queue can handle CLs rolling lss # and whatever else without interference from each other. - 'lss_revision': '63f24c8221a229f677d26ebe8f3d1528a9d787ac', + 'lss_revision': 'e6527b0cd469e3ff5764785dadcb39bf7d787154', # Three lines of non-changing comments so that # the commit queue can handle CLs rolling catapult # and whatever else without interference from each other. @@ -22,7 +22,7 @@ deps = { 'src/third_party': Var('chromium_git') + '/chromium/src/third_party' + '@' + '72c52c224cdd3c377f7caff8ffed0f5749e79549', 'src/third_party/catapult': - Var('chromium_git') + '/external/github.com/catapult-project/catapult.git' + '@' + Var('catapult_revision'), + Var('chromium_git') + '/catapult.git' + '@' + Var('catapult_revision'), 'src/third_party/colorama/src': Var('chromium_git') + '/external/colorama.git' + '@' + '799604a1041e9b3bc5d2789ecbd7e8db2e18e6b8', 'src/third_party/googletest/src': @@ -45,45 +45,65 @@ deps = { Var('chromium_git') + '/external/github.com/gflags/gflags' + '@' + '03bebcb065c83beff83d50ae025a55a4bf94dfca', 'src/third_party/gtest-parallel': Var('chromium_git') + '/external/webrtc/deps/third_party/gtest-parallel' + '@' + '1dad0e9f6d82ff994130b529d7d814b40eb32b0e', -} -deps_os = { - 'android': { - 'src/base': - Var('chromium_git') + '/chromium/src/base' + '@' + '9b543d487c7c38be191c6180001ff9ce186ae326', - 'src/third_party/android_tools': - Var('chromium_git') + '/android_tools.git' + '@' + 'aadb2fed04af8606545b0afe4e3060bc1a15fad7', - 'src/third_party/ced/src': - Var('chromium_git') + '/external/github.com/google/compact_enc_det.git' + '@' + '94c367a1fe3a13207f4b22604fcfd1d9f9ddf6d9', - 'src/third_party/icu': - Var('chromium_git') + '/chromium/deps/icu.git' + '@' + '08cb956852a5ccdba7f9c941728bb833529ba3c6', - 'src/third_party/jsr-305/src': - Var('chromium_git') + '/external/jsr-305.git' + '@' + '642c508235471f7220af6d5df2d3210e3bfc0919', - 'src/third_party/junit/src': - Var('chromium_git') + '/external/junit.git' + '@' + '64155f8a9babcfcf4263cf4d08253a1556e75481', - 'src/third_party/lss': - Var('chromium_git') + '/linux-syscall-support.git' + '@' + Var('lss_revision'), - 'src/third_party/mockito/src': - Var('chromium_git') + '/external/mockito/mockito.git' + '@' + 'de83ad4598ad4cf5ea53c69a8a8053780b04b850', - 'src/third_party/requests/src': - Var('chromium_git') + '/external/github.com/kennethreitz/requests.git' + '@' + 'f172b30356d821d180fa4ecfa3e71c7274a32de4', - 'src/third_party/robolectric/robolectric': - Var('chromium_git') + '/external/robolectric.git' + '@' + 'b02c65cc6d7465f58f0de48a39914aa905692afa', - 'src/third_party/ub-uiautomator/lib': - Var('chromium_git') + '/chromium/third_party/ub-uiautomator.git' + '@' + '00270549ce3161ae72ceb24712618ea28b4f9434', - }, - 'ios': { - 'src/ios': - Var('chromium_git') + '/chromium/src/ios' + '@' + '39c4b2fcf73f5b1e82af3b9c57267c17217d6a30', - }, - 'unix': { - 'src/third_party/lss': - Var('chromium_git') + '/linux-syscall-support.git' + '@' + Var('lss_revision'), - }, - 'win': { - # Dependencies used by libjpeg-turbo - 'src/third_party/yasm/binaries': - Var('chromium_git') + '/chromium/deps/yasm/binaries.git' + '@' + '52f9b3f4b0aa06da24ef8b123058bb61ee468881', + 'src/third_party/lss': { + 'url': Var('chromium_git') + '/linux-syscall-support.git' + '@' + Var('lss_revision'), + 'condition': 'checkout_android or checkout_linux', + }, + + # Android deps: + 'src/base': { + 'url': Var('chromium_git') + '/chromium/src/base' + '@' + '9b543d487c7c38be191c6180001ff9ce186ae326', + 'condition': 'checkout_android', + }, + 'src/third_party/android_tools': { + 'url': Var('chromium_git') + '/android_tools.git' + '@' + 'aadb2fed04af8606545b0afe4e3060bc1a15fad7', + 'condition': 'checkout_android', + }, + 'src/third_party/ced/src': { + 'url': Var('chromium_git') + '/external/github.com/google/compact_enc_det.git' + '@' + '94c367a1fe3a13207f4b22604fcfd1d9f9ddf6d9', + 'condition': 'checkout_android', + }, + 'src/third_party/icu': { + 'url': Var('chromium_git') + '/chromium/deps/icu.git' + '@' + '08cb956852a5ccdba7f9c941728bb833529ba3c6', + 'condition': 'checkout_android', + }, + 'src/third_party/jsr-305/src': { + 'url': Var('chromium_git') + '/external/jsr-305.git' + '@' + '642c508235471f7220af6d5df2d3210e3bfc0919', + 'condition': 'checkout_android', + }, + 'src/third_party/junit/src': { + 'url': Var('chromium_git') + '/external/junit.git' + '@' + '64155f8a9babcfcf4263cf4d08253a1556e75481', + 'condition': 'checkout_android', + }, + 'src/third_party/mockito/src': { + 'url': Var('chromium_git') + '/external/mockito/mockito.git' + '@' + 'de83ad4598ad4cf5ea53c69a8a8053780b04b850', + 'condition': 'checkout_android', + }, + 'src/third_party/requests/src': { + 'url': Var('chromium_git') + '/external/github.com/kennethreitz/requests.git' + '@' + 'f172b30356d821d180fa4ecfa3e71c7274a32de4', + 'condition': 'checkout_android', + }, + 'src/third_party/robolectric/robolectric': { + 'url': Var('chromium_git') + '/external/robolectric.git' + '@' + 'b02c65cc6d7465f58f0de48a39914aa905692afa', + 'condition': 'checkout_android', + }, + 'src/third_party/ub-uiautomator/lib': { + 'url': Var('chromium_git') + '/chromium/third_party/ub-uiautomator.git' + '@' + '00270549ce3161ae72ceb24712618ea28b4f9434', + 'condition': 'checkout_android', + }, + + # iOS deps: + 'src/ios': { + 'url': Var('chromium_git') + '/chromium/src/ios' + '@' + '39c4b2fcf73f5b1e82af3b9c57267c17217d6a30', + 'condition': 'checkout_ios' + }, + + # Win deps: + # Dependencies used by libjpeg-turbo + 'src/third_party/yasm/binaries': { + 'url': Var('chromium_git') + '/chromium/deps/yasm/binaries.git' + '@' + '52f9b3f4b0aa06da24ef8b123058bb61ee468881', + 'condition': 'checkout_win', }, } diff --git a/chromium/third_party/libyuv/README.chromium b/chromium/third_party/libyuv/README.chromium index 88c069734ac..373e5f75c87 100644 --- a/chromium/third_party/libyuv/README.chromium +++ b/chromium/third_party/libyuv/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 1678 +Version: 1688 License: BSD License File: LICENSE diff --git a/chromium/third_party/libyuv/README.md b/chromium/third_party/libyuv/README.md index b59b71c5d2b..7b6619220b8 100644 --- a/chromium/third_party/libyuv/README.md +++ b/chromium/third_party/libyuv/README.md @@ -1,12 +1,12 @@ **libyuv** is an open source project that includes YUV scaling and conversion functionality. * Scale YUV to prepare content for compression, with point, bilinear or box filter. -* Convert to YUV from webcam formats. -* Convert from YUV to formats for rendering/effects. +* Convert to YUV from webcam formats for compression. +* Convert to RGB formats for rendering/effects. * Rotate by 90/180/270 degrees to adjust for mobile devices in portrait mode. -* Optimized for SSE2/SSSE3/AVX2 on x86/x64. +* Optimized for SSSE3/AVX2 on x86/x64. * Optimized for Neon on Arm. -* Optimized for DSP R2 on Mips. +* Optimized for MSA on Mips. ### Development diff --git a/chromium/third_party/libyuv/docs/environment_variables.md b/chromium/third_party/libyuv/docs/environment_variables.md index 9071c54de23..c28d83e7dc1 100644 --- a/chromium/third_party/libyuv/docs/environment_variables.md +++ b/chromium/third_party/libyuv/docs/environment_variables.md @@ -17,7 +17,7 @@ By default the cpu is detected and the most advanced form of SIMD is used. But LIBYUV_DISABLE_AVX512BW LIBYUV_DISABLE_ERMS LIBYUV_DISABLE_FMA3 - LIBYUV_DISABLE_DSPR2 + LIBYUV_DISABLE_MSA LIBYUV_DISABLE_NEON # Test Width/Height/Repeat diff --git a/chromium/third_party/libyuv/docs/formats.md b/chromium/third_party/libyuv/docs/formats.md index 2b75d31ac75..3973e5d5ad8 100644 --- a/chromium/third_party/libyuv/docs/formats.md +++ b/chromium/third_party/libyuv/docs/formats.md @@ -35,9 +35,8 @@ This is how OSX formats map to libyuv # FOURCC (Four Charactacter Code) List The following is extracted from video_common.h as a complete list of formats supported by libyuv. - enum FourCC { - // 8 Primary YUV formats: 5 planar, 2 biplanar, 2 packed. + // 9 Primary YUV formats: 5 planar, 2 biplanar, 2 packed. FOURCC_I420 = FOURCC('I', '4', '2', '0'), FOURCC_I422 = FOURCC('I', '4', '2', '2'), FOURCC_I444 = FOURCC('I', '4', '4', '4'), @@ -46,37 +45,34 @@ The following is extracted from video_common.h as a complete list of formats sup FOURCC_NV12 = FOURCC('N', 'V', '1', '2'), FOURCC_YUY2 = FOURCC('Y', 'U', 'Y', '2'), FOURCC_UYVY = FOURCC('U', 'Y', 'V', 'Y'), + FOURCC_H010 = FOURCC('H', '0', '1', '0'), // unofficial fourcc. 10 bit lsb - // 1 Secondary YUV formats: row biplanar. + // 1 Secondary YUV format: row biplanar. FOURCC_M420 = FOURCC('M', '4', '2', '0'), - // 9 Primary RGB formats: 4 32 bpp, 2 24 bpp, 3 16 bpp. + // 10 Primary RGB formats: 4 32 bpp, 2 24 bpp, 3 16 bpp, 1 10 bpc FOURCC_ARGB = FOURCC('A', 'R', 'G', 'B'), FOURCC_BGRA = FOURCC('B', 'G', 'R', 'A'), FOURCC_ABGR = FOURCC('A', 'B', 'G', 'R'), + FOURCC_AR30 = FOURCC('A', 'R', '3', '0'), // 10 bit per channel. 2101010. FOURCC_24BG = FOURCC('2', '4', 'B', 'G'), - FOURCC_RAW = FOURCC('r', 'a', 'w', ' '), + FOURCC_RAW = FOURCC('r', 'a', 'w', ' '), FOURCC_RGBA = FOURCC('R', 'G', 'B', 'A'), FOURCC_RGBP = FOURCC('R', 'G', 'B', 'P'), // rgb565 LE. FOURCC_RGBO = FOURCC('R', 'G', 'B', 'O'), // argb1555 LE. FOURCC_R444 = FOURCC('R', '4', '4', '4'), // argb4444 LE. - // 4 Secondary RGB formats: 4 Bayer Patterns. - FOURCC_RGGB = FOURCC('R', 'G', 'G', 'B'), - FOURCC_BGGR = FOURCC('B', 'G', 'G', 'R'), - FOURCC_GRBG = FOURCC('G', 'R', 'B', 'G'), - FOURCC_GBRG = FOURCC('G', 'B', 'R', 'G'), - // 1 Primary Compressed YUV format. FOURCC_MJPG = FOURCC('M', 'J', 'P', 'G'), - // 5 Auxiliary YUV variations: 3 with U and V planes are swapped, 1 Alias. + // 7 Auxiliary YUV variations: 3 with U and V planes are swapped, 1 Alias. FOURCC_YV12 = FOURCC('Y', 'V', '1', '2'), FOURCC_YV16 = FOURCC('Y', 'V', '1', '6'), FOURCC_YV24 = FOURCC('Y', 'V', '2', '4'), FOURCC_YU12 = FOURCC('Y', 'U', '1', '2'), // Linux version of I420. FOURCC_J420 = FOURCC('J', '4', '2', '0'), - FOURCC_J400 = FOURCC('J', '4', '0', '0'), + FOURCC_J400 = FOURCC('J', '4', '0', '0'), // unofficial fourcc + FOURCC_H420 = FOURCC('H', '4', '2', '0'), // unofficial fourcc // 14 Auxiliary aliases. CanonicalFourCC() maps these to canonical fourcc. FOURCC_IYUV = FOURCC('I', 'Y', 'U', 'V'), // Alias for I420. @@ -97,9 +93,6 @@ The following is extracted from video_common.h as a complete list of formats sup FOURCC_L565 = FOURCC('L', '5', '6', '5'), // Alias for RGBP. FOURCC_5551 = FOURCC('5', '5', '5', '1'), // Alias for RGBO. - // 1 Auxiliary compressed YUV format set aside for capturer. - FOURCC_H264 = FOURCC('H', '2', '6', '4'), - # Planar YUV The following formats contains a full size Y plane followed by 1 or 2 planes for UV: I420, I422, I444, I400, NV21, NV12, I400 @@ -145,3 +138,13 @@ There are 2 RGB layouts - RGB24 (aka 24BG) and RAW RGB24 is B,G,R in memory RAW is R,G,B in memory + +# AR30 + +AR30 is 2 10 10 10 ARGB stored in little endian order. +The 2 bit alpha has 4 values. Here are the comparable 8 bit alpha values. +0 - 0. 00000000b = 0x00 = 0 +1 - 33%. 01010101b = 0x55 = 85 +2 - 66%. 10101010b = 0xaa = 170 +3 - 100%. 11111111b = 0xff = 255 +The 10 bit RGB values range from 0 to 1023. diff --git a/chromium/third_party/libyuv/docs/getting_started.md b/chromium/third_party/libyuv/docs/getting_started.md index 58e05f3cbcb..fefffce4f1f 100644 --- a/chromium/third_party/libyuv/docs/getting_started.md +++ b/chromium/third_party/libyuv/docs/getting_started.md @@ -129,15 +129,10 @@ ia32 ninja -v -C out/Debug libyuv_unittest ninja -v -C out/Release libyuv_unittest -mipsel +mips - gn gen out/Release "--args=is_debug=false target_os=\"android\" target_cpu=\"mipsel\" mips_arch_variant=\"r6\" mips_use_msa=true is_component_build=true is_clang=false" - gn gen out/Debug "--args=is_debug=true target_os=\"android\" target_cpu=\"mipsel\" mips_arch_variant=\"r6\" mips_use_msa=true is_component_build=true is_clang=false" - ninja -v -C out/Debug libyuv_unittest - ninja -v -C out/Release libyuv_unittest - - gn gen out/Release "--args=is_debug=false target_os=\"android\" target_cpu=\"mips64el\" mips_arch_variant=\"r6\" mips_use_msa=true is_component_build=true is_clang=false" - gn gen out/Debug "--args=is_debug=true target_os=\"android\" target_cpu=\"mips64el\" mips_arch_variant=\"r6\" mips_use_msa=true is_component_build=true is_clang=false" + gn gen out/Release "--args=is_debug=false target_os=\"android\" target_cpu=\"mips64el\" mips_arch_variant=\"r6\" mips_use_msa=true is_component_build=true is_clang=true" + gn gen out/Debug "--args=is_debug=true target_os=\"android\" target_cpu=\"mips64el\" mips_arch_variant=\"r6\" mips_use_msa=true is_component_build=true is_clang=true" ninja -v -C out/Debug libyuv_unittest ninja -v -C out/Release libyuv_unittest diff --git a/chromium/third_party/libyuv/include/libyuv/convert.h b/chromium/third_party/libyuv/include/libyuv/convert.h index f096d193a74..d310f8493bf 100644 --- a/chromium/third_party/libyuv/include/libyuv/convert.h +++ b/chromium/third_party/libyuv/include/libyuv/convert.h @@ -77,6 +77,43 @@ int I420Copy(const uint8* src_y, int width, int height); +// Copy I010 to I010 +#define I010ToI010 I010Copy +#define H010ToH010 I010Copy +LIBYUV_API +int I010Copy(const uint16* src_y, + int src_stride_y, + const uint16* src_u, + int src_stride_u, + const uint16* src_v, + int src_stride_v, + uint16* dst_y, + int dst_stride_y, + uint16* dst_u, + int dst_stride_u, + uint16* dst_v, + int dst_stride_v, + int width, + int height); + +// Convert 10 bit YUV to 8 bit +#define H010ToH420 I010ToI420 +LIBYUV_API +int I010ToI420(const uint16* src_y, + int src_stride_y, + const uint16* src_u, + int src_stride_u, + const uint16* src_v, + int src_stride_v, + uint8* dst_y, + int dst_stride_y, + uint8* dst_u, + int dst_stride_u, + uint8* dst_v, + int dst_stride_v, + int width, + int height); + // Convert I400 (grey) to I420. LIBYUV_API int I400ToI420(const uint8* src_y, diff --git a/chromium/third_party/libyuv/include/libyuv/convert_argb.h b/chromium/third_party/libyuv/include/libyuv/convert_argb.h index f43a5060bdb..b8b57cb12b3 100644 --- a/chromium/third_party/libyuv/include/libyuv/convert_argb.h +++ b/chromium/third_party/libyuv/include/libyuv/convert_argb.h @@ -58,11 +58,76 @@ int I420ToABGR(const uint8* src_y, int src_stride_u, const uint8* src_v, int src_stride_v, + uint8* dst_abgr, + int dst_stride_abgr, + int width, + int height); + +// Convert I010 to ARGB. +LIBYUV_API +int I010ToARGB(const uint16* src_y, + int src_stride_y, + const uint16* src_u, + int src_stride_u, + const uint16* src_v, + int src_stride_v, + uint8* dst_argb, + int dst_stride_argb, + int width, + int height); + +// Convert I010 to ARGB. +LIBYUV_API +int I010ToARGB(const uint16* src_y, + int src_stride_y, + const uint16* src_u, + int src_stride_u, + const uint16* src_v, + int src_stride_v, uint8* dst_argb, int dst_stride_argb, int width, int height); +// Convert I010 to ABGR. +LIBYUV_API +int I010ToABGR(const uint16* src_y, + int src_stride_y, + const uint16* src_u, + int src_stride_u, + const uint16* src_v, + int src_stride_v, + uint8* dst_abgr, + int dst_stride_abgr, + int width, + int height); + +// Convert H010 to ARGB. +LIBYUV_API +int H010ToARGB(const uint16* src_y, + int src_stride_y, + const uint16* src_u, + int src_stride_u, + const uint16* src_v, + int src_stride_v, + uint8* dst_argb, + int dst_stride_argb, + int width, + int height); + +// Convert H010 to ABGR. +LIBYUV_API +int H010ToABGR(const uint16* src_y, + int src_stride_y, + const uint16* src_u, + int src_stride_u, + const uint16* src_v, + int src_stride_v, + uint8* dst_abgr, + int dst_stride_abgr, + int width, + int height); + // Convert I422 to ARGB. LIBYUV_API int I422ToARGB(const uint8* src_y, @@ -190,6 +255,27 @@ int NV21ToARGB(const uint8* src_y, int width, int height); +// Convert NV12 to ABGR. +int NV12ToABGR(const uint8* src_y, + int src_stride_y, + const uint8* src_uv, + int src_stride_uv, + uint8* dst_abgr, + int dst_stride_abgr, + int width, + int height); + +// Convert NV21 to ABGR. +LIBYUV_API +int NV21ToABGR(const uint8* src_y, + int src_stride_y, + const uint8* src_uv, + int src_stride_uv, + uint8* dst_abgr, + int dst_stride_abgr, + int width, + int height); + // Convert M420 to ARGB. LIBYUV_API int M420ToARGB(const uint8* src_m420, @@ -321,6 +407,32 @@ int H422ToABGR(const uint8* src_y, int width, int height); +// Convert H010 to ARGB. +LIBYUV_API +int H010ToARGB(const uint16* src_y, + int src_stride_y, + const uint16* src_u, + int src_stride_u, + const uint16* src_v, + int src_stride_v, + uint8* dst_argb, + int dst_stride_argb, + int width, + int height); + +// Convert H010 to AR30. +LIBYUV_API +int H010ToAR30(const uint16* src_y, + int src_stride_y, + const uint16* src_u, + int src_stride_u, + const uint16* src_v, + int src_stride_v, + uint8* dst_ar30, + int dst_stride_ar30, + int width, + int height); + // BGRA little endian (argb in memory) to ARGB. LIBYUV_API int BGRAToARGB(const uint8* src_frame, @@ -396,6 +508,15 @@ int ARGB4444ToARGB(const uint8* src_frame, int width, int height); +// Convert AR30 To ARGB. +LIBYUV_API +int AR30ToARGB(const uint8* src_ar30, + int src_stride_ar30, + uint8* dst_argb, + int dst_stride_argb, + int width, + int height); + #ifdef HAVE_JPEG // src_width/height provided by capture // dst_width/height for clipping determine final size. @@ -410,6 +531,34 @@ int MJPGToARGB(const uint8* sample, int dst_height); #endif +// Convert Android420 to ARGB. +LIBYUV_API +int Android420ToARGB(const uint8* src_y, + int src_stride_y, + const uint8* src_u, + int src_stride_u, + const uint8* src_v, + int src_stride_v, + int src_pixel_stride_uv, + uint8* dst_argb, + int dst_stride_argb, + int width, + int height); + +// Convert Android420 to ABGR. +LIBYUV_API +int Android420ToABGR(const uint8* src_y, + int src_stride_y, + const uint8* src_u, + int src_stride_u, + const uint8* src_v, + int src_stride_v, + int src_pixel_stride_uv, + uint8* dst_abgr, + int dst_stride_abgr, + int width, + int height); + // Convert camera sample to ARGB with cropping, rotation and vertical flip. // "src_size" is needed to parse MJPG. // "dst_stride_argb" number of bytes in a row of the dst_argb plane. diff --git a/chromium/third_party/libyuv/include/libyuv/convert_from.h b/chromium/third_party/libyuv/include/libyuv/convert_from.h index 237f68f57aa..b5a422903a5 100644 --- a/chromium/third_party/libyuv/include/libyuv/convert_from.h +++ b/chromium/third_party/libyuv/include/libyuv/convert_from.h @@ -21,7 +21,22 @@ extern "C" { // See Also convert.h for conversions from formats to I420. -// I420Copy in convert to I420ToI420. +// Convert 8 bit YUV to 10 bit. +#define H420ToH010 I420ToI010 +int I420ToI010(const uint8* src_y, + int src_stride_y, + const uint8* src_u, + int src_stride_u, + const uint8* src_v, + int src_stride_v, + uint16* dst_y, + int dst_stride_y, + uint16* dst_u, + int dst_stride_u, + uint16* dst_v, + int dst_stride_v, + int width, + int height); LIBYUV_API int I420ToI422(const uint8* src_y, @@ -276,6 +291,18 @@ int I420ToARGB4444(const uint8* src_y, int dst_stride_frame, int width, int height); +// Convert I420 to AR30. +LIBYUV_API +int I420ToAR30(const uint8* src_y, + int src_stride_y, + const uint8* src_u, + int src_stride_u, + const uint8* src_v, + int src_stride_v, + uint8* dst_ar30, + int dst_stride_ar30, + int width, + int height); // Convert I420 to specified format. // "dst_sample_stride" is bytes in a row for the destination. Pass 0 if the diff --git a/chromium/third_party/libyuv/include/libyuv/convert_from_argb.h b/chromium/third_party/libyuv/include/libyuv/convert_from_argb.h index 50722d76102..4d613502a16 100644 --- a/chromium/third_party/libyuv/include/libyuv/convert_from_argb.h +++ b/chromium/third_party/libyuv/include/libyuv/convert_from_argb.h @@ -55,6 +55,15 @@ int ARGBToRGBA(const uint8* src_argb, int width, int height); +// Convert ARGB To AR30. +LIBYUV_API +int ARGBToAR30(const uint8* src_argb, + int src_stride_argb, + uint8* dst_ar30, + int dst_stride_ar30, + int width, + int height); + // Convert ARGB To RGB24. LIBYUV_API int ARGBToRGB24(const uint8* src_argb, diff --git a/chromium/third_party/libyuv/include/libyuv/cpu_id.h b/chromium/third_party/libyuv/include/libyuv/cpu_id.h index c2e9bbbd954..14f735f57b2 100644 --- a/chromium/third_party/libyuv/include/libyuv/cpu_id.h +++ b/chromium/third_party/libyuv/include/libyuv/cpu_id.h @@ -47,8 +47,7 @@ static const int kCpuHasAVX512VPOPCNTDQ = 0x100000; // These flags are only valid on MIPS processors. static const int kCpuHasMIPS = 0x200000; -static const int kCpuHasDSPR2 = 0x400000; -static const int kCpuHasMSA = 0x800000; +static const int kCpuHasMSA = 0x400000; // Optional init function. TestCpuFlag does an auto-init. // Returns cpu_info flags. diff --git a/chromium/third_party/libyuv/include/libyuv/planar_functions.h b/chromium/third_party/libyuv/include/libyuv/planar_functions.h index c91501a9c2c..653b0619710 100644 --- a/chromium/third_party/libyuv/include/libyuv/planar_functions.h +++ b/chromium/third_party/libyuv/include/libyuv/planar_functions.h @@ -39,6 +39,24 @@ void CopyPlane_16(const uint16* src_y, int width, int height); +LIBYUV_API +void Convert16To8Plane(const uint16* src_y, + int src_stride_y, + uint8* dst_y, + int dst_stride_y, + int scale, // 16384 for 10 bits + int width, + int height); + +LIBYUV_API +void Convert8To16Plane(const uint8* src_y, + int src_stride_y, + uint16* dst_y, + int dst_stride_y, + int scale, // 1024 for 10 bits + int width, + int height); + // Set a plane of data to a 32 bit value. LIBYUV_API void SetPlane(uint8* dst_y, diff --git a/chromium/third_party/libyuv/include/libyuv/rotate_row.h b/chromium/third_party/libyuv/include/libyuv/rotate_row.h index 973fc15284f..7e9dfd2cf4e 100644 --- a/chromium/third_party/libyuv/include/libyuv/rotate_row.h +++ b/chromium/third_party/libyuv/include/libyuv/rotate_row.h @@ -54,12 +54,6 @@ extern "C" { #define HAS_TRANSPOSEUVWX8_NEON #endif -#if !defined(LIBYUV_DISABLE_DSPR2) && !defined(__native_client__) && \ - defined(__mips__) && defined(__mips_dsp) && (__mips_dsp_rev >= 2) -#define HAS_TRANSPOSEWX8_DSPR2 -#define HAS_TRANSPOSEUVWX8_DSPR2 -#endif // defined(__mips__) - #if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) #define HAS_TRANSPOSEWX16_MSA #define HAS_TRANSPOSEUVWX16_MSA @@ -97,16 +91,6 @@ void TransposeWx8_Fast_SSSE3(const uint8* src, uint8* dst, int dst_stride, int width); -void TransposeWx8_DSPR2(const uint8* src, - int src_stride, - uint8* dst, - int dst_stride, - int width); -void TransposeWx8_Fast_DSPR2(const uint8* src, - int src_stride, - uint8* dst, - int dst_stride, - int width); void TransposeWx16_MSA(const uint8* src, int src_stride, uint8* dst, @@ -128,11 +112,6 @@ void TransposeWx8_Fast_Any_SSSE3(const uint8* src, uint8* dst, int dst_stride, int width); -void TransposeWx8_Any_DSPR2(const uint8* src, - int src_stride, - uint8* dst, - int dst_stride, - int width); void TransposeWx16_Any_MSA(const uint8* src, int src_stride, uint8* dst, @@ -176,13 +155,6 @@ void TransposeUVWx8_NEON(const uint8* src, uint8* dst_b, int dst_stride_b, int width); -void TransposeUVWx8_DSPR2(const uint8* src, - int src_stride, - uint8* dst_a, - int dst_stride_a, - uint8* dst_b, - int dst_stride_b, - int width); void TransposeUVWx16_MSA(const uint8* src, int src_stride, uint8* dst_a, @@ -205,13 +177,6 @@ void TransposeUVWx8_Any_NEON(const uint8* src, uint8* dst_b, int dst_stride_b, int width); -void TransposeUVWx8_Any_DSPR2(const uint8* src, - int src_stride, - uint8* dst_a, - int dst_stride_a, - uint8* dst_b, - int dst_stride_b, - int width); void TransposeUVWx16_Any_MSA(const uint8* src, int src_stride, uint8* dst_a, diff --git a/chromium/third_party/libyuv/include/libyuv/row.h b/chromium/third_party/libyuv/include/libyuv/row.h index 34d727641a8..992d2ceb5d5 100644 --- a/chromium/third_party/libyuv/include/libyuv/row.h +++ b/chromium/third_party/libyuv/include/libyuv/row.h @@ -37,14 +37,9 @@ extern "C" { // MemorySanitizer does not support assembly code yet. http://crbug.com/344505 #if defined(__has_feature) #if __has_feature(memory_sanitizer) -#define LIBYUV_DISABLE_X86 -#endif +// define LIBYUV_DISABLE_X86 #endif -// True if compiling for SSSE3 as a requirement. -#if defined(__SSSE3__) || (defined(_M_IX86_FP) && (_M_IX86_FP >= 3)) -#define LIBYUV_SSSE3_ONLY #endif - #if defined(__native_client__) #define LIBYUV_DISABLE_NEON #endif @@ -85,7 +80,6 @@ extern "C" { #define HAS_ARGB4444TOARGBROW_SSE2 #define HAS_ARGBEXTRACTALPHAROW_SSE2 #define HAS_ARGBSETROW_X86 -#define HAS_ARGBSHUFFLEROW_SSE2 #define HAS_ARGBSHUFFLEROW_SSSE3 #define HAS_ARGBTOARGB1555ROW_SSE2 #define HAS_ARGBTOARGB4444ROW_SSE2 @@ -268,6 +262,11 @@ extern "C" { // TODO(fbarchard): Port to Visual C #if !defined(LIBYUV_DISABLE_X86) && \ (defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER))) +#define HAS_ARGBTOAR30ROW_SSSE3 +#define HAS_CONVERT16TO8ROW_SSSE3 +#define HAS_CONVERT8TO16ROW_SSE2 +// I210 is for H010. 2 = 422. I for 601 vs H for 709. +#define HAS_I210TOARGBROW_SSSE3 #define HAS_MERGERGBROW_SSSE3 #define HAS_SPLITRGBROW_SSSE3 #endif @@ -277,6 +276,9 @@ extern "C" { #if !defined(LIBYUV_DISABLE_X86) && \ (defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER))) && \ (defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2)) +#define HAS_ARGBTOAR30ROW_AVX2 +#define HAS_CONVERT16TO8ROW_AVX2 +#define HAS_CONVERT8TO16ROW_AVX2 #define HAS_MERGEUVROW_16_AVX2 #define HAS_MULTIPLYROW_16_AVX2 #endif @@ -376,37 +378,6 @@ extern "C" { #if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) #define HAS_SCALESUMSAMPLES_NEON #endif - -// The following are available on Mips platforms: -#if !defined(LIBYUV_DISABLE_DSPR2) && defined(__mips__) && \ - (_MIPS_SIM == _MIPS_SIM_ABI32) && (__mips_isa_rev < 6) -#define HAS_COPYROW_MIPS -#if defined(__mips_dsp) && (__mips_dsp_rev >= 2) -#define HAS_I422TOARGBROW_DSPR2 -#define HAS_INTERPOLATEROW_DSPR2 -#define HAS_MIRRORROW_DSPR2 -#define HAS_MIRRORUVROW_DSPR2 -#define HAS_SPLITUVROW_DSPR2 -#define HAS_RGB24TOARGBROW_DSPR2 -#define HAS_RAWTOARGBROW_DSPR2 -#define HAS_RGB565TOARGBROW_DSPR2 -#define HAS_ARGB1555TOARGBROW_DSPR2 -#define HAS_ARGB4444TOARGBROW_DSPR2 -#define HAS_I444TOARGBROW_DSPR2 -#define HAS_I422TOARGB4444ROW_DSPR2 -#define HAS_I422TOARGB1555ROW_DSPR2 -#define HAS_NV12TOARGBROW_DSPR2 -#define HAS_BGRATOUVROW_DSPR2 -#define HAS_BGRATOYROW_DSPR2 -#define HAS_ABGRTOUVROW_DSPR2 -#define HAS_ARGBTOYROW_DSPR2 -#define HAS_ABGRTOYROW_DSPR2 -#define HAS_RGBATOUVROW_DSPR2 -#define HAS_RGBATOYROW_DSPR2 -#define HAS_ARGBTOUVROW_DSPR2 -#endif -#endif - #if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) #define HAS_ABGRTOUVROW_MSA #define HAS_ABGRTOYROW_MSA @@ -793,29 +764,6 @@ void I444ToARGBRow_MSA(const uint8* src_y, uint8* dst_argb, const struct YuvConstants* yuvconstants, int width); -void I444ToARGBRow_DSPR2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width); -void I422ToARGB4444Row_DSPR2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb4444, - const struct YuvConstants* yuvconstants, - int width); -void I422ToARGB1555Row_DSPR2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb1555, - const struct YuvConstants* yuvconstants, - int width); -void NV12ToARGBRow_DSPR2(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width); void I422ToARGBRow_MSA(const uint8* src_y, const uint8* src_u, @@ -1017,30 +965,6 @@ void RGB24ToYRow_MSA(const uint8* src_rgb24, uint8* dst_y, int width); void RAWToYRow_MSA(const uint8* src_raw, uint8* dst_y, int width); void RGB565ToYRow_MSA(const uint8* src_rgb565, uint8* dst_y, int width); void ARGB1555ToYRow_MSA(const uint8* src_argb1555, uint8* dst_y, int width); -void BGRAToUVRow_DSPR2(const uint8* src_bgra, - int src_stride_bgra, - uint8* dst_u, - uint8* dst_v, - int width); -void BGRAToYRow_DSPR2(const uint8* src_bgra, uint8* dst_y, int width); -void ABGRToUVRow_DSPR2(const uint8* src_abgr, - int src_stride_abgr, - uint8* dst_u, - uint8* dst_v, - int width); -void ARGBToYRow_DSPR2(const uint8* src_argb, uint8* dst_y, int width); -void ABGRToYRow_DSPR2(const uint8* src_abgr, uint8* dst_y, int width); -void RGBAToUVRow_DSPR2(const uint8* src_rgba, - int src_stride_rgba, - uint8* dst_u, - uint8* dst_v, - int width); -void RGBAToYRow_DSPR2(const uint8* src_rgba, uint8* dst_y, int width); -void ARGBToUVRow_DSPR2(const uint8* src_argb, - int src_stride_argb, - uint8* dst_u, - uint8* dst_v, - int width); void ARGBToYRow_C(const uint8* src_argb, uint8* dst_y, int width); void ARGBToYJRow_C(const uint8* src_argb, uint8* dst_y, int width); void BGRAToYRow_C(const uint8* src_bgra, uint8* dst_y, int width); @@ -1069,10 +993,6 @@ void RGB565ToYRow_Any_NEON(const uint8* src_rgb565, uint8* dst_y, int width); void ARGB1555ToYRow_Any_NEON(const uint8* src_argb1555, uint8* dst_y, int width); -void BGRAToYRow_Any_DSPR2(const uint8* src_bgra, uint8* dst_y, int width); -void ARGBToYRow_Any_DSPR2(const uint8* src_argb, uint8* dst_y, int width); -void ABGRToYRow_Any_DSPR2(const uint8* src_abgr, uint8* dst_y, int width); -void RGBAToYRow_Any_DSPR2(const uint8* src_rgba, uint8* dst_y, int width); void ARGB4444ToYRow_Any_NEON(const uint8* src_argb4444, uint8* dst_y, int width); @@ -1259,26 +1179,6 @@ void ARGB1555ToUVRow_Any_MSA(const uint8* src_argb1555, uint8* dst_u, uint8* dst_v, int width); -void BGRAToUVRow_Any_DSPR2(const uint8* src_bgra, - int src_stride_bgra, - uint8* dst_u, - uint8* dst_v, - int width); -void ABGRToUVRow_Any_DSPR2(const uint8* src_abgr, - int src_stride_abgr, - uint8* dst_u, - uint8* dst_v, - int width); -void RGBAToUVRow_Any_DSPR2(const uint8* src_rgba, - int src_stride_rgba, - uint8* dst_u, - uint8* dst_v, - int width); -void ARGBToUVRow_Any_DSPR2(const uint8* src_argb, - int src_stride_argb, - uint8* dst_u, - uint8* dst_v, - int width); void ARGBToUVRow_C(const uint8* src_argb, int src_stride_argb, uint8* dst_u, @@ -1357,7 +1257,6 @@ void ARGBToUV444Row_C(const uint8* src_argb, void MirrorRow_AVX2(const uint8* src, uint8* dst, int width); void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width); void MirrorRow_NEON(const uint8* src, uint8* dst, int width); -void MirrorRow_DSPR2(const uint8* src, uint8* dst, int width); void MirrorRow_MSA(const uint8* src, uint8* dst, int width); void MirrorRow_C(const uint8* src, uint8* dst, int width); void MirrorRow_Any_AVX2(const uint8* src, uint8* dst, int width); @@ -1374,10 +1273,6 @@ void MirrorUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width); -void MirrorUVRow_DSPR2(const uint8* src_uv, - uint8* dst_u, - uint8* dst_v, - int width); void MirrorUVRow_MSA(const uint8* src_uv, uint8* dst_u, uint8* dst_v, @@ -1407,10 +1302,6 @@ void SplitUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width); -void SplitUVRow_DSPR2(const uint8* src_uv, - uint8* dst_u, - uint8* dst_v, - int width); void SplitUVRow_MSA(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width); void SplitUVRow_Any_SSE2(const uint8* src_uv, uint8* dst_u, @@ -1424,10 +1315,6 @@ void SplitUVRow_Any_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width); -void SplitUVRow_Any_DSPR2(const uint8* src_uv, - uint8* dst_u, - uint8* dst_v, - int width); void SplitUVRow_Any_MSA(const uint8* src_uv, uint8* dst_u, uint8* dst_v, @@ -1539,6 +1426,42 @@ void MultiplyRow_16_AVX2(const uint16* src_y, int width); void MultiplyRow_16_C(const uint16* src_y, uint16* dst_y, int scale, int width); +void Convert8To16Row_C(const uint8* src_y, uint16* dst_y, int scale, int width); +void Convert8To16Row_SSE2(const uint8* src_y, + uint16* dst_y, + int scale, + int width); +void Convert8To16Row_AVX2(const uint8* src_y, + uint16* dst_y, + int scale, + int width); +void Convert8To16Row_Any_SSE2(const uint8* src_y, + uint16* dst_y, + int scale, + int width); +void Convert8To16Row_Any_AVX2(const uint8* src_y, + uint16* dst_y, + int scale, + int width); + +void Convert16To8Row_C(const uint16* src_y, uint8* dst_y, int scale, int width); +void Convert16To8Row_SSSE3(const uint16* src_y, + uint8* dst_y, + int scale, + int width); +void Convert16To8Row_AVX2(const uint16* src_y, + uint8* dst_y, + int scale, + int width); +void Convert16To8Row_Any_SSSE3(const uint16* src_y, + uint8* dst_y, + int scale, + int width); +void Convert16To8Row_Any_AVX2(const uint16* src_y, + uint8* dst_y, + int scale, + int width); + void CopyRow_SSE2(const uint8* src, uint8* dst, int count); void CopyRow_AVX(const uint8* src, uint8* dst, int count); void CopyRow_ERMS(const uint8* src, uint8* dst, int count); @@ -1609,10 +1532,6 @@ void ARGBShuffleRow_C(const uint8* src_argb, uint8* dst_argb, const uint8* shuffler, int width); -void ARGBShuffleRow_SSE2(const uint8* src_argb, - uint8* dst_argb, - const uint8* shuffler, - int width); void ARGBShuffleRow_SSSE3(const uint8* src_argb, uint8* dst_argb, const uint8* shuffler, @@ -1629,10 +1548,6 @@ void ARGBShuffleRow_MSA(const uint8* src_argb, uint8* dst_argb, const uint8* shuffler, int width); -void ARGBShuffleRow_Any_SSE2(const uint8* src_argb, - uint8* dst_argb, - const uint8* shuffler, - int width); void ARGBShuffleRow_Any_SSSE3(const uint8* src_argb, uint8* dst_argb, const uint8* shuffler, @@ -1685,15 +1600,6 @@ void ARGB1555ToARGBRow_MSA(const uint8* src_argb1555, void ARGB4444ToARGBRow_NEON(const uint8* src_argb4444, uint8* dst_argb, int width); -void RGB24ToARGBRow_DSPR2(const uint8* src_rgb24, uint8* dst_argb, int width); -void RAWToARGBRow_DSPR2(const uint8* src_raw, uint8* dst_argb, int width); -void RGB565ToARGBRow_DSPR2(const uint8* src_rgb565, uint8* dst_argb, int width); -void ARGB1555ToARGBRow_DSPR2(const uint8* src_argb1555, - uint8* dst_argb, - int width); -void ARGB4444ToARGBRow_DSPR2(const uint8* src_argb4444, - uint8* dst_argb, - int width); void ARGB4444ToARGBRow_MSA(const uint8* src_argb4444, uint8* dst_argb, int width); @@ -1703,6 +1609,7 @@ void RAWToRGB24Row_C(const uint8* src_raw, uint8* dst_rgb24, int width); void RGB565ToARGBRow_C(const uint8* src_rgb, uint8* dst_argb, int width); void ARGB1555ToARGBRow_C(const uint8* src_argb, uint8* dst_argb, int width); void ARGB4444ToARGBRow_C(const uint8* src_argb, uint8* dst_argb, int width); +void AR30ToARGBRow_C(const uint8* src_ar30, uint8* dst_argb, int width); void RGB24ToARGBRow_Any_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int width); @@ -1751,19 +1658,6 @@ void ARGB1555ToARGBRow_Any_MSA(const uint8* src_argb1555, void ARGB4444ToARGBRow_Any_NEON(const uint8* src_argb4444, uint8* dst_argb, int width); -void RGB24ToARGBRow_Any_DSPR2(const uint8* src_rgb24, - uint8* dst_argb, - int width); -void RAWToARGBRow_Any_DSPR2(const uint8* src_raw, uint8* dst_argb, int width); -void RGB565ToARGBRow_Any_DSPR2(const uint8* src_rgb565, - uint8* dst_argb, - int width); -void ARGB1555ToARGBRow_Any_DSPR2(const uint8* src_argb1555, - uint8* dst_argb, - int width); -void ARGB4444ToARGBRow_Any_DSPR2(const uint8* src_argb4444, - uint8* dst_argb, - int width); void ARGB4444ToARGBRow_Any_MSA(const uint8* src_argb4444, uint8* dst_argb, @@ -1774,6 +1668,7 @@ void ARGBToRAWRow_SSSE3(const uint8* src_argb, uint8* dst_rgb, int width); void ARGBToRGB565Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int width); void ARGBToARGB1555Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int width); void ARGBToARGB4444Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int width); +void ARGBToAR30Row_SSSE3(const uint8* src_argb, uint8* dst_rgb, int width); void ARGBToRGB565DitherRow_C(const uint8* src_argb, uint8* dst_rgb, @@ -1791,6 +1686,7 @@ void ARGBToRGB565DitherRow_AVX2(const uint8* src_argb, void ARGBToRGB565Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int width); void ARGBToARGB1555Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int width); void ARGBToARGB4444Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int width); +void ARGBToAR30Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int width); void ARGBToRGB24Row_NEON(const uint8* src_argb, uint8* dst_rgb, int width); void ARGBToRAWRow_NEON(const uint8* src_argb, uint8* dst_rgb, int width); @@ -1817,6 +1713,7 @@ void ARGBToRAWRow_C(const uint8* src_argb, uint8* dst_rgb, int width); void ARGBToRGB565Row_C(const uint8* src_argb, uint8* dst_rgb, int width); void ARGBToARGB1555Row_C(const uint8* src_argb, uint8* dst_rgb, int width); void ARGBToARGB4444Row_C(const uint8* src_argb, uint8* dst_rgb, int width); +void ARGBToAR30Row_C(const uint8* src_argb, uint8* dst_rgb, int width); void J400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int width); void J400ToARGBRow_AVX2(const uint8* src_y, uint8* dst_argb, int width); @@ -1840,9 +1737,9 @@ void I422ToARGBRow_C(const uint8* src_y, uint8* dst_argb, const struct YuvConstants* yuvconstants, int width); -void I422ToARGBRow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, +void I210ToARGBRow_C(const uint16* src_y, + const uint16* src_u, + const uint16* src_v, uint8* dst_argb, const struct YuvConstants* yuvconstants, int width); @@ -1912,12 +1809,6 @@ void I422ToARGBRow_AVX2(const uint8* src_y, uint8* dst_argb, const struct YuvConstants* yuvconstants, int width); -void I422ToARGBRow_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width); void I422ToRGBARow_AVX2(const uint8* src_y, const uint8* src_u, const uint8* src_v, @@ -1954,6 +1845,13 @@ void I422ToARGBRow_SSSE3(const uint8* src_y, uint8* dst_argb, const struct YuvConstants* yuvconstants, int width); + +void I210ToARGBRow_SSSE3(const uint16* src_y, + const uint16* src_u, + const uint16* src_v, + uint8* dst_argb, + const struct YuvConstants* yuvconstants, + int width); void I422AlphaToARGBRow_SSSE3(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, @@ -1968,12 +1866,6 @@ void I422AlphaToARGBRow_AVX2(const uint8* y_buf, uint8* dst_argb, const struct YuvConstants* yuvconstants, int width); -void I422ToARGBRow_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width); void NV12ToARGBRow_SSSE3(const uint8* src_y, const uint8* src_uv, uint8* dst_argb, @@ -2104,6 +1996,12 @@ void I422ToARGBRow_Any_SSSE3(const uint8* src_y, uint8* dst_argb, const struct YuvConstants* yuvconstants, int width); +void I210ToARGBRow_Any_SSSE3(const uint16* src_y, + const uint16* src_u, + const uint16* src_v, + uint8* dst_argb, + const struct YuvConstants* yuvconstants, + int width); void I422AlphaToARGBRow_Any_SSSE3(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, @@ -2399,6 +2297,7 @@ void ARGBToARGB1555Row_Any_SSE2(const uint8* src_argb, void ARGBToARGB4444Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, int width); +void ARGBToAR30Row_Any_SSSE3(const uint8* src_argb, uint8* dst_rgb, int width); void ARGBToRGB565DitherRow_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, @@ -2416,6 +2315,7 @@ void ARGBToARGB1555Row_Any_AVX2(const uint8* src_argb, void ARGBToARGB4444Row_Any_AVX2(const uint8* src_argb, uint8* dst_rgb, int width); +void ARGBToAR30Row_Any_AVX2(const uint8* src_argb, uint8* dst_rgb, int width); void ARGBToRGB24Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int width); void ARGBToRAWRow_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int width); @@ -2516,53 +2416,6 @@ void UYVYToARGBRow_Any_NEON(const uint8* src_uyvy, uint8* dst_argb, const struct YuvConstants* yuvconstants, int width); -void I444ToARGBRow_Any_DSPR2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width); -void I422ToARGB4444Row_Any_DSPR2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width); -void I422ToARGBRow_Any_DSPR2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width); -void I422ToARGBRow_DSPR2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width); -void I422ToARGB1555Row_Any_DSPR2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width); -void I411ToARGBRow_Any_DSPR2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width); -void NV12ToARGBRow_Any_DSPR2(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width); -void I422ToARGBRow_DSPR2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width); void I444ToARGBRow_Any_MSA(const uint8* src_y, const uint8* src_u, const uint8* src_v, @@ -2905,9 +2758,6 @@ void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width); void ARGBAttenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width); void ARGBAttenuateRow_NEON(const uint8* src_argb, uint8* dst_argb, int width); void ARGBAttenuateRow_MSA(const uint8* src_argb, uint8* dst_argb, int width); -void ARGBAttenuateRow_Any_SSE2(const uint8* src_argb, - uint8* dst_argb, - int width); void ARGBAttenuateRow_Any_SSSE3(const uint8* src_argb, uint8* dst_argb, int width); @@ -3061,11 +2911,6 @@ void InterpolateRow_NEON(uint8* dst_ptr, ptrdiff_t src_stride_ptr, int width, int source_y_fraction); -void InterpolateRow_DSPR2(uint8* dst_ptr, - const uint8* src_ptr, - ptrdiff_t src_stride_ptr, - int width, - int source_y_fraction); void InterpolateRow_MSA(uint8* dst_ptr, const uint8* src_ptr, ptrdiff_t src_stride_ptr, @@ -3086,11 +2931,6 @@ void InterpolateRow_Any_AVX2(uint8* dst_ptr, ptrdiff_t src_stride_ptr, int width, int source_y_fraction); -void InterpolateRow_Any_DSPR2(uint8* dst_ptr, - const uint8* src_ptr, - ptrdiff_t src_stride_ptr, - int width, - int source_y_fraction); void InterpolateRow_Any_MSA(uint8* dst_ptr, const uint8* src_ptr, ptrdiff_t src_stride_ptr, diff --git a/chromium/third_party/libyuv/include/libyuv/scale_row.h b/chromium/third_party/libyuv/include/libyuv/scale_row.h index c4a66aa07b1..3db46d399ea 100644 --- a/chromium/third_party/libyuv/include/libyuv/scale_row.h +++ b/chromium/third_party/libyuv/include/libyuv/scale_row.h @@ -94,16 +94,6 @@ extern "C" { #define HAS_SCALEARGBFILTERCOLS_NEON #endif -// The following are available on Mips platforms: -#if !defined(LIBYUV_DISABLE_DSPR2) && !defined(__native_client__) && \ - defined(__mips__) && defined(__mips_dsp) && (__mips_dsp_rev >= 2) -#define HAS_SCALEROWDOWN2_DSPR2 -#define HAS_SCALEROWDOWN4_DSPR2 -#define HAS_SCALEROWDOWN34_DSPR2 -#define HAS_SCALEROWDOWN38_DSPR2 -#define HAS_SCALEADDROW_DSPR2 -#endif - #if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) #define HAS_SCALEADDROW_MSA #define HAS_SCALEARGBCOLS_MSA @@ -831,51 +821,6 @@ void ScaleFilterCols_Any_NEON(uint8* dst_ptr, int x, int dx); -void ScaleRowDown2_DSPR2(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst, - int dst_width); -void ScaleRowDown2Box_DSPR2(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst, - int dst_width); -void ScaleRowDown4_DSPR2(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst, - int dst_width); -void ScaleRowDown4Box_DSPR2(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst, - int dst_width); -void ScaleRowDown34_DSPR2(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst, - int dst_width); -void ScaleRowDown34_0_Box_DSPR2(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* d, - int dst_width); -void ScaleRowDown34_1_Box_DSPR2(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* d, - int dst_width); -void ScaleRowDown38_DSPR2(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst, - int dst_width); -void ScaleRowDown38_2_Box_DSPR2(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst_ptr, - int dst_width); -void ScaleRowDown38_3_Box_DSPR2(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst_ptr, - int dst_width); -void ScaleAddRow_DSPR2(const uint8* src_ptr, uint16* dst_ptr, int src_width); -void ScaleAddRow_Any_DSPR2(const uint8* src_ptr, - uint16* dst_ptr, - int src_width); - void ScaleRowDown2_MSA(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst, diff --git a/chromium/third_party/libyuv/include/libyuv/version.h b/chromium/third_party/libyuv/include/libyuv/version.h index 838c70f1349..97595e58ffc 100644 --- a/chromium/third_party/libyuv/include/libyuv/version.h +++ b/chromium/third_party/libyuv/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1678 +#define LIBYUV_VERSION 1688 #endif // INCLUDE_LIBYUV_VERSION_H_ diff --git a/chromium/third_party/libyuv/include/libyuv/video_common.h b/chromium/third_party/libyuv/include/libyuv/video_common.h index f3711c423ba..e3c180f167d 100644 --- a/chromium/third_party/libyuv/include/libyuv/video_common.h +++ b/chromium/third_party/libyuv/include/libyuv/video_common.h @@ -49,25 +49,25 @@ extern "C" { // Secondary formats are converted in 2 steps. // Auxilliary formats call primary converters. enum FourCC { - // 8 Primary YUV formats: 5 planar, 2 biplanar, 2 packed. + // 9 Primary YUV formats: 5 planar, 2 biplanar, 2 packed. FOURCC_I420 = FOURCC('I', '4', '2', '0'), FOURCC_I422 = FOURCC('I', '4', '2', '2'), FOURCC_I444 = FOURCC('I', '4', '4', '4'), - FOURCC_I411 = FOURCC('I', '4', '1', '1'), // deprecated. FOURCC_I400 = FOURCC('I', '4', '0', '0'), FOURCC_NV21 = FOURCC('N', 'V', '2', '1'), FOURCC_NV12 = FOURCC('N', 'V', '1', '2'), FOURCC_YUY2 = FOURCC('Y', 'U', 'Y', '2'), FOURCC_UYVY = FOURCC('U', 'Y', 'V', 'Y'), + FOURCC_H010 = FOURCC('H', '0', '1', '0'), // unofficial fourcc. 10 bit lsb // 1 Secondary YUV format: row biplanar. FOURCC_M420 = FOURCC('M', '4', '2', '0'), - FOURCC_Q420 = FOURCC('Q', '4', '2', '0'), // deprecated. - // 9 Primary RGB formats: 4 32 bpp, 2 24 bpp, 3 16 bpp. + // 10 Primary RGB formats: 4 32 bpp, 2 24 bpp, 3 16 bpp, 1 10 bpc FOURCC_ARGB = FOURCC('A', 'R', 'G', 'B'), FOURCC_BGRA = FOURCC('B', 'G', 'R', 'A'), FOURCC_ABGR = FOURCC('A', 'B', 'G', 'R'), + FOURCC_AR30 = FOURCC('A', 'R', '3', '0'), // 10 bit per channel. 2101010. FOURCC_24BG = FOURCC('2', '4', 'B', 'G'), FOURCC_RAW = FOURCC('r', 'a', 'w', ' '), FOURCC_RGBA = FOURCC('R', 'G', 'B', 'A'), @@ -75,16 +75,10 @@ enum FourCC { FOURCC_RGBO = FOURCC('R', 'G', 'B', 'O'), // argb1555 LE. FOURCC_R444 = FOURCC('R', '4', '4', '4'), // argb4444 LE. - // 4 Secondary RGB formats: 4 Bayer Patterns. deprecated. - FOURCC_RGGB = FOURCC('R', 'G', 'G', 'B'), - FOURCC_BGGR = FOURCC('B', 'G', 'G', 'R'), - FOURCC_GRBG = FOURCC('G', 'R', 'B', 'G'), - FOURCC_GBRG = FOURCC('G', 'B', 'R', 'G'), - // 1 Primary Compressed YUV format. FOURCC_MJPG = FOURCC('M', 'J', 'P', 'G'), - // 5 Auxiliary YUV variations: 3 with U and V planes are swapped, 1 Alias. + // 7 Auxiliary YUV variations: 3 with U and V planes are swapped, 1 Alias. FOURCC_YV12 = FOURCC('Y', 'V', '1', '2'), FOURCC_YV16 = FOURCC('Y', 'V', '1', '6'), FOURCC_YV24 = FOURCC('Y', 'V', '2', '4'), @@ -112,7 +106,13 @@ enum FourCC { FOURCC_L565 = FOURCC('L', '5', '6', '5'), // Alias for RGBP. FOURCC_5551 = FOURCC('5', '5', '5', '1'), // Alias for RGBO. - // 1 Auxiliary compressed YUV format set aside for capturer. + // deprecated formats. Not supported, but defined for backward compatibility. + FOURCC_I411 = FOURCC('I', '4', '1', '1'), + FOURCC_Q420 = FOURCC('Q', '4', '2', '0'), + FOURCC_RGGB = FOURCC('R', 'G', 'G', 'B'), + FOURCC_BGGR = FOURCC('B', 'G', 'G', 'R'), + FOURCC_GRBG = FOURCC('G', 'R', 'B', 'G'), + FOURCC_GBRG = FOURCC('G', 'B', 'R', 'G'), FOURCC_H264 = FOURCC('H', '2', '6', '4'), // Match any fourcc. @@ -136,6 +136,7 @@ enum FourCCBpp { FOURCC_BPP_BGRA = 32, FOURCC_BPP_ABGR = 32, FOURCC_BPP_RGBA = 32, + FOURCC_BPP_AR30 = 32, FOURCC_BPP_24BG = 24, FOURCC_BPP_RAW = 24, FOURCC_BPP_RGBP = 16, @@ -152,6 +153,7 @@ enum FourCCBpp { FOURCC_BPP_J420 = 12, FOURCC_BPP_J400 = 8, FOURCC_BPP_H420 = 12, + FOURCC_BPP_H010 = 24, FOURCC_BPP_MJPG = 0, // 0 means unknown. FOURCC_BPP_H264 = 0, FOURCC_BPP_IYUV = 12, diff --git a/chromium/third_party/libyuv/libyuv.gyp b/chromium/third_party/libyuv/libyuv.gyp index f73a1a4b745..e853ba3197a 100644 --- a/chromium/third_party/libyuv/libyuv.gyp +++ b/chromium/third_party/libyuv/libyuv.gyp @@ -121,7 +121,6 @@ # Enable the following 3 macros to turn off assembly for specified CPU. # 'LIBYUV_DISABLE_X86', # 'LIBYUV_DISABLE_NEON', - # 'LIBYUV_DISABLE_DSPR2', # Enable the following macro to build libyuv as a shared library (dll). # 'LIBYUV_USING_SHARED_LIBRARY', # TODO(fbarchard): Make these into gyp defines. diff --git a/chromium/third_party/libyuv/libyuv.gypi b/chromium/third_party/libyuv/libyuv.gypi index ec81bc9bbab..9467adfc515 100644 --- a/chromium/third_party/libyuv/libyuv.gypi +++ b/chromium/third_party/libyuv/libyuv.gypi @@ -55,7 +55,6 @@ 'source/rotate_argb.cc', 'source/rotate_common.cc', 'source/rotate_gcc.cc', - 'source/rotate_dspr2.cc', 'source/rotate_msa.cc', 'source/rotate_neon.cc', 'source/rotate_neon64.cc', @@ -63,7 +62,6 @@ 'source/row_any.cc', 'source/row_common.cc', 'source/row_gcc.cc', - 'source/row_dspr2.cc', 'source/row_msa.cc', 'source/row_neon.cc', 'source/row_neon64.cc', @@ -73,7 +71,6 @@ 'source/scale_argb.cc', 'source/scale_common.cc', 'source/scale_gcc.cc', - 'source/scale_dspr2.cc', 'source/scale_msa.cc', 'source/scale_neon.cc', 'source/scale_neon64.cc', diff --git a/chromium/third_party/libyuv/libyuv_test.gyp b/chromium/third_party/libyuv/libyuv_test.gyp index 4222cf26da7..5fe154c610a 100644 --- a/chromium/third_party/libyuv/libyuv_test.gyp +++ b/chromium/third_party/libyuv/libyuv_test.gyp @@ -100,7 +100,6 @@ # Enable the following 3 macros to turn off assembly for specified CPU. # 'LIBYUV_DISABLE_X86', # 'LIBYUV_DISABLE_NEON', - # 'LIBYUV_DISABLE_DSPR2', # Enable the following macro to build libyuv as a shared library (dll). # 'LIBYUV_USING_SHARED_LIBRARY', ], diff --git a/chromium/third_party/libyuv/linux.mk b/chromium/third_party/libyuv/linux.mk index 7e9aa5e4e8b..b84c89f912e 100644 --- a/chromium/third_party/libyuv/linux.mk +++ b/chromium/third_party/libyuv/linux.mk @@ -32,14 +32,12 @@ LOCAL_OBJ_FILES := \ source/rotate.o \ source/rotate_common.o \ source/rotate_gcc.o \ - source/rotate_dspr2.o \ source/rotate_neon64.o \ source/rotate_neon.o \ source/rotate_win.o \ source/row_any.o \ source/row_common.o \ source/row_gcc.o \ - source/row_dspr2.o \ source/row_neon64.o \ source/row_neon.o \ source/row_win.o \ @@ -48,7 +46,6 @@ LOCAL_OBJ_FILES := \ source/scale.o \ source/scale_common.o \ source/scale_gcc.o \ - source/scale_dspr2.o \ source/scale_neon64.o \ source/scale_neon.o \ source/scale_win.o \ diff --git a/chromium/third_party/libyuv/source/compare_gcc.cc b/chromium/third_party/libyuv/source/compare_gcc.cc index 595c8ec4ae2..49b471af1a0 100644 --- a/chromium/third_party/libyuv/source/compare_gcc.cc +++ b/chromium/third_party/libyuv/source/compare_gcc.cc @@ -111,9 +111,9 @@ uint32 HammingDistance_SSE42(const uint8* src_a, } #endif -static vec8 kNibbleMask = {15, 15, 15, 15, 15, 15, 15, 15, - 15, 15, 15, 15, 15, 15, 15, 15}; -static vec8 kBitCount = {0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4}; +static const vec8 kNibbleMask = {15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15}; +static const vec8 kBitCount = {0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4}; uint32 HammingDistance_SSSE3(const uint8* src_a, const uint8* src_b, @@ -267,26 +267,26 @@ uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) { return sse; } -static uvec32 kHash16x33 = {0x92d9e201, 0, 0, 0}; // 33 ^ 16 -static uvec32 kHashMul0 = { +static const uvec32 kHash16x33 = {0x92d9e201, 0, 0, 0}; // 33 ^ 16 +static const uvec32 kHashMul0 = { 0x0c3525e1, // 33 ^ 15 0xa3476dc1, // 33 ^ 14 0x3b4039a1, // 33 ^ 13 0x4f5f0981, // 33 ^ 12 }; -static uvec32 kHashMul1 = { +static const uvec32 kHashMul1 = { 0x30f35d61, // 33 ^ 11 0x855cb541, // 33 ^ 10 0x040a9121, // 33 ^ 9 0x747c7101, // 33 ^ 8 }; -static uvec32 kHashMul2 = { +static const uvec32 kHashMul2 = { 0xec41d4e1, // 33 ^ 7 0x4cfa3cc1, // 33 ^ 6 0x025528a1, // 33 ^ 5 0x00121881, // 33 ^ 4 }; -static uvec32 kHashMul3 = { +static const uvec32 kHashMul3 = { 0x00008c61, // 33 ^ 3 0x00000441, // 33 ^ 2 0x00000021, // 33 ^ 1 diff --git a/chromium/third_party/libyuv/source/convert.cc b/chromium/third_party/libyuv/source/convert.cc index dfa83a5a6d8..fd2066e29eb 100644 --- a/chromium/third_party/libyuv/source/convert.cc +++ b/chromium/third_party/libyuv/source/convert.cc @@ -62,7 +62,7 @@ static int I4xxToI420(const uint8* src_y, return 0; } -// Copy I420 with optional flipping +// Copy I420 with optional flipping. // TODO(fbarchard): Use Scale plane which supports mirroring, but ensure // is does row coalescing. LIBYUV_API @@ -106,6 +106,92 @@ int I420Copy(const uint8* src_y, return 0; } +// Copy I010 with optional flipping. +LIBYUV_API +int I010Copy(const uint16* src_y, + int src_stride_y, + const uint16* src_u, + int src_stride_u, + const uint16* src_v, + int src_stride_v, + uint16* dst_y, + int dst_stride_y, + uint16* dst_u, + int dst_stride_u, + uint16* dst_v, + int dst_stride_v, + int width, + int height) { + int halfwidth = (width + 1) >> 1; + int halfheight = (height + 1) >> 1; + if (!src_u || !src_v || !dst_u || !dst_v || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + halfheight = (height + 1) >> 1; + src_y = src_y + (height - 1) * src_stride_y; + src_u = src_u + (halfheight - 1) * src_stride_u; + src_v = src_v + (halfheight - 1) * src_stride_v; + src_stride_y = -src_stride_y; + src_stride_u = -src_stride_u; + src_stride_v = -src_stride_v; + } + + if (dst_y) { + CopyPlane_16(src_y, src_stride_y, dst_y, dst_stride_y, width, height); + } + // Copy UV planes. + CopyPlane_16(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, halfheight); + CopyPlane_16(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, halfheight); + return 0; +} + +// Convert 10 bit YUV to 8 bit. +LIBYUV_API +int I010ToI420(const uint16* src_y, + int src_stride_y, + const uint16* src_u, + int src_stride_u, + const uint16* src_v, + int src_stride_v, + uint8* dst_y, + int dst_stride_y, + uint8* dst_u, + int dst_stride_u, + uint8* dst_v, + int dst_stride_v, + int width, + int height) { + int halfwidth = (width + 1) >> 1; + int halfheight = (height + 1) >> 1; + if (!src_u || !src_v || !dst_u || !dst_v || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + halfheight = (height + 1) >> 1; + src_y = src_y + (height - 1) * src_stride_y; + src_u = src_u + (halfheight - 1) * src_stride_u; + src_v = src_v + (halfheight - 1) * src_stride_v; + src_stride_y = -src_stride_y; + src_stride_u = -src_stride_u; + src_stride_v = -src_stride_v; + } + + // Convert Y plane. + Convert16To8Plane(src_y, src_stride_y, dst_y, dst_stride_y, 16384, width, + height); + // Convert UV planes. + Convert16To8Plane(src_u, src_stride_u, dst_u, dst_stride_u, 16384, halfwidth, + halfheight); + Convert16To8Plane(src_v, src_stride_v, dst_v, dst_stride_v, 16384, halfwidth, + halfheight); + return 0; +} + // 422 chroma is 1/2 width, 1x height // 420 chroma is 1/2 width, 1/2 height LIBYUV_API @@ -212,11 +298,6 @@ static void CopyPlane2(const uint8* src, CopyRow = IS_ALIGNED(width, 32) ? CopyRow_NEON : CopyRow_Any_NEON; } #endif -#if defined(HAS_COPYROW_MIPS) - if (TestCpuFlag(kCpuHasMIPS)) { - CopyRow = CopyRow_MIPS; - } -#endif // Copy plane for (y = 0; y < height - 1; y += 2) { @@ -579,14 +660,6 @@ int ARGBToI420(const uint8* src_argb, } } #endif -#if defined(HAS_ARGBTOYROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2)) { - ARGBToYRow = ARGBToYRow_Any_DSPR2; - if (IS_ALIGNED(width, 8)) { - ARGBToYRow = ARGBToYRow_DSPR2; - } - } -#endif #if defined(HAS_ARGBTOYROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { ARGBToYRow = ARGBToYRow_Any_MSA; @@ -595,14 +668,6 @@ int ARGBToI420(const uint8* src_argb, } } #endif -#if defined(HAS_ARGBTOUVROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2)) { - ARGBToUVRow = ARGBToUVRow_Any_DSPR2; - if (IS_ALIGNED(width, 16)) { - ARGBToUVRow = ARGBToUVRow_DSPR2; - } - } -#endif #if defined(HAS_ARGBTOUVROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { ARGBToUVRow = ARGBToUVRow_Any_MSA; @@ -680,22 +745,6 @@ int BGRAToI420(const uint8* src_bgra, } } #endif -#if defined(HAS_BGRATOYROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2)) { - BGRAToYRow = BGRAToYRow_Any_DSPR2; - if (IS_ALIGNED(width, 8)) { - BGRAToYRow = BGRAToYRow_DSPR2; - } - } -#endif -#if defined(HAS_BGRATOUVROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2)) { - BGRAToUVRow = BGRAToUVRow_Any_DSPR2; - if (IS_ALIGNED(width, 16)) { - BGRAToUVRow = BGRAToUVRow_DSPR2; - } - } -#endif #if defined(HAS_BGRATOYROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { BGRAToYRow = BGRAToYRow_Any_MSA; @@ -781,22 +830,6 @@ int ABGRToI420(const uint8* src_abgr, } } #endif -#if defined(HAS_ABGRTOYROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2)) { - ABGRToYRow = ABGRToYRow_Any_DSPR2; - if (IS_ALIGNED(width, 8)) { - ABGRToYRow = ABGRToYRow_DSPR2; - } - } -#endif -#if defined(HAS_ABGRTOUVROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2)) { - ABGRToUVRow = ABGRToUVRow_Any_DSPR2; - if (IS_ALIGNED(width, 16)) { - ABGRToUVRow = ABGRToUVRow_DSPR2; - } - } -#endif #if defined(HAS_ABGRTOYROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { ABGRToYRow = ABGRToYRow_Any_MSA; @@ -882,22 +915,6 @@ int RGBAToI420(const uint8* src_rgba, } } #endif -#if defined(HAS_RGBATOYROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2)) { - RGBAToYRow = RGBAToYRow_Any_DSPR2; - if (IS_ALIGNED(width, 8)) { - RGBAToYRow = RGBAToYRow_DSPR2; - } - } -#endif -#if defined(HAS_RGBATOUVROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2)) { - RGBAToUVRow = RGBAToUVRow_Any_DSPR2; - if (IS_ALIGNED(width, 16)) { - RGBAToUVRow = RGBAToUVRow_DSPR2; - } - } -#endif #if defined(HAS_RGBATOYROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { RGBAToYRow = RGBAToYRow_Any_MSA; @@ -1287,14 +1304,6 @@ int RGB565ToI420(const uint8* src_rgb565, } } #endif -#if defined(HAS_RGB565TOARGBROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2)) { - RGB565ToARGBRow = RGB565ToARGBRow_Any_DSPR2; - if (IS_ALIGNED(width, 8)) { - RGB565ToARGBRow = RGB565ToARGBRow_DSPR2; - } - } -#endif #endif { #if !(defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA)) diff --git a/chromium/third_party/libyuv/source/convert_argb.cc b/chromium/third_party/libyuv/source/convert_argb.cc index 5007bdb9708..9b93fc15194 100644 --- a/chromium/third_party/libyuv/source/convert_argb.cc +++ b/chromium/third_party/libyuv/source/convert_argb.cc @@ -47,7 +47,7 @@ int ARGBCopy(const uint8* src_argb, return 0; } -// Convert I422 to ARGB with matrix +// Convert I420 to ARGB with matrix static int I420ToARGBMatrix(const uint8* src_y, int src_stride_y, const uint8* src_u, @@ -97,15 +97,6 @@ static int I420ToARGBMatrix(const uint8* src_y, } } #endif -#if defined(HAS_I422TOARGBROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(width, 4) && - IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) && - IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) && - IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) && - IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) { - I422ToARGBRow = I422ToARGBRow_DSPR2; - } -#endif #if defined(HAS_I422TOARGBROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { I422ToARGBRow = I422ToARGBRow_Any_MSA; @@ -292,15 +283,6 @@ static int I422ToARGBMatrix(const uint8* src_y, } } #endif -#if defined(HAS_I422TOARGBROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(width, 4) && - IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) && - IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) && - IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) && - IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) { - I422ToARGBRow = I422ToARGBRow_DSPR2; - } -#endif #if defined(HAS_I422TOARGBROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { I422ToARGBRow = I422ToARGBRow_Any_MSA; @@ -428,6 +410,218 @@ int H422ToABGR(const uint8* src_y, width, height); } +// Convert 10 bit YUV to ARGB with matrix +// TODO(fbarchard): Consider passing scale multiplier to I210ToARGB to +// multiply 10 bit yuv into high bits to allow any number of bits. +static int H010ToAR30Matrix(const uint16* src_y, + int src_stride_y, + const uint16* src_u, + int src_stride_u, + const uint16* src_v, + int src_stride_v, + uint8* dst_ar30, + int dst_stride_ar30, + const struct YuvConstants* yuvconstants, + int width, + int height) { + int y; + void (*I210ToARGBRow)(const uint16* y_buf, const uint16* u_buf, + const uint16* v_buf, uint8* rgb_buf, + const struct YuvConstants* yuvconstants, int width) = + I210ToARGBRow_C; + void (*ARGBToAR30Row)(const uint8* src_argb, uint8* dst_rgb, int width) = + ARGBToAR30Row_C; + if (!src_y || !src_u || !src_v || !dst_ar30 || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_ar30 = dst_ar30 + (height - 1) * dst_stride_ar30; + dst_stride_ar30 = -dst_stride_ar30; + } +#if defined(HAS_I210TOARGBROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + I210ToARGBRow = I210ToARGBRow_Any_SSSE3; + if (IS_ALIGNED(width, 8)) { + I210ToARGBRow = I210ToARGBRow_SSSE3; + } + } +#endif +#if defined(HAS_ARGBTOAR30ROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToAR30Row = ARGBToAR30Row_Any_SSSE3; + if (IS_ALIGNED(width, 4)) { + ARGBToAR30Row = ARGBToAR30Row_SSSE3; + } + } +#endif +#if defined(HAS_ARGBTOAR30ROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToAR30Row = ARGBToAR30Row_Any_AVX2; + if (IS_ALIGNED(width, 8)) { + ARGBToAR30Row = ARGBToAR30Row_AVX2; + } + } +#endif + + { + // Row buffers for 8 bit YUV and RGB. + align_buffer_64(row_argb, width * 4); + + for (y = 0; y < height; ++y) { + I210ToARGBRow(src_y, src_u, src_v, row_argb, yuvconstants, width); + ARGBToAR30Row(row_argb, dst_ar30, width); + dst_ar30 += dst_stride_ar30; + src_y += src_stride_y; + if (y & 1) { + src_u += src_stride_u; + src_v += src_stride_v; + } + } + + free_aligned_buffer_64(row_argb); + } + + return 0; +} + +// Convert H010 to AR30. +LIBYUV_API +int H010ToAR30(const uint16* src_y, + int src_stride_y, + const uint16* src_u, + int src_stride_u, + const uint16* src_v, + int src_stride_v, + uint8* dst_ar30, + int dst_stride_ar30, + int width, + int height) { + return H010ToAR30Matrix(src_y, src_stride_y, src_u, src_stride_u, src_v, + src_stride_v, dst_ar30, dst_stride_ar30, + &kYuvH709Constants, width, height); +} + +// Convert 10 bit YUV to ARGB with matrix +static int I010ToARGBMatrix(const uint16* src_y, + int src_stride_y, + const uint16* src_u, + int src_stride_u, + const uint16* src_v, + int src_stride_v, + uint8* dst_argb, + int dst_stride_argb, + const struct YuvConstants* yuvconstants, + int width, + int height) { + int y; + void (*I210ToARGBRow)(const uint16* y_buf, const uint16* u_buf, + const uint16* v_buf, uint8* rgb_buf, + const struct YuvConstants* yuvconstants, int width) = + I210ToARGBRow_C; + if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_argb = dst_argb + (height - 1) * dst_stride_argb; + dst_stride_argb = -dst_stride_argb; + } +#if defined(HAS_I210TOARGBROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + I210ToARGBRow = I210ToARGBRow_Any_SSSE3; + if (IS_ALIGNED(width, 8)) { + I210ToARGBRow = I210ToARGBRow_SSSE3; + } + } +#endif + + for (y = 0; y < height; ++y) { + I210ToARGBRow(src_y, src_u, src_v, dst_argb, yuvconstants, width); + dst_argb += dst_stride_argb; + src_y += src_stride_y; + if (y & 1) { + src_u += src_stride_u; + src_v += src_stride_v; + } + } + return 0; +} + +// Convert I010 to ARGB. +LIBYUV_API +int I010ToARGB(const uint16* src_y, + int src_stride_y, + const uint16* src_u, + int src_stride_u, + const uint16* src_v, + int src_stride_v, + uint8* dst_argb, + int dst_stride_argb, + int width, + int height) { + return I010ToARGBMatrix(src_y, src_stride_y, src_u, src_stride_u, src_v, + src_stride_v, dst_argb, dst_stride_argb, + &kYuvI601Constants, width, height); +} + +// Convert I010 to ABGR. +LIBYUV_API +int I010ToABGR(const uint16* src_y, + int src_stride_y, + const uint16* src_u, + int src_stride_u, + const uint16* src_v, + int src_stride_v, + uint8* dst_abgr, + int dst_stride_abgr, + int width, + int height) { + return I010ToARGBMatrix(src_y, src_stride_y, src_v, + src_stride_v, // Swap U and V + src_u, src_stride_u, dst_abgr, dst_stride_abgr, + &kYvuI601Constants, // Use Yvu matrix + width, height); +} + +// Convert H010 to ARGB. +LIBYUV_API +int H010ToARGB(const uint16* src_y, + int src_stride_y, + const uint16* src_u, + int src_stride_u, + const uint16* src_v, + int src_stride_v, + uint8* dst_argb, + int dst_stride_argb, + int width, + int height) { + return I010ToARGBMatrix(src_y, src_stride_y, src_u, src_stride_u, src_v, + src_stride_v, dst_argb, dst_stride_argb, + &kYuvH709Constants, width, height); +} + +// Convert H010 to ABGR. +LIBYUV_API +int H010ToABGR(const uint16* src_y, + int src_stride_y, + const uint16* src_u, + int src_stride_u, + const uint16* src_v, + int src_stride_v, + uint8* dst_abgr, + int dst_stride_abgr, + int width, + int height) { + return I010ToARGBMatrix(src_y, src_stride_y, src_v, + src_stride_v, // Swap U and V + src_u, src_stride_u, dst_abgr, dst_stride_abgr, + &kYvuH709Constants, // Use Yvu matrix + width, height); +} + // Convert I444 to ARGB with matrix static int I444ToARGBMatrix(const uint8* src_y, int src_stride_y, @@ -485,14 +679,6 @@ static int I444ToARGBMatrix(const uint8* src_y, } } #endif -#if defined(HAS_I444TOARGBROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2)) { - I444ToARGBRow = I444ToARGBRow_Any_DSPR2; - if (IS_ALIGNED(width, 8)) { - I444ToARGBRow = I444ToARGBRow_DSPR2; - } - } -#endif #if defined(HAS_I444TOARGBROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { I444ToARGBRow = I444ToARGBRow_Any_MSA; @@ -621,15 +807,6 @@ static int I420AlphaToARGBMatrix(const uint8* src_y, } } #endif -#if defined(HAS_I422ALPHATOARGBROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(width, 4) && - IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) && - IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) && - IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) && - IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) { - I422AlphaToARGBRow = I422AlphaToARGBRow_DSPR2; - } -#endif #if defined(HAS_I422ALPHATOARGBROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { I422AlphaToARGBRow = I422AlphaToARGBRow_Any_MSA; @@ -865,16 +1042,16 @@ int J400ToARGB(const uint8* src_y, } // Shuffle table for converting BGRA to ARGB. -static uvec8 kShuffleMaskBGRAToARGB = {3u, 2u, 1u, 0u, 7u, 6u, 5u, 4u, - 11u, 10u, 9u, 8u, 15u, 14u, 13u, 12u}; +static const uvec8 kShuffleMaskBGRAToARGB = { + 3u, 2u, 1u, 0u, 7u, 6u, 5u, 4u, 11u, 10u, 9u, 8u, 15u, 14u, 13u, 12u}; // Shuffle table for converting ABGR to ARGB. -static uvec8 kShuffleMaskABGRToARGB = {2u, 1u, 0u, 3u, 6u, 5u, 4u, 7u, - 10u, 9u, 8u, 11u, 14u, 13u, 12u, 15u}; +static const uvec8 kShuffleMaskABGRToARGB = { + 2u, 1u, 0u, 3u, 6u, 5u, 4u, 7u, 10u, 9u, 8u, 11u, 14u, 13u, 12u, 15u}; // Shuffle table for converting RGBA to ARGB. -static uvec8 kShuffleMaskRGBAToARGB = {1u, 2u, 3u, 0u, 5u, 6u, 7u, 4u, - 9u, 10u, 11u, 8u, 13u, 14u, 15u, 12u}; +static const uvec8 kShuffleMaskRGBAToARGB = { + 1u, 2u, 3u, 0u, 5u, 6u, 7u, 4u, 9u, 10u, 11u, 8u, 13u, 14u, 15u, 12u}; // Convert BGRA to ARGB. LIBYUV_API @@ -978,14 +1155,6 @@ int RGB24ToARGB(const uint8* src_rgb24, } } #endif -#if defined(HAS_RGB24TOARGBROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2)) { - RGB24ToARGBRow = RGB24ToARGBRow_Any_DSPR2; - if (IS_ALIGNED(width, 8)) { - RGB24ToARGBRow = RGB24ToARGBRow_DSPR2; - } - } -#endif #if defined(HAS_RGB24TOARGBROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { RGB24ToARGBRow = RGB24ToARGBRow_Any_MSA; @@ -1045,14 +1214,6 @@ int RAWToARGB(const uint8* src_raw, } } #endif -#if defined(HAS_RAWTOARGBROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2)) { - RAWToARGBRow = RAWToARGBRow_Any_DSPR2; - if (IS_ALIGNED(width, 8)) { - RAWToARGBRow = RAWToARGBRow_DSPR2; - } - } -#endif #if defined(HAS_RAWTOARGBROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { RAWToARGBRow = RAWToARGBRow_Any_MSA; @@ -1120,14 +1281,6 @@ int RGB565ToARGB(const uint8* src_rgb565, } } #endif -#if defined(HAS_RGB565TOARGBROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2)) { - RGB565ToARGBRow = RGB565ToARGBRow_Any_DSPR2; - if (IS_ALIGNED(width, 8)) { - RGB565ToARGBRow = RGB565ToARGBRow_DSPR2; - } - } -#endif #if defined(HAS_RGB565TOARGBROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { RGB565ToARGBRow = RGB565ToARGBRow_Any_MSA; @@ -1195,14 +1348,6 @@ int ARGB1555ToARGB(const uint8* src_argb1555, } } #endif -#if defined(HAS_ARGB1555TOARGBROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2)) { - ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_DSPR2; - if (IS_ALIGNED(width, 4)) { - ARGB1555ToARGBRow = ARGB1555ToARGBRow_DSPR2; - } - } -#endif #if defined(HAS_ARGB1555TOARGBROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_MSA; @@ -1270,14 +1415,6 @@ int ARGB4444ToARGB(const uint8* src_argb4444, } } #endif -#if defined(HAS_ARGB4444TOARGBROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2)) { - ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_DSPR2; - if (IS_ALIGNED(width, 4)) { - ARGB4444ToARGBRow = ARGB4444ToARGBRow_DSPR2; - } - } -#endif #if defined(HAS_ARGB4444TOARGBROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_MSA; @@ -1295,17 +1432,51 @@ int ARGB4444ToARGB(const uint8* src_argb4444, return 0; } -// Convert NV12 to ARGB. +// Convert AR30 to ARGB. LIBYUV_API -int NV12ToARGB(const uint8* src_y, - int src_stride_y, - const uint8* src_uv, - int src_stride_uv, +int AR30ToARGB(const uint8* src_ar30, + int src_stride_ar30, uint8* dst_argb, int dst_stride_argb, int width, int height) { int y; + void (*AR30ToARGBRow)(const uint8* src_ar30, uint8* dst_argb, int width) = + AR30ToARGBRow_C; + if (!src_ar30 || !dst_argb || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_ar30 = src_ar30 + (height - 1) * src_stride_ar30; + src_stride_ar30 = -src_stride_ar30; + } + // Coalesce rows. + if (src_stride_ar30 == width * 4 && dst_stride_argb == width * 4) { + width *= height; + height = 1; + src_stride_ar30 = dst_stride_argb = 0; + } + for (y = 0; y < height; ++y) { + AR30ToARGBRow(src_ar30, dst_argb, width); + src_ar30 += src_stride_ar30; + dst_argb += dst_stride_argb; + } + return 0; +} + +// Convert NV12 to ARGB with matrix +static int NV12ToARGBMatrix(const uint8* src_y, + int src_stride_y, + const uint8* src_uv, + int src_stride_uv, + uint8* dst_argb, + int dst_stride_argb, + const struct YuvConstants* yuvconstants, + int width, + int height) { + int y; void (*NV12ToARGBRow)(const uint8* y_buf, const uint8* uv_buf, uint8* rgb_buf, const struct YuvConstants* yuvconstants, int width) = NV12ToARGBRow_C; @@ -1342,14 +1513,6 @@ int NV12ToARGB(const uint8* src_y, } } #endif -#if defined(HAS_NV12TOARGBROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2)) { - NV12ToARGBRow = NV12ToARGBRow_Any_DSPR2; - if (IS_ALIGNED(width, 8)) { - NV12ToARGBRow = NV12ToARGBRow_DSPR2; - } - } -#endif #if defined(HAS_NV12TOARGBROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { NV12ToARGBRow = NV12ToARGBRow_Any_MSA; @@ -1360,7 +1523,7 @@ int NV12ToARGB(const uint8* src_y, #endif for (y = 0; y < height; ++y) { - NV12ToARGBRow(src_y, src_uv, dst_argb, &kYuvI601Constants, width); + NV12ToARGBRow(src_y, src_uv, dst_argb, yuvconstants, width); dst_argb += dst_stride_argb; src_y += src_stride_y; if (y & 1) { @@ -1370,16 +1533,16 @@ int NV12ToARGB(const uint8* src_y, return 0; } -// Convert NV21 to ARGB. -LIBYUV_API -int NV21ToARGB(const uint8* src_y, - int src_stride_y, - const uint8* src_uv, - int src_stride_uv, - uint8* dst_argb, - int dst_stride_argb, - int width, - int height) { +// Convert NV21 to ARGB with matrix +static int NV21ToARGBMatrix(const uint8* src_y, + int src_stride_y, + const uint8* src_uv, + int src_stride_uv, + uint8* dst_argb, + int dst_stride_argb, + const struct YuvConstants* yuvconstants, + int width, + int height) { int y; void (*NV21ToARGBRow)(const uint8* y_buf, const uint8* uv_buf, uint8* rgb_buf, const struct YuvConstants* yuvconstants, int width) = @@ -1427,7 +1590,7 @@ int NV21ToARGB(const uint8* src_y, #endif for (y = 0; y < height; ++y) { - NV21ToARGBRow(src_y, src_uv, dst_argb, &kYuvI601Constants, width); + NV21ToARGBRow(src_y, src_uv, dst_argb, yuvconstants, width); dst_argb += dst_stride_argb; src_y += src_stride_y; if (y & 1) { @@ -1437,6 +1600,63 @@ int NV21ToARGB(const uint8* src_y, return 0; } +// Convert NV12 to ARGB. +LIBYUV_API +int NV12ToARGB(const uint8* src_y, + int src_stride_y, + const uint8* src_uv, + int src_stride_uv, + uint8* dst_argb, + int dst_stride_argb, + int width, + int height) { + return NV12ToARGBMatrix(src_y, src_stride_y, src_uv, src_stride_uv, dst_argb, + dst_stride_argb, &kYuvI601Constants, width, height); +} + +// Convert NV21 to ARGB. +LIBYUV_API +int NV21ToARGB(const uint8* src_y, + int src_stride_y, + const uint8* src_uv, + int src_stride_uv, + uint8* dst_argb, + int dst_stride_argb, + int width, + int height) { + return NV21ToARGBMatrix(src_y, src_stride_y, src_uv, src_stride_uv, dst_argb, + dst_stride_argb, &kYuvI601Constants, width, height); +} + +// Convert NV12 to ABGR. +// To output ABGR instead of ARGB swap the UV and use a mirrrored yuc matrix. +// To swap the UV use NV12 instead of NV21.LIBYUV_API +int NV12ToABGR(const uint8* src_y, + int src_stride_y, + const uint8* src_uv, + int src_stride_uv, + uint8* dst_abgr, + int dst_stride_abgr, + int width, + int height) { + return NV21ToARGBMatrix(src_y, src_stride_y, src_uv, src_stride_uv, dst_abgr, + dst_stride_abgr, &kYvuI601Constants, width, height); +} + +// Convert NV21 to ABGR. +LIBYUV_API +int NV21ToABGR(const uint8* src_y, + int src_stride_y, + const uint8* src_vu, + int src_stride_vu, + uint8* dst_abgr, + int dst_stride_abgr, + int width, + int height) { + return NV12ToARGBMatrix(src_y, src_stride_y, src_vu, src_stride_vu, dst_abgr, + dst_stride_abgr, &kYvuI601Constants, width, height); +} + // Convert M420 to ARGB. LIBYUV_API int M420ToARGB(const uint8* src_m420, @@ -1482,14 +1702,6 @@ int M420ToARGB(const uint8* src_m420, } } #endif -#if defined(HAS_NV12TOARGBROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2)) { - NV12ToARGBRow = NV12ToARGBRow_Any_DSPR2; - if (IS_ALIGNED(width, 8)) { - NV12ToARGBRow = NV12ToARGBRow_DSPR2; - } - } -#endif #if defined(HAS_NV12TOARGBROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { NV12ToARGBRow = NV12ToARGBRow_Any_MSA; @@ -1647,6 +1859,120 @@ int UYVYToARGB(const uint8* src_uyvy, } return 0; } +static void WeavePixels(const uint8* src_u, + const uint8* src_v, + int src_pixel_stride_uv, + uint8* dst_uv, + int width) { + int i; + for (i = 0; i < width; ++i) { + dst_uv[0] = *src_u; + dst_uv[1] = *src_v; + dst_uv += 2; + src_u += src_pixel_stride_uv; + src_v += src_pixel_stride_uv; + } +} + +// Convert Android420 to ARGB. +LIBYUV_API +int Android420ToARGBMatrix(const uint8* src_y, + int src_stride_y, + const uint8* src_u, + int src_stride_u, + const uint8* src_v, + int src_stride_v, + int src_pixel_stride_uv, + uint8* dst_argb, + int dst_stride_argb, + const struct YuvConstants* yuvconstants, + int width, + int height) { + int y; + uint8* dst_uv; + const ptrdiff_t vu_off = src_v - src_u; + int halfwidth = (width + 1) >> 1; + int halfheight = (height + 1) >> 1; + if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + halfheight = (height + 1) >> 1; + dst_argb = dst_argb + (height - 1) * dst_stride_argb; + dst_stride_argb = -dst_stride_argb; + } + + // I420 + if (src_pixel_stride_uv == 1) { + return I420ToARGBMatrix(src_y, src_stride_y, src_u, src_stride_u, src_v, + src_stride_v, dst_argb, dst_stride_argb, + yuvconstants, width, height); + // NV21 + } else if (src_pixel_stride_uv == 2 && vu_off == -1 && + src_stride_u == src_stride_v) { + return NV21ToARGBMatrix(src_y, src_stride_y, src_v, src_stride_v, dst_argb, + dst_stride_argb, yuvconstants, width, height); + // NV12 + } else if (src_pixel_stride_uv == 2 && vu_off == 1 && + src_stride_u == src_stride_v) { + return NV12ToARGBMatrix(src_y, src_stride_y, src_u, src_stride_u, dst_argb, + dst_stride_argb, yuvconstants, width, height); + } + + // General case fallback creates NV12 + align_buffer_64(plane_uv, halfwidth * 2 * halfheight); + dst_uv = plane_uv; + for (y = 0; y < halfheight; ++y) { + WeavePixels(src_u, src_v, src_pixel_stride_uv, dst_uv, halfwidth); + src_u += src_stride_u; + src_v += src_stride_v; + dst_uv += halfwidth * 2; + } + NV12ToARGBMatrix(src_y, src_stride_y, plane_uv, halfwidth * 2, dst_argb, + dst_stride_argb, yuvconstants, width, height); + free_aligned_buffer_64(plane_uv); + return 0; +} + +// Convert Android420 to ARGB. +LIBYUV_API +int Android420ToARGB(const uint8* src_y, + int src_stride_y, + const uint8* src_u, + int src_stride_u, + const uint8* src_v, + int src_stride_v, + int src_pixel_stride_uv, + uint8* dst_argb, + int dst_stride_argb, + int width, + int height) { + return Android420ToARGBMatrix(src_y, src_stride_y, src_u, src_stride_u, src_v, + src_stride_v, src_pixel_stride_uv, dst_argb, + dst_stride_argb, &kYuvI601Constants, width, + height); +} + +// Convert Android420 to ABGR. +LIBYUV_API +int Android420ToABGR(const uint8* src_y, + int src_stride_y, + const uint8* src_u, + int src_stride_u, + const uint8* src_v, + int src_stride_v, + int src_pixel_stride_uv, + uint8* dst_abgr, + int dst_stride_abgr, + int width, + int height) { + return Android420ToARGBMatrix(src_y, src_stride_y, src_v, src_stride_v, src_u, + src_stride_u, src_pixel_stride_uv, dst_abgr, + dst_stride_abgr, &kYvuI601Constants, width, + height); +} #ifdef __cplusplus } // extern "C" diff --git a/chromium/third_party/libyuv/source/convert_from.cc b/chromium/third_party/libyuv/source/convert_from.cc index 0f52f9ef9e0..9da607102f6 100644 --- a/chromium/third_party/libyuv/source/convert_from.cc +++ b/chromium/third_party/libyuv/source/convert_from.cc @@ -65,6 +65,50 @@ static int I420ToI4xx(const uint8* src_y, return 0; } +// Convert 8 bit YUV to 10 bit. +LIBYUV_API +int I420ToI010(const uint8* src_y, + int src_stride_y, + const uint8* src_u, + int src_stride_u, + const uint8* src_v, + int src_stride_v, + uint16* dst_y, + int dst_stride_y, + uint16* dst_u, + int dst_stride_u, + uint16* dst_v, + int dst_stride_v, + int width, + int height) { + int halfwidth = (width + 1) >> 1; + int halfheight = (height + 1) >> 1; + if (!src_u || !src_v || !dst_u || !dst_v || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + halfheight = (height + 1) >> 1; + src_y = src_y + (height - 1) * src_stride_y; + src_u = src_u + (halfheight - 1) * src_stride_u; + src_v = src_v + (halfheight - 1) * src_stride_v; + src_stride_y = -src_stride_y; + src_stride_u = -src_stride_u; + src_stride_v = -src_stride_v; + } + + // Convert Y plane. + Convert8To16Plane(src_y, src_stride_y, dst_y, dst_stride_y, 1024, width, + height); + // Convert UV planes. + Convert8To16Plane(src_u, src_stride_u, dst_u, dst_stride_u, 1024, halfwidth, + halfheight); + Convert8To16Plane(src_v, src_stride_v, dst_v, dst_stride_v, 1024, halfwidth, + halfheight); + return 0; +} + // 420 chroma is 1/2 width, 1/2 height // 422 chroma is 1/2 width, 1x height LIBYUV_API @@ -484,15 +528,6 @@ static int I420ToRGBAMatrix(const uint8* src_y, } } #endif -#if defined(HAS_I422TORGBAROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(width, 4) && - IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) && - IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) && - IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) && - IS_ALIGNED(dst_rgba, 4) && IS_ALIGNED(dst_stride_rgba, 4)) { - I422ToRGBARow = I422ToRGBARow_DSPR2; - } -#endif #if defined(HAS_I422TORGBAROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { I422ToRGBARow = I422ToRGBARow_Any_MSA; @@ -744,14 +779,6 @@ int I420ToARGB1555(const uint8* src_y, } } #endif -#if defined(HAS_I422TOARGB1555ROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2)) { - I422ToARGB1555Row = I422ToARGB1555Row_Any_DSPR2; - if (IS_ALIGNED(width, 4)) { - I422ToARGB1555Row = I422ToARGB1555Row_DSPR2; - } - } -#endif #if defined(HAS_I422TOARGB1555ROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { I422ToARGB1555Row = I422ToARGB1555Row_Any_MSA; @@ -825,14 +852,6 @@ int I420ToARGB4444(const uint8* src_y, } } #endif -#if defined(HAS_I422TOARGB4444ROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2)) { - I422ToARGB4444Row = I422ToARGB4444Row_Any_DSPR2; - if (IS_ALIGNED(width, 4)) { - I422ToARGB4444Row = I422ToARGB4444Row_DSPR2; - } - } -#endif #if defined(HAS_I422TOARGB4444ROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { I422ToARGB4444Row = I422ToARGB4444Row_Any_MSA; @@ -1057,14 +1076,6 @@ int I420ToRGB565Dither(const uint8* src_y, } } #endif -#if defined(HAS_I422TOARGBROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(width, 4) && - IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) && - IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) && - IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2)) { - I422ToARGBRow = I422ToARGBRow_DSPR2; - } -#endif #if defined(HAS_I422TOARGBROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { I422ToARGBRow = I422ToARGBRow_Any_MSA; @@ -1125,6 +1136,122 @@ int I420ToRGB565Dither(const uint8* src_y, return 0; } +// Convert I420 to AR30 with matrix +static int I420ToAR30Matrix(const uint8* src_y, + int src_stride_y, + const uint8* src_u, + int src_stride_u, + const uint8* src_v, + int src_stride_v, + uint8* dst_ar30, + int dst_stride_ar30, + const struct YuvConstants* yuvconstants, + int width, + int height) { + int y; + void (*I422ToARGBRow)(const uint8* y_buf, const uint8* u_buf, + const uint8* v_buf, uint8* rgb_buf, + const struct YuvConstants* yuvconstants, int width) = + I422ToARGBRow_C; + void (*ARGBToAR30Row)(const uint8* src_argb, uint8* dst_rgb, int width) = + ARGBToAR30Row_C; + + if (!src_y || !src_u || !src_v || !dst_ar30 || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_ar30 = dst_ar30 + (height - 1) * dst_stride_ar30; + dst_stride_ar30 = -dst_stride_ar30; + } + +#if defined(HAS_ARGBTOAR30ROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToAR30Row = ARGBToAR30Row_Any_SSSE3; + if (IS_ALIGNED(width, 4)) { + ARGBToAR30Row = ARGBToAR30Row_SSSE3; + } + } +#endif +#if defined(HAS_ARGBTOAR30ROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToAR30Row = ARGBToAR30Row_Any_AVX2; + if (IS_ALIGNED(width, 8)) { + ARGBToAR30Row = ARGBToAR30Row_AVX2; + } + } +#endif +#if defined(HAS_I422TOARGBROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + I422ToARGBRow = I422ToARGBRow_Any_SSSE3; + if (IS_ALIGNED(width, 8)) { + I422ToARGBRow = I422ToARGBRow_SSSE3; + } + } +#endif +#if defined(HAS_I422TOARGBROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + I422ToARGBRow = I422ToARGBRow_Any_AVX2; + if (IS_ALIGNED(width, 16)) { + I422ToARGBRow = I422ToARGBRow_AVX2; + } + } +#endif +#if defined(HAS_I422TOARGBROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + I422ToARGBRow = I422ToARGBRow_Any_NEON; + if (IS_ALIGNED(width, 8)) { + I422ToARGBRow = I422ToARGBRow_NEON; + } + } +#endif +#if defined(HAS_I422TOARGBROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + I422ToARGBRow = I422ToARGBRow_Any_MSA; + if (IS_ALIGNED(width, 8)) { + I422ToARGBRow = I422ToARGBRow_MSA; + } + } +#endif + + { + // Row buffer for ARGB. + align_buffer_64(row_argb, width * 4); + + for (y = 0; y < height; ++y) { + I422ToARGBRow(src_y, src_u, src_v, row_argb, yuvconstants, width); + ARGBToAR30Row(row_argb, dst_ar30, width); + dst_ar30 += dst_stride_ar30; + src_y += src_stride_y; + if (y & 1) { + src_u += src_stride_u; + src_v += src_stride_v; + } + } + + free_aligned_buffer_64(row_argb); + } + return 0; +} + +// Convert I420 to AR30. +LIBYUV_API +int I420ToAR30(const uint8* src_y, + int src_stride_y, + const uint8* src_u, + int src_stride_u, + const uint8* src_v, + int src_stride_v, + uint8* dst_ar30, + int dst_stride_ar30, + int width, + int height) { + return I420ToAR30Matrix(src_y, src_stride_y, src_u, src_stride_u, src_v, + src_stride_v, dst_ar30, dst_stride_ar30, + &kYuvI601Constants, width, height); +} + // Convert I420 to specified format LIBYUV_API int ConvertFromI420(const uint8* y, @@ -1200,6 +1327,11 @@ int ConvertFromI420(const uint8* y, dst_sample_stride ? dst_sample_stride : width * 4, width, height); break; + case FOURCC_AR30: + r = I420ToAR30(y, y_stride, u, u_stride, v, v_stride, dst_sample, + dst_sample_stride ? dst_sample_stride : width * 4, width, + height); + break; case FOURCC_I400: r = I400Copy(y, y_stride, dst_sample, dst_sample_stride ? dst_sample_stride : width, width, diff --git a/chromium/third_party/libyuv/source/convert_from_argb.cc b/chromium/third_party/libyuv/source/convert_from_argb.cc index 88f38279ace..02e12a12804 100644 --- a/chromium/third_party/libyuv/source/convert_from_argb.cc +++ b/chromium/third_party/libyuv/source/convert_from_argb.cc @@ -100,14 +100,6 @@ int ARGBToI444(const uint8* src_argb, } } #endif -#if defined(HAS_ARGBTOYROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2)) { - ARGBToYRow = ARGBToYRow_Any_DSPR2; - if (IS_ALIGNED(width, 8)) { - ARGBToYRow = ARGBToYRow_DSPR2; - } - } -#endif #if defined(HAS_ARGBTOYROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { ARGBToYRow = ARGBToYRow_Any_MSA; @@ -197,22 +189,6 @@ int ARGBToI422(const uint8* src_argb, } } #endif -#if defined(HAS_ARGBTOYROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2)) { - ARGBToYRow = ARGBToYRow_Any_DSPR2; - if (IS_ALIGNED(width, 8)) { - ARGBToYRow = ARGBToYRow_DSPR2; - } - } -#endif -#if defined(HAS_ARGBTOUVROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2)) { - ARGBToUVRow = ARGBToUVRow_Any_DSPR2; - if (IS_ALIGNED(width, 16)) { - ARGBToUVRow = ARGBToUVRow_DSPR2; - } - } -#endif #if defined(HAS_ARGBTOYROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { @@ -344,22 +320,6 @@ int ARGBToNV12(const uint8* src_argb, } } #endif -#if defined(HAS_ARGBTOYROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2)) { - ARGBToYRow = ARGBToYRow_Any_DSPR2; - if (IS_ALIGNED(width, 8)) { - ARGBToYRow = ARGBToYRow_DSPR2; - } - } -#endif -#if defined(HAS_ARGBTOUVROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2)) { - ARGBToUVRow = ARGBToUVRow_Any_DSPR2; - if (IS_ALIGNED(width, 16)) { - ARGBToUVRow = ARGBToUVRow_DSPR2; - } - } -#endif #if defined(HAS_MERGEUVROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { MergeUVRow_ = MergeUVRow_Any_MSA; @@ -495,22 +455,6 @@ int ARGBToNV21(const uint8* src_argb, } } #endif -#if defined(HAS_ARGBTOYROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2)) { - ARGBToYRow = ARGBToYRow_Any_DSPR2; - if (IS_ALIGNED(width, 8)) { - ARGBToYRow = ARGBToYRow_DSPR2; - } - } -#endif -#if defined(HAS_ARGBTOUVROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2)) { - ARGBToUVRow = ARGBToUVRow_Any_DSPR2; - if (IS_ALIGNED(width, 16)) { - ARGBToUVRow = ARGBToUVRow_DSPR2; - } - } -#endif #if defined(HAS_MERGEUVROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { MergeUVRow_ = MergeUVRow_Any_MSA; @@ -643,22 +587,6 @@ int ARGBToYUY2(const uint8* src_argb, } } #endif -#if defined(HAS_ARGBTOYROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2)) { - ARGBToYRow = ARGBToYRow_Any_DSPR2; - if (IS_ALIGNED(width, 8)) { - ARGBToYRow = ARGBToYRow_DSPR2; - } - } -#endif -#if defined(HAS_ARGBTOUVROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2)) { - ARGBToUVRow = ARGBToUVRow_Any_DSPR2; - if (IS_ALIGNED(width, 16)) { - ARGBToUVRow = ARGBToUVRow_DSPR2; - } - } -#endif #if defined(HAS_I422TOYUY2ROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { I422ToYUY2Row = I422ToYUY2Row_Any_MSA; @@ -787,22 +715,6 @@ int ARGBToUYVY(const uint8* src_argb, } } #endif -#if defined(HAS_ARGBTOYROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2)) { - ARGBToYRow = ARGBToYRow_Any_DSPR2; - if (IS_ALIGNED(width, 8)) { - ARGBToYRow = ARGBToYRow_DSPR2; - } - } -#endif -#if defined(HAS_ARGBTOUVROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2)) { - ARGBToUVRow = ARGBToUVRow_Any_DSPR2; - if (IS_ALIGNED(width, 16)) { - ARGBToUVRow = ARGBToUVRow_DSPR2; - } - } -#endif #if defined(HAS_I422TOUYVYROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { I422ToUYVYRow = I422ToUYVYRow_Any_MSA; @@ -880,14 +792,6 @@ int ARGBToI400(const uint8* src_argb, } } #endif -#if defined(HAS_ARGBTOYROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2)) { - ARGBToYRow = ARGBToYRow_Any_DSPR2; - if (IS_ALIGNED(width, 8)) { - ARGBToYRow = ARGBToYRow_DSPR2; - } - } -#endif #if defined(HAS_ARGBTOYROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { ARGBToYRow = ARGBToYRow_Any_MSA; @@ -906,8 +810,8 @@ int ARGBToI400(const uint8* src_argb, } // Shuffle table for converting ARGB to RGBA. -static uvec8 kShuffleMaskARGBToRGBA = {3u, 0u, 1u, 2u, 7u, 4u, 5u, 6u, - 11u, 8u, 9u, 10u, 15u, 12u, 13u, 14u}; +static const uvec8 kShuffleMaskARGBToRGBA = { + 3u, 0u, 1u, 2u, 7u, 4u, 5u, 6u, 11u, 8u, 9u, 10u, 15u, 12u, 13u, 14u}; // Convert ARGB to RGBA. LIBYUV_API @@ -1308,6 +1212,55 @@ int ARGBToARGB4444(const uint8* src_argb, return 0; } +// Convert ARGB To AR30. +LIBYUV_API +int ARGBToAR30(const uint8* src_argb, + int src_stride_argb, + uint8* dst_ar30, + int dst_stride_ar30, + int width, + int height) { + int y; + void (*ARGBToAR30Row)(const uint8* src_argb, uint8* dst_rgb, int width) = + ARGBToAR30Row_C; + if (!src_argb || !dst_ar30 || width <= 0 || height == 0) { + return -1; + } + if (height < 0) { + height = -height; + src_argb = src_argb + (height - 1) * src_stride_argb; + src_stride_argb = -src_stride_argb; + } + // Coalesce rows. + if (src_stride_argb == width * 4 && dst_stride_ar30 == width * 4) { + width *= height; + height = 1; + src_stride_argb = dst_stride_ar30 = 0; + } +#if defined(HAS_ARGBTOAR30ROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToAR30Row = ARGBToAR30Row_Any_SSSE3; + if (IS_ALIGNED(width, 4)) { + ARGBToAR30Row = ARGBToAR30Row_SSSE3; + } + } +#endif +#if defined(HAS_ARGBTOAR30ROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToAR30Row = ARGBToAR30Row_Any_AVX2; + if (IS_ALIGNED(width, 8)) { + ARGBToAR30Row = ARGBToAR30Row_AVX2; + } + } +#endif + for (y = 0; y < height; ++y) { + ARGBToAR30Row(src_argb, dst_ar30, width); + src_argb += src_stride_argb; + dst_ar30 += dst_stride_ar30; + } + return 0; +} + // Convert ARGB to J420. (JPeg full range I420). LIBYUV_API int ARGBToJ420(const uint8* src_argb, diff --git a/chromium/third_party/libyuv/source/cpu_id.cc b/chromium/third_party/libyuv/source/cpu_id.cc index 344f3c06a2b..d08fc365988 100644 --- a/chromium/third_party/libyuv/source/cpu_id.cc +++ b/chromium/third_party/libyuv/source/cpu_id.cc @@ -179,7 +179,7 @@ LIBYUV_API SAFEBUFFERS int MipsCpuCaps(const char* cpuinfo_name, if (strcmp(ase, " msa") == 0) { return kCpuHasMSA; } - return kCpuHasDSPR2; + return 0; } while (fgets(cpuinfo_line, sizeof(cpuinfo_line) - 1, f)) { if (memcmp(cpuinfo_line, "ASEs implemented", 16) == 0) { @@ -189,7 +189,7 @@ LIBYUV_API SAFEBUFFERS int MipsCpuCaps(const char* cpuinfo_name, if (strcmp(ase, " msa") == 0) { return kCpuHasMSA; } - return kCpuHasDSPR2; + return 0; } } } @@ -290,16 +290,10 @@ static SAFEBUFFERS int GetCpuFlags(void) { #endif #if defined(__mips__) && defined(__linux__) -#if defined(__mips_dspr2) - cpu_info |= kCpuHasDSPR2; -#endif #if defined(__mips_msa) cpu_info = MipsCpuCaps("/proc/cpuinfo", " msa"); #endif cpu_info |= kCpuHasMIPS; - if (getenv("LIBYUV_DISABLE_DSPR2")) { - cpu_info &= ~kCpuHasDSPR2; - } if (getenv("LIBYUV_DISABLE_MSA")) { cpu_info &= ~kCpuHasMSA; } diff --git a/chromium/third_party/libyuv/source/planar_functions.cc b/chromium/third_party/libyuv/source/planar_functions.cc index dd311d1f08e..c55ef7f2742 100644 --- a/chromium/third_party/libyuv/source/planar_functions.cc +++ b/chromium/third_party/libyuv/source/planar_functions.cc @@ -50,6 +50,7 @@ void CopyPlane(const uint8* src_y, if (src_y == dst_y && src_stride_y == dst_stride_y) { return; } + #if defined(HAS_COPYROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { CopyRow = IS_ALIGNED(width, 32) ? CopyRow_SSE2 : CopyRow_Any_SSE2; @@ -70,11 +71,6 @@ void CopyPlane(const uint8* src_y, CopyRow = IS_ALIGNED(width, 32) ? CopyRow_NEON : CopyRow_Any_NEON; } #endif -#if defined(HAS_COPYROW_MIPS) - if (TestCpuFlag(kCpuHasMIPS)) { - CopyRow = CopyRow_MIPS; - } -#endif // Copy plane for (y = 0; y < height; ++y) { @@ -116,11 +112,6 @@ void CopyPlane_16(const uint16* src_y, CopyRow = CopyRow_16_NEON; } #endif -#if defined(HAS_COPYROW_16_MIPS) - if (TestCpuFlag(kCpuHasMIPS)) { - CopyRow = CopyRow_16_MIPS; - } -#endif // Copy plane for (y = 0; y < height; ++y) { @@ -130,6 +121,106 @@ void CopyPlane_16(const uint16* src_y, } } +// Convert a plane of 16 bit data to 8 bit +LIBYUV_API +void Convert16To8Plane(const uint16* src_y, + int src_stride_y, + uint8* dst_y, + int dst_stride_y, + int scale, // 16384 for 10 bits + int width, + int height) { + int y; + void (*Convert16To8Row)(const uint16* src_y, uint8* dst_y, int scale, + int width) = Convert16To8Row_C; + + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_y = dst_y + (height - 1) * dst_stride_y; + dst_stride_y = -dst_stride_y; + } + // Coalesce rows. + if (src_stride_y == width && dst_stride_y == width) { + width *= height; + height = 1; + src_stride_y = dst_stride_y = 0; + } +#if defined(HAS_CONVERT16TO8ROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + Convert16To8Row = Convert16To8Row_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + Convert16To8Row = Convert16To8Row_SSSE3; + } + } +#endif +#if defined(HAS_CONVERT16TO8ROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + Convert16To8Row = Convert16To8Row_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + Convert16To8Row = Convert16To8Row_AVX2; + } + } +#endif + + // Convert plane + for (y = 0; y < height; ++y) { + Convert16To8Row(src_y, dst_y, scale, width); + src_y += src_stride_y; + dst_y += dst_stride_y; + } +} + +// Convert a plane of 8 bit data to 16 bit +LIBYUV_API +void Convert8To16Plane(const uint8* src_y, + int src_stride_y, + uint16* dst_y, + int dst_stride_y, + int scale, // 16384 for 10 bits + int width, + int height) { + int y; + void (*Convert8To16Row)(const uint8* src_y, uint16* dst_y, int scale, + int width) = Convert8To16Row_C; + + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_y = dst_y + (height - 1) * dst_stride_y; + dst_stride_y = -dst_stride_y; + } + // Coalesce rows. + if (src_stride_y == width && dst_stride_y == width) { + width *= height; + height = 1; + src_stride_y = dst_stride_y = 0; + } +#if defined(HAS_CONVERT8TO16ROW_SSE2) + if (TestCpuFlag(kCpuHasSSE2)) { + Convert8To16Row = Convert8To16Row_Any_SSE2; + if (IS_ALIGNED(width, 16)) { + Convert8To16Row = Convert8To16Row_SSE2; + } + } +#endif +#if defined(HAS_CONVERT8TO16ROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + Convert8To16Row = Convert8To16Row_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + Convert8To16Row = Convert8To16Row_AVX2; + } + } +#endif + + // Convert plane + for (y = 0; y < height; ++y) { + Convert8To16Row(src_y, dst_y, scale, width); + src_y += src_stride_y; + dst_y += dst_stride_y; + } +} + // Copy I422. LIBYUV_API int I422Copy(const uint8* src_y, @@ -311,16 +402,6 @@ void SplitUVPlane(const uint8* src_uv, } } #endif -#if defined(HAS_SPLITUVROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(dst_u, 4) && - IS_ALIGNED(dst_stride_u, 4) && IS_ALIGNED(dst_v, 4) && - IS_ALIGNED(dst_stride_v, 4)) { - SplitUVRow = SplitUVRow_Any_DSPR2; - if (IS_ALIGNED(width, 16)) { - SplitUVRow = SplitUVRow_DSPR2; - } - } -#endif #if defined(HAS_SPLITUVROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { SplitUVRow = SplitUVRow_Any_MSA; @@ -562,14 +643,6 @@ void MirrorPlane(const uint8* src_y, } } #endif -// TODO(fbarchard): Mirror on mips handle unaligned memory. -#if defined(HAS_MIRRORROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src_y, 4) && - IS_ALIGNED(src_stride_y, 4) && IS_ALIGNED(dst_y, 4) && - IS_ALIGNED(dst_stride_y, 4)) { - MirrorRow = MirrorRow_DSPR2; - } -#endif #if defined(HAS_MIRRORROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { MirrorRow = MirrorRow_Any_MSA; @@ -1473,15 +1546,6 @@ static int I422ToRGBAMatrix(const uint8* src_y, } } #endif -#if defined(HAS_I422TORGBAROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(width, 4) && - IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) && - IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) && - IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) && - IS_ALIGNED(dst_rgba, 4) && IS_ALIGNED(dst_stride_rgba, 4)) { - I422ToRGBARow = I422ToRGBARow_DSPR2; - } -#endif #if defined(HAS_I422TORGBAROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { I422ToRGBARow = I422ToRGBARow_Any_MSA; @@ -2534,14 +2598,6 @@ int InterpolatePlane(const uint8* src0, } } #endif -#if defined(HAS_INTERPOLATEROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src0, 4) && - IS_ALIGNED(src_stride0, 4) && IS_ALIGNED(src1, 4) && - IS_ALIGNED(src_stride1, 4) && IS_ALIGNED(dst, 4) && - IS_ALIGNED(dst_stride, 4) && IS_ALIGNED(width, 4)) { - InterpolateRow = InterpolateRow_DSPR2; - } -#endif #if defined(HAS_INTERPOLATEROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { InterpolateRow = InterpolateRow_Any_MSA; @@ -2641,14 +2697,6 @@ int ARGBShuffle(const uint8* src_bgra, height = 1; src_stride_bgra = dst_stride_argb = 0; } -#if defined(HAS_ARGBSHUFFLEROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2)) { - ARGBShuffleRow = ARGBShuffleRow_Any_SSE2; - if (IS_ALIGNED(width, 4)) { - ARGBShuffleRow = ARGBShuffleRow_SSE2; - } - } -#endif #if defined(HAS_ARGBSHUFFLEROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ARGBShuffleRow = ARGBShuffleRow_Any_SSSE3; diff --git a/chromium/third_party/libyuv/source/rotate.cc b/chromium/third_party/libyuv/source/rotate.cc index b16af507185..1f74cd0714a 100644 --- a/chromium/third_party/libyuv/source/rotate.cc +++ b/chromium/third_party/libyuv/source/rotate.cc @@ -57,16 +57,6 @@ void TransposePlane(const uint8* src, } } #endif -#if defined(HAS_TRANSPOSEWX8_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2)) { - if (IS_ALIGNED(width, 4) && IS_ALIGNED(src, 4) && - IS_ALIGNED(src_stride, 4)) { - TransposeWx8 = TransposeWx8_Fast_DSPR2; - } else { - TransposeWx8 = TransposeWx8_DSPR2; - } - } -#endif #if defined(HAS_TRANSPOSEWX16_MSA) if (TestCpuFlag(kCpuHasMSA)) { TransposeWx16 = TransposeWx16_Any_MSA; @@ -168,14 +158,6 @@ void RotatePlane180(const uint8* src, } } #endif -// TODO(fbarchard): Mirror on mips handle unaligned memory. -#if defined(HAS_MIRRORROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src, 4) && - IS_ALIGNED(src_stride, 4) && IS_ALIGNED(dst, 4) && - IS_ALIGNED(dst_stride, 4)) { - MirrorRow = MirrorRow_DSPR2; - } -#endif #if defined(HAS_MIRRORROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { MirrorRow = MirrorRow_Any_MSA; @@ -204,11 +186,6 @@ void RotatePlane180(const uint8* src, CopyRow = IS_ALIGNED(width, 32) ? CopyRow_NEON : CopyRow_Any_NEON; } #endif -#if defined(HAS_COPYROW_MIPS) - if (TestCpuFlag(kCpuHasMIPS)) { - CopyRow = CopyRow_MIPS; - } -#endif // Odd height will harmlessly mirror the middle row twice. for (y = 0; y < half_height; ++y) { @@ -255,12 +232,6 @@ void TransposeUV(const uint8* src, } } #endif -#if defined(HAS_TRANSPOSEUVWX8_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(width, 2) && IS_ALIGNED(src, 4) && - IS_ALIGNED(src_stride, 4)) { - TransposeUVWx8 = TransposeUVWx8_DSPR2; - } -#endif #if defined(HAS_TRANSPOSEUVWX16_MSA) if (TestCpuFlag(kCpuHasMSA)) { TransposeUVWx16 = TransposeUVWx16_Any_MSA; @@ -355,12 +326,6 @@ void RotateUV180(const uint8* src, MirrorUVRow = MirrorUVRow_SSSE3; } #endif -#if defined(HAS_MIRRORUVROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src, 4) && - IS_ALIGNED(src_stride, 4)) { - MirrorUVRow = MirrorUVRow_DSPR2; - } -#endif #if defined(HAS_MIRRORUVROW_MSA) if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 32)) { MirrorUVRow = MirrorUVRow_MSA; diff --git a/chromium/third_party/libyuv/source/rotate_any.cc b/chromium/third_party/libyuv/source/rotate_any.cc index 562096b926c..eb4f7418660 100644 --- a/chromium/third_party/libyuv/source/rotate_any.cc +++ b/chromium/third_party/libyuv/source/rotate_any.cc @@ -38,9 +38,6 @@ TANY(TransposeWx8_Any_SSSE3, TransposeWx8_SSSE3, 7) #ifdef HAS_TRANSPOSEWX8_FAST_SSSE3 TANY(TransposeWx8_Fast_Any_SSSE3, TransposeWx8_Fast_SSSE3, 15) #endif -#ifdef HAS_TRANSPOSEWX8_DSPR2 -TANY(TransposeWx8_Any_DSPR2, TransposeWx8_DSPR2, 7) -#endif #ifdef HAS_TRANSPOSEWX16_MSA TANY(TransposeWx16_Any_MSA, TransposeWx16_MSA, 15) #endif @@ -64,9 +61,6 @@ TUVANY(TransposeUVWx8_Any_NEON, TransposeUVWx8_NEON, 7) #ifdef HAS_TRANSPOSEUVWX8_SSE2 TUVANY(TransposeUVWx8_Any_SSE2, TransposeUVWx8_SSE2, 7) #endif -#ifdef HAS_TRANSPOSEUVWX8_DSPR2 -TUVANY(TransposeUVWx8_Any_DSPR2, TransposeUVWx8_DSPR2, 7) -#endif #ifdef HAS_TRANSPOSEUVWX16_MSA TUVANY(TransposeUVWx16_Any_MSA, TransposeUVWx16_MSA, 7) #endif diff --git a/chromium/third_party/libyuv/source/rotate_argb.cc b/chromium/third_party/libyuv/source/rotate_argb.cc index ede4eafa6c2..f6a2bf69f94 100644 --- a/chromium/third_party/libyuv/source/rotate_argb.cc +++ b/chromium/third_party/libyuv/source/rotate_argb.cc @@ -173,11 +173,6 @@ void ARGBRotate180(const uint8* src, CopyRow = IS_ALIGNED(width * 4, 32) ? CopyRow_NEON : CopyRow_Any_NEON; } #endif -#if defined(HAS_COPYROW_MIPS) - if (TestCpuFlag(kCpuHasMIPS)) { - CopyRow = CopyRow_MIPS; - } -#endif // Odd height will harmlessly mirror the middle row twice. for (y = 0; y < half_height; ++y) { diff --git a/chromium/third_party/libyuv/source/rotate_dspr2.cc b/chromium/third_party/libyuv/source/rotate_dspr2.cc deleted file mode 100644 index 5d2338deff6..00000000000 --- a/chromium/third_party/libyuv/source/rotate_dspr2.cc +++ /dev/null @@ -1,475 +0,0 @@ -/* - * Copyright 2011 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "libyuv/rotate_row.h" -#include "libyuv/row.h" - -#include "libyuv/basic_types.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -#if !defined(LIBYUV_DISABLE_DSPR2) && defined(__mips_dsp) && \ - (__mips_dsp_rev >= 2) && (_MIPS_SIM == _MIPS_SIM_ABI32) - -void TransposeWx8_DSPR2(const uint8* src, - int src_stride, - uint8* dst, - int dst_stride, - int width) { - __asm__ __volatile__( - ".set push \n" - ".set noreorder \n" - "sll $t2, %[src_stride], 0x1 \n" // src_stride x 2 - "sll $t4, %[src_stride], 0x2 \n" // src_stride x 4 - "sll $t9, %[src_stride], 0x3 \n" // src_stride x 8 - "addu $t3, $t2, %[src_stride] \n" - "addu $t5, $t4, %[src_stride] \n" - "addu $t6, $t2, $t4 \n" - "andi $t0, %[dst], 0x3 \n" - "andi $t1, %[dst_stride], 0x3 \n" - "or $t0, $t0, $t1 \n" - "bnez $t0, 11f \n" - " subu $t7, $t9, %[src_stride] \n" - // dst + dst_stride word aligned - "1: \n" - "lbu $t0, 0(%[src]) \n" - "lbux $t1, %[src_stride](%[src]) \n" - "lbux $t8, $t2(%[src]) \n" - "lbux $t9, $t3(%[src]) \n" - "sll $t1, $t1, 16 \n" - "sll $t9, $t9, 16 \n" - "or $t0, $t0, $t1 \n" - "or $t8, $t8, $t9 \n" - "precr.qb.ph $s0, $t8, $t0 \n" - "lbux $t0, $t4(%[src]) \n" - "lbux $t1, $t5(%[src]) \n" - "lbux $t8, $t6(%[src]) \n" - "lbux $t9, $t7(%[src]) \n" - "sll $t1, $t1, 16 \n" - "sll $t9, $t9, 16 \n" - "or $t0, $t0, $t1 \n" - "or $t8, $t8, $t9 \n" - "precr.qb.ph $s1, $t8, $t0 \n" - "sw $s0, 0(%[dst]) \n" - "addiu %[width], -1 \n" - "addiu %[src], 1 \n" - "sw $s1, 4(%[dst]) \n" - "bnez %[width], 1b \n" - " addu %[dst], %[dst], %[dst_stride] \n" - "b 2f \n" - // dst + dst_stride unaligned - "11: \n" - "lbu $t0, 0(%[src]) \n" - "lbux $t1, %[src_stride](%[src]) \n" - "lbux $t8, $t2(%[src]) \n" - "lbux $t9, $t3(%[src]) \n" - "sll $t1, $t1, 16 \n" - "sll $t9, $t9, 16 \n" - "or $t0, $t0, $t1 \n" - "or $t8, $t8, $t9 \n" - "precr.qb.ph $s0, $t8, $t0 \n" - "lbux $t0, $t4(%[src]) \n" - "lbux $t1, $t5(%[src]) \n" - "lbux $t8, $t6(%[src]) \n" - "lbux $t9, $t7(%[src]) \n" - "sll $t1, $t1, 16 \n" - "sll $t9, $t9, 16 \n" - "or $t0, $t0, $t1 \n" - "or $t8, $t8, $t9 \n" - "precr.qb.ph $s1, $t8, $t0 \n" - "swr $s0, 0(%[dst]) \n" - "swl $s0, 3(%[dst]) \n" - "addiu %[width], -1 \n" - "addiu %[src], 1 \n" - "swr $s1, 4(%[dst]) \n" - "swl $s1, 7(%[dst]) \n" - "bnez %[width], 11b \n" - "addu %[dst], %[dst], %[dst_stride] \n" - "2: \n" - ".set pop \n" - : [src] "+r"(src), [dst] "+r"(dst), [width] "+r"(width) - : [src_stride] "r"(src_stride), [dst_stride] "r"(dst_stride) - : "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9", "s0", "s1"); -} - -void TransposeWx8_Fast_DSPR2(const uint8* src, - int src_stride, - uint8* dst, - int dst_stride, - int width) { - __asm__ __volatile__( - ".set noat \n" - ".set push \n" - ".set noreorder \n" - "beqz %[width], 2f \n" - " sll $t2, %[src_stride], 0x1 \n" // src_stride x 2 - "sll $t4, %[src_stride], 0x2 \n" // src_stride x 4 - "sll $t9, %[src_stride], 0x3 \n" // src_stride x 8 - "addu $t3, $t2, %[src_stride] \n" - "addu $t5, $t4, %[src_stride] \n" - "addu $t6, $t2, $t4 \n" - - "srl $AT, %[width], 0x2 \n" - "andi $t0, %[dst], 0x3 \n" - "andi $t1, %[dst_stride], 0x3 \n" - "or $t0, $t0, $t1 \n" - "bnez $t0, 11f \n" - " subu $t7, $t9, %[src_stride] \n" - // dst + dst_stride word aligned - "1: \n" - "lw $t0, 0(%[src]) \n" - "lwx $t1, %[src_stride](%[src]) \n" - "lwx $t8, $t2(%[src]) \n" - "lwx $t9, $t3(%[src]) \n" - - // t0 = | 30 | 20 | 10 | 00 | - // t1 = | 31 | 21 | 11 | 01 | - // t8 = | 32 | 22 | 12 | 02 | - // t9 = | 33 | 23 | 13 | 03 | - - "precr.qb.ph $s0, $t1, $t0 \n" - "precr.qb.ph $s1, $t9, $t8 \n" - "precrq.qb.ph $s2, $t1, $t0 \n" - "precrq.qb.ph $s3, $t9, $t8 \n" - - // s0 = | 21 | 01 | 20 | 00 | - // s1 = | 23 | 03 | 22 | 02 | - // s2 = | 31 | 11 | 30 | 10 | - // s3 = | 33 | 13 | 32 | 12 | - - "precr.qb.ph $s4, $s1, $s0 \n" - "precrq.qb.ph $s5, $s1, $s0 \n" - "precr.qb.ph $s6, $s3, $s2 \n" - "precrq.qb.ph $s7, $s3, $s2 \n" - - // s4 = | 03 | 02 | 01 | 00 | - // s5 = | 23 | 22 | 21 | 20 | - // s6 = | 13 | 12 | 11 | 10 | - // s7 = | 33 | 32 | 31 | 30 | - - "lwx $t0, $t4(%[src]) \n" - "lwx $t1, $t5(%[src]) \n" - "lwx $t8, $t6(%[src]) \n" - "lwx $t9, $t7(%[src]) \n" - - // t0 = | 34 | 24 | 14 | 04 | - // t1 = | 35 | 25 | 15 | 05 | - // t8 = | 36 | 26 | 16 | 06 | - // t9 = | 37 | 27 | 17 | 07 | - - "precr.qb.ph $s0, $t1, $t0 \n" - "precr.qb.ph $s1, $t9, $t8 \n" - "precrq.qb.ph $s2, $t1, $t0 \n" - "precrq.qb.ph $s3, $t9, $t8 \n" - - // s0 = | 25 | 05 | 24 | 04 | - // s1 = | 27 | 07 | 26 | 06 | - // s2 = | 35 | 15 | 34 | 14 | - // s3 = | 37 | 17 | 36 | 16 | - - "precr.qb.ph $t0, $s1, $s0 \n" - "precrq.qb.ph $t1, $s1, $s0 \n" - "precr.qb.ph $t8, $s3, $s2 \n" - "precrq.qb.ph $t9, $s3, $s2 \n" - - // t0 = | 07 | 06 | 05 | 04 | - // t1 = | 27 | 26 | 25 | 24 | - // t8 = | 17 | 16 | 15 | 14 | - // t9 = | 37 | 36 | 35 | 34 | - - "addu $s0, %[dst], %[dst_stride] \n" - "addu $s1, $s0, %[dst_stride] \n" - "addu $s2, $s1, %[dst_stride] \n" - - "sw $s4, 0(%[dst]) \n" - "sw $t0, 4(%[dst]) \n" - "sw $s6, 0($s0) \n" - "sw $t8, 4($s0) \n" - "sw $s5, 0($s1) \n" - "sw $t1, 4($s1) \n" - "sw $s7, 0($s2) \n" - "sw $t9, 4($s2) \n" - - "addiu $AT, -1 \n" - "addiu %[src], 4 \n" - - "bnez $AT, 1b \n" - " addu %[dst], $s2, %[dst_stride] \n" - "b 2f \n" - // dst + dst_stride unaligned - "11: \n" - "lw $t0, 0(%[src]) \n" - "lwx $t1, %[src_stride](%[src]) \n" - "lwx $t8, $t2(%[src]) \n" - "lwx $t9, $t3(%[src]) \n" - - // t0 = | 30 | 20 | 10 | 00 | - // t1 = | 31 | 21 | 11 | 01 | - // t8 = | 32 | 22 | 12 | 02 | - // t9 = | 33 | 23 | 13 | 03 | - - "precr.qb.ph $s0, $t1, $t0 \n" - "precr.qb.ph $s1, $t9, $t8 \n" - "precrq.qb.ph $s2, $t1, $t0 \n" - "precrq.qb.ph $s3, $t9, $t8 \n" - - // s0 = | 21 | 01 | 20 | 00 | - // s1 = | 23 | 03 | 22 | 02 | - // s2 = | 31 | 11 | 30 | 10 | - // s3 = | 33 | 13 | 32 | 12 | - - "precr.qb.ph $s4, $s1, $s0 \n" - "precrq.qb.ph $s5, $s1, $s0 \n" - "precr.qb.ph $s6, $s3, $s2 \n" - "precrq.qb.ph $s7, $s3, $s2 \n" - - // s4 = | 03 | 02 | 01 | 00 | - // s5 = | 23 | 22 | 21 | 20 | - // s6 = | 13 | 12 | 11 | 10 | - // s7 = | 33 | 32 | 31 | 30 | - - "lwx $t0, $t4(%[src]) \n" - "lwx $t1, $t5(%[src]) \n" - "lwx $t8, $t6(%[src]) \n" - "lwx $t9, $t7(%[src]) \n" - - // t0 = | 34 | 24 | 14 | 04 | - // t1 = | 35 | 25 | 15 | 05 | - // t8 = | 36 | 26 | 16 | 06 | - // t9 = | 37 | 27 | 17 | 07 | - - "precr.qb.ph $s0, $t1, $t0 \n" - "precr.qb.ph $s1, $t9, $t8 \n" - "precrq.qb.ph $s2, $t1, $t0 \n" - "precrq.qb.ph $s3, $t9, $t8 \n" - - // s0 = | 25 | 05 | 24 | 04 | - // s1 = | 27 | 07 | 26 | 06 | - // s2 = | 35 | 15 | 34 | 14 | - // s3 = | 37 | 17 | 36 | 16 | - - "precr.qb.ph $t0, $s1, $s0 \n" - "precrq.qb.ph $t1, $s1, $s0 \n" - "precr.qb.ph $t8, $s3, $s2 \n" - "precrq.qb.ph $t9, $s3, $s2 \n" - - // t0 = | 07 | 06 | 05 | 04 | - // t1 = | 27 | 26 | 25 | 24 | - // t8 = | 17 | 16 | 15 | 14 | - // t9 = | 37 | 36 | 35 | 34 | - - "addu $s0, %[dst], %[dst_stride] \n" - "addu $s1, $s0, %[dst_stride] \n" - "addu $s2, $s1, %[dst_stride] \n" - - "swr $s4, 0(%[dst]) \n" - "swl $s4, 3(%[dst]) \n" - "swr $t0, 4(%[dst]) \n" - "swl $t0, 7(%[dst]) \n" - "swr $s6, 0($s0) \n" - "swl $s6, 3($s0) \n" - "swr $t8, 4($s0) \n" - "swl $t8, 7($s0) \n" - "swr $s5, 0($s1) \n" - "swl $s5, 3($s1) \n" - "swr $t1, 4($s1) \n" - "swl $t1, 7($s1) \n" - "swr $s7, 0($s2) \n" - "swl $s7, 3($s2) \n" - "swr $t9, 4($s2) \n" - "swl $t9, 7($s2) \n" - - "addiu $AT, -1 \n" - "addiu %[src], 4 \n" - - "bnez $AT, 11b \n" - " addu %[dst], $s2, %[dst_stride] \n" - "2: \n" - ".set pop \n" - ".set at \n" - : [src] "+r"(src), [dst] "+r"(dst), [width] "+r"(width) - : [src_stride] "r"(src_stride), [dst_stride] "r"(dst_stride) - : "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9", "s0", "s1", - "s2", "s3", "s4", "s5", "s6", "s7"); -} - -void TransposeUVWx8_DSPR2(const uint8* src, - int src_stride, - uint8* dst_a, - int dst_stride_a, - uint8* dst_b, - int dst_stride_b, - int width) { - __asm__ __volatile__( - ".set push \n" - ".set noreorder \n" - "beqz %[width], 2f \n" - " sll $t2, %[src_stride], 0x1 \n" // src_stride x 2 - "sll $t4, %[src_stride], 0x2 \n" // src_stride x 4 - "sll $t9, %[src_stride], 0x3 \n" // src_stride x 8 - "addu $t3, $t2, %[src_stride] \n" - "addu $t5, $t4, %[src_stride] \n" - "addu $t6, $t2, $t4 \n" - "subu $t7, $t9, %[src_stride] \n" - "srl $t1, %[width], 1 \n" - - // check word aligment for dst_a, dst_b, dst_stride_a and dst_stride_b - "andi $t0, %[dst_a], 0x3 \n" - "andi $t8, %[dst_b], 0x3 \n" - "or $t0, $t0, $t8 \n" - "andi $t8, %[dst_stride_a], 0x3 \n" - "andi $s5, %[dst_stride_b], 0x3 \n" - "or $t8, $t8, $s5 \n" - "or $t0, $t0, $t8 \n" - "bnez $t0, 11f \n" - " nop \n" - // dst + dst_stride word aligned (both, a & b dst addresses) - "1: \n" - "lw $t0, 0(%[src]) \n" // |B0|A0|b0|a0| - "lwx $t8, %[src_stride](%[src]) \n" // |B1|A1|b1|a1| - "addu $s5, %[dst_a], %[dst_stride_a] \n" - "lwx $t9, $t2(%[src]) \n" // |B2|A2|b2|a2| - "lwx $s0, $t3(%[src]) \n" // |B3|A3|b3|a3| - "addu $s6, %[dst_b], %[dst_stride_b] \n" - - "precrq.ph.w $s1, $t8, $t0 \n" // |B1|A1|B0|A0| - "precrq.ph.w $s2, $s0, $t9 \n" // |B3|A3|B2|A2| - "precr.qb.ph $s3, $s2, $s1 \n" // |A3|A2|A1|A0| - "precrq.qb.ph $s4, $s2, $s1 \n" // |B3|B2|B1|B0| - - "sll $t0, $t0, 16 \n" - "packrl.ph $s1, $t8, $t0 \n" // |b1|a1|b0|a0| - "sll $t9, $t9, 16 \n" - "packrl.ph $s2, $s0, $t9 \n" // |b3|a3|b2|a2| - - "sw $s3, 0($s5) \n" - "sw $s4, 0($s6) \n" - - "precr.qb.ph $s3, $s2, $s1 \n" // |a3|a2|a1|a0| - "precrq.qb.ph $s4, $s2, $s1 \n" // |b3|b2|b1|b0| - - "lwx $t0, $t4(%[src]) \n" // |B4|A4|b4|a4| - "lwx $t8, $t5(%[src]) \n" // |B5|A5|b5|a5| - "lwx $t9, $t6(%[src]) \n" // |B6|A6|b6|a6| - "lwx $s0, $t7(%[src]) \n" // |B7|A7|b7|a7| - "sw $s3, 0(%[dst_a]) \n" - "sw $s4, 0(%[dst_b]) \n" - - "precrq.ph.w $s1, $t8, $t0 \n" // |B5|A5|B4|A4| - "precrq.ph.w $s2, $s0, $t9 \n" // |B6|A6|B7|A7| - "precr.qb.ph $s3, $s2, $s1 \n" // |A7|A6|A5|A4| - "precrq.qb.ph $s4, $s2, $s1 \n" // |B7|B6|B5|B4| - - "sll $t0, $t0, 16 \n" - "packrl.ph $s1, $t8, $t0 \n" // |b5|a5|b4|a4| - "sll $t9, $t9, 16 \n" - "packrl.ph $s2, $s0, $t9 \n" // |b7|a7|b6|a6| - "sw $s3, 4($s5) \n" - "sw $s4, 4($s6) \n" - - "precr.qb.ph $s3, $s2, $s1 \n" // |a7|a6|a5|a4| - "precrq.qb.ph $s4, $s2, $s1 \n" // |b7|b6|b5|b4| - - "addiu %[src], 4 \n" - "addiu $t1, -1 \n" - "sll $t0, %[dst_stride_a], 1 \n" - "sll $t8, %[dst_stride_b], 1 \n" - "sw $s3, 4(%[dst_a]) \n" - "sw $s4, 4(%[dst_b]) \n" - "addu %[dst_a], %[dst_a], $t0 \n" - "bnez $t1, 1b \n" - " addu %[dst_b], %[dst_b], $t8 \n" - "b 2f \n" - " nop \n" - - // dst_a or dst_b or dst_stride_a or dst_stride_b not word aligned - "11: \n" - "lw $t0, 0(%[src]) \n" // |B0|A0|b0|a0| - "lwx $t8, %[src_stride](%[src]) \n" // |B1|A1|b1|a1| - "addu $s5, %[dst_a], %[dst_stride_a] \n" - "lwx $t9, $t2(%[src]) \n" // |B2|A2|b2|a2| - "lwx $s0, $t3(%[src]) \n" // |B3|A3|b3|a3| - "addu $s6, %[dst_b], %[dst_stride_b] \n" - - "precrq.ph.w $s1, $t8, $t0 \n" // |B1|A1|B0|A0| - "precrq.ph.w $s2, $s0, $t9 \n" // |B3|A3|B2|A2| - "precr.qb.ph $s3, $s2, $s1 \n" // |A3|A2|A1|A0| - "precrq.qb.ph $s4, $s2, $s1 \n" // |B3|B2|B1|B0| - - "sll $t0, $t0, 16 \n" - "packrl.ph $s1, $t8, $t0 \n" // |b1|a1|b0|a0| - "sll $t9, $t9, 16 \n" - "packrl.ph $s2, $s0, $t9 \n" // |b3|a3|b2|a2| - - "swr $s3, 0($s5) \n" - "swl $s3, 3($s5) \n" - "swr $s4, 0($s6) \n" - "swl $s4, 3($s6) \n" - - "precr.qb.ph $s3, $s2, $s1 \n" // |a3|a2|a1|a0| - "precrq.qb.ph $s4, $s2, $s1 \n" // |b3|b2|b1|b0| - - "lwx $t0, $t4(%[src]) \n" // |B4|A4|b4|a4| - "lwx $t8, $t5(%[src]) \n" // |B5|A5|b5|a5| - "lwx $t9, $t6(%[src]) \n" // |B6|A6|b6|a6| - "lwx $s0, $t7(%[src]) \n" // |B7|A7|b7|a7| - "swr $s3, 0(%[dst_a]) \n" - "swl $s3, 3(%[dst_a]) \n" - "swr $s4, 0(%[dst_b]) \n" - "swl $s4, 3(%[dst_b]) \n" - - "precrq.ph.w $s1, $t8, $t0 \n" // |B5|A5|B4|A4| - "precrq.ph.w $s2, $s0, $t9 \n" // |B6|A6|B7|A7| - "precr.qb.ph $s3, $s2, $s1 \n" // |A7|A6|A5|A4| - "precrq.qb.ph $s4, $s2, $s1 \n" // |B7|B6|B5|B4| - - "sll $t0, $t0, 16 \n" - "packrl.ph $s1, $t8, $t0 \n" // |b5|a5|b4|a4| - "sll $t9, $t9, 16 \n" - "packrl.ph $s2, $s0, $t9 \n" // |b7|a7|b6|a6| - - "swr $s3, 4($s5) \n" - "swl $s3, 7($s5) \n" - "swr $s4, 4($s6) \n" - "swl $s4, 7($s6) \n" - - "precr.qb.ph $s3, $s2, $s1 \n" // |a7|a6|a5|a4| - "precrq.qb.ph $s4, $s2, $s1 \n" // |b7|b6|b5|b4| - - "addiu %[src], 4 \n" - "addiu $t1, -1 \n" - "sll $t0, %[dst_stride_a], 1 \n" - "sll $t8, %[dst_stride_b], 1 \n" - "swr $s3, 4(%[dst_a]) \n" - "swl $s3, 7(%[dst_a]) \n" - "swr $s4, 4(%[dst_b]) \n" - "swl $s4, 7(%[dst_b]) \n" - "addu %[dst_a], %[dst_a], $t0 \n" - "bnez $t1, 11b \n" - " addu %[dst_b], %[dst_b], $t8 \n" - - "2: \n" - ".set pop \n" - : [src] "+r"(src), [dst_a] "+r"(dst_a), [dst_b] "+r"(dst_b), - [width] "+r"(width), [src_stride] "+r"(src_stride) - : [dst_stride_a] "r"(dst_stride_a), [dst_stride_b] "r"(dst_stride_b) - : "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9", "s0", "s1", - "s2", "s3", "s4", "s5", "s6"); -} - -#endif // defined(__mips_dsp) && (__mips_dsp_rev >= 2) - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif diff --git a/chromium/third_party/libyuv/source/rotate_neon.cc b/chromium/third_party/libyuv/source/rotate_neon.cc index d9bbc78cdab..47ff9b29ef5 100644 --- a/chromium/third_party/libyuv/source/rotate_neon.cc +++ b/chromium/third_party/libyuv/source/rotate_neon.cc @@ -21,8 +21,8 @@ extern "C" { #if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__) && \ !defined(__aarch64__) -static uvec8 kVTbl4x4Transpose = {0, 4, 8, 12, 1, 5, 9, 13, - 2, 6, 10, 14, 3, 7, 11, 15}; +static const uvec8 kVTbl4x4Transpose = {0, 4, 8, 12, 1, 5, 9, 13, + 2, 6, 10, 14, 3, 7, 11, 15}; void TransposeWx8_NEON(const uint8* src, int src_stride, @@ -189,8 +189,8 @@ void TransposeWx8_NEON(const uint8* src, : "memory", "cc", "q0", "q1", "q2", "q3"); } -static uvec8 kVTbl4x4TransposeDi = {0, 8, 1, 9, 2, 10, 3, 11, - 4, 12, 5, 13, 6, 14, 7, 15}; +static const uvec8 kVTbl4x4TransposeDi = {0, 8, 1, 9, 2, 10, 3, 11, + 4, 12, 5, 13, 6, 14, 7, 15}; void TransposeUVWx8_NEON(const uint8* src, int src_stride, diff --git a/chromium/third_party/libyuv/source/rotate_neon64.cc b/chromium/third_party/libyuv/source/rotate_neon64.cc index 59db7f9f3da..93c30546bd2 100644 --- a/chromium/third_party/libyuv/source/rotate_neon64.cc +++ b/chromium/third_party/libyuv/source/rotate_neon64.cc @@ -21,8 +21,8 @@ extern "C" { // This module is for GCC Neon armv8 64 bit. #if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) -static uvec8 kVTbl4x4Transpose = {0, 4, 8, 12, 1, 5, 9, 13, - 2, 6, 10, 14, 3, 7, 11, 15}; +static const uvec8 kVTbl4x4Transpose = {0, 4, 8, 12, 1, 5, 9, 13, + 2, 6, 10, 14, 3, 7, 11, 15}; void TransposeWx8_NEON(const uint8* src, int src_stride, @@ -196,7 +196,7 @@ void TransposeWx8_NEON(const uint8* src, "v17", "v18", "v19", "v20", "v21", "v22", "v23"); } -static uint8 kVTbl4x4TransposeDi[32] = { +static const uint8 kVTbl4x4TransposeDi[32] = { 0, 16, 32, 48, 2, 18, 34, 50, 4, 20, 36, 52, 6, 22, 38, 54, 1, 17, 33, 49, 3, 19, 35, 51, 5, 21, 37, 53, 7, 23, 39, 55}; diff --git a/chromium/third_party/libyuv/source/row_any.cc b/chromium/third_party/libyuv/source/row_any.cc index 8b31ac9fccc..7e557d42109 100644 --- a/chromium/third_party/libyuv/source/row_any.cc +++ b/chromium/third_party/libyuv/source/row_any.cc @@ -183,12 +183,6 @@ ANY31C(I422ToARGB4444Row_Any_NEON, I422ToARGB4444Row_NEON, 1, 0, 2, 7) ANY31C(I422ToARGB1555Row_Any_NEON, I422ToARGB1555Row_NEON, 1, 0, 2, 7) ANY31C(I422ToRGB565Row_Any_NEON, I422ToRGB565Row_NEON, 1, 0, 2, 7) #endif -#ifdef HAS_I422TOARGBROW_DSPR2 -ANY31C(I444ToARGBRow_Any_DSPR2, I444ToARGBRow_DSPR2, 0, 0, 4, 7) -ANY31C(I422ToARGBRow_Any_DSPR2, I422ToARGBRow_DSPR2, 1, 0, 4, 7) -ANY31C(I422ToARGB4444Row_Any_DSPR2, I422ToARGB4444Row_DSPR2, 1, 0, 2, 7) -ANY31C(I422ToARGB1555Row_Any_DSPR2, I422ToARGB1555Row_DSPR2, 1, 0, 2, 7) -#endif #ifdef HAS_I422TOARGBROW_MSA ANY31C(I444ToARGBRow_Any_MSA, I444ToARGBRow_MSA, 0, 0, 4, 7) ANY31C(I422ToARGBRow_Any_MSA, I422ToARGBRow_MSA, 1, 0, 4, 7) @@ -200,6 +194,32 @@ ANY31C(I422ToRGB565Row_Any_MSA, I422ToRGB565Row_MSA, 1, 0, 2, 7) #endif #undef ANY31C +// 64 byte per row for future AVX2 +// Any 3 planes of 16 bit to 1 with yuvconstants +// TODO(fbarchard): consider +#define ANY31CT(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, T, SBPP, BPP, MASK) \ + void NAMEANY(const T* y_buf, const T* u_buf, const T* v_buf, uint8* dst_ptr, \ + const struct YuvConstants* yuvconstants, int width) { \ + SIMD_ALIGNED(T temp[16 * 3]); \ + SIMD_ALIGNED(uint8 out[64]); \ + memset(temp, 0, 16 * 3 * SBPP); /* for YUY2 and msan */ \ + int r = width & MASK; \ + int n = width & ~MASK; \ + if (n > 0) { \ + ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, yuvconstants, n); \ + } \ + memcpy(temp, y_buf + n, r * SBPP); \ + memcpy(temp + 16, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP); \ + memcpy(temp + 32, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP); \ + ANY_SIMD(temp, temp + 16, temp + 32, out, yuvconstants, MASK + 1); \ + memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, out, SS(r, DUVSHIFT) * BPP); \ + } + +#ifdef HAS_I210TOARGBROW_SSSE3 +ANY31CT(I210ToARGBRow_Any_SSSE3, I210ToARGBRow_SSSE3, 1, 0, uint16, 2, 4, 7) +#endif +#undef ANY31CT + // Any 2 planes to 1. #define ANY21(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK) \ void NAMEANY(const uint8* y_buf, const uint8* uv_buf, uint8* dst_ptr, \ @@ -326,9 +346,6 @@ ANY21C(NV12ToARGBRow_Any_AVX2, NV12ToARGBRow_AVX2, 1, 1, 2, 4, 15) #ifdef HAS_NV12TOARGBROW_NEON ANY21C(NV12ToARGBRow_Any_NEON, NV12ToARGBRow_NEON, 1, 1, 2, 4, 7) #endif -#ifdef HAS_NV12TOARGBROW_DSPR2 -ANY21C(NV12ToARGBRow_Any_DSPR2, NV12ToARGBRow_DSPR2, 1, 1, 2, 4, 7) -#endif #ifdef HAS_NV12TOARGBROW_MSA ANY21C(NV12ToARGBRow_Any_MSA, NV12ToARGBRow_MSA, 1, 1, 2, 4, 7) #endif @@ -396,6 +413,12 @@ ANY11(ARGBToRGB565Row_Any_AVX2, ARGBToRGB565Row_AVX2, 0, 4, 2, 7) ANY11(ARGBToARGB1555Row_Any_AVX2, ARGBToARGB1555Row_AVX2, 0, 4, 2, 7) ANY11(ARGBToARGB4444Row_Any_AVX2, ARGBToARGB4444Row_AVX2, 0, 4, 2, 7) #endif +#if defined(HAS_ARGBTOAR30ROW_SSSE3) +ANY11(ARGBToAR30Row_Any_SSSE3, ARGBToAR30Row_SSSE3, 0, 4, 4, 3) +#endif +#if defined(HAS_ARGBTOAR30ROW_AVX2) +ANY11(ARGBToAR30Row_Any_AVX2, ARGBToAR30Row_AVX2, 0, 4, 4, 7) +#endif #if defined(HAS_J400TOARGBROW_SSE2) ANY11(J400ToARGBRow_Any_SSE2, J400ToARGBRow_SSE2, 0, 1, 4, 7) #endif @@ -572,33 +595,6 @@ ANY11(ARGB1555ToARGBRow_Any_MSA, ARGB1555ToARGBRow_MSA, 0, 2, 4, 15) #ifdef HAS_ARGB4444TOARGBROW_NEON ANY11(ARGB4444ToARGBRow_Any_NEON, ARGB4444ToARGBRow_NEON, 0, 2, 4, 7) #endif -#ifdef HAS_RGB24TOARGBROW_DSPR2 -ANY11(RGB24ToARGBRow_Any_DSPR2, RGB24ToARGBRow_DSPR2, 0, 3, 4, 7) -#endif -#ifdef HAS_RAWTOARGBROW_DSPR2 -ANY11(RAWToARGBRow_Any_DSPR2, RAWToARGBRow_DSPR2, 0, 3, 4, 7) -#endif -#ifdef HAS_RGB565TOARGBROW_DSPR2 -ANY11(RGB565ToARGBRow_Any_DSPR2, RGB565ToARGBRow_DSPR2, 0, 2, 4, 7) -#endif -#ifdef HAS_ARGB1555TOARGBROW_DSPR2 -ANY11(ARGB1555ToARGBRow_Any_DSPR2, ARGB1555ToARGBRow_DSPR2, 0, 2, 4, 7) -#endif -#ifdef HAS_ARGB4444TOARGBROW_DSPR2 -ANY11(ARGB4444ToARGBRow_Any_DSPR2, ARGB4444ToARGBRow_DSPR2, 0, 2, 4, 7) -#endif -#ifdef HAS_BGRATOYROW_DSPR2 -ANY11(BGRAToYRow_Any_DSPR2, BGRAToYRow_DSPR2, 0, 4, 1, 7) -#endif -#ifdef HAS_ARGBTOYROW_DSPR2 -ANY11(ARGBToYRow_Any_DSPR2, ARGBToYRow_DSPR2, 0, 4, 1, 7) -#endif -#ifdef HAS_ABGRTOYROW_DSPR2 -ANY11(ABGRToYRow_Any_DSPR2, ABGRToYRow_DSPR2, 0, 4, 1, 7) -#endif -#ifdef HAS_RGBATOYROW_DSPR2 -ANY11(RGBAToYRow_Any_DSPR2, RGBAToYRow_DSPR2, 0, 4, 1, 7) -#endif #ifdef HAS_ARGB4444TOARGBROW_MSA ANY11(ARGB4444ToARGBRow_Any_MSA, ARGB4444ToARGBRow_MSA, 0, 2, 4, 15) #endif @@ -711,9 +707,6 @@ ANY11P(ARGBToRGB565DitherRow_Any_MSA, 2, 7) #endif -#ifdef HAS_ARGBSHUFFLEROW_SSE2 -ANY11P(ARGBShuffleRow_Any_SSE2, ARGBShuffleRow_SSE2, const uint8*, 4, 4, 3) -#endif #ifdef HAS_ARGBSHUFFLEROW_SSSE3 ANY11P(ARGBShuffleRow_Any_SSSE3, ARGBShuffleRow_SSSE3, const uint8*, 4, 4, 7) #endif @@ -729,10 +722,46 @@ ANY11P(ARGBShuffleRow_Any_MSA, ARGBShuffleRow_MSA, const uint8*, 4, 4, 7) #undef ANY11P // Any 1 to 1 with parameter and shorts. BPP measures in shorts. +#define ANY11C(NAMEANY, ANY_SIMD, SBPP, BPP, STYPE, DTYPE, MASK) \ + void NAMEANY(const STYPE* src_ptr, DTYPE* dst_ptr, int scale, int width) { \ + SIMD_ALIGNED(STYPE temp[32]); \ + SIMD_ALIGNED(DTYPE out[32]); \ + memset(temp, 0, 32 * SBPP); /* for msan */ \ + int r = width & MASK; \ + int n = width & ~MASK; \ + if (n > 0) { \ + ANY_SIMD(src_ptr, dst_ptr, scale, n); \ + } \ + memcpy(temp, src_ptr + n, r * SBPP); \ + ANY_SIMD(temp, out, scale, MASK + 1); \ + memcpy(dst_ptr + n, out, r * BPP); \ + } + +#ifdef HAS_CONVERT16TO8ROW_SSSE3 +ANY11C(Convert16To8Row_Any_SSSE3, + Convert16To8Row_SSSE3, + 2, + 1, + uint16, + uint8, + 15) +#endif +#ifdef HAS_CONVERT16TO8ROW_AVX2 +ANY11C(Convert16To8Row_Any_AVX2, Convert16To8Row_AVX2, 2, 1, uint16, uint8, 31) +#endif +#ifdef HAS_CONVERT8TO16ROW_SSE2 +ANY11C(Convert8To16Row_Any_SSE2, Convert8To16Row_SSE2, 1, 2, uint8, uint16, 15) +#endif +#ifdef HAS_CONVERT8TO16ROW_AVX2 +ANY11C(Convert8To16Row_Any_AVX2, Convert8To16Row_AVX2, 1, 2, uint8, uint16, 31) +#endif +#undef ANY11C + +// Any 1 to 1 with parameter and shorts to byte. BPP measures in shorts. #define ANY11P16(NAMEANY, ANY_SIMD, T, SBPP, BPP, MASK) \ void NAMEANY(const uint16* src_ptr, uint16* dst_ptr, T param, int width) { \ - SIMD_ALIGNED(uint16 temp[16 * 2]); \ - memset(temp, 0, 32); /* for msan */ \ + SIMD_ALIGNED(uint16 temp[32 * 2]); \ + memset(temp, 0, 64); /* for msan */ \ int r = width & MASK; \ int n = width & ~MASK; \ if (n > 0) { \ @@ -821,9 +850,6 @@ ANY11T(InterpolateRow_Any_SSSE3, InterpolateRow_SSSE3, 1, 1, 15) #ifdef HAS_INTERPOLATEROW_NEON ANY11T(InterpolateRow_Any_NEON, InterpolateRow_NEON, 1, 1, 15) #endif -#ifdef HAS_INTERPOLATEROW_DSPR2 -ANY11T(InterpolateRow_Any_DSPR2, InterpolateRow_DSPR2, 1, 1, 3) -#endif #ifdef HAS_INTERPOLATEROW_MSA ANY11T(InterpolateRow_Any_MSA, InterpolateRow_MSA, 1, 1, 31) #endif @@ -922,9 +948,6 @@ ANY12(SplitUVRow_Any_AVX2, SplitUVRow_AVX2, 0, 2, 0, 31) #ifdef HAS_SPLITUVROW_NEON ANY12(SplitUVRow_Any_NEON, SplitUVRow_NEON, 0, 2, 0, 15) #endif -#ifdef HAS_SPLITUVROW_DSPR2 -ANY12(SplitUVRow_Any_DSPR2, SplitUVRow_DSPR2, 0, 2, 0, 15) -#endif #ifdef HAS_SPLITUVROW_MSA ANY12(SplitUVRow_Any_MSA, SplitUVRow_MSA, 0, 2, 0, 31) #endif @@ -1086,18 +1109,6 @@ ANY12S(YUY2ToUVRow_Any_NEON, YUY2ToUVRow_NEON, 1, 4, 15) #ifdef HAS_UYVYTOUVROW_NEON ANY12S(UYVYToUVRow_Any_NEON, UYVYToUVRow_NEON, 1, 4, 15) #endif -#ifdef HAS_BGRATOUVROW_DSPR2 -ANY12S(BGRAToUVRow_Any_DSPR2, BGRAToUVRow_DSPR2, 0, 4, 15) -#endif -#ifdef HAS_ABGRTOUVROW_DSPR2 -ANY12S(ABGRToUVRow_Any_DSPR2, ABGRToUVRow_DSPR2, 0, 4, 15) -#endif -#ifdef HAS_RGBATOUVROW_DSPR2 -ANY12S(RGBAToUVRow_Any_DSPR2, RGBAToUVRow_DSPR2, 0, 4, 15) -#endif -#ifdef HAS_ARGBTOUVROW_DSPR2 -ANY12S(ARGBToUVRow_Any_DSPR2, ARGBToUVRow_DSPR2, 0, 4, 15) -#endif #ifdef HAS_YUY2TOUVROW_MSA ANY12S(YUY2ToUVRow_Any_MSA, YUY2ToUVRow_MSA, 1, 4, 31) #endif diff --git a/chromium/third_party/libyuv/source/row_common.cc b/chromium/third_party/libyuv/source/row_common.cc index 6ffc4febbf6..a0ca90b8ab8 100644 --- a/chromium/third_party/libyuv/source/row_common.cc +++ b/chromium/third_party/libyuv/source/row_common.cc @@ -165,6 +165,23 @@ void ARGB4444ToARGBRow_C(const uint8* src_argb4444, } } +void AR30ToARGBRow_C(const uint8* src_ar30, uint8* dst_argb, int width) { + int x; + for (x = 0; x < width; ++x) { + uint32 ar30 = *(uint32*)src_ar30; + uint32 b = ar30 & 0x3ff; + uint32 g = (ar30 >> 10) & 0x3ff; + uint32 r = (ar30 >> 20) & 0x3ff; + uint32 a = (ar30 >> 30) & 0x3; + dst_argb[0] = b >> 2; + dst_argb[1] = g >> 2; + dst_argb[2] = r >> 2; + dst_argb[3] = a * 0x55; + dst_argb += 4; + src_ar30 += 4; + } +} + void ARGBToRGB24Row_C(const uint8* src_argb, uint8* dst_rgb, int width) { int x; for (x = 0; x < width; ++x) { @@ -301,6 +318,19 @@ void ARGBToARGB4444Row_C(const uint8* src_argb, uint8* dst_rgb, int width) { } } +void ARGBToAR30Row_C(const uint8* src_argb, uint8* dst_rgb, int width) { + int x; + for (x = 0; x < width; ++x) { + uint32 b0 = (src_argb[0] >> 6) | ((uint32)(src_argb[0]) << 2); + uint32 g0 = (src_argb[1] >> 6) | ((uint32)(src_argb[1]) << 2); + uint32 r0 = (src_argb[2] >> 6) | ((uint32)(src_argb[2]) << 2); + uint32 a0 = (src_argb[3] >> 6); + *(uint32*)(dst_rgb) = b0 | (g0 << 10) | (r0 << 20) | (a0 << 30); + dst_rgb += 4; + src_argb += 4; + } +} + static __inline int RGBToY(uint8 r, uint8 g, uint8 b) { return (66 * r + 129 * g + 25 * b + 0x1080) >> 8; } @@ -1265,6 +1295,51 @@ static __inline void YuvPixel(uint8 y, *r = Clamp((int32)(-(v * vr) + y1 + br) >> 6); } +// C reference code that mimics the YUV 10 bit assembly. +static __inline void YuvPixel10(uint16 y, + uint16 u, + uint16 v, + uint8* b, + uint8* g, + uint8* r, + const struct YuvConstants* yuvconstants) { +#if defined(__aarch64__) + int ub = -yuvconstants->kUVToRB[0]; + int ug = yuvconstants->kUVToG[0]; + int vg = yuvconstants->kUVToG[1]; + int vr = -yuvconstants->kUVToRB[1]; + int bb = yuvconstants->kUVBiasBGR[0]; + int bg = yuvconstants->kUVBiasBGR[1]; + int br = yuvconstants->kUVBiasBGR[2]; + int yg = yuvconstants->kYToRgb[0] / 0x0101; +#elif defined(__arm__) + int ub = -yuvconstants->kUVToRB[0]; + int ug = yuvconstants->kUVToG[0]; + int vg = yuvconstants->kUVToG[4]; + int vr = -yuvconstants->kUVToRB[4]; + int bb = yuvconstants->kUVBiasBGR[0]; + int bg = yuvconstants->kUVBiasBGR[1]; + int br = yuvconstants->kUVBiasBGR[2]; + int yg = yuvconstants->kYToRgb[0] / 0x0101; +#else + int ub = yuvconstants->kUVToB[0]; + int ug = yuvconstants->kUVToG[0]; + int vg = yuvconstants->kUVToG[1]; + int vr = yuvconstants->kUVToR[1]; + int bb = yuvconstants->kUVBiasB[0]; + int bg = yuvconstants->kUVBiasG[0]; + int br = yuvconstants->kUVBiasR[0]; + int yg = yuvconstants->kYToRgb[0]; +#endif + + uint32 y1 = (uint32)((y << 6) * yg) >> 16; + u = clamp255(u >> 2); + v = clamp255(v >> 2); + *b = Clamp((int32)(-(u * ub) + y1 + bb) >> 6); + *g = Clamp((int32)(-(u * ug + v * vg) + y1 + bg) >> 6); + *r = Clamp((int32)(-(v * vr) + y1 + br) >> 6); +} + // Y contribution to R,G,B. Scale and bias. #define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */ #define YGB -1160 /* 1.164 * 64 * -16 + 64 / 2 */ @@ -1358,6 +1433,33 @@ void I422ToARGBRow_C(const uint8* src_y, } } +// 10 bit YUV to ARGB +void I210ToARGBRow_C(const uint16* src_y, + const uint16* src_u, + const uint16* src_v, + uint8* rgb_buf, + const struct YuvConstants* yuvconstants, + int width) { + int x; + for (x = 0; x < width - 1; x += 2) { + YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1, + rgb_buf + 2, yuvconstants); + rgb_buf[3] = 255; + YuvPixel10(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5, + rgb_buf + 6, yuvconstants); + rgb_buf[7] = 255; + src_y += 2; + src_u += 1; + src_v += 1; + rgb_buf += 8; // Advance 2 pixels. + } + if (width & 1) { + YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1, + rgb_buf + 2, yuvconstants); + rgb_buf[3] = 255; + } +} + void I422AlphaToARGBRow_C(const uint8* src_y, const uint8* src_u, const uint8* src_v, @@ -1798,6 +1900,11 @@ void MergeRGBRow_C(const uint8* src_r, } } +// Use scale to convert lsb formats to msb, depending how many bits there are: +// 128 = 9 bits +// 64 = 10 bits +// 16 = 12 bits +// 1 = 16 bits void MergeUVRow_16_C(const uint16* src_u, const uint16* src_v, uint16* dst_uv, @@ -1827,6 +1934,34 @@ void MultiplyRow_16_C(const uint16* src_y, } } +// Use scale to convert lsb formats to msb, depending how many bits there are: +// 32768 = 9 bits +// 16384 = 10 bits +// 4096 = 12 bits +// 256 = 16 bits +void Convert16To8Row_C(const uint16* src_y, + uint8* dst_y, + int scale, + int width) { + int x; + for (x = 0; x < width; ++x) { + dst_y[x] = clamp255((src_y[x] * scale) >> 16); + } +} + +// Use scale to convert lsb formats to msb, depending how many bits there are: +// 1024 = 10 bits +void Convert8To16Row_C(const uint8* src_y, + uint16* dst_y, + int scale, + int width) { + int x; + scale *= 0x0101; // replicates the byte. + for (x = 0; x < width; ++x) { + dst_y[x] = (src_y[x] * scale) >> 16; + } +} + void CopyRow_C(const uint8* src, uint8* dst, int count) { memcpy(dst, src, count); } diff --git a/chromium/third_party/libyuv/source/row_dspr2.cc b/chromium/third_party/libyuv/source/row_dspr2.cc deleted file mode 100644 index 11f78e0d2f1..00000000000 --- a/chromium/third_party/libyuv/source/row_dspr2.cc +++ /dev/null @@ -1,1721 +0,0 @@ -/* - * Copyright (c) 2012 The LibYuv project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "libyuv/row.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -// The following are available on Mips platforms: -#if !defined(LIBYUV_DISABLE_DSPR2) && defined(__mips__) && \ - (_MIPS_SIM == _MIPS_SIM_ABI32) - -#ifdef HAS_COPYROW_MIPS -void CopyRow_MIPS(const uint8* src, uint8* dst, int count) { - __asm__ __volatile__( - ".set noreorder \n" - ".set noat \n" - "slti $at, %[count], 8 \n" - "bne $at ,$zero, $last8 \n" - "xor $t8, %[src], %[dst] \n" - "andi $t8, $t8, 0x3 \n" - - "bne $t8, $zero, unaligned \n" - "negu $a3, %[dst] \n" - // make dst/src aligned - "andi $a3, $a3, 0x3 \n" - "beq $a3, $zero, $chk16w \n" - // word-aligned now count is the remining bytes count - "subu %[count], %[count], $a3 \n" - - "lwr $t8, 0(%[src]) \n" - "addu %[src], %[src], $a3 \n" - "swr $t8, 0(%[dst]) \n" - "addu %[dst], %[dst], $a3 \n" - - // Now the dst/src are mutually word-aligned with word-aligned addresses - "$chk16w: \n" - "andi $t8, %[count], 0x3f \n" // whole 64-B chunks? - // t8 is the byte count after 64-byte chunks - "beq %[count], $t8, chk8w \n" - // There will be at most 1 32-byte chunk after it - "subu $a3, %[count], $t8 \n" // the reminder - // Here a3 counts bytes in 16w chunks - "addu $a3, %[dst], $a3 \n" - // Now a3 is the final dst after 64-byte chunks - "addu $t0, %[dst], %[count] \n" - // t0 is the "past the end" address - - // When in the loop we exercise "pref 30,x(a1)", the a1+x should not be - // past - // the "t0-32" address - // This means: for x=128 the last "safe" a1 address is "t0-160" - // Alternatively, for x=64 the last "safe" a1 address is "t0-96" - // we will use "pref 30,128(a1)", so "t0-160" is the limit - "subu $t9, $t0, 160 \n" - // t9 is the "last safe pref 30,128(a1)" address - "pref 0, 0(%[src]) \n" // first line of src - "pref 0, 32(%[src]) \n" // second line of src - "pref 0, 64(%[src]) \n" - "pref 30, 32(%[dst]) \n" - // In case the a1 > t9 don't use "pref 30" at all - "sltu $v1, $t9, %[dst] \n" - "bgtz $v1, $loop16w \n" - "nop \n" - // otherwise, start with using pref30 - "pref 30, 64(%[dst]) \n" - "$loop16w: \n" - "pref 0, 96(%[src]) \n" - "lw $t0, 0(%[src]) \n" - "bgtz $v1, $skip_pref30_96 \n" // skip - "lw $t1, 4(%[src]) \n" - "pref 30, 96(%[dst]) \n" // continue - "$skip_pref30_96: \n" - "lw $t2, 8(%[src]) \n" - "lw $t3, 12(%[src]) \n" - "lw $t4, 16(%[src]) \n" - "lw $t5, 20(%[src]) \n" - "lw $t6, 24(%[src]) \n" - "lw $t7, 28(%[src]) \n" - "pref 0, 128(%[src]) \n" - // bring the next lines of src, addr 128 - "sw $t0, 0(%[dst]) \n" - "sw $t1, 4(%[dst]) \n" - "sw $t2, 8(%[dst]) \n" - "sw $t3, 12(%[dst]) \n" - "sw $t4, 16(%[dst]) \n" - "sw $t5, 20(%[dst]) \n" - "sw $t6, 24(%[dst]) \n" - "sw $t7, 28(%[dst]) \n" - "lw $t0, 32(%[src]) \n" - "bgtz $v1, $skip_pref30_128 \n" // skip pref 30,128(a1) - "lw $t1, 36(%[src]) \n" - "pref 30, 128(%[dst]) \n" // set dest, addr 128 - "$skip_pref30_128: \n" - "lw $t2, 40(%[src]) \n" - "lw $t3, 44(%[src]) \n" - "lw $t4, 48(%[src]) \n" - "lw $t5, 52(%[src]) \n" - "lw $t6, 56(%[src]) \n" - "lw $t7, 60(%[src]) \n" - "pref 0, 160(%[src]) \n" - // bring the next lines of src, addr 160 - "sw $t0, 32(%[dst]) \n" - "sw $t1, 36(%[dst]) \n" - "sw $t2, 40(%[dst]) \n" - "sw $t3, 44(%[dst]) \n" - "sw $t4, 48(%[dst]) \n" - "sw $t5, 52(%[dst]) \n" - "sw $t6, 56(%[dst]) \n" - "sw $t7, 60(%[dst]) \n" - - "addiu %[dst], %[dst], 64 \n" // adding 64 to dest - "sltu $v1, $t9, %[dst] \n" - "bne %[dst], $a3, $loop16w \n" - " addiu %[src], %[src], 64 \n" // adding 64 to src - "move %[count], $t8 \n" - - // Here we have src and dest word-aligned but less than 64-bytes to go - - "chk8w: \n" - "pref 0, 0x0(%[src]) \n" - "andi $t8, %[count], 0x1f \n" // 32-byte chunk? - // the t8 is the reminder count past 32-bytes - "beq %[count], $t8, chk1w \n" - // count=t8,no 32-byte chunk - " nop \n" - - "lw $t0, 0(%[src]) \n" - "lw $t1, 4(%[src]) \n" - "lw $t2, 8(%[src]) \n" - "lw $t3, 12(%[src]) \n" - "lw $t4, 16(%[src]) \n" - "lw $t5, 20(%[src]) \n" - "lw $t6, 24(%[src]) \n" - "lw $t7, 28(%[src]) \n" - "addiu %[src], %[src], 32 \n" - - "sw $t0, 0(%[dst]) \n" - "sw $t1, 4(%[dst]) \n" - "sw $t2, 8(%[dst]) \n" - "sw $t3, 12(%[dst]) \n" - "sw $t4, 16(%[dst]) \n" - "sw $t5, 20(%[dst]) \n" - "sw $t6, 24(%[dst]) \n" - "sw $t7, 28(%[dst]) \n" - "addiu %[dst], %[dst], 32 \n" - - "chk1w: \n" - "andi %[count], $t8, 0x3 \n" - // now count is the reminder past 1w chunks - "beq %[count], $t8, $last8 \n" - " subu $a3, $t8, %[count] \n" - // a3 is count of bytes in 1w chunks - "addu $a3, %[dst], $a3 \n" - // now a3 is the dst address past the 1w chunks - // copying in words (4-byte chunks) - "$wordCopy_loop: \n" - "lw $t3, 0(%[src]) \n" - // the first t3 may be equal t0 ... optimize? - "addiu %[src], %[src],4 \n" - "addiu %[dst], %[dst],4 \n" - "bne %[dst], $a3,$wordCopy_loop \n" - " sw $t3, -4(%[dst]) \n" - - // For the last (<8) bytes - "$last8: \n" - "blez %[count], leave \n" - " addu $a3, %[dst], %[count] \n" // a3 -last dst address - "$last8loop: \n" - "lb $v1, 0(%[src]) \n" - "addiu %[src], %[src], 1 \n" - "addiu %[dst], %[dst], 1 \n" - "bne %[dst], $a3, $last8loop \n" - " sb $v1, -1(%[dst]) \n" - - "leave: \n" - " j $ra \n" - " nop \n" - - // - // UNALIGNED case - // - - "unaligned: \n" - // got here with a3="negu a1" - "andi $a3, $a3, 0x3 \n" // a1 is word aligned? - "beqz $a3, $ua_chk16w \n" - " subu %[count], %[count], $a3 \n" - // bytes left after initial a3 bytes - "lwr $v1, 0(%[src]) \n" - "lwl $v1, 3(%[src]) \n" - "addu %[src], %[src], $a3 \n" // a3 may be 1, 2 or 3 - "swr $v1, 0(%[dst]) \n" - "addu %[dst], %[dst], $a3 \n" - // below the dst will be word aligned (NOTE1) - "$ua_chk16w: \n" - "andi $t8, %[count], 0x3f \n" // whole 64-B chunks? - // t8 is the byte count after 64-byte chunks - "beq %[count], $t8, ua_chk8w \n" - // if a2==t8, no 64-byte chunks - // There will be at most 1 32-byte chunk after it - "subu $a3, %[count], $t8 \n" // the reminder - // Here a3 counts bytes in 16w chunks - "addu $a3, %[dst], $a3 \n" - // Now a3 is the final dst after 64-byte chunks - "addu $t0, %[dst], %[count] \n" // t0 "past the end" - "subu $t9, $t0, 160 \n" - // t9 is the "last safe pref 30,128(a1)" address - "pref 0, 0(%[src]) \n" // first line of src - "pref 0, 32(%[src]) \n" // second line addr 32 - "pref 0, 64(%[src]) \n" - "pref 30, 32(%[dst]) \n" - // safe, as we have at least 64 bytes ahead - // In case the a1 > t9 don't use "pref 30" at all - "sltu $v1, $t9, %[dst] \n" - "bgtz $v1, $ua_loop16w \n" - // skip "pref 30,64(a1)" for too short arrays - " nop \n" - // otherwise, start with using pref30 - "pref 30, 64(%[dst]) \n" - "$ua_loop16w: \n" - "pref 0, 96(%[src]) \n" - "lwr $t0, 0(%[src]) \n" - "lwl $t0, 3(%[src]) \n" - "lwr $t1, 4(%[src]) \n" - "bgtz $v1, $ua_skip_pref30_96 \n" - " lwl $t1, 7(%[src]) \n" - "pref 30, 96(%[dst]) \n" - // continue setting up the dest, addr 96 - "$ua_skip_pref30_96: \n" - "lwr $t2, 8(%[src]) \n" - "lwl $t2, 11(%[src]) \n" - "lwr $t3, 12(%[src]) \n" - "lwl $t3, 15(%[src]) \n" - "lwr $t4, 16(%[src]) \n" - "lwl $t4, 19(%[src]) \n" - "lwr $t5, 20(%[src]) \n" - "lwl $t5, 23(%[src]) \n" - "lwr $t6, 24(%[src]) \n" - "lwl $t6, 27(%[src]) \n" - "lwr $t7, 28(%[src]) \n" - "lwl $t7, 31(%[src]) \n" - "pref 0, 128(%[src]) \n" - // bring the next lines of src, addr 128 - "sw $t0, 0(%[dst]) \n" - "sw $t1, 4(%[dst]) \n" - "sw $t2, 8(%[dst]) \n" - "sw $t3, 12(%[dst]) \n" - "sw $t4, 16(%[dst]) \n" - "sw $t5, 20(%[dst]) \n" - "sw $t6, 24(%[dst]) \n" - "sw $t7, 28(%[dst]) \n" - "lwr $t0, 32(%[src]) \n" - "lwl $t0, 35(%[src]) \n" - "lwr $t1, 36(%[src]) \n" - "bgtz $v1, ua_skip_pref30_128 \n" - " lwl $t1, 39(%[src]) \n" - "pref 30, 128(%[dst]) \n" - // continue setting up the dest, addr 128 - "ua_skip_pref30_128: \n" - - "lwr $t2, 40(%[src]) \n" - "lwl $t2, 43(%[src]) \n" - "lwr $t3, 44(%[src]) \n" - "lwl $t3, 47(%[src]) \n" - "lwr $t4, 48(%[src]) \n" - "lwl $t4, 51(%[src]) \n" - "lwr $t5, 52(%[src]) \n" - "lwl $t5, 55(%[src]) \n" - "lwr $t6, 56(%[src]) \n" - "lwl $t6, 59(%[src]) \n" - "lwr $t7, 60(%[src]) \n" - "lwl $t7, 63(%[src]) \n" - "pref 0, 160(%[src]) \n" - // bring the next lines of src, addr 160 - "sw $t0, 32(%[dst]) \n" - "sw $t1, 36(%[dst]) \n" - "sw $t2, 40(%[dst]) \n" - "sw $t3, 44(%[dst]) \n" - "sw $t4, 48(%[dst]) \n" - "sw $t5, 52(%[dst]) \n" - "sw $t6, 56(%[dst]) \n" - "sw $t7, 60(%[dst]) \n" - - "addiu %[dst],%[dst],64 \n" // adding 64 to dest - "sltu $v1,$t9,%[dst] \n" - "bne %[dst],$a3,$ua_loop16w \n" - " addiu %[src],%[src],64 \n" // adding 64 to src - "move %[count],$t8 \n" - - // Here we have src and dest word-aligned but less than 64-bytes to go - - "ua_chk8w: \n" - "pref 0, 0x0(%[src]) \n" - "andi $t8, %[count], 0x1f \n" // 32-byte chunk? - // the t8 is the reminder count - "beq %[count], $t8, $ua_chk1w \n" - // when count==t8, no 32-byte chunk - - "lwr $t0, 0(%[src]) \n" - "lwl $t0, 3(%[src]) \n" - "lwr $t1, 4(%[src]) \n" - "lwl $t1, 7(%[src]) \n" - "lwr $t2, 8(%[src]) \n" - "lwl $t2, 11(%[src]) \n" - "lwr $t3, 12(%[src]) \n" - "lwl $t3, 15(%[src]) \n" - "lwr $t4, 16(%[src]) \n" - "lwl $t4, 19(%[src]) \n" - "lwr $t5, 20(%[src]) \n" - "lwl $t5, 23(%[src]) \n" - "lwr $t6, 24(%[src]) \n" - "lwl $t6, 27(%[src]) \n" - "lwr $t7, 28(%[src]) \n" - "lwl $t7, 31(%[src]) \n" - "addiu %[src], %[src], 32 \n" - - "sw $t0, 0(%[dst]) \n" - "sw $t1, 4(%[dst]) \n" - "sw $t2, 8(%[dst]) \n" - "sw $t3, 12(%[dst]) \n" - "sw $t4, 16(%[dst]) \n" - "sw $t5, 20(%[dst]) \n" - "sw $t6, 24(%[dst]) \n" - "sw $t7, 28(%[dst]) \n" - "addiu %[dst], %[dst], 32 \n" - - "$ua_chk1w: \n" - "andi %[count], $t8, 0x3 \n" - // now count is the reminder past 1w chunks - "beq %[count], $t8, ua_smallCopy \n" - "subu $a3, $t8, %[count] \n" - // a3 is count of bytes in 1w chunks - "addu $a3, %[dst], $a3 \n" - // now a3 is the dst address past the 1w chunks - - // copying in words (4-byte chunks) - "$ua_wordCopy_loop: \n" - "lwr $v1, 0(%[src]) \n" - "lwl $v1, 3(%[src]) \n" - "addiu %[src], %[src], 4 \n" - "addiu %[dst], %[dst], 4 \n" - // note: dst=a1 is word aligned here, see NOTE1 - "bne %[dst], $a3, $ua_wordCopy_loop \n" - " sw $v1,-4(%[dst]) \n" - - // Now less than 4 bytes (value in count) left to copy - "ua_smallCopy: \n" - "beqz %[count], leave \n" - " addu $a3, %[dst], %[count] \n" // a3 = last dst address - "$ua_smallCopy_loop: \n" - "lb $v1, 0(%[src]) \n" - "addiu %[src], %[src], 1 \n" - "addiu %[dst], %[dst], 1 \n" - "bne %[dst],$a3,$ua_smallCopy_loop \n" - " sb $v1, -1(%[dst]) \n" - - "j $ra \n" - " nop \n" - ".set at \n" - ".set reorder \n" - : [dst] "+r"(dst), [src] "+r"(src) - : [count] "r"(count) - : "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9", "a3", "v1", - "at"); -} -#endif // HAS_COPYROW_MIPS - -// DSPR2 functions -#if !defined(LIBYUV_DISABLE_DSPR2) && defined(__mips_dsp) && \ - (__mips_dsp_rev >= 2) && (_MIPS_SIM == _MIPS_SIM_ABI32) && \ - (__mips_isa_rev < 6) - -void SplitUVRow_DSPR2(const uint8* src_uv, - uint8* dst_u, - uint8* dst_v, - int width) { - __asm__ __volatile__( - ".set push \n" - ".set noreorder \n" - "srl $t4, %[width], 4 \n" // multiplies of 16 - "blez $t4, 2f \n" - " andi %[width], %[width], 0xf \n" // residual - - "1: \n" - "addiu $t4, $t4, -1 \n" - "lw $t0, 0(%[src_uv]) \n" // V1 | U1 | V0 | U0 - "lw $t1, 4(%[src_uv]) \n" // V3 | U3 | V2 | U2 - "lw $t2, 8(%[src_uv]) \n" // V5 | U5 | V4 | U4 - "lw $t3, 12(%[src_uv]) \n" // V7 | U7 | V6 | U6 - "lw $t5, 16(%[src_uv]) \n" // V9 | U9 | V8 | U8 - "lw $t6, 20(%[src_uv]) \n" // V11 | U11 | V10 | - // U10 - "lw $t7, 24(%[src_uv]) \n" // V13 | U13 | V12 | - // U12 - "lw $t8, 28(%[src_uv]) \n" // V15 | U15 | V14 | - // U14 - "addiu %[src_uv], %[src_uv], 32 \n" - "precrq.qb.ph $t9, $t1, $t0 \n" // V3 | V2 | V1 | V0 - "precr.qb.ph $t0, $t1, $t0 \n" // U3 | U2 | U1 | U0 - "precrq.qb.ph $t1, $t3, $t2 \n" // V7 | V6 | V5 | V4 - "precr.qb.ph $t2, $t3, $t2 \n" // U7 | U6 | U5 | U4 - "precrq.qb.ph $t3, $t6, $t5 \n" // V11 | V10 | V9 | V8 - "precr.qb.ph $t5, $t6, $t5 \n" // U11 | U10 | U9 | U8 - "precrq.qb.ph $t6, $t8, $t7 \n" // V15 | V14 | V13 | - // V12 - "precr.qb.ph $t7, $t8, $t7 \n" // U15 | U14 | U13 | - // U12 - "sw $t9, 0(%[dst_v]) \n" - "sw $t0, 0(%[dst_u]) \n" - "sw $t1, 4(%[dst_v]) \n" - "sw $t2, 4(%[dst_u]) \n" - "sw $t3, 8(%[dst_v]) \n" - "sw $t5, 8(%[dst_u]) \n" - "sw $t6, 12(%[dst_v]) \n" - "sw $t7, 12(%[dst_u]) \n" - "addiu %[dst_v], %[dst_v], 16 \n" - "bgtz $t4, 1b \n" - " addiu %[dst_u], %[dst_u], 16 \n" - - "beqz %[width], 3f \n" - " nop \n" - - "2: \n" - "lbu $t0, 0(%[src_uv]) \n" - "lbu $t1, 1(%[src_uv]) \n" - "addiu %[src_uv], %[src_uv], 2 \n" - "addiu %[width], %[width], -1 \n" - "sb $t0, 0(%[dst_u]) \n" - "sb $t1, 0(%[dst_v]) \n" - "addiu %[dst_u], %[dst_u], 1 \n" - "bgtz %[width], 2b \n" - " addiu %[dst_v], %[dst_v], 1 \n" - - "3: \n" - ".set pop \n" - : [src_uv] "+r"(src_uv), [width] "+r"(width), [dst_u] "+r"(dst_u), - [dst_v] "+r"(dst_v) - : - : "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9"); -} - -void MirrorRow_DSPR2(const uint8* src, uint8* dst, int width) { - __asm__ __volatile__( - ".set push \n" - ".set noreorder \n" - - "srl $t4, %[width], 4 \n" // multiplies of 16 - "andi $t5, %[width], 0xf \n" - "blez $t4, 2f \n" - " addu %[src], %[src], %[width] \n" // src += width - - "1: \n" - "lw $t0, -16(%[src]) \n" // |3|2|1|0| - "lw $t1, -12(%[src]) \n" // |7|6|5|4| - "lw $t2, -8(%[src]) \n" // |11|10|9|8| - "lw $t3, -4(%[src]) \n" // |15|14|13|12| - "wsbh $t0, $t0 \n" // |2|3|0|1| - "wsbh $t1, $t1 \n" // |6|7|4|5| - "wsbh $t2, $t2 \n" // |10|11|8|9| - "wsbh $t3, $t3 \n" // |14|15|12|13| - "rotr $t0, $t0, 16 \n" // |0|1|2|3| - "rotr $t1, $t1, 16 \n" // |4|5|6|7| - "rotr $t2, $t2, 16 \n" // |8|9|10|11| - "rotr $t3, $t3, 16 \n" // |12|13|14|15| - "addiu %[src], %[src], -16 \n" - "addiu $t4, $t4, -1 \n" - "sw $t3, 0(%[dst]) \n" // |15|14|13|12| - "sw $t2, 4(%[dst]) \n" // |11|10|9|8| - "sw $t1, 8(%[dst]) \n" // |7|6|5|4| - "sw $t0, 12(%[dst]) \n" // |3|2|1|0| - "bgtz $t4, 1b \n" - " addiu %[dst], %[dst], 16 \n" - "beqz $t5, 3f \n" - " nop \n" - - "2: \n" - "lbu $t0, -1(%[src]) \n" - "addiu $t5, $t5, -1 \n" - "addiu %[src], %[src], -1 \n" - "sb $t0, 0(%[dst]) \n" - "bgez $t5, 2b \n" - " addiu %[dst], %[dst], 1 \n" - - "3: \n" - ".set pop \n" - : [src] "+r"(src), [dst] "+r"(dst) - : [width] "r"(width) - : "t0", "t1", "t2", "t3", "t4", "t5"); -} - -void MirrorUVRow_DSPR2(const uint8* src_uv, - uint8* dst_u, - uint8* dst_v, - int width) { - int x; - int y; - __asm__ __volatile__( - ".set push \n" - ".set noreorder \n" - - "addu $t4, %[width], %[width] \n" - "srl %[x], %[width], 4 \n" - "andi %[y], %[width], 0xf \n" - "blez %[x], 2f \n" - " addu %[src_uv], %[src_uv], $t4 \n" - - "1: \n" - "lw $t0, -32(%[src_uv]) \n" // |3|2|1|0| - "lw $t1, -28(%[src_uv]) \n" // |7|6|5|4| - "lw $t2, -24(%[src_uv]) \n" // |11|10|9|8| - "lw $t3, -20(%[src_uv]) \n" // |15|14|13|12| - "lw $t4, -16(%[src_uv]) \n" // |19|18|17|16| - "lw $t6, -12(%[src_uv]) \n" // |23|22|21|20| - "lw $t7, -8(%[src_uv]) \n" // |27|26|25|24| - "lw $t8, -4(%[src_uv]) \n" // |31|30|29|28| - - "rotr $t0, $t0, 16 \n" // |1|0|3|2| - "rotr $t1, $t1, 16 \n" // |5|4|7|6| - "rotr $t2, $t2, 16 \n" // |9|8|11|10| - "rotr $t3, $t3, 16 \n" // |13|12|15|14| - "rotr $t4, $t4, 16 \n" // |17|16|19|18| - "rotr $t6, $t6, 16 \n" // |21|20|23|22| - "rotr $t7, $t7, 16 \n" // |25|24|27|26| - "rotr $t8, $t8, 16 \n" // |29|28|31|30| - "precr.qb.ph $t9, $t0, $t1 \n" // |0|2|4|6| - "precrq.qb.ph $t5, $t0, $t1 \n" // |1|3|5|7| - "precr.qb.ph $t0, $t2, $t3 \n" // |8|10|12|14| - "precrq.qb.ph $t1, $t2, $t3 \n" // |9|11|13|15| - "precr.qb.ph $t2, $t4, $t6 \n" // |16|18|20|22| - "precrq.qb.ph $t3, $t4, $t6 \n" // |17|19|21|23| - "precr.qb.ph $t4, $t7, $t8 \n" // |24|26|28|30| - "precrq.qb.ph $t6, $t7, $t8 \n" // |25|27|29|31| - "addiu %[src_uv], %[src_uv], -32 \n" - "addiu %[x], %[x], -1 \n" - "swr $t4, 0(%[dst_u]) \n" - "swl $t4, 3(%[dst_u]) \n" // |30|28|26|24| - "swr $t6, 0(%[dst_v]) \n" - "swl $t6, 3(%[dst_v]) \n" // |31|29|27|25| - "swr $t2, 4(%[dst_u]) \n" - "swl $t2, 7(%[dst_u]) \n" // |22|20|18|16| - "swr $t3, 4(%[dst_v]) \n" - "swl $t3, 7(%[dst_v]) \n" // |23|21|19|17| - "swr $t0, 8(%[dst_u]) \n" - "swl $t0, 11(%[dst_u]) \n" // |14|12|10|8| - "swr $t1, 8(%[dst_v]) \n" - "swl $t1, 11(%[dst_v]) \n" // |15|13|11|9| - "swr $t9, 12(%[dst_u]) \n" - "swl $t9, 15(%[dst_u]) \n" // |6|4|2|0| - "swr $t5, 12(%[dst_v]) \n" - "swl $t5, 15(%[dst_v]) \n" // |7|5|3|1| - "addiu %[dst_v], %[dst_v], 16 \n" - "bgtz %[x], 1b \n" - " addiu %[dst_u], %[dst_u], 16 \n" - "beqz %[y], 3f \n" - " nop \n" - "b 2f \n" - " nop \n" - - "2: \n" - "lbu $t0, -2(%[src_uv]) \n" - "lbu $t1, -1(%[src_uv]) \n" - "addiu %[src_uv], %[src_uv], -2 \n" - "addiu %[y], %[y], -1 \n" - "sb $t0, 0(%[dst_u]) \n" - "sb $t1, 0(%[dst_v]) \n" - "addiu %[dst_u], %[dst_u], 1 \n" - "bgtz %[y], 2b \n" - " addiu %[dst_v], %[dst_v], 1 \n" - - "3: \n" - ".set pop \n" - : [src_uv] "+r"(src_uv), [dst_u] "+r"(dst_u), [dst_v] "+r"(dst_v), - [x] "=&r"(x), [y] "=&r"(y) - : [width] "r"(width) - : "t0", "t1", "t2", "t3", "t4", "t5", "t7", "t8", "t9"); -} - -void I422ToARGBRow_DSPR2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* rgb_buf, - const struct YuvConstants* yuvconstants, - int width) { - int x; - uint32 tmp_ub = yuvconstants->kUVToB[0]; - uint32 tmp_ug = yuvconstants->kUVToG[0]; - uint32 tmp_vg = yuvconstants->kUVToG[1]; - uint32 tmp_vr = yuvconstants->kUVToR[1]; - uint32 tmp_bb = yuvconstants->kUVBiasB[0]; - uint32 tmp_bg = yuvconstants->kUVBiasG[0]; - uint32 tmp_br = yuvconstants->kUVBiasR[0]; - uint32 yg = yuvconstants->kYToRgb[0]; - uint32 tmp_yg; - uint32 tmp_mask = 0x7fff7fff; - tmp_bb = ((uint)(tmp_bb & 0xffff) << 16) | (tmp_bb & 0xffff); - tmp_bg = ((uint)(tmp_bg & 0xffff) << 16) | (tmp_bg & 0xffff); - tmp_br = ((uint)(tmp_br & 0xffff) << 16) | (tmp_br & 0xffff); - tmp_yg = ((uint)(yg & 0xffff) << 16) | (yg & 0xffff); - tmp_ub = ~(((uint)(tmp_ub & 0xffff) << 16) | (tmp_ub & 0xffff)) + 0x00010001; - tmp_ug = ((uint)(tmp_ug & 0xffff) << 16) | (tmp_ug & 0xffff); - tmp_vg = ((uint)(tmp_vg & 0xffff) << 16) | (tmp_vg & 0xffff); - tmp_vr = ~(((uint)(tmp_vr & 0xffff) << 16) | (tmp_vr & 0xffff)) + 0x00010001; - yg = yg * 0x0101; - - for (x = 0; x < width - 1; x += 2) { - uint32 tmp_t1, tmp_t2, tmp_t3, tmp_t4, tmp_t5; - uint32 tmp_t6, tmp_t7, tmp_t8, tmp_t9; - __asm__ __volatile__( - ".set push \n" - ".set noreorder \n" - "lbu %[tmp_t7], 0(%[src_y]) \n" - "lbu %[tmp_t1], 1(%[src_y]) \n" - "mul %[tmp_t7], %[tmp_t7], %[yg] \n" - "mul %[tmp_t1], %[tmp_t1], %[yg] \n" - "lbu %[tmp_t2], 0(%[src_u]) \n" - "lbu %[tmp_t3], 0(%[src_v]) \n" - "replv.ph %[tmp_t2], %[tmp_t2] \n" - "replv.ph %[tmp_t3], %[tmp_t3] \n" - "mul.ph %[tmp_t4], %[tmp_t2], %[tmp_ub] \n" - "mul.ph %[tmp_t5], %[tmp_t2], %[tmp_ug] \n" - "mul.ph %[tmp_t6], %[tmp_t3], %[tmp_vr] \n" - "mul.ph %[tmp_t3], %[tmp_t3], %[tmp_vg] \n" - "srl %[tmp_t7], %[tmp_t7], 16 \n" - "ins %[tmp_t1], %[tmp_t7], 0, 16 \n" - "addq_s.ph %[tmp_t7], %[tmp_t1], %[tmp_bb] \n" - "addq_s.ph %[tmp_t8], %[tmp_t1], %[tmp_bg] \n" - "addq_s.ph %[tmp_t9], %[tmp_t1], %[tmp_br] \n" - "addq_s.ph %[tmp_t5], %[tmp_t5], %[tmp_t3] \n" - "addq_s.ph %[tmp_t7], %[tmp_t7], %[tmp_t4] \n" - "subq_s.ph %[tmp_t8], %[tmp_t8], %[tmp_t5] \n" - "addq_s.ph %[tmp_t9], %[tmp_t9], %[tmp_t6] \n" - "shra.ph %[tmp_t7], %[tmp_t7], 6 \n" - "shra.ph %[tmp_t8], %[tmp_t8], 6 \n" - "shra.ph %[tmp_t9], %[tmp_t9], 6 \n" - "shll_s.ph %[tmp_t7], %[tmp_t7], 7 \n" - "shll_s.ph %[tmp_t8], %[tmp_t8], 7 \n" - "shll_s.ph %[tmp_t9], %[tmp_t9], 7 \n" - "precrqu_s.qb.ph %[tmp_t8], %[tmp_mask], %[tmp_t8] \n" - "precrqu_s.qb.ph %[tmp_t7], %[tmp_t9], %[tmp_t7] \n" - "precrq.ph.w %[tmp_t9], %[tmp_t8], %[tmp_t7] \n" - "ins %[tmp_t7], %[tmp_t8], 16, 16 \n" - "precr.qb.ph %[tmp_t8], %[tmp_t9], %[tmp_t7] \n" - "precrq.qb.ph %[tmp_t7], %[tmp_t9], %[tmp_t7] \n" - "sw %[tmp_t8], 0(%[rgb_buf]) \n" - "sw %[tmp_t7], 4(%[rgb_buf]) \n" - ".set pop \n" - : [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2), - [tmp_t3] "=&r"(tmp_t3), [tmp_t4] "=&r"(tmp_t4), - [tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6), - [tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8), [tmp_t9] "=&r"(tmp_t9) - : [src_y] "r"(src_y), [src_u] "r"(src_u), [src_v] "r"(src_v), - [tmp_ub] "r"(tmp_ub), [tmp_ug] "r"(tmp_ug), [yg] "r"(yg), - [tmp_vg] "r"(tmp_vg), [tmp_vr] "r"(tmp_vr), [tmp_bb] "r"(tmp_bb), - [tmp_bg] "r"(tmp_bg), [tmp_br] "r"(tmp_br), [tmp_yg] "r"(tmp_yg), - [rgb_buf] "r"(rgb_buf), [tmp_mask] "r"(tmp_mask)); - src_y += 2; - src_u += 1; - src_v += 1; - rgb_buf += 8; // Advance 4 pixels. - } -} - -// Bilinear filter 8x2 -> 8x1 -void InterpolateRow_DSPR2(uint8* dst_ptr, - const uint8* src_ptr, - ptrdiff_t src_stride, - int dst_width, - int source_y_fraction) { - int y0_fraction = 256 - source_y_fraction; - const uint8* src_ptr1 = src_ptr + src_stride; - - __asm__ __volatile__( - ".set push \n" - ".set noreorder \n" - - "replv.ph $t0, %[y0_fraction] \n" - "replv.ph $t1, %[source_y_fraction] \n" - - "1: \n" - "lw $t2, 0(%[src_ptr]) \n" - "lw $t3, 0(%[src_ptr1]) \n" - "lw $t4, 4(%[src_ptr]) \n" - "lw $t5, 4(%[src_ptr1]) \n" - "muleu_s.ph.qbl $t6, $t2, $t0 \n" - "muleu_s.ph.qbr $t7, $t2, $t0 \n" - "muleu_s.ph.qbl $t8, $t3, $t1 \n" - "muleu_s.ph.qbr $t9, $t3, $t1 \n" - "muleu_s.ph.qbl $t2, $t4, $t0 \n" - "muleu_s.ph.qbr $t3, $t4, $t0 \n" - "muleu_s.ph.qbl $t4, $t5, $t1 \n" - "muleu_s.ph.qbr $t5, $t5, $t1 \n" - "addq.ph $t6, $t6, $t8 \n" - "addq.ph $t7, $t7, $t9 \n" - "addq.ph $t2, $t2, $t4 \n" - "addq.ph $t3, $t3, $t5 \n" - "shra_r.ph $t6, $t6, 8 \n" - "shra_r.ph $t7, $t7, 8 \n" - "shra_r.ph $t2, $t2, 8 \n" - "shra_r.ph $t3, $t3, 8 \n" - "precr.qb.ph $t6, $t6, $t7 \n" - "precr.qb.ph $t2, $t2, $t3 \n" - "addiu %[src_ptr], %[src_ptr], 8 \n" - "addiu %[src_ptr1], %[src_ptr1], 8 \n" - "addiu %[dst_width], %[dst_width], -8 \n" - "sw $t6, 0(%[dst_ptr]) \n" - "sw $t2, 4(%[dst_ptr]) \n" - "bgtz %[dst_width], 1b \n" - " addiu %[dst_ptr], %[dst_ptr], 8 \n" - - ".set pop \n" - : [dst_ptr] "+r"(dst_ptr), [src_ptr1] "+r"(src_ptr1), - [src_ptr] "+r"(src_ptr), [dst_width] "+r"(dst_width) - : [source_y_fraction] "r"(source_y_fraction), - [y0_fraction] "r"(y0_fraction), [src_stride] "r"(src_stride) - : "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9"); -} -#include -void RGB24ToARGBRow_DSPR2(const uint8* src_rgb24, uint8* dst_argb, int width) { - int x; - uint32 tmp_mask = 0xff; - uint32 tmp_t1; - for (x = 0; x < (width - 1); ++x) { - __asm__ __volatile__( - ".set push \n" - ".set noreorder \n" - "ulw %[tmp_t1], 0(%[src_rgb24]) \n" - "addiu %[dst_argb], %[dst_argb], 4 \n" - "addiu %[src_rgb24], %[src_rgb24], 3 \n" - "ins %[tmp_t1], %[tmp_mask], 24, 8 \n" - "sw %[tmp_t1], -4(%[dst_argb]) \n" - ".set pop \n" - : [src_rgb24] "+r"(src_rgb24), [dst_argb] "+r"(dst_argb), - [tmp_t1] "=&r"(tmp_t1) - : [tmp_mask] "r"(tmp_mask) - : "memory"); - } - uint8 b = src_rgb24[0]; - uint8 g = src_rgb24[1]; - uint8 r = src_rgb24[2]; - dst_argb[0] = b; - dst_argb[1] = g; - dst_argb[2] = r; - dst_argb[3] = 255u; -} - -void RAWToARGBRow_DSPR2(const uint8* src_raw, uint8* dst_argb, int width) { - int x; - uint32 tmp_mask = 0xff; - uint32 tmp_t1, tmp_t2; - for (x = 0; x < (width - 1); ++x) { - __asm__ __volatile__( - ".set push \n" - ".set noreorder \n" - "ulw %[tmp_t1], 0(%[src_raw]) \n" - "addiu %[dst_argb], %[dst_argb], 4 \n" - "addiu %[src_raw], %[src_raw], 3 \n" - "srl %[tmp_t2], %[tmp_t1], 16 \n" - "ins %[tmp_t1], %[tmp_mask], 24, 8 \n" - "ins %[tmp_t1], %[tmp_t1], 16, 8 \n" - "ins %[tmp_t1], %[tmp_t2], 0, 8 \n" - "sw %[tmp_t1], -4(%[dst_argb]) \n" - ".set pop \n" - : [src_raw] "+r"(src_raw), [dst_argb] "+r"(dst_argb), - [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2) - : [tmp_mask] "r"(tmp_mask) - : "memory"); - } - uint8 r = src_raw[0]; - uint8 g = src_raw[1]; - uint8 b = src_raw[2]; - dst_argb[0] = b; - dst_argb[1] = g; - dst_argb[2] = r; - dst_argb[3] = 255u; -} - -void RGB565ToARGBRow_DSPR2(const uint8* src_rgb565, - uint8* dst_argb, - int width) { - int x; - uint32 tmp_mask = 0xff; - uint32 tmp_t1, tmp_t2, tmp_t3; - for (x = 0; x < width; ++x) { - __asm__ __volatile__( - ".set push \n" - ".set noreorder \n" - "lhu %[tmp_t1], 0(%[src_rgb565]) \n" - "addiu %[dst_argb], %[dst_argb], 4 \n" - "addiu %[src_rgb565], %[src_rgb565], 2 \n" - "sll %[tmp_t2], %[tmp_t1], 8 \n" - "ins %[tmp_t2], %[tmp_mask], 24,8 \n" - "ins %[tmp_t2], %[tmp_t1], 3, 16 \n" - "ins %[tmp_t2], %[tmp_t1], 5, 11 \n" - "srl %[tmp_t3], %[tmp_t1], 9 \n" - "ins %[tmp_t2], %[tmp_t3], 8, 2 \n" - "ins %[tmp_t2], %[tmp_t1], 3, 5 \n" - "srl %[tmp_t3], %[tmp_t1], 2 \n" - "ins %[tmp_t2], %[tmp_t3], 0, 3 \n" - "sw %[tmp_t2], -4(%[dst_argb]) \n" - ".set pop \n" - : [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2), - [tmp_t3] "=&r"(tmp_t3), [src_rgb565] "+r"(src_rgb565), - [dst_argb] "+r"(dst_argb) - : [tmp_mask] "r"(tmp_mask)); - } -} - -void ARGB1555ToARGBRow_DSPR2(const uint8* src_argb1555, - uint8* dst_argb, - int width) { - int x; - uint32 tmp_t1, tmp_t2, tmp_t3; - for (x = 0; x < width; ++x) { - __asm__ __volatile__( - ".set push \n" - ".set noreorder \n" - "lh %[tmp_t1], 0(%[src_argb1555]) \n" - "addiu %[dst_argb], %[dst_argb], 4 \n" - "addiu %[src_argb1555], %[src_argb1555], 2 \n" - "sll %[tmp_t2], %[tmp_t1], 9 \n" - "ins %[tmp_t2], %[tmp_t1], 4, 15 \n" - "ins %[tmp_t2], %[tmp_t1], 6, 10 \n" - "srl %[tmp_t3], %[tmp_t1], 7 \n" - "ins %[tmp_t2], %[tmp_t3], 8, 3 \n" - "ins %[tmp_t2], %[tmp_t1], 3, 5 \n" - "srl %[tmp_t3], %[tmp_t1], 2 \n" - "ins %[tmp_t2], %[tmp_t3], 0, 3 \n" - "sw %[tmp_t2], -4(%[dst_argb]) \n" - ".set pop \n" - : [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2), - [tmp_t3] "=&r"(tmp_t3), [src_argb1555] "+r"(src_argb1555), - [dst_argb] "+r"(dst_argb) - :); - } -} - -void ARGB4444ToARGBRow_DSPR2(const uint8* src_argb4444, - uint8* dst_argb, - int width) { - int x; - uint32 tmp_t1; - for (x = 0; x < width; ++x) { - __asm__ __volatile__( - ".set push \n" - ".set noreorder \n" - "lh %[tmp_t1], 0(%[src_argb4444]) \n" - "addiu %[dst_argb], %[dst_argb], 4 \n" - "addiu %[src_argb4444], %[src_argb4444], 2 \n" - "ins %[tmp_t1], %[tmp_t1], 16, 16 \n" - "ins %[tmp_t1], %[tmp_t1], 12, 16 \n" - "ins %[tmp_t1], %[tmp_t1], 8, 12 \n" - "ins %[tmp_t1], %[tmp_t1], 4, 8 \n" - "sw %[tmp_t1], -4(%[dst_argb]) \n" - ".set pop \n" - : [src_argb4444] "+r"(src_argb4444), [dst_argb] "+r"(dst_argb), - [tmp_t1] "=&r"(tmp_t1)); - } -} - -void I444ToARGBRow_DSPR2(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - const struct YuvConstants* yuvconstants, - int width) { - int x; - uint32 tmp_ub = yuvconstants->kUVToB[0]; - uint32 tmp_ug = yuvconstants->kUVToG[0]; - uint32 tmp_vg = yuvconstants->kUVToG[1]; - uint32 tmp_vr = yuvconstants->kUVToR[1]; - uint32 tmp_bb = yuvconstants->kUVBiasB[0]; - uint32 tmp_bg = yuvconstants->kUVBiasG[0]; - uint32 tmp_br = yuvconstants->kUVBiasR[0]; - uint32 yg = yuvconstants->kYToRgb[0]; - uint32 tmp_mask = 0x7fff7fff; - uint32 tmp_yg; - - tmp_bb = ((uint)(tmp_bb & 0xffff) << 16) | (tmp_bb & 0xffff); - tmp_bg = ((uint)(tmp_bg & 0xffff) << 16) | (tmp_bg & 0xffff); - tmp_br = ((uint)(tmp_br & 0xffff) << 16) | (tmp_br & 0xffff); - tmp_yg = ((uint)(yg & 0xffff) << 16) | (yg & 0xffff); - tmp_ub = ~(((uint)(tmp_ub & 0xffff) << 16) | (tmp_ub & 0xffff)) + 0x00010001; - tmp_ug = ((uint)(tmp_ug & 0xffff) << 16) | (tmp_ug & 0xffff); - tmp_vg = ((uint)(tmp_vg & 0xffff) << 16) | (tmp_vg & 0xffff); - tmp_vr = ~(((uint)(tmp_vr & 0xffff) << 16) | (tmp_vr & 0xffff)) + 0x00010001; - yg = yg * 0x0101; - - for (x = 0; x < width - 1; x += 2) { - uint32 tmp_t1, tmp_t2, tmp_t3, tmp_t4, tmp_t5; - uint32 tmp_t6, tmp_t7, tmp_t8, tmp_t9; - __asm__ __volatile__( - ".set push \n" - ".set noreorder \n" - "lbu %[tmp_t7], 0(%[y_buf]) \n" - "lbu %[tmp_t1], 1(%[y_buf]) \n" - "mul %[tmp_t7], %[tmp_t7], %[yg] \n" - "mul %[tmp_t1], %[tmp_t1], %[yg] \n" - "lh %[tmp_t2], 0(%[u_buf]) \n" - "lh %[tmp_t3], 0(%[v_buf]) \n" - "preceu.ph.qbr %[tmp_t2], %[tmp_t2] \n" - "preceu.ph.qbr %[tmp_t3], %[tmp_t3] \n" - "mul.ph %[tmp_t4], %[tmp_t2], %[tmp_ub] \n" - "mul.ph %[tmp_t5], %[tmp_t2], %[tmp_ug] \n" - "mul.ph %[tmp_t6], %[tmp_t3], %[tmp_vr] \n" - "mul.ph %[tmp_t3], %[tmp_t3], %[tmp_vg] \n" - "srl %[tmp_t7], %[tmp_t7], 16 \n" - "ins %[tmp_t1], %[tmp_t7], 0, 16 \n" - "addq_s.ph %[tmp_t7], %[tmp_t1], %[tmp_bb] \n" - "addq_s.ph %[tmp_t8], %[tmp_t1], %[tmp_bg] \n" - "addq_s.ph %[tmp_t9], %[tmp_t1], %[tmp_br] \n" - "addq_s.ph %[tmp_t5], %[tmp_t5], %[tmp_t3] \n" - "addq_s.ph %[tmp_t7], %[tmp_t7], %[tmp_t4] \n" - "subq_s.ph %[tmp_t8], %[tmp_t8], %[tmp_t5] \n" - "addq_s.ph %[tmp_t9], %[tmp_t9], %[tmp_t6] \n" - "shra.ph %[tmp_t7], %[tmp_t7], 6 \n" - "shra.ph %[tmp_t8], %[tmp_t8], 6 \n" - "shra.ph %[tmp_t9], %[tmp_t9], 6 \n" - "shll_s.ph %[tmp_t7], %[tmp_t7], 7 \n" - "shll_s.ph %[tmp_t8], %[tmp_t8], 7 \n" - "shll_s.ph %[tmp_t9], %[tmp_t9], 7 \n" - "precrqu_s.qb.ph %[tmp_t8], %[tmp_mask], %[tmp_t8] \n" - "precrqu_s.qb.ph %[tmp_t7], %[tmp_t9], %[tmp_t7] \n" - "precrq.ph.w %[tmp_t2], %[tmp_t8], %[tmp_t7] \n" - "ins %[tmp_t7], %[tmp_t8], 16, 16 \n" - "precr.qb.ph %[tmp_t8], %[tmp_t2], %[tmp_t7] \n" - "precrq.qb.ph %[tmp_t7], %[tmp_t2], %[tmp_t7] \n" - "sw %[tmp_t8], 0(%[rgb_buf]) \n" - "sw %[tmp_t7], 4(%[rgb_buf]) \n" - ".set pop \n" - : [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2), - [tmp_t3] "=&r"(tmp_t3), [tmp_t4] "=&r"(tmp_t4), - [tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6), - [tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8), [tmp_t9] "=&r"(tmp_t9) - : [y_buf] "r"(y_buf), [yg] "r"(yg), [u_buf] "r"(u_buf), - [v_buf] "r"(v_buf), [tmp_ub] "r"(tmp_ub), [tmp_ug] "r"(tmp_ug), - [tmp_vg] "r"(tmp_vg), [tmp_vr] "r"(tmp_vr), [tmp_bb] "r"(tmp_bb), - [tmp_bg] "r"(tmp_bg), [tmp_br] "r"(tmp_br), [tmp_yg] "r"(tmp_yg), - [rgb_buf] "r"(rgb_buf), [tmp_mask] "r"(tmp_mask)); - y_buf += 2; - u_buf += 2; - v_buf += 2; - rgb_buf += 8; // Advance 1 pixel. - } -} - -void I422ToARGB4444Row_DSPR2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb4444, - const struct YuvConstants* yuvconstants, - int width) { - int x; - uint32 tmp_ub = yuvconstants->kUVToB[0]; - uint32 tmp_ug = yuvconstants->kUVToG[0]; - uint32 tmp_vg = yuvconstants->kUVToG[1]; - uint32 tmp_vr = yuvconstants->kUVToR[1]; - uint32 tmp_bb = yuvconstants->kUVBiasB[0]; - uint32 tmp_bg = yuvconstants->kUVBiasG[0]; - uint32 tmp_br = yuvconstants->kUVBiasR[0]; - uint32 yg = yuvconstants->kYToRgb[0]; - uint32 tmp_yg; - uint32 tmp_mask = 0x7fff7fff; - tmp_bb = ((uint)(tmp_bb & 0xffff) << 16) | (tmp_bb & 0xffff); - tmp_bg = ((uint)(tmp_bg & 0xffff) << 16) | (tmp_bg & 0xffff); - tmp_br = ((uint)(tmp_br & 0xffff) << 16) | (tmp_br & 0xffff); - tmp_yg = ((uint)(yg & 0xffff) << 16) | (yg & 0xffff); - tmp_ub = ~(((uint)(tmp_ub & 0xffff) << 16) | (tmp_ub & 0xffff)) + 0x00010001; - tmp_ug = ((uint)(tmp_ug & 0xffff) << 16) | (tmp_ug & 0xffff); - tmp_vg = ((uint)(tmp_vg & 0xffff) << 16) | (tmp_vg & 0xffff); - tmp_vr = ~(((uint)(tmp_vr & 0xffff) << 16) | (tmp_vr & 0xffff)) + 0x00010001; - yg = yg * 0x0101; - - for (x = 0; x < width - 1; x += 2) { - uint32 tmp_t1, tmp_t2, tmp_t3, tmp_t4, tmp_t5; - uint32 tmp_t6, tmp_t7, tmp_t8, tmp_t9; - __asm__ __volatile__( - ".set push \n" - ".set noreorder \n" - "lbu %[tmp_t7], 0(%[src_y]) \n" - "lbu %[tmp_t1], 1(%[src_y]) \n" - "mul %[tmp_t7], %[tmp_t7], %[yg] \n" - "mul %[tmp_t1], %[tmp_t1], %[yg] \n" - "lbu %[tmp_t2], 0(%[src_u]) \n" - "lbu %[tmp_t3], 0(%[src_v]) \n" - "replv.ph %[tmp_t2], %[tmp_t2] \n" - "replv.ph %[tmp_t3], %[tmp_t3] \n" - "mul.ph %[tmp_t4], %[tmp_t2], %[tmp_ub] \n" - "mul.ph %[tmp_t5], %[tmp_t2], %[tmp_ug] \n" - "mul.ph %[tmp_t6], %[tmp_t3], %[tmp_vr] \n" - "mul.ph %[tmp_t3], %[tmp_t3], %[tmp_vg] \n" - "srl %[tmp_t7], %[tmp_t7], 16 \n" - "ins %[tmp_t1], %[tmp_t7], 0, 16 \n" - "addq_s.ph %[tmp_t7], %[tmp_t1], %[tmp_bb] \n" - "addq_s.ph %[tmp_t8], %[tmp_t1], %[tmp_bg] \n" - "addq_s.ph %[tmp_t9], %[tmp_t1], %[tmp_br] \n" - "addq_s.ph %[tmp_t5], %[tmp_t5], %[tmp_t3] \n" - "addq_s.ph %[tmp_t7], %[tmp_t7], %[tmp_t4] \n" - "subq_s.ph %[tmp_t8], %[tmp_t8], %[tmp_t5] \n" - "addq_s.ph %[tmp_t9], %[tmp_t9], %[tmp_t6] \n" - "shra.ph %[tmp_t7], %[tmp_t7], 6 \n" - "shra.ph %[tmp_t8], %[tmp_t8], 6 \n" - "shra.ph %[tmp_t9], %[tmp_t9], 6 \n" - "shll_s.ph %[tmp_t7], %[tmp_t7], 7 \n" - "shll_s.ph %[tmp_t8], %[tmp_t8], 7 \n" - "shll_s.ph %[tmp_t9], %[tmp_t9], 7 \n" - "precrqu_s.qb.ph %[tmp_t8], %[tmp_mask], %[tmp_t8] \n" - "precrqu_s.qb.ph %[tmp_t7], %[tmp_t9], %[tmp_t7] \n" - "precrq.ph.w %[tmp_t2], %[tmp_t8], %[tmp_t7] \n" - "ins %[tmp_t7], %[tmp_t8], 16, 16 \n" - "precr.qb.ph %[tmp_t8], %[tmp_t2], %[tmp_t7] \n" - "precrq.qb.ph %[tmp_t7], %[tmp_t2], %[tmp_t7] \n" - "shrl.qb %[tmp_t1], %[tmp_t8], 4 \n" - "shrl.qb %[tmp_t2], %[tmp_t7], 4 \n" - "shrl.ph %[tmp_t8], %[tmp_t1], 4 \n" - "shrl.ph %[tmp_t7], %[tmp_t2], 4 \n" - "or %[tmp_t8], %[tmp_t8], %[tmp_t1] \n" - "or %[tmp_t7], %[tmp_t7], %[tmp_t2] \n" - "precr.qb.ph %[tmp_t8], %[tmp_t7], %[tmp_t8] \n" - "sw %[tmp_t8], 0(%[dst_argb4444]) \n" - ".set pop \n" - : [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2), - [tmp_t3] "=&r"(tmp_t3), [tmp_t4] "=&r"(tmp_t4), - [tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6), - [tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8), [tmp_t9] "=&r"(tmp_t9) - : [dst_argb4444] "r"(dst_argb4444), [yg] "r"(yg), [src_u] "r"(src_u), - [src_v] "r"(src_v), [src_y] "r"(src_y), [tmp_ub] "r"(tmp_ub), - [tmp_ug] "r"(tmp_ug), [tmp_vg] "r"(tmp_vg), [tmp_vr] "r"(tmp_vr), - [tmp_bb] "r"(tmp_bb), [tmp_bg] "r"(tmp_bg), [tmp_br] "r"(tmp_br), - [tmp_yg] "r"(tmp_yg), [tmp_mask] "r"(tmp_mask)); - src_y += 2; - src_u += 1; - src_v += 1; - dst_argb4444 += 4; // Advance 2 pixels. - } -} - -void I422ToARGB1555Row_DSPR2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb1555, - const struct YuvConstants* yuvconstants, - int width) { - int x; - uint32 tmp_ub = yuvconstants->kUVToB[0]; - uint32 tmp_ug = yuvconstants->kUVToG[0]; - uint32 tmp_vg = yuvconstants->kUVToG[1]; - uint32 tmp_vr = yuvconstants->kUVToR[1]; - uint32 tmp_bb = yuvconstants->kUVBiasB[0]; - uint32 tmp_bg = yuvconstants->kUVBiasG[0]; - uint32 tmp_br = yuvconstants->kUVBiasR[0]; - uint32 yg = yuvconstants->kYToRgb[0]; - uint32 tmp_yg; - uint32 tmp_mask = 0x80008000; - tmp_bb = ((uint)(tmp_bb & 0xffff) << 16) | (tmp_bb & 0xffff); - tmp_bg = ((uint)(tmp_bg & 0xffff) << 16) | (tmp_bg & 0xffff); - tmp_br = ((uint)(tmp_br & 0xffff) << 16) | (tmp_br & 0xffff); - tmp_yg = ((uint)(yg & 0xffff) << 16) | (yg & 0xffff); - tmp_ub = ~(((uint)(tmp_ub & 0xffff) << 16) | (tmp_ub & 0xffff)) + 0x00010001; - tmp_ug = ((uint)(tmp_ug & 0xffff) << 16) | (tmp_ug & 0xffff); - tmp_vg = ((uint)(tmp_vg & 0xffff) << 16) | (tmp_vg & 0xffff); - tmp_vr = ~(((uint)(tmp_vr & 0xffff) << 16) | (tmp_vr & 0xffff)) + 0x00010001; - yg = yg * 0x0101; - - for (x = 0; x < width - 1; x += 2) { - uint32 tmp_t1, tmp_t2, tmp_t3, tmp_t4, tmp_t5; - uint32 tmp_t6, tmp_t7, tmp_t8, tmp_t9; - __asm__ __volatile__( - ".set push \n" - ".set noreorder \n" - "lbu %[tmp_t7], 0(%[src_y]) \n" - "lbu %[tmp_t1], 1(%[src_y]) \n" - "mul %[tmp_t7], %[tmp_t7], %[yg] \n" - "mul %[tmp_t1], %[tmp_t1], %[yg] \n" - "lbu %[tmp_t2], 0(%[src_u]) \n" - "lbu %[tmp_t3], 0(%[src_v]) \n" - "replv.ph %[tmp_t2], %[tmp_t2] \n" - "replv.ph %[tmp_t3], %[tmp_t3] \n" - "mul.ph %[tmp_t4], %[tmp_t2], %[tmp_ub] \n" - "mul.ph %[tmp_t5], %[tmp_t2], %[tmp_ug] \n" - "mul.ph %[tmp_t6], %[tmp_t3], %[tmp_vr] \n" - "mul.ph %[tmp_t3], %[tmp_t3], %[tmp_vg] \n" - "srl %[tmp_t7], %[tmp_t7], 16 \n" - "ins %[tmp_t1], %[tmp_t7], 0, 16 \n" - "addq_s.ph %[tmp_t7], %[tmp_t1], %[tmp_bb] \n" - "addq_s.ph %[tmp_t8], %[tmp_t1], %[tmp_bg] \n" - "addq_s.ph %[tmp_t9], %[tmp_t1], %[tmp_br] \n" - "addq_s.ph %[tmp_t5], %[tmp_t5], %[tmp_t3] \n" - "addq_s.ph %[tmp_t7], %[tmp_t7], %[tmp_t4] \n" - "subq_s.ph %[tmp_t8], %[tmp_t8], %[tmp_t5] \n" - "addq_s.ph %[tmp_t9], %[tmp_t9], %[tmp_t6] \n" - "shra.ph %[tmp_t7], %[tmp_t7], 6 \n" - "shra.ph %[tmp_t8], %[tmp_t8], 6 \n" - "shra.ph %[tmp_t9], %[tmp_t9], 6 \n" - "shll_s.ph %[tmp_t7], %[tmp_t7], 7 \n" - "shll_s.ph %[tmp_t8], %[tmp_t8], 7 \n" - "shll_s.ph %[tmp_t9], %[tmp_t9], 7 \n" - "precrqu_s.qb.ph %[tmp_t8], %[tmp_mask], %[tmp_t8] \n" - "precrqu_s.qb.ph %[tmp_t7], %[tmp_t9], %[tmp_t7] \n" - "precrq.ph.w %[tmp_t2], %[tmp_t8], %[tmp_t7] \n" - "ins %[tmp_t7], %[tmp_t8], 16, 16 \n" - "precr.qb.ph %[tmp_t8], %[tmp_t2], %[tmp_t7] \n" - "precrq.qb.ph %[tmp_t7], %[tmp_t2], %[tmp_t7] \n" - "ins %[tmp_t3], %[tmp_t8], 7, 24 \n" - "ins %[tmp_t3], %[tmp_t8], 10, 16 \n" - "ins %[tmp_t3], %[tmp_t8], 13, 8 \n" - "ins %[tmp_t4], %[tmp_t7], 7, 24 \n" - "ins %[tmp_t4], %[tmp_t7], 10, 16 \n" - "ins %[tmp_t4], %[tmp_t7], 13, 8 \n" - "precrq.ph.w %[tmp_t8], %[tmp_t4], %[tmp_t3] \n" - "or %[tmp_t8], %[tmp_t8], %[tmp_mask]\n" - "sw %[tmp_t8], 0(%[dst_argb1555]) \n" - ".set pop \n" - : [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2), - [tmp_t3] "=&r"(tmp_t3), [tmp_t4] "=&r"(tmp_t4), - [tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6), - [tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8), [tmp_t9] "=&r"(tmp_t9) - : [dst_argb1555] "r"(dst_argb1555), [yg] "r"(yg), [src_u] "r"(src_u), - [src_v] "r"(src_v), [src_y] "r"(src_y), [tmp_ub] "r"(tmp_ub), - [tmp_ug] "r"(tmp_ug), [tmp_vg] "r"(tmp_vg), [tmp_vr] "r"(tmp_vr), - [tmp_bb] "r"(tmp_bb), [tmp_bg] "r"(tmp_bg), [tmp_br] "r"(tmp_br), - [tmp_yg] "r"(tmp_yg), [tmp_mask] "r"(tmp_mask)); - src_y += 2; - src_u += 1; - src_v += 1; - dst_argb1555 += 4; // Advance 2 pixels. - } -} - -void NV12ToARGBRow_DSPR2(const uint8* src_y, - const uint8* src_uv, - uint8* rgb_buf, - const struct YuvConstants* yuvconstants, - int width) { - int x; - uint32 tmp_ub = yuvconstants->kUVToB[0]; - uint32 tmp_ug = yuvconstants->kUVToG[0]; - uint32 tmp_vg = yuvconstants->kUVToG[1]; - uint32 tmp_vr = yuvconstants->kUVToR[1]; - uint32 tmp_bb = yuvconstants->kUVBiasB[0]; - uint32 tmp_bg = yuvconstants->kUVBiasG[0]; - uint32 tmp_br = yuvconstants->kUVBiasR[0]; - uint32 yg = yuvconstants->kYToRgb[0]; - uint32 tmp_mask = 0x7fff7fff; - uint32 tmp_yg; - tmp_bb = ((uint)(tmp_bb & 0xffff) << 16) | (tmp_bb & 0xffff); - tmp_bg = ((uint)(tmp_bg & 0xffff) << 16) | (tmp_bg & 0xffff); - tmp_br = ((uint)(tmp_br & 0xffff) << 16) | (tmp_br & 0xffff); - tmp_yg = ((uint)(yg & 0xffff) << 16) | (yg & 0xffff); - tmp_ub = ~(((uint)(tmp_ub & 0xffff) << 16) | (tmp_ub & 0xffff)) + 0x00010001; - tmp_ug = ((uint)(tmp_ug & 0xffff) << 16) | (tmp_ug & 0xffff); - tmp_vg = ((uint)(tmp_vg & 0xffff) << 16) | (tmp_vg & 0xffff); - tmp_vr = ~(((uint)(tmp_vr & 0xffff) << 16) | (tmp_vr & 0xffff)) + 0x00010001; - yg = yg * 0x0101; - - for (x = 0; x < width - 1; x += 2) { - uint32 tmp_t1, tmp_t2, tmp_t3, tmp_t4, tmp_t5; - uint32 tmp_t6, tmp_t7, tmp_t8, tmp_t9; - __asm__ __volatile__( - ".set push \n" - ".set noreorder \n" - "lbu %[tmp_t7], 0(%[src_y]) \n" - "lbu %[tmp_t1], 1(%[src_y]) \n" - "mul %[tmp_t7], %[tmp_t7], %[yg] \n" - "mul %[tmp_t1], %[tmp_t1], %[yg] \n" - "lbu %[tmp_t2], 0(%[src_uv]) \n" - "lbu %[tmp_t3], 1(%[src_uv]) \n" - "replv.ph %[tmp_t2], %[tmp_t2] \n" - "replv.ph %[tmp_t3], %[tmp_t3] \n" - "mul.ph %[tmp_t4], %[tmp_t2], %[tmp_ub] \n" - "mul.ph %[tmp_t5], %[tmp_t2], %[tmp_ug] \n" - "mul.ph %[tmp_t6], %[tmp_t3], %[tmp_vr] \n" - "mul.ph %[tmp_t3], %[tmp_t3], %[tmp_vg] \n" - "srl %[tmp_t7], %[tmp_t7], 16 \n" - "ins %[tmp_t1], %[tmp_t7], 0, 16 \n" - "addq_s.ph %[tmp_t7], %[tmp_t1], %[tmp_bb] \n" - "addq_s.ph %[tmp_t8], %[tmp_t1], %[tmp_bg] \n" - "addq_s.ph %[tmp_t9], %[tmp_t1], %[tmp_br] \n" - "addq_s.ph %[tmp_t5], %[tmp_t5], %[tmp_t3] \n" - "addq_s.ph %[tmp_t7], %[tmp_t7], %[tmp_t4] \n" - "subq_s.ph %[tmp_t8], %[tmp_t8], %[tmp_t5] \n" - "addq_s.ph %[tmp_t9], %[tmp_t9], %[tmp_t6] \n" - "shra.ph %[tmp_t7], %[tmp_t7], 6 \n" - "shra.ph %[tmp_t8], %[tmp_t8], 6 \n" - "shra.ph %[tmp_t9], %[tmp_t9], 6 \n" - "shll_s.ph %[tmp_t7], %[tmp_t7], 7 \n" - "shll_s.ph %[tmp_t8], %[tmp_t8], 7 \n" - "shll_s.ph %[tmp_t9], %[tmp_t9], 7 \n" - "precrqu_s.qb.ph %[tmp_t8], %[tmp_mask], %[tmp_t8] \n" - "precrqu_s.qb.ph %[tmp_t7], %[tmp_t9], %[tmp_t7] \n" - "precrq.ph.w %[tmp_t2], %[tmp_t8], %[tmp_t7] \n" - "ins %[tmp_t7], %[tmp_t8], 16, 16 \n" - "precr.qb.ph %[tmp_t8], %[tmp_t2], %[tmp_t7] \n" - "precrq.qb.ph %[tmp_t7], %[tmp_t2], %[tmp_t7] \n" - "sw %[tmp_t8], 0(%[rgb_buf]) \n" - "sw %[tmp_t7], 4(%[rgb_buf]) \n" - ".set pop \n" - : [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2), - [tmp_t3] "=&r"(tmp_t3), [tmp_t4] "=&r"(tmp_t4), - [tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6), - [tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8), [tmp_t9] "=&r"(tmp_t9) - : [src_y] "r"(src_y), [src_uv] "r"(src_uv), [yg] "r"(yg), - [tmp_ub] "r"(tmp_ub), [tmp_ug] "r"(tmp_ug), [tmp_vg] "r"(tmp_vg), - [tmp_vr] "r"(tmp_vr), [tmp_bb] "r"(tmp_bb), [tmp_bg] "r"(tmp_bg), - [tmp_br] "r"(tmp_br), [tmp_yg] "r"(tmp_yg), [rgb_buf] "r"(rgb_buf), - [tmp_mask] "r"(tmp_mask)); - - src_y += 2; - src_uv += 2; - rgb_buf += 8; // Advance 2 pixels. - } -} - -void BGRAToUVRow_DSPR2(const uint8* src_rgb0, - int src_stride_rgb, - uint8* dst_u, - uint8* dst_v, - int width) { - const uint8* src_rgb1 = src_rgb0 + src_stride_rgb; - int x; - int const1 = 0xffda0000; - int const2 = 0x0070ffb6; - int const3 = 0x00700000; - int const4 = 0xffeeffa2; - int const5 = 0x100; - for (x = 0; x < width - 1; x += 2) { - int tmp_t1, tmp_t2, tmp_t3, tmp_t4, tmp_t5; - int tmp_t6, tmp_t7, tmp_t8; - __asm__ __volatile__( - ".set push \n" - ".set noreorder \n" - "lw %[tmp_t1], 0(%[src_rgb0]) \n" - "lw %[tmp_t2], 4(%[src_rgb0]) \n" - "lw %[tmp_t3], 0(%[src_rgb1]) \n" - "lw %[tmp_t4], 4(%[src_rgb1]) \n" - "preceu.ph.qbr %[tmp_t5], %[tmp_t1] \n" - "preceu.ph.qbl %[tmp_t1], %[tmp_t1] \n" - "preceu.ph.qbr %[tmp_t6], %[tmp_t2] \n" - "preceu.ph.qbl %[tmp_t2], %[tmp_t2] \n" - "preceu.ph.qbr %[tmp_t7], %[tmp_t3] \n" - "preceu.ph.qbl %[tmp_t3], %[tmp_t3] \n" - "preceu.ph.qbr %[tmp_t8], %[tmp_t4] \n" - "preceu.ph.qbl %[tmp_t4], %[tmp_t4] \n" - "addu.ph %[tmp_t5], %[tmp_t5], %[tmp_t6] \n" - "addu.ph %[tmp_t7], %[tmp_t7], %[tmp_t8] \n" - "addu.ph %[tmp_t1], %[tmp_t1], %[tmp_t2] \n" - "addu.ph %[tmp_t3], %[tmp_t3], %[tmp_t4] \n" - "addu.ph %[tmp_t5], %[tmp_t5], %[tmp_t7] \n" - "addu.ph %[tmp_t1], %[tmp_t1], %[tmp_t3] \n" - "shrl.ph %[tmp_t5], %[tmp_t5], 2 \n" - "shrl.ph %[tmp_t1], %[tmp_t1], 2 \n" - "mult $ac0, %[const5], %[const5] \n" - "mult $ac1, %[const5], %[const5] \n" - "dpaq_s.w.ph $ac0, %[tmp_t5], %[const1] \n" - "dpaq_s.w.ph $ac1, %[tmp_t5], %[const3] \n" - "dpaq_s.w.ph $ac0, %[tmp_t1], %[const2] \n" - "dpaq_s.w.ph $ac1, %[tmp_t1], %[const4] \n" - "extr_r.w %[tmp_t7], $ac0, 9 \n" - "extr_r.w %[tmp_t8], $ac1, 9 \n" - "addiu %[dst_u], %[dst_u], 1 \n" - "addiu %[dst_v], %[dst_v], 1 \n" - "addiu %[src_rgb0], %[src_rgb0], 8 \n" - "addiu %[src_rgb1], %[src_rgb1], 8 \n" - "sb %[tmp_t7], -1(%[dst_u]) \n" - "sb %[tmp_t8], -1(%[dst_v]) \n" - ".set pop \n" - : [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2), - [tmp_t3] "=&r"(tmp_t3), [tmp_t4] "=&r"(tmp_t4), - [tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6), - [tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8), - [src_rgb0] "+r"(src_rgb0), [src_rgb1] "+r"(src_rgb1), - [dst_u] "+r"(dst_u), [dst_v] "+r"(dst_v) - : [const1] "r"(const1), [const2] "r"(const2), [const3] "r"(const3), - [const4] "r"(const4), [const5] "r"(const5) - : "hi", "lo", "$ac1lo", "$ac1hi"); - } -} - -void BGRAToYRow_DSPR2(const uint8* src_argb0, uint8* dst_y, int width) { - int x; - int const1 = 0x00420000; - int const2 = 0x00190081; - int const5 = 0x40; - for (x = 0; x < width; x += 4) { - int tmp_t1, tmp_t2, tmp_t3, tmp_t4, tmp_t5; - int tmp_t6, tmp_t7, tmp_t8; - __asm__ __volatile__( - ".set push \n" - ".set noreorder \n" - "lw %[tmp_t1], 0(%[src_argb0]) \n" - "lw %[tmp_t2], 4(%[src_argb0]) \n" - "lw %[tmp_t3], 8(%[src_argb0]) \n" - "lw %[tmp_t4], 12(%[src_argb0]) \n" - "preceu.ph.qbr %[tmp_t5], %[tmp_t1] \n" - "preceu.ph.qbl %[tmp_t1], %[tmp_t1] \n" - "preceu.ph.qbr %[tmp_t6], %[tmp_t2] \n" - "preceu.ph.qbl %[tmp_t2], %[tmp_t2] \n" - "preceu.ph.qbr %[tmp_t7], %[tmp_t3] \n" - "preceu.ph.qbl %[tmp_t3], %[tmp_t3] \n" - "preceu.ph.qbr %[tmp_t8], %[tmp_t4] \n" - "preceu.ph.qbl %[tmp_t4], %[tmp_t4] \n" - "mult $ac0, %[const5], %[const5] \n" - "mult $ac1, %[const5], %[const5] \n" - "mult $ac2, %[const5], %[const5] \n" - "mult $ac3, %[const5], %[const5] \n" - "dpa.w.ph $ac0, %[tmp_t5], %[const1] \n" - "dpa.w.ph $ac1, %[tmp_t6], %[const1] \n" - "dpa.w.ph $ac2, %[tmp_t7], %[const1] \n" - "dpa.w.ph $ac3, %[tmp_t8], %[const1] \n" - "dpa.w.ph $ac0, %[tmp_t1], %[const2] \n" - "dpa.w.ph $ac1, %[tmp_t2], %[const2] \n" - "dpa.w.ph $ac2, %[tmp_t3], %[const2] \n" - "dpa.w.ph $ac3, %[tmp_t4], %[const2] \n" - "extr_r.w %[tmp_t1], $ac0, 8 \n" - "extr_r.w %[tmp_t2], $ac1, 8 \n" - "extr_r.w %[tmp_t3], $ac2, 8 \n" - "extr_r.w %[tmp_t4], $ac3, 8 \n" - "addiu %[src_argb0],%[src_argb0], 16 \n" - "addiu %[dst_y], %[dst_y], 4 \n" - "sb %[tmp_t1], -4(%[dst_y]) \n" - "sb %[tmp_t2], -3(%[dst_y]) \n" - "sb %[tmp_t3], -2(%[dst_y]) \n" - "sb %[tmp_t4], -1(%[dst_y]) \n" - ".set pop \n" - : [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2), - [tmp_t3] "=&r"(tmp_t3), [tmp_t4] "=&r"(tmp_t4), - [tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6), - [tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8), - [src_argb0] "+r"(src_argb0), [dst_y] "+r"(dst_y) - : [const1] "r"(const1), [const2] "r"(const2), [const5] "r"(const5) - : "hi", "lo", "$ac1lo", "$ac1hi", "$ac2lo", "$ac2hi", "$ac3lo", - "$ac3hi"); - } -} - -void ABGRToUVRow_DSPR2(const uint8* src_rgb0, - int src_stride_rgb, - uint8* dst_u, - uint8* dst_v, - int width) { - const uint8* src_rgb1 = src_rgb0 + src_stride_rgb; - int x; - int const1 = 0xffb6ffda; - int const2 = 0x00000070; - int const3 = 0xffa20070; - int const4 = 0x0000ffee; - int const5 = 0x100; - - for (x = 0; x < width - 1; x += 2) { - int tmp_t1, tmp_t2, tmp_t3, tmp_t4, tmp_t5; - int tmp_t6, tmp_t7, tmp_t8; - __asm__ __volatile__( - ".set push \n" - ".set noreorder \n" - "lw %[tmp_t1], 0(%[src_rgb0]) \n" - "lw %[tmp_t2], 4(%[src_rgb0]) \n" - "lw %[tmp_t3], 0(%[src_rgb1]) \n" - "lw %[tmp_t4], 4(%[src_rgb1]) \n" - "preceu.ph.qbr %[tmp_t5], %[tmp_t1] \n" - "preceu.ph.qbl %[tmp_t1], %[tmp_t1] \n" - "preceu.ph.qbr %[tmp_t6], %[tmp_t2] \n" - "preceu.ph.qbl %[tmp_t2], %[tmp_t2] \n" - "preceu.ph.qbr %[tmp_t7], %[tmp_t3] \n" - "preceu.ph.qbl %[tmp_t3], %[tmp_t3] \n" - "preceu.ph.qbr %[tmp_t8], %[tmp_t4] \n" - "preceu.ph.qbl %[tmp_t4], %[tmp_t4] \n" - "addu.ph %[tmp_t5], %[tmp_t5], %[tmp_t6] \n" - "addu.ph %[tmp_t7], %[tmp_t7], %[tmp_t8] \n" - "addu.ph %[tmp_t1], %[tmp_t1], %[tmp_t2] \n" - "addu.ph %[tmp_t3], %[tmp_t3], %[tmp_t4] \n" - "addu.ph %[tmp_t5], %[tmp_t5], %[tmp_t7] \n" - "addu.ph %[tmp_t1], %[tmp_t1], %[tmp_t3] \n" - "shrl.ph %[tmp_t5], %[tmp_t5], 2 \n" - "shrl.ph %[tmp_t1], %[tmp_t1], 2 \n" - "mult $ac0, %[const5], %[const5] \n" - "mult $ac1, %[const5], %[const5] \n" - "dpaq_s.w.ph $ac0, %[tmp_t5], %[const1] \n" - "dpaq_s.w.ph $ac1, %[tmp_t5], %[const3] \n" - "dpaq_s.w.ph $ac0, %[tmp_t1], %[const2] \n" - "dpaq_s.w.ph $ac1, %[tmp_t1], %[const4] \n" - "extr_r.w %[tmp_t7], $ac0, 9 \n" - "extr_r.w %[tmp_t8], $ac1, 9 \n" - "addiu %[dst_u], %[dst_u], 1 \n" - "addiu %[dst_v], %[dst_v], 1 \n" - "addiu %[src_rgb0], %[src_rgb0], 8 \n" - "addiu %[src_rgb1], %[src_rgb1], 8 \n" - "sb %[tmp_t7], -1(%[dst_u]) \n" - "sb %[tmp_t8], -1(%[dst_v]) \n" - ".set pop \n" - : [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2), - [tmp_t3] "=&r"(tmp_t3), [tmp_t4] "=&r"(tmp_t4), - [tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6), - [tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8), - [src_rgb0] "+r"(src_rgb0), [src_rgb1] "+r"(src_rgb1), - [dst_u] "+r"(dst_u), [dst_v] "+r"(dst_v) - : [const1] "r"(const1), [const2] "r"(const2), [const3] "r"(const3), - [const4] "r"(const4), [const5] "r"(const5) - : "hi", "lo", "$ac1lo", "$ac1hi"); - } -} - -void ARGBToYRow_DSPR2(const uint8* src_argb0, uint8* dst_y, int width) { - int x; - int const1 = 0x00810019; - int const2 = 0x00000042; - int const5 = 0x40; - for (x = 0; x < width; x += 4) { - int tmp_t1, tmp_t2, tmp_t3, tmp_t4, tmp_t5; - int tmp_t6, tmp_t7, tmp_t8; - __asm__ __volatile__( - ".set push \n" - ".set noreorder \n" - "lw %[tmp_t1], 0(%[src_argb0]) \n" - "lw %[tmp_t2], 4(%[src_argb0]) \n" - "lw %[tmp_t3], 8(%[src_argb0]) \n" - "lw %[tmp_t4], 12(%[src_argb0]) \n" - "preceu.ph.qbr %[tmp_t5], %[tmp_t1] \n" - "preceu.ph.qbl %[tmp_t1], %[tmp_t1] \n" - "preceu.ph.qbr %[tmp_t6], %[tmp_t2] \n" - "preceu.ph.qbl %[tmp_t2], %[tmp_t2] \n" - "preceu.ph.qbr %[tmp_t7], %[tmp_t3] \n" - "preceu.ph.qbl %[tmp_t3], %[tmp_t3] \n" - "preceu.ph.qbr %[tmp_t8], %[tmp_t4] \n" - "preceu.ph.qbl %[tmp_t4], %[tmp_t4] \n" - "mult $ac0, %[const5], %[const5] \n" - "mult $ac1, %[const5], %[const5] \n" - "mult $ac2, %[const5], %[const5] \n" - "mult $ac3, %[const5], %[const5] \n" - "dpa.w.ph $ac0, %[tmp_t5], %[const1] \n" - "dpa.w.ph $ac1, %[tmp_t6], %[const1] \n" - "dpa.w.ph $ac2, %[tmp_t7], %[const1] \n" - "dpa.w.ph $ac3, %[tmp_t8], %[const1] \n" - "dpa.w.ph $ac0, %[tmp_t1], %[const2] \n" - "dpa.w.ph $ac1, %[tmp_t2], %[const2] \n" - "dpa.w.ph $ac2, %[tmp_t3], %[const2] \n" - "dpa.w.ph $ac3, %[tmp_t4], %[const2] \n" - "extr_r.w %[tmp_t1], $ac0, 8 \n" - "extr_r.w %[tmp_t2], $ac1, 8 \n" - "extr_r.w %[tmp_t3], $ac2, 8 \n" - "extr_r.w %[tmp_t4], $ac3, 8 \n" - "addiu %[dst_y], %[dst_y], 4 \n" - "addiu %[src_argb0],%[src_argb0], 16 \n" - "sb %[tmp_t1], -4(%[dst_y]) \n" - "sb %[tmp_t2], -3(%[dst_y]) \n" - "sb %[tmp_t3], -2(%[dst_y]) \n" - "sb %[tmp_t4], -1(%[dst_y]) \n" - ".set pop \n" - : [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2), - [tmp_t3] "=&r"(tmp_t3), [tmp_t4] "=&r"(tmp_t4), - [tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6), - [tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8), - [src_argb0] "+r"(src_argb0), [dst_y] "+r"(dst_y) - : [const1] "r"(const1), [const2] "r"(const2), [const5] "r"(const5) - : "hi", "lo", "$ac1lo", "$ac1hi", "$ac2lo", "$ac2hi", "$ac3lo", - "$ac3hi"); - } -} - -void ABGRToYRow_DSPR2(const uint8* src_argb0, uint8* dst_y, int width) { - int x; - int const1 = 0x00810042; - int const2 = 0x00000019; - int const5 = 0x40; - for (x = 0; x < width; x += 4) { - int tmp_t1, tmp_t2, tmp_t3, tmp_t4, tmp_t5; - int tmp_t6, tmp_t7, tmp_t8; - __asm__ __volatile__( - ".set push \n" - ".set noreorder \n" - "lw %[tmp_t1], 0(%[src_argb0]) \n" - "lw %[tmp_t2], 4(%[src_argb0]) \n" - "lw %[tmp_t3], 8(%[src_argb0]) \n" - "lw %[tmp_t4], 12(%[src_argb0]) \n" - "preceu.ph.qbr %[tmp_t5], %[tmp_t1] \n" - "preceu.ph.qbl %[tmp_t1], %[tmp_t1] \n" - "preceu.ph.qbr %[tmp_t6], %[tmp_t2] \n" - "preceu.ph.qbl %[tmp_t2], %[tmp_t2] \n" - "preceu.ph.qbr %[tmp_t7], %[tmp_t3] \n" - "preceu.ph.qbl %[tmp_t3], %[tmp_t3] \n" - "preceu.ph.qbr %[tmp_t8], %[tmp_t4] \n" - "preceu.ph.qbl %[tmp_t4], %[tmp_t4] \n" - "mult $ac0, %[const5], %[const5] \n" - "mult $ac1, %[const5], %[const5] \n" - "mult $ac2, %[const5], %[const5] \n" - "mult $ac3, %[const5], %[const5] \n" - "dpa.w.ph $ac0, %[tmp_t5], %[const1] \n" - "dpa.w.ph $ac1, %[tmp_t6], %[const1] \n" - "dpa.w.ph $ac2, %[tmp_t7], %[const1] \n" - "dpa.w.ph $ac3, %[tmp_t8], %[const1] \n" - "dpa.w.ph $ac0, %[tmp_t1], %[const2] \n" - "dpa.w.ph $ac1, %[tmp_t2], %[const2] \n" - "dpa.w.ph $ac2, %[tmp_t3], %[const2] \n" - "dpa.w.ph $ac3, %[tmp_t4], %[const2] \n" - "extr_r.w %[tmp_t1], $ac0, 8 \n" - "extr_r.w %[tmp_t2], $ac1, 8 \n" - "extr_r.w %[tmp_t3], $ac2, 8 \n" - "extr_r.w %[tmp_t4], $ac3, 8 \n" - "addiu %[src_argb0],%[src_argb0], 16 \n" - "addiu %[dst_y], %[dst_y], 4 \n" - "sb %[tmp_t1], -4(%[dst_y]) \n" - "sb %[tmp_t2], -3(%[dst_y]) \n" - "sb %[tmp_t3], -2(%[dst_y]) \n" - "sb %[tmp_t4], -1(%[dst_y]) \n" - ".set pop \n" - : [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2), - [tmp_t3] "=&r"(tmp_t3), [tmp_t4] "=&r"(tmp_t4), - [tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6), - [tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8), - [src_argb0] "+r"(src_argb0), [dst_y] "+r"(dst_y) - : [const1] "r"(const1), [const2] "r"(const2), [const5] "r"(const5) - : "hi", "lo", "$ac1lo", "$ac1hi", "$ac2lo", "$ac2hi", "$ac3lo", - "$ac3hi"); - } -} - -void RGBAToUVRow_DSPR2(const uint8* src_rgb0, - int src_stride_rgb, - uint8* dst_u, - uint8* dst_v, - int width) { - const uint8* src_rgb1 = src_rgb0 + src_stride_rgb; - int x; - int const1 = 0xffb60070; - int const2 = 0x0000ffda; - int const3 = 0xffa2ffee; - int const4 = 0x00000070; - int const5 = 0x100; - - for (x = 0; x < width - 1; x += 2) { - int tmp_t1, tmp_t2, tmp_t3, tmp_t4, tmp_t5; - int tmp_t6, tmp_t7, tmp_t8; - __asm__ __volatile__( - ".set push \n" - ".set noreorder \n" - "ulw %[tmp_t1], 0+1(%[src_rgb0]) \n" - "ulw %[tmp_t2], 4+1(%[src_rgb0]) \n" - "ulw %[tmp_t3], 0+1(%[src_rgb1]) \n" - "ulw %[tmp_t4], 4+1(%[src_rgb1]) \n" - "preceu.ph.qbr %[tmp_t5], %[tmp_t1] \n" - "preceu.ph.qbl %[tmp_t1], %[tmp_t1] \n" - "preceu.ph.qbr %[tmp_t6], %[tmp_t2] \n" - "preceu.ph.qbl %[tmp_t2], %[tmp_t2] \n" - "preceu.ph.qbr %[tmp_t7], %[tmp_t3] \n" - "preceu.ph.qbl %[tmp_t3], %[tmp_t3] \n" - "preceu.ph.qbr %[tmp_t8], %[tmp_t4] \n" - "preceu.ph.qbl %[tmp_t4], %[tmp_t4] \n" - "addu.ph %[tmp_t5], %[tmp_t5], %[tmp_t6] \n" - "addu.ph %[tmp_t7], %[tmp_t7], %[tmp_t8] \n" - "addu.ph %[tmp_t1], %[tmp_t1], %[tmp_t2] \n" - "addu.ph %[tmp_t3], %[tmp_t3], %[tmp_t4] \n" - "addu.ph %[tmp_t5], %[tmp_t5], %[tmp_t7] \n" - "addu.ph %[tmp_t1], %[tmp_t1], %[tmp_t3] \n" - "shrl.ph %[tmp_t5], %[tmp_t5], 2 \n" - "shrl.ph %[tmp_t1], %[tmp_t1], 2 \n" - "mult $ac0, %[const5], %[const5] \n" - "mult $ac1, %[const5], %[const5] \n" - "dpaq_s.w.ph $ac0, %[tmp_t5], %[const1] \n" - "dpaq_s.w.ph $ac1, %[tmp_t5], %[const3] \n" - "dpaq_s.w.ph $ac0, %[tmp_t1], %[const2] \n" - "dpaq_s.w.ph $ac1, %[tmp_t1], %[const4] \n" - "extr_r.w %[tmp_t7], $ac0, 9 \n" - "extr_r.w %[tmp_t8], $ac1, 9 \n" - "addiu %[src_rgb0], %[src_rgb0], 8 \n" - "addiu %[src_rgb1], %[src_rgb1], 8 \n" - "addiu %[dst_u], %[dst_u], 1 \n" - "addiu %[dst_v], %[dst_v], 1 \n" - "sb %[tmp_t7], -1(%[dst_u]) \n" - "sb %[tmp_t8], -1(%[dst_v]) \n" - ".set pop \n" - : [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2), - [tmp_t3] "=&r"(tmp_t3), [tmp_t4] "=&r"(tmp_t4), - [tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6), - [tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8), - [src_rgb0] "+r"(src_rgb0), [src_rgb1] "+r"(src_rgb1), - [dst_u] "+r"(dst_u), [dst_v] "+r"(dst_v) - : [const1] "r"(const1), [const2] "r"(const2), [const3] "r"(const3), - [const4] "r"(const4), [const5] "r"(const5) - : "hi", "lo", "$ac1lo", "$ac1hi"); - } -} - -void RGBAToYRow_DSPR2(const uint8* src_argb0, uint8* dst_y, int width) { - int x; - int const1 = 0x00420081; - int const2 = 0x00190000; - int const5 = 0x40; - for (x = 0; x < width; x += 4) { - int tmp_t1, tmp_t2, tmp_t3, tmp_t4, tmp_t5; - int tmp_t6, tmp_t7, tmp_t8; - __asm__ __volatile__( - ".set push \n" - ".set noreorder \n" - "lw %[tmp_t1], 0(%[src_argb0]) \n" - "lw %[tmp_t2], 4(%[src_argb0]) \n" - "lw %[tmp_t3], 8(%[src_argb0]) \n" - "lw %[tmp_t4], 12(%[src_argb0]) \n" - "preceu.ph.qbl %[tmp_t5], %[tmp_t1] \n" - "preceu.ph.qbr %[tmp_t1], %[tmp_t1] \n" - "preceu.ph.qbl %[tmp_t6], %[tmp_t2] \n" - "preceu.ph.qbr %[tmp_t2], %[tmp_t2] \n" - "preceu.ph.qbl %[tmp_t7], %[tmp_t3] \n" - "preceu.ph.qbr %[tmp_t3], %[tmp_t3] \n" - "preceu.ph.qbl %[tmp_t8], %[tmp_t4] \n" - "preceu.ph.qbr %[tmp_t4], %[tmp_t4] \n" - "mult $ac0, %[const5], %[const5] \n" - "mult $ac1, %[const5], %[const5] \n" - "mult $ac2, %[const5], %[const5] \n" - "mult $ac3, %[const5], %[const5] \n" - "dpa.w.ph $ac0, %[tmp_t5], %[const1] \n" - "dpa.w.ph $ac1, %[tmp_t6], %[const1] \n" - "dpa.w.ph $ac2, %[tmp_t7], %[const1] \n" - "dpa.w.ph $ac3, %[tmp_t8], %[const1] \n" - "dpa.w.ph $ac0, %[tmp_t1], %[const2] \n" - "dpa.w.ph $ac1, %[tmp_t2], %[const2] \n" - "dpa.w.ph $ac2, %[tmp_t3], %[const2] \n" - "dpa.w.ph $ac3, %[tmp_t4], %[const2] \n" - "extr_r.w %[tmp_t1], $ac0, 8 \n" - "extr_r.w %[tmp_t2], $ac1, 8 \n" - "extr_r.w %[tmp_t3], $ac2, 8 \n" - "extr_r.w %[tmp_t4], $ac3, 8 \n" - "addiu %[dst_y], %[dst_y], 4 \n" - "addiu %[src_argb0],%[src_argb0], 16 \n" - "sb %[tmp_t1], -4(%[dst_y]) \n" - "sb %[tmp_t2], -3(%[dst_y]) \n" - "sb %[tmp_t3], -2(%[dst_y]) \n" - "sb %[tmp_t4], -1(%[dst_y]) \n" - ".set pop \n" - : [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2), - [tmp_t3] "=&r"(tmp_t3), [tmp_t4] "=&r"(tmp_t4), - [tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6), - [tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8), - [src_argb0] "+r"(src_argb0), [dst_y] "+r"(dst_y) - : [const1] "r"(const1), [const2] "r"(const2), [const5] "r"(const5) - : "hi", "lo", "$ac1lo", "$ac1hi", "$ac2lo", "$ac2hi", "$ac3lo", - "$ac3hi"); - } -} - -void ARGBToUVRow_DSPR2(const uint8* src_rgb0, - int src_stride_rgb, - uint8* dst_u, - uint8* dst_v, - int width) { - const uint8* src_rgb1 = src_rgb0 + src_stride_rgb; - int x; - int const1 = 0xffb60070; - int const2 = 0x0000ffda; - int const3 = 0xffa2ffee; - int const4 = 0x00000070; - int const5 = 0x100; - - for (x = 0; x < width - 1; x += 2) { - int tmp_t1, tmp_t2, tmp_t3, tmp_t4, tmp_t5; - int tmp_t6, tmp_t7, tmp_t8; - __asm__ __volatile__( - ".set push \n" - ".set noreorder \n" - "lw %[tmp_t1], 0(%[src_rgb0]) \n" - "lw %[tmp_t2], 4(%[src_rgb0]) \n" - "lw %[tmp_t3], 0(%[src_rgb1]) \n" - "lw %[tmp_t4], 4(%[src_rgb1]) \n" - "preceu.ph.qbr %[tmp_t5], %[tmp_t1] \n" - "preceu.ph.qbl %[tmp_t1], %[tmp_t1] \n" - "preceu.ph.qbr %[tmp_t6], %[tmp_t2] \n" - "preceu.ph.qbl %[tmp_t2], %[tmp_t2] \n" - "preceu.ph.qbr %[tmp_t7], %[tmp_t3] \n" - "preceu.ph.qbl %[tmp_t3], %[tmp_t3] \n" - "preceu.ph.qbr %[tmp_t8], %[tmp_t4] \n" - "preceu.ph.qbl %[tmp_t4], %[tmp_t4] \n" - "addu.ph %[tmp_t5], %[tmp_t5], %[tmp_t6] \n" - "addu.ph %[tmp_t7], %[tmp_t7], %[tmp_t8] \n" - "addu.ph %[tmp_t1], %[tmp_t1], %[tmp_t2] \n" - "addu.ph %[tmp_t3], %[tmp_t3], %[tmp_t4] \n" - "addu.ph %[tmp_t5], %[tmp_t5], %[tmp_t7] \n" - "addu.ph %[tmp_t1], %[tmp_t1], %[tmp_t3] \n" - "shrl.ph %[tmp_t5], %[tmp_t5], 2 \n" - "shrl.ph %[tmp_t1], %[tmp_t1], 2 \n" - "mult $ac0, %[const5], %[const5] \n" - "mult $ac1, %[const5], %[const5] \n" - "dpaq_s.w.ph $ac0, %[tmp_t5], %[const1] \n" - "dpaq_s.w.ph $ac1, %[tmp_t5], %[const3] \n" - "dpaq_s.w.ph $ac0, %[tmp_t1], %[const2] \n" - "dpaq_s.w.ph $ac1, %[tmp_t1], %[const4] \n" - "extr_r.w %[tmp_t7], $ac0, 9 \n" - "extr_r.w %[tmp_t8], $ac1, 9 \n" - "addiu %[src_rgb0], %[src_rgb0], 8 \n" - "addiu %[src_rgb1], %[src_rgb1], 8 \n" - "addiu %[dst_u], %[dst_u], 1 \n" - "addiu %[dst_v], %[dst_v], 1 \n" - "sb %[tmp_t7], -1(%[dst_u]) \n" - "sb %[tmp_t8], -1(%[dst_v]) \n" - ".set pop \n" - : [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2), - [tmp_t3] "=&r"(tmp_t3), [tmp_t4] "=&r"(tmp_t4), - [tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6), - [tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8), - [src_rgb0] "+r"(src_rgb0), [src_rgb1] "+r"(src_rgb1), - [dst_u] "+r"(dst_u), [dst_v] "+r"(dst_v) - : [const1] "r"(const1), [const2] "r"(const2), [const3] "r"(const3), - [const4] "r"(const4), [const5] "r"(const5) - : "hi", "lo", "$ac1lo", "$ac1hi"); - } -} - -#endif // __mips_dsp_rev >= 2 - -#endif // defined(__mips__) - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif diff --git a/chromium/third_party/libyuv/source/row_gcc.cc b/chromium/third_party/libyuv/source/row_gcc.cc index b5c2e65c938..0dc126678e0 100644 --- a/chromium/third_party/libyuv/source/row_gcc.cc +++ b/chromium/third_party/libyuv/source/row_gcc.cc @@ -22,80 +22,80 @@ extern "C" { #if defined(HAS_ARGBTOYROW_SSSE3) || defined(HAS_ARGBGRAYROW_SSSE3) // Constants for ARGB -static vec8 kARGBToY = {13, 65, 33, 0, 13, 65, 33, 0, - 13, 65, 33, 0, 13, 65, 33, 0}; +static const vec8 kARGBToY = {13, 65, 33, 0, 13, 65, 33, 0, + 13, 65, 33, 0, 13, 65, 33, 0}; // JPeg full range. -static vec8 kARGBToYJ = {15, 75, 38, 0, 15, 75, 38, 0, - 15, 75, 38, 0, 15, 75, 38, 0}; +static const vec8 kARGBToYJ = {15, 75, 38, 0, 15, 75, 38, 0, + 15, 75, 38, 0, 15, 75, 38, 0}; #endif // defined(HAS_ARGBTOYROW_SSSE3) || defined(HAS_ARGBGRAYROW_SSSE3) #if defined(HAS_ARGBTOYROW_SSSE3) || defined(HAS_I422TOARGBROW_SSSE3) -static vec8 kARGBToU = {112, -74, -38, 0, 112, -74, -38, 0, - 112, -74, -38, 0, 112, -74, -38, 0}; +static const vec8 kARGBToU = {112, -74, -38, 0, 112, -74, -38, 0, + 112, -74, -38, 0, 112, -74, -38, 0}; -static vec8 kARGBToUJ = {127, -84, -43, 0, 127, -84, -43, 0, - 127, -84, -43, 0, 127, -84, -43, 0}; +static const vec8 kARGBToUJ = {127, -84, -43, 0, 127, -84, -43, 0, + 127, -84, -43, 0, 127, -84, -43, 0}; -static vec8 kARGBToV = {-18, -94, 112, 0, -18, -94, 112, 0, - -18, -94, 112, 0, -18, -94, 112, 0}; +static const vec8 kARGBToV = {-18, -94, 112, 0, -18, -94, 112, 0, + -18, -94, 112, 0, -18, -94, 112, 0}; -static vec8 kARGBToVJ = {-20, -107, 127, 0, -20, -107, 127, 0, - -20, -107, 127, 0, -20, -107, 127, 0}; +static const vec8 kARGBToVJ = {-20, -107, 127, 0, -20, -107, 127, 0, + -20, -107, 127, 0, -20, -107, 127, 0}; // Constants for BGRA -static vec8 kBGRAToY = {0, 33, 65, 13, 0, 33, 65, 13, - 0, 33, 65, 13, 0, 33, 65, 13}; +static const vec8 kBGRAToY = {0, 33, 65, 13, 0, 33, 65, 13, + 0, 33, 65, 13, 0, 33, 65, 13}; -static vec8 kBGRAToU = {0, -38, -74, 112, 0, -38, -74, 112, - 0, -38, -74, 112, 0, -38, -74, 112}; +static const vec8 kBGRAToU = {0, -38, -74, 112, 0, -38, -74, 112, + 0, -38, -74, 112, 0, -38, -74, 112}; -static vec8 kBGRAToV = {0, 112, -94, -18, 0, 112, -94, -18, - 0, 112, -94, -18, 0, 112, -94, -18}; +static const vec8 kBGRAToV = {0, 112, -94, -18, 0, 112, -94, -18, + 0, 112, -94, -18, 0, 112, -94, -18}; // Constants for ABGR -static vec8 kABGRToY = {33, 65, 13, 0, 33, 65, 13, 0, - 33, 65, 13, 0, 33, 65, 13, 0}; +static const vec8 kABGRToY = {33, 65, 13, 0, 33, 65, 13, 0, + 33, 65, 13, 0, 33, 65, 13, 0}; -static vec8 kABGRToU = {-38, -74, 112, 0, -38, -74, 112, 0, - -38, -74, 112, 0, -38, -74, 112, 0}; +static const vec8 kABGRToU = {-38, -74, 112, 0, -38, -74, 112, 0, + -38, -74, 112, 0, -38, -74, 112, 0}; -static vec8 kABGRToV = {112, -94, -18, 0, 112, -94, -18, 0, - 112, -94, -18, 0, 112, -94, -18, 0}; +static const vec8 kABGRToV = {112, -94, -18, 0, 112, -94, -18, 0, + 112, -94, -18, 0, 112, -94, -18, 0}; // Constants for RGBA. -static vec8 kRGBAToY = {0, 13, 65, 33, 0, 13, 65, 33, - 0, 13, 65, 33, 0, 13, 65, 33}; +static const vec8 kRGBAToY = {0, 13, 65, 33, 0, 13, 65, 33, + 0, 13, 65, 33, 0, 13, 65, 33}; -static vec8 kRGBAToU = {0, 112, -74, -38, 0, 112, -74, -38, - 0, 112, -74, -38, 0, 112, -74, -38}; +static const vec8 kRGBAToU = {0, 112, -74, -38, 0, 112, -74, -38, + 0, 112, -74, -38, 0, 112, -74, -38}; -static vec8 kRGBAToV = {0, -18, -94, 112, 0, -18, -94, 112, - 0, -18, -94, 112, 0, -18, -94, 112}; +static const vec8 kRGBAToV = {0, -18, -94, 112, 0, -18, -94, 112, + 0, -18, -94, 112, 0, -18, -94, 112}; -static uvec8 kAddY16 = {16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, - 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u}; +static const uvec8 kAddY16 = {16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, + 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u}; // 7 bit fixed point 0.5. -static vec16 kAddYJ64 = {64, 64, 64, 64, 64, 64, 64, 64}; +static const vec16 kAddYJ64 = {64, 64, 64, 64, 64, 64, 64, 64}; -static uvec8 kAddUV128 = {128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, - 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u}; +static const uvec8 kAddUV128 = {128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, + 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u}; -static uvec16 kAddUVJ128 = {0x8080u, 0x8080u, 0x8080u, 0x8080u, - 0x8080u, 0x8080u, 0x8080u, 0x8080u}; +static const uvec16 kAddUVJ128 = {0x8080u, 0x8080u, 0x8080u, 0x8080u, + 0x8080u, 0x8080u, 0x8080u, 0x8080u}; #endif // defined(HAS_ARGBTOYROW_SSSE3) || defined(HAS_I422TOARGBROW_SSSE3) #ifdef HAS_RGB24TOARGBROW_SSSE3 // Shuffle table for converting RGB24 to ARGB. -static uvec8 kShuffleMaskRGB24ToARGB = {0u, 1u, 2u, 12u, 3u, 4u, 5u, 13u, - 6u, 7u, 8u, 14u, 9u, 10u, 11u, 15u}; +static const uvec8 kShuffleMaskRGB24ToARGB = { + 0u, 1u, 2u, 12u, 3u, 4u, 5u, 13u, 6u, 7u, 8u, 14u, 9u, 10u, 11u, 15u}; // Shuffle table for converting RAW to ARGB. -static uvec8 kShuffleMaskRAWToARGB = {2u, 1u, 0u, 12u, 5u, 4u, 3u, 13u, - 8u, 7u, 6u, 14u, 11u, 10u, 9u, 15u}; +static const uvec8 kShuffleMaskRAWToARGB = {2u, 1u, 0u, 12u, 5u, 4u, 3u, 13u, + 8u, 7u, 6u, 14u, 11u, 10u, 9u, 15u}; // Shuffle table for converting RAW to RGB24. First 8. static const uvec8 kShuffleMaskRAWToRGB24_0 = { @@ -113,15 +113,15 @@ static const uvec8 kShuffleMaskRAWToRGB24_2 = { 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u}; // Shuffle table for converting ARGB to RGB24. -static uvec8 kShuffleMaskARGBToRGB24 = { +static const uvec8 kShuffleMaskARGBToRGB24 = { 0u, 1u, 2u, 4u, 5u, 6u, 8u, 9u, 10u, 12u, 13u, 14u, 128u, 128u, 128u, 128u}; // Shuffle table for converting ARGB to RAW. -static uvec8 kShuffleMaskARGBToRAW = { +static const uvec8 kShuffleMaskARGBToRAW = { 2u, 1u, 0u, 6u, 5u, 4u, 10u, 9u, 8u, 14u, 13u, 12u, 128u, 128u, 128u, 128u}; // Shuffle table for converting ARGBToRGB24 for I422ToRGB24. First 8 + next 4 -static uvec8 kShuffleMaskARGBToRGB24_0 = { +static const uvec8 kShuffleMaskARGBToRGB24_0 = { 0u, 1u, 2u, 4u, 5u, 6u, 8u, 9u, 128u, 128u, 128u, 128u, 10u, 12u, 13u, 14u}; // YUY2 shuf 16 Y to 32 Y. @@ -700,6 +700,112 @@ void ARGBToARGB4444Row_SSE2(const uint8* src, uint8* dst, int width) { } #endif // HAS_RGB24TOARGBROW_SSSE3 +/* + +ARGBToAR30Row: + +Red Blue +With the 8 bit value in the upper bits of a short, vpmulhuw by (1024+4) will +produce a 10 bit value in the low 10 bits of each 16 bit value. This is whats +wanted for the blue channel. The red needs to be shifted 4 left, so multiply by +(1024+4)*16 for red. + +Alpha Green +Alpha and Green are already in the high bits so vpand can zero out the other +bits, keeping just 2 upper bits of alpha and 8 bit green. The same multiplier +could be used for Green - (1024+4) putting the 10 bit green in the lsb. Alpha +would be a simple multiplier to shift it into position. It wants a gap of 10 +above the green. Green is 10 bits, so there are 6 bits in the low short. 4 +more are needed, so a multiplier of 4 gets the 2 bits into the upper 16 bits, +and then a shift of 4 is a multiply of 16, so (4*16) = 64. Then shift the +result left 10 to position the A and G channels. +*/ + +// Shuffle table for converting RAW to RGB24. Last 8. +static const uvec8 kShuffleRB30 = {128u, 0u, 128u, 2u, 128u, 4u, 128u, 6u, + 128u, 8u, 128u, 10u, 128u, 12u, 128u, 14u}; +static const uint32 kMulRB10 = 1028 * 16 * 65536 + 1028; +static const uint32 kMaskRB10 = 0x3ff003ff; +static const uint32 kMaskAG10 = 0xc000ff00; +static const uint32 kMulAG10 = 64 * 65536 + 1028; + +void ARGBToAR30Row_SSSE3(const uint8* src, uint8* dst, int width) { + asm volatile( + "movdqa %3,%%xmm2 \n" // shuffler for RB + "movd %4,%%xmm3 \n" // multipler for RB + "movd %5,%%xmm4 \n" // mask for R10 B10 + "movd %6,%%xmm5 \n" // mask for AG + "movd %7,%%xmm6 \n" // multipler for AG + "pshufd $0x0,%%xmm3,%%xmm3 \n" + "pshufd $0x0,%%xmm4,%%xmm4 \n" + "pshufd $0x0,%%xmm5,%%xmm5 \n" + "pshufd $0x0,%%xmm6,%%xmm6 \n" + "sub %0,%1 \n" + + "1: \n" + "movdqu (%0),%%xmm0 \n" // fetch 4 ARGB pixels + "movdqa %%xmm0,%%xmm1 \n" + "pshufb %%xmm2,%%xmm1 \n" // R0B0 + "pand %%xmm5,%%xmm0 \n" // A0G0 + "pmulhuw %%xmm3,%%xmm1 \n" // X2 R16 X4 B10 + "pmulhuw %%xmm6,%%xmm0 \n" // X10 A2 X10 G10 + "pand %%xmm4,%%xmm1 \n" // X2 R10 X10 B10 + "pslld $10,%%xmm0 \n" // A2 x10 G10 x10 + "por %%xmm1,%%xmm0 \n" // A2 R10 G10 B10 + "movdqu %%xmm0,(%1,%0) \n" // store 4 AR30 pixels + "add $0x10,%0 \n" + "sub $0x4,%2 \n" + "jg 1b \n" + + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + : "m"(kShuffleRB30), // %3 + "m"(kMulRB10), // %4 + "m"(kMaskRB10), // %5 + "m"(kMaskAG10), // %6 + "m"(kMulAG10) // %7 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"); +} + +#ifdef HAS_ARGBTOAR30ROW_AVX2 + +void ARGBToAR30Row_AVX2(const uint8* src, uint8* dst, int width) { + asm volatile( + "vbroadcastf128 %3,%%ymm2 \n" // shuffler for RB + "vbroadcastss %4,%%ymm3 \n" // multipler for RB + "vbroadcastss %5,%%ymm4 \n" // mask for R10 B10 + "vbroadcastss %6,%%ymm5 \n" // mask for AG + "vbroadcastss %7,%%ymm6 \n" // multipler for AG + "sub %0,%1 \n" + + "1: \n" + "vmovdqu (%0),%%ymm0 \n" // fetch 8 ARGB pixels + "vpshufb %%ymm2,%%ymm0,%%ymm1 \n" // R0B0 + "vpand %%ymm5,%%ymm0,%%ymm0 \n" // A0G0 + "vpmulhuw %%ymm3,%%ymm1,%%ymm1 \n" // X2 R16 X4 B10 + "vpmulhuw %%ymm6,%%ymm0,%%ymm0 \n" // X10 A2 X10 G10 + "vpand %%ymm4,%%ymm1,%%ymm1 \n" // X2 R10 X10 B10 + "vpslld $10,%%ymm0,%%ymm0 \n" // A2 x10 G10 x10 + "vpor %%ymm1,%%ymm0,%%ymm0 \n" // A2 R10 G10 B10 + "vmovdqu %%ymm0,(%1,%0) \n" // store 8 AR30 pixels + "add $0x20,%0 \n" + "sub $0x8,%2 \n" + "jg 1b \n" + "vzeroupper \n" + + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + : "m"(kShuffleRB30), // %3 + "m"(kMulRB10), // %4 + "m"(kMaskRB10), // %5 + "m"(kMaskAG10), // %6 + "m"(kMulAG10) // %7 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"); +} +#endif + #ifdef HAS_ARGBTOYROW_SSSE3 // Convert 16 ARGB pixels (64 bytes) to 16 Y values. void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int width) { @@ -1517,6 +1623,22 @@ void RGBAToUVRow_SSSE3(const uint8* src_rgba0, "punpcklbw %%xmm4,%%xmm4 \n" \ "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" +// Read 4 UV from 422 10 bit, upsample to 8 UV +// TODO(fbarchard): Consider shufb to replace pack/unpack +// TODO(fbarchard): Consider pmulhuw to replace psraw +// TODO(fbarchard): Consider pmullw to replace psllw and allow different bits. +#define READYUV422_10 \ + "movq " MEMACCESS([u_buf]) ",%%xmm0 \n" \ + MEMOPREG(movq, 0x00, [u_buf], [v_buf], 1, xmm1) \ + "lea " MEMLEA(0x8, [u_buf]) ",%[u_buf] \n" \ + "punpcklwd %%xmm1,%%xmm0 \n" \ + "psraw $0x2,%%xmm0 \n" \ + "packuswb %%xmm0,%%xmm0 \n" \ + "punpcklwd %%xmm0,%%xmm0 \n" \ + "movdqu " MEMACCESS([y_buf]) ",%%xmm4 \n" \ + "psllw $0x6,%%xmm4 \n" \ + "lea " MEMLEA(0x10, [y_buf]) ",%[y_buf] \n" + // Read 4 UV from 422, upsample to 8 UV. With 8 Alpha. #define READYUVA422 \ "movd " MEMACCESS([u_buf]) ",%%xmm0 \n" \ @@ -1756,6 +1878,36 @@ void OMITFP I422ToARGBRow_SSSE3(const uint8* y_buf, ); } +// 10 bit YUV to ARGB +void OMITFP I210ToARGBRow_SSSE3(const uint16* y_buf, + const uint16* u_buf, + const uint16* v_buf, + uint8* dst_argb, + const struct YuvConstants* yuvconstants, + int width) { + asm volatile ( + YUVTORGB_SETUP(yuvconstants) + "sub %[u_buf],%[v_buf] \n" + "pcmpeqb %%xmm5,%%xmm5 \n" + + LABELALIGN + "1: \n" + READYUV422_10 + YUVTORGB(yuvconstants) + STOREARGB + "sub $0x8,%[width] \n" + "jg 1b \n" + : [y_buf]"+r"(y_buf), // %[y_buf] + [u_buf]"+r"(u_buf), // %[u_buf] + [v_buf]"+r"(v_buf), // %[v_buf] + [dst_argb]"+r"(dst_argb), // %[dst_argb] + [width]"+rm"(width) // %[width] + : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] + : "memory", "cc", NACL_R14 YUVTORGB_REGS + "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" + ); +} + #ifdef HAS_I422ALPHATOARGBROW_SSSE3 void OMITFP I422AlphaToARGBRow_SSSE3(const uint8* y_buf, const uint8* u_buf, @@ -2469,8 +2621,8 @@ void I400ToARGBRow_AVX2(const uint8* y_buf, uint8* dst_argb, int width) { #ifdef HAS_MIRRORROW_SSSE3 // Shuffle table for reversing the bytes. -static uvec8 kShuffleMirror = {15u, 14u, 13u, 12u, 11u, 10u, 9u, 8u, - 7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u}; +static const uvec8 kShuffleMirror = {15u, 14u, 13u, 12u, 11u, 10u, 9u, 8u, + 7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u}; void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width) { intptr_t temp_width = (intptr_t)(width); @@ -2523,8 +2675,8 @@ void MirrorRow_AVX2(const uint8* src, uint8* dst, int width) { #ifdef HAS_MIRRORUVROW_SSSE3 // Shuffle table for reversing the bytes of UV channels. -static uvec8 kShuffleMirrorUV = {14u, 12u, 10u, 8u, 6u, 4u, 2u, 0u, - 15u, 13u, 11u, 9u, 7u, 5u, 3u, 1u}; +static const uvec8 kShuffleMirrorUV = {14u, 12u, 10u, 8u, 6u, 4u, 2u, 0u, + 15u, 13u, 11u, 9u, 7u, 5u, 3u, 1u}; void MirrorUVRow_SSSE3(const uint8* src, uint8* dst_u, uint8* dst_v, @@ -2800,6 +2952,11 @@ void MergeUVRow_16_AVX2(const uint16* src_u, } #endif // HAS_MERGEUVROW_AVX2 +// Use scale to convert lsb formats to msb, depending how many bits there are: +// 128 = 9 bits +// 64 = 10 bits +// 16 = 12 bits +// 1 = 16 bits #ifdef HAS_MULTIPLYROW_16_AVX2 void MultiplyRow_16_AVX2(const uint16* src_y, uint16* dst_y, @@ -2834,38 +2991,183 @@ void MultiplyRow_16_AVX2(const uint16* src_y, } #endif // HAS_MULTIPLYROW_16_AVX2 +// Use scale to convert lsb formats to msb, depending how many bits there are: +// 32768 = 9 bits +// 16384 = 10 bits +// 4096 = 12 bits +// 256 = 16 bits +void Convert16To8Row_SSSE3(const uint16* src_y, + uint8* dst_y, + int scale, + int width) { + // clang-format off + asm volatile ( + "movd %3,%%xmm2 \n" + "punpcklwd %%xmm2,%%xmm2 \n" + "pshufd $0x0,%%xmm2,%%xmm2 \n" + + // 32 pixels per loop. + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "add $0x20,%0 \n" + "pmulhuw %%xmm2,%%xmm0 \n" + "pmulhuw %%xmm2,%%xmm1 \n" + "packuswb %%xmm1,%%xmm0 \n" + "movdqu %%xmm0,(%1) \n" + "add $0x10,%1 \n" + "sub $0x10,%2 \n" + "jg 1b \n" + : "+r"(src_y), // %0 + "+r"(dst_y), // %1 + "+r"(width) // %2 + : "r"(scale) // %3 + : "memory", "cc", "xmm0", "xmm1", "xmm2"); + // clang-format on +} + +#ifdef HAS_CONVERT16TO8ROW_AVX2 +void Convert16To8Row_AVX2(const uint16* src_y, + uint8* dst_y, + int scale, + int width) { + // clang-format off + asm volatile ( + "vmovd %3,%%xmm2 \n" + "vpunpcklwd %%xmm2,%%xmm2,%%xmm2 \n" + "vbroadcastss %%xmm2,%%ymm2 \n" + + // 32 pixels per loop. + LABELALIGN + "1: \n" + "vmovdqu (%0),%%ymm0 \n" + "vmovdqu 0x20(%0),%%ymm1 \n" + "add $0x40,%0 \n" + "vpmulhuw %%ymm2,%%ymm0,%%ymm0 \n" + "vpmulhuw %%ymm2,%%ymm1,%%ymm1 \n" + "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" // mutates + "vpermq $0xd8,%%ymm0,%%ymm0 \n" + "vmovdqu %%ymm0,(%1) \n" + "add $0x20,%1 \n" + "sub $0x20,%2 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src_y), // %0 + "+r"(dst_y), // %1 + "+r"(width) // %2 + : "r"(scale) // %3 + : "memory", "cc", "xmm0", "xmm1", "xmm2"); + // clang-format on +} +#endif // HAS_CONVERT16TO8ROW_AVX2 + +// Use scale to convert to lsb formats depending how many bits there are: +// 512 = 9 bits +// 1024 = 10 bits +// 4096 = 12 bits +// TODO(fbarchard): reduce to SSE2 +void Convert8To16Row_SSE2(const uint8* src_y, + uint16* dst_y, + int scale, + int width) { + // clang-format off + asm volatile ( + "movd %3,%%xmm2 \n" + "punpcklwd %%xmm2,%%xmm2 \n" + "pshufd $0x0,%%xmm2,%%xmm2 \n" + + // 32 pixels per loop. + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqa %%xmm0,%%xmm1 \n" + "punpcklbw %%xmm0,%%xmm0 \n" + "punpckhbw %%xmm1,%%xmm1 \n" + "add $0x10,%0 \n" + "pmulhuw %%xmm2,%%xmm0 \n" + "pmulhuw %%xmm2,%%xmm1 \n" + "movdqu %%xmm0,(%1) \n" + "movdqu %%xmm1,0x10(%1) \n" + "add $0x20,%1 \n" + "sub $0x10,%2 \n" + "jg 1b \n" + : "+r"(src_y), // %0 + "+r"(dst_y), // %1 + "+r"(width) // %2 + : "r"(scale) // %3 + : "memory", "cc", "xmm0", "xmm1", "xmm2"); + // clang-format on +} + +#ifdef HAS_CONVERT8TO16ROW_AVX2 +void Convert8To16Row_AVX2(const uint8* src_y, + uint16* dst_y, + int scale, + int width) { + // clang-format off + asm volatile ( + "vmovd %3,%%xmm2 \n" + "vpunpcklwd %%xmm2,%%xmm2,%%xmm2 \n" + "vbroadcastss %%xmm2,%%ymm2 \n" + + // 32 pixels per loop. + LABELALIGN + "1: \n" + "vmovdqu (%0),%%ymm0 \n" + "vpermq $0xd8,%%ymm0,%%ymm0 \n" + "add $0x20,%0 \n" + "vpunpckhbw %%ymm0,%%ymm0,%%ymm1 \n" + "vpunpcklbw %%ymm0,%%ymm0,%%ymm0 \n" + "vpmulhuw %%ymm2,%%ymm0,%%ymm0 \n" + "vpmulhuw %%ymm2,%%ymm1,%%ymm1 \n" + "vmovdqu %%ymm0,(%1) \n" + "vmovdqu %%ymm1,0x20(%1) \n" + "add $0x40,%1 \n" + "sub $0x20,%2 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src_y), // %0 + "+r"(dst_y), // %1 + "+r"(width) // %2 + : "r"(scale) // %3 + : "memory", "cc", "xmm0", "xmm1", "xmm2"); + // clang-format on +} +#endif // HAS_CONVERT8TO16ROW_AVX2 + #ifdef HAS_SPLITRGBROW_SSSE3 // Shuffle table for converting RGB to Planar. -static uvec8 kShuffleMaskRGBToR0 = {0u, 3u, 6u, 9u, 12u, 15u, - 128u, 128u, 128u, 128u, 128u, 128u, - 128u, 128u, 128u, 128u}; -static uvec8 kShuffleMaskRGBToR1 = {128u, 128u, 128u, 128u, 128u, 128u, - 2u, 5u, 8u, 11u, 14u, 128u, - 128u, 128u, 128u, 128u}; -static uvec8 kShuffleMaskRGBToR2 = {128u, 128u, 128u, 128u, 128u, 128u, - 128u, 128u, 128u, 128u, 128u, 1u, - 4u, 7u, 10u, 13u}; - -static uvec8 kShuffleMaskRGBToG0 = {1u, 4u, 7u, 10u, 13u, 128u, - 128u, 128u, 128u, 128u, 128u, 128u, - 128u, 128u, 128u, 128u}; -static uvec8 kShuffleMaskRGBToG1 = {128u, 128u, 128u, 128u, 128u, 0u, - 3u, 6u, 9u, 12u, 15u, 128u, - 128u, 128u, 128u, 128u}; -static uvec8 kShuffleMaskRGBToG2 = {128u, 128u, 128u, 128u, 128u, 128u, - 128u, 128u, 128u, 128u, 128u, 2u, - 5u, 8u, 11u, 14u}; - -static uvec8 kShuffleMaskRGBToB0 = {2u, 5u, 8u, 11u, 14u, 128u, - 128u, 128u, 128u, 128u, 128u, 128u, - 128u, 128u, 128u, 128u}; -static uvec8 kShuffleMaskRGBToB1 = {128u, 128u, 128u, 128u, 128u, 1u, - 4u, 7u, 10u, 13u, 128u, 128u, - 128u, 128u, 128u, 128u}; -static uvec8 kShuffleMaskRGBToB2 = {128u, 128u, 128u, 128u, 128u, 128u, - 128u, 128u, 128u, 128u, 0u, 3u, - 6u, 9u, 12u, 15u}; +static const uvec8 kShuffleMaskRGBToR0 = {0u, 3u, 6u, 9u, 12u, 15u, + 128u, 128u, 128u, 128u, 128u, 128u, + 128u, 128u, 128u, 128u}; +static const uvec8 kShuffleMaskRGBToR1 = {128u, 128u, 128u, 128u, 128u, 128u, + 2u, 5u, 8u, 11u, 14u, 128u, + 128u, 128u, 128u, 128u}; +static const uvec8 kShuffleMaskRGBToR2 = {128u, 128u, 128u, 128u, 128u, 128u, + 128u, 128u, 128u, 128u, 128u, 1u, + 4u, 7u, 10u, 13u}; + +static const uvec8 kShuffleMaskRGBToG0 = {1u, 4u, 7u, 10u, 13u, 128u, + 128u, 128u, 128u, 128u, 128u, 128u, + 128u, 128u, 128u, 128u}; +static const uvec8 kShuffleMaskRGBToG1 = {128u, 128u, 128u, 128u, 128u, 0u, + 3u, 6u, 9u, 12u, 15u, 128u, + 128u, 128u, 128u, 128u}; +static const uvec8 kShuffleMaskRGBToG2 = {128u, 128u, 128u, 128u, 128u, 128u, + 128u, 128u, 128u, 128u, 128u, 2u, + 5u, 8u, 11u, 14u}; + +static const uvec8 kShuffleMaskRGBToB0 = {2u, 5u, 8u, 11u, 14u, 128u, + 128u, 128u, 128u, 128u, 128u, 128u, + 128u, 128u, 128u, 128u}; +static const uvec8 kShuffleMaskRGBToB1 = {128u, 128u, 128u, 128u, 128u, 1u, + 4u, 7u, 10u, 13u, 128u, 128u, + 128u, 128u, 128u, 128u}; +static const uvec8 kShuffleMaskRGBToB2 = {128u, 128u, 128u, 128u, 128u, 128u, + 128u, 128u, 128u, 128u, 0u, 3u, + 6u, 9u, 12u, 15u}; void SplitRGBRow_SSSE3(const uint8* src_rgb, uint8* dst_r, @@ -2933,35 +3235,35 @@ void SplitRGBRow_SSSE3(const uint8* src_rgb, #ifdef HAS_MERGERGBROW_SSSE3 // Shuffle table for converting RGB to Planar. -static uvec8 kShuffleMaskRToRGB0 = {0u, 128u, 128u, 1u, 128u, 128u, - 2u, 128u, 128u, 3u, 128u, 128u, - 4u, 128u, 128u, 5u}; -static uvec8 kShuffleMaskGToRGB0 = {128u, 0u, 128u, 128u, 1u, 128u, - 128u, 2u, 128u, 128u, 3u, 128u, - 128u, 4u, 128u, 128u}; -static uvec8 kShuffleMaskBToRGB0 = {128u, 128u, 0u, 128u, 128u, 1u, - 128u, 128u, 2u, 128u, 128u, 3u, - 128u, 128u, 4u, 128u}; - -static uvec8 kShuffleMaskGToRGB1 = {5u, 128u, 128u, 6u, 128u, 128u, - 7u, 128u, 128u, 8u, 128u, 128u, - 9u, 128u, 128u, 10u}; -static uvec8 kShuffleMaskBToRGB1 = {128u, 5u, 128u, 128u, 6u, 128u, - 128u, 7u, 128u, 128u, 8u, 128u, - 128u, 9u, 128u, 128u}; -static uvec8 kShuffleMaskRToRGB1 = {128u, 128u, 6u, 128u, 128u, 7u, - 128u, 128u, 8u, 128u, 128u, 9u, - 128u, 128u, 10u, 128u}; - -static uvec8 kShuffleMaskBToRGB2 = {10u, 128u, 128u, 11u, 128u, 128u, - 12u, 128u, 128u, 13u, 128u, 128u, - 14u, 128u, 128u, 15u}; -static uvec8 kShuffleMaskRToRGB2 = {128u, 11u, 128u, 128u, 12u, 128u, - 128u, 13u, 128u, 128u, 14u, 128u, - 128u, 15u, 128u, 128u}; -static uvec8 kShuffleMaskGToRGB2 = {128u, 128u, 11u, 128u, 128u, 12u, - 128u, 128u, 13u, 128u, 128u, 14u, - 128u, 128u, 15u, 128u}; +static const uvec8 kShuffleMaskRToRGB0 = {0u, 128u, 128u, 1u, 128u, 128u, + 2u, 128u, 128u, 3u, 128u, 128u, + 4u, 128u, 128u, 5u}; +static const uvec8 kShuffleMaskGToRGB0 = {128u, 0u, 128u, 128u, 1u, 128u, + 128u, 2u, 128u, 128u, 3u, 128u, + 128u, 4u, 128u, 128u}; +static const uvec8 kShuffleMaskBToRGB0 = {128u, 128u, 0u, 128u, 128u, 1u, + 128u, 128u, 2u, 128u, 128u, 3u, + 128u, 128u, 4u, 128u}; + +static const uvec8 kShuffleMaskGToRGB1 = {5u, 128u, 128u, 6u, 128u, 128u, + 7u, 128u, 128u, 8u, 128u, 128u, + 9u, 128u, 128u, 10u}; +static const uvec8 kShuffleMaskBToRGB1 = {128u, 5u, 128u, 128u, 6u, 128u, + 128u, 7u, 128u, 128u, 8u, 128u, + 128u, 9u, 128u, 128u}; +static const uvec8 kShuffleMaskRToRGB1 = {128u, 128u, 6u, 128u, 128u, 7u, + 128u, 128u, 8u, 128u, 128u, 9u, + 128u, 128u, 10u, 128u}; + +static const uvec8 kShuffleMaskBToRGB2 = {10u, 128u, 128u, 11u, 128u, 128u, + 12u, 128u, 128u, 13u, 128u, 128u, + 14u, 128u, 128u, 15u}; +static const uvec8 kShuffleMaskRToRGB2 = {128u, 11u, 128u, 128u, 12u, 128u, + 128u, 13u, 128u, 128u, 14u, 128u, + 128u, 15u, 128u, 128u}; +static const uvec8 kShuffleMaskGToRGB2 = {128u, 128u, 11u, 128u, 128u, 12u, + 128u, 128u, 13u, 128u, 128u, 14u, + 128u, 128u, 15u, 128u}; void MergeRGBRow_SSSE3(const uint8* src_r, const uint8* src_g, @@ -3771,8 +4073,8 @@ void UYVYToUV422Row_AVX2(const uint8* src_uyvy, #ifdef HAS_ARGBBLENDROW_SSSE3 // Shuffle table for isolating alpha. -static uvec8 kShuffleAlpha = {3u, 0x80, 3u, 0x80, 7u, 0x80, 7u, 0x80, - 11u, 0x80, 11u, 0x80, 15u, 0x80, 15u, 0x80}; +static const uvec8 kShuffleAlpha = {3u, 0x80, 3u, 0x80, 7u, 0x80, 7u, 0x80, + 11u, 0x80, 11u, 0x80, 15u, 0x80, 15u, 0x80}; // Blend 8 pixels at a time void ARGBBlendRow_SSSE3(const uint8* src_argb0, @@ -3972,10 +4274,10 @@ void BlendPlaneRow_AVX2(const uint8* src0, #ifdef HAS_ARGBATTENUATEROW_SSSE3 // Shuffle table duplicating alpha -static uvec8 kShuffleAlpha0 = {3u, 3u, 3u, 3u, 3u, 3u, 128u, 128u, - 7u, 7u, 7u, 7u, 7u, 7u, 128u, 128u}; -static uvec8 kShuffleAlpha1 = {11u, 11u, 11u, 11u, 11u, 11u, 128u, 128u, - 15u, 15u, 15u, 15u, 15u, 15u, 128u, 128u}; +static const uvec8 kShuffleAlpha0 = {3u, 3u, 3u, 3u, 3u, 3u, 128u, 128u, + 7u, 7u, 7u, 7u, 7u, 7u, 128u, 128u}; +static const uvec8 kShuffleAlpha1 = {11u, 11u, 11u, 11u, 11u, 11u, 128u, 128u, + 15u, 15u, 15u, 15u, 15u, 15u, 128u, 128u}; // Attenuate 4 pixels at a time. void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) { asm volatile ( @@ -4230,14 +4532,14 @@ void ARGBGrayRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) { // g = (r * 45 + g * 88 + b * 22) >> 7 // r = (r * 50 + g * 98 + b * 24) >> 7 // Constant for ARGB color to sepia tone -static vec8 kARGBToSepiaB = {17, 68, 35, 0, 17, 68, 35, 0, - 17, 68, 35, 0, 17, 68, 35, 0}; +static const vec8 kARGBToSepiaB = {17, 68, 35, 0, 17, 68, 35, 0, + 17, 68, 35, 0, 17, 68, 35, 0}; -static vec8 kARGBToSepiaG = {22, 88, 45, 0, 22, 88, 45, 0, - 22, 88, 45, 0, 22, 88, 45, 0}; +static const vec8 kARGBToSepiaG = {22, 88, 45, 0, 22, 88, 45, 0, + 22, 88, 45, 0, 22, 88, 45, 0}; -static vec8 kARGBToSepiaR = {24, 98, 50, 0, 24, 98, 50, 0, - 24, 98, 50, 0, 24, 98, 50, 0}; +static const vec8 kARGBToSepiaR = {24, 98, 50, 0, 24, 98, 50, 0, + 24, 98, 50, 0, 24, 98, 50, 0}; // Convert 8 ARGB pixels (32 bytes) to 8 Sepia ARGB pixels. void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width) { @@ -5431,128 +5733,6 @@ void ARGBShuffleRow_AVX2(const uint8* src_argb, } #endif // HAS_ARGBSHUFFLEROW_AVX2 -#ifdef HAS_ARGBSHUFFLEROW_SSE2 -// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA. -void ARGBShuffleRow_SSE2(const uint8* src_argb, - uint8* dst_argb, - const uint8* shuffler, - int width) { - uintptr_t pixel_temp; - asm volatile ( - "pxor %%xmm5,%%xmm5 \n" - "mov " MEMACCESS(4) ",%k2 \n" - "cmp $0x3000102,%k2 \n" - "je 3012f \n" - "cmp $0x10203,%k2 \n" - "je 123f \n" - "cmp $0x30201,%k2 \n" - "je 321f \n" - "cmp $0x2010003,%k2 \n" - "je 2103f \n" - - LABELALIGN - "1: \n" - "movzb " MEMACCESS(4) ",%2 \n" - MEMOPARG(movzb,0x00,0,2,1,2) " \n" // movzb (%0,%2,1),%2 - "mov %b2," MEMACCESS(1) " \n" - "movzb " MEMACCESS2(0x1,4) ",%2 \n" - MEMOPARG(movzb,0x00,0,2,1,2) " \n" // movzb (%0,%2,1),%2 - "mov %b2," MEMACCESS2(0x1,1) " \n" - "movzb " MEMACCESS2(0x2,4) ",%2 \n" - MEMOPARG(movzb,0x00,0,2,1,2) " \n" // movzb (%0,%2,1),%2 - "mov %b2," MEMACCESS2(0x2,1) " \n" - "movzb " MEMACCESS2(0x3,4) ",%2 \n" - MEMOPARG(movzb,0x00,0,2,1,2) " \n" // movzb (%0,%2,1),%2 - "mov %b2," MEMACCESS2(0x3,1) " \n" - "lea " MEMLEA(0x4,0) ",%0 \n" - "lea " MEMLEA(0x4,1) ",%1 \n" - "sub $0x1,%3 \n" - "jg 1b \n" - "jmp 99f \n" - - LABELALIGN - "123: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "lea " MEMLEA(0x10,0) ",%0 \n" - "movdqa %%xmm0,%%xmm1 \n" - "punpcklbw %%xmm5,%%xmm0 \n" - "punpckhbw %%xmm5,%%xmm1 \n" - "pshufhw $0x1b,%%xmm0,%%xmm0 \n" - "pshuflw $0x1b,%%xmm0,%%xmm0 \n" - "pshufhw $0x1b,%%xmm1,%%xmm1 \n" - "pshuflw $0x1b,%%xmm1,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x4,%3 \n" - "jg 123b \n" - "jmp 99f \n" - - LABELALIGN - "321: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "lea " MEMLEA(0x10,0) ",%0 \n" - "movdqa %%xmm0,%%xmm1 \n" - "punpcklbw %%xmm5,%%xmm0 \n" - "punpckhbw %%xmm5,%%xmm1 \n" - "pshufhw $0x39,%%xmm0,%%xmm0 \n" - "pshuflw $0x39,%%xmm0,%%xmm0 \n" - "pshufhw $0x39,%%xmm1,%%xmm1 \n" - "pshuflw $0x39,%%xmm1,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x4,%3 \n" - "jg 321b \n" - "jmp 99f \n" - - LABELALIGN - "2103: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "lea " MEMLEA(0x10,0) ",%0 \n" - "movdqa %%xmm0,%%xmm1 \n" - "punpcklbw %%xmm5,%%xmm0 \n" - "punpckhbw %%xmm5,%%xmm1 \n" - "pshufhw $0x93,%%xmm0,%%xmm0 \n" - "pshuflw $0x93,%%xmm0,%%xmm0 \n" - "pshufhw $0x93,%%xmm1,%%xmm1 \n" - "pshuflw $0x93,%%xmm1,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x4,%3 \n" - "jg 2103b \n" - "jmp 99f \n" - - LABELALIGN - "3012: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "lea " MEMLEA(0x10,0) ",%0 \n" - "movdqa %%xmm0,%%xmm1 \n" - "punpcklbw %%xmm5,%%xmm0 \n" - "punpckhbw %%xmm5,%%xmm1 \n" - "pshufhw $0xc6,%%xmm0,%%xmm0 \n" - "pshuflw $0xc6,%%xmm0,%%xmm0 \n" - "pshufhw $0xc6,%%xmm1,%%xmm1 \n" - "pshuflw $0xc6,%%xmm1,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x4,%3 \n" - "jg 3012b \n" - - "99: \n" - : "+r"(src_argb), // %0 - "+r"(dst_argb), // %1 - "=&d"(pixel_temp), // %2 - "+r"(width) // %3 - : "r"(shuffler) // %4 - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm5" - ); -} -#endif // HAS_ARGBSHUFFLEROW_SSE2 - #ifdef HAS_I422TOYUY2ROW_SSE2 void I422ToYUY2Row_SSE2(const uint8* src_y, const uint8* src_u, @@ -5728,7 +5908,8 @@ static float kScaleBias = 1.9259299444e-34f; void HalfFloatRow_SSE2(const uint16* src, uint16* dst, float scale, int width) { scale *= kScaleBias; asm volatile ( - "pshufd $0x0,%3,%%xmm4 \n" + "movd %3,%%xmm4 \n" + "pshufd $0x0,%%xmm4,%%xmm4 \n" "pxor %%xmm5,%%xmm5 \n" "sub %0,%1 \n" @@ -5753,11 +5934,7 @@ void HalfFloatRow_SSE2(const uint16* src, uint16* dst, float scale, int width) { : "+r"(src), // %0 "+r"(dst), // %1 "+r"(width) // %2 -#if defined(__x86_64__) - : "x"(scale) // %3 -#else : "m"(scale) // %3 -#endif : "memory", "cc", "xmm2", "xmm3", "xmm4", "xmm5" ); diff --git a/chromium/third_party/libyuv/source/row_win.cc b/chromium/third_party/libyuv/source/row_win.cc index 03a7e9506da..596d7df739e 100644 --- a/chromium/third_party/libyuv/source/row_win.cc +++ b/chromium/third_party/libyuv/source/row_win.cc @@ -5761,123 +5761,6 @@ __declspec(naked) void ARGBShuffleRow_AVX2(const uint8* src_argb, } #endif // HAS_ARGBSHUFFLEROW_AVX2 -__declspec(naked) void ARGBShuffleRow_SSE2(const uint8* src_argb, - uint8* dst_argb, - const uint8* shuffler, - int width) { - __asm { - push ebx - push esi - mov eax, [esp + 8 + 4] // src_argb - mov edx, [esp + 8 + 8] // dst_argb - mov esi, [esp + 8 + 12] // shuffler - mov ecx, [esp + 8 + 16] // width - pxor xmm5, xmm5 - - mov ebx, [esi] // shuffler - cmp ebx, 0x03000102 - je shuf_3012 - cmp ebx, 0x00010203 - je shuf_0123 - cmp ebx, 0x00030201 - je shuf_0321 - cmp ebx, 0x02010003 - je shuf_2103 - - // TODO(fbarchard): Use one source pointer and 3 offsets. - shuf_any1: - movzx ebx, byte ptr [esi] - movzx ebx, byte ptr [eax + ebx] - mov [edx], bl - movzx ebx, byte ptr [esi + 1] - movzx ebx, byte ptr [eax + ebx] - mov [edx + 1], bl - movzx ebx, byte ptr [esi + 2] - movzx ebx, byte ptr [eax + ebx] - mov [edx + 2], bl - movzx ebx, byte ptr [esi + 3] - movzx ebx, byte ptr [eax + ebx] - mov [edx + 3], bl - lea eax, [eax + 4] - lea edx, [edx + 4] - sub ecx, 1 - jg shuf_any1 - jmp shuf99 - - shuf_0123: - movdqu xmm0, [eax] - lea eax, [eax + 16] - movdqa xmm1, xmm0 - punpcklbw xmm0, xmm5 - punpckhbw xmm1, xmm5 - pshufhw xmm0, xmm0, 01Bh // 1B = 00011011 = 0x0123 = BGRAToARGB - pshuflw xmm0, xmm0, 01Bh - pshufhw xmm1, xmm1, 01Bh - pshuflw xmm1, xmm1, 01Bh - packuswb xmm0, xmm1 - movdqu [edx], xmm0 - lea edx, [edx + 16] - sub ecx, 4 - jg shuf_0123 - jmp shuf99 - - shuf_0321: - movdqu xmm0, [eax] - lea eax, [eax + 16] - movdqa xmm1, xmm0 - punpcklbw xmm0, xmm5 - punpckhbw xmm1, xmm5 - pshufhw xmm0, xmm0, 039h // 39 = 00111001 = 0x0321 = RGBAToARGB - pshuflw xmm0, xmm0, 039h - pshufhw xmm1, xmm1, 039h - pshuflw xmm1, xmm1, 039h - packuswb xmm0, xmm1 - movdqu [edx], xmm0 - lea edx, [edx + 16] - sub ecx, 4 - jg shuf_0321 - jmp shuf99 - - shuf_2103: - movdqu xmm0, [eax] - lea eax, [eax + 16] - movdqa xmm1, xmm0 - punpcklbw xmm0, xmm5 - punpckhbw xmm1, xmm5 - pshufhw xmm0, xmm0, 093h // 93 = 10010011 = 0x2103 = ARGBToRGBA - pshuflw xmm0, xmm0, 093h - pshufhw xmm1, xmm1, 093h - pshuflw xmm1, xmm1, 093h - packuswb xmm0, xmm1 - movdqu [edx], xmm0 - lea edx, [edx + 16] - sub ecx, 4 - jg shuf_2103 - jmp shuf99 - - shuf_3012: - movdqu xmm0, [eax] - lea eax, [eax + 16] - movdqa xmm1, xmm0 - punpcklbw xmm0, xmm5 - punpckhbw xmm1, xmm5 - pshufhw xmm0, xmm0, 0C6h // C6 = 11000110 = 0x3012 = ABGRToARGB - pshuflw xmm0, xmm0, 0C6h - pshufhw xmm1, xmm1, 0C6h - pshuflw xmm1, xmm1, 0C6h - packuswb xmm0, xmm1 - movdqu [edx], xmm0 - lea edx, [edx + 16] - sub ecx, 4 - jg shuf_3012 - - shuf99: - pop esi - pop ebx - ret - } -} - // YUY2 - Macro-pixel = 2 image pixels // Y0U0Y1V0....Y2U2Y3V2...Y4U4Y5V4.... diff --git a/chromium/third_party/libyuv/source/scale.cc b/chromium/third_party/libyuv/source/scale.cc index 9104acb95fc..6951d8fb95e 100644 --- a/chromium/third_party/libyuv/source/scale.cc +++ b/chromium/third_party/libyuv/source/scale.cc @@ -103,13 +103,6 @@ static void ScalePlaneDown2(int src_width, } } #endif -#if defined(HAS_SCALEROWDOWN2_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src_ptr, 4) && - IS_ALIGNED(src_stride, 4) && IS_ALIGNED(row_stride, 4) && - IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) { - ScaleRowDown2 = filtering ? ScaleRowDown2Box_DSPR2 : ScaleRowDown2_DSPR2; - } -#endif #if defined(HAS_SCALEROWDOWN2_MSA) if (TestCpuFlag(kCpuHasMSA)) { ScaleRowDown2 = @@ -176,14 +169,6 @@ static void ScalePlaneDown2_16(int src_width, : ScaleRowDown2Box_16_SSE2); } #endif -#if defined(HAS_SCALEROWDOWN2_16_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src_ptr, 4) && - IS_ALIGNED(src_stride, 4) && IS_ALIGNED(row_stride, 4) && - IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) { - ScaleRowDown2 = - filtering ? ScaleRowDown2Box_16_DSPR2 : ScaleRowDown2_16_DSPR2; - } -#endif if (filtering == kFilterLinear) { src_stride = 0; @@ -247,13 +232,6 @@ static void ScalePlaneDown4(int src_width, } } #endif -#if defined(HAS_SCALEROWDOWN4_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(row_stride, 4) && - IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) && - IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) { - ScaleRowDown4 = filtering ? ScaleRowDown4Box_DSPR2 : ScaleRowDown4_DSPR2; - } -#endif #if defined(HAS_SCALEROWDOWN4_MSA) if (TestCpuFlag(kCpuHasMSA)) { ScaleRowDown4 = @@ -306,14 +284,6 @@ static void ScalePlaneDown4_16(int src_width, filtering ? ScaleRowDown4Box_16_SSE2 : ScaleRowDown4_16_SSE2; } #endif -#if defined(HAS_SCALEROWDOWN4_16_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(row_stride, 4) && - IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) && - IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) { - ScaleRowDown4 = - filtering ? ScaleRowDown4Box_16_DSPR2 : ScaleRowDown4_16_DSPR2; - } -#endif if (filtering == kFilterLinear) { src_stride = 0; @@ -411,19 +381,6 @@ static void ScalePlaneDown34(int src_width, } } #endif -#if defined(HAS_SCALEROWDOWN34_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && (dst_width % 24 == 0) && - IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) && - IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) { - if (!filtering) { - ScaleRowDown34_0 = ScaleRowDown34_DSPR2; - ScaleRowDown34_1 = ScaleRowDown34_DSPR2; - } else { - ScaleRowDown34_0 = ScaleRowDown34_0_Box_DSPR2; - ScaleRowDown34_1 = ScaleRowDown34_1_Box_DSPR2; - } - } -#endif for (y = 0; y < dst_height - 2; y += 3) { ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width); @@ -495,19 +452,6 @@ static void ScalePlaneDown34_16(int src_width, } } #endif -#if defined(HAS_SCALEROWDOWN34_16_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && (dst_width % 24 == 0) && - IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) && - IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) { - if (!filtering) { - ScaleRowDown34_0 = ScaleRowDown34_16_DSPR2; - ScaleRowDown34_1 = ScaleRowDown34_16_DSPR2; - } else { - ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_DSPR2; - ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_DSPR2; - } - } -#endif for (y = 0; y < dst_height - 2; y += 3) { ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width); @@ -612,19 +556,6 @@ static void ScalePlaneDown38(int src_width, } } #endif -#if defined(HAS_SCALEROWDOWN38_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && (dst_width % 12 == 0) && - IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) && - IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) { - if (!filtering) { - ScaleRowDown38_3 = ScaleRowDown38_DSPR2; - ScaleRowDown38_2 = ScaleRowDown38_DSPR2; - } else { - ScaleRowDown38_3 = ScaleRowDown38_3_Box_DSPR2; - ScaleRowDown38_2 = ScaleRowDown38_2_Box_DSPR2; - } - } -#endif #if defined(HAS_SCALEROWDOWN38_MSA) if (TestCpuFlag(kCpuHasMSA)) { if (!filtering) { @@ -716,19 +647,6 @@ static void ScalePlaneDown38_16(int src_width, } } #endif -#if defined(HAS_SCALEROWDOWN38_16_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && (dst_width % 12 == 0) && - IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) && - IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) { - if (!filtering) { - ScaleRowDown38_3 = ScaleRowDown38_16_DSPR2; - ScaleRowDown38_2 = ScaleRowDown38_16_DSPR2; - } else { - ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_DSPR2; - ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_DSPR2; - } - } -#endif for (y = 0; y < dst_height - 2; y += 3) { ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width); @@ -931,14 +849,6 @@ static void ScalePlaneBox(int src_width, } } #endif -#if defined(HAS_SCALEADDROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2)) { - ScaleAddRow = ScaleAddRow_Any_DSPR2; - if (IS_ALIGNED(src_width, 16)) { - ScaleAddRow = ScaleAddRow_DSPR2; - } - } -#endif for (j = 0; j < dst_height; ++j) { int boxheight; @@ -1070,14 +980,6 @@ void ScalePlaneBilinearDown(int src_width, } } #endif -#if defined(HAS_INTERPOLATEROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2)) { - InterpolateRow = InterpolateRow_Any_DSPR2; - if (IS_ALIGNED(src_width, 4)) { - InterpolateRow = InterpolateRow_DSPR2; - } - } -#endif #if defined(HAS_INTERPOLATEROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { InterpolateRow = InterpolateRow_Any_MSA; @@ -1193,14 +1095,6 @@ void ScalePlaneBilinearDown_16(int src_width, } } #endif -#if defined(HAS_INTERPOLATEROW_16_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2)) { - InterpolateRow = InterpolateRow_Any_16_DSPR2; - if (IS_ALIGNED(src_width, 4)) { - InterpolateRow = InterpolateRow_16_DSPR2; - } - } -#endif #if defined(HAS_SCALEFILTERCOLS_16_SSSE3) if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) { @@ -1281,14 +1175,6 @@ void ScalePlaneBilinearUp(int src_width, } } #endif -#if defined(HAS_INTERPOLATEROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2)) { - InterpolateRow = InterpolateRow_Any_DSPR2; - if (IS_ALIGNED(dst_width, 4)) { - InterpolateRow = InterpolateRow_DSPR2; - } - } -#endif if (filtering && src_width >= 32768) { ScaleFilterCols = ScaleFilterCols64_C; @@ -1432,14 +1318,6 @@ void ScalePlaneBilinearUp_16(int src_width, } } #endif -#if defined(HAS_INTERPOLATEROW_16_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2)) { - InterpolateRow = InterpolateRow_Any_16_DSPR2; - if (IS_ALIGNED(dst_width, 4)) { - InterpolateRow = InterpolateRow_16_DSPR2; - } - } -#endif if (filtering && src_width >= 32768) { ScaleFilterCols = ScaleFilterCols64_16_C; diff --git a/chromium/third_party/libyuv/source/scale_any.cc b/chromium/third_party/libyuv/source/scale_any.cc index c4d6626ab72..8604c233859 100644 --- a/chromium/third_party/libyuv/source/scale_any.cc +++ b/chromium/third_party/libyuv/source/scale_any.cc @@ -456,9 +456,6 @@ SAANY(ScaleAddRow_Any_NEON, ScaleAddRow_NEON, ScaleAddRow_C, 15) #ifdef HAS_SCALEADDROW_MSA SAANY(ScaleAddRow_Any_MSA, ScaleAddRow_MSA, ScaleAddRow_C, 15) #endif -#ifdef HAS_SCALEADDROW_DSPR2 -SAANY(ScaleAddRow_Any_DSPR2, ScaleAddRow_DSPR2, ScaleAddRow_C, 15) -#endif #undef SAANY #ifdef __cplusplus diff --git a/chromium/third_party/libyuv/source/scale_argb.cc b/chromium/third_party/libyuv/source/scale_argb.cc index c3ec7d6bb67..cd4683b37be 100644 --- a/chromium/third_party/libyuv/source/scale_argb.cc +++ b/chromium/third_party/libyuv/source/scale_argb.cc @@ -306,15 +306,6 @@ static void ScaleARGBBilinearDown(int src_width, } } #endif -#if defined(HAS_INTERPOLATEROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src_argb, 4) && - IS_ALIGNED(src_stride, 4)) { - InterpolateRow = InterpolateRow_Any_DSPR2; - if (IS_ALIGNED(clip_src_width, 4)) { - InterpolateRow = InterpolateRow_DSPR2; - } - } -#endif #if defined(HAS_INTERPOLATEROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { InterpolateRow = InterpolateRow_Any_MSA; @@ -419,12 +410,6 @@ static void ScaleARGBBilinearUp(int src_width, } } #endif -#if defined(HAS_INTERPOLATEROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(dst_argb, 4) && - IS_ALIGNED(dst_stride, 4)) { - InterpolateRow = InterpolateRow_DSPR2; - } -#endif #if defined(HAS_INTERPOLATEROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { InterpolateRow = InterpolateRow_Any_MSA; @@ -587,15 +572,6 @@ static void ScaleYUVToARGBBilinearUp(int src_width, } } #endif -#if defined(HAS_I422TOARGBROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src_width, 4) && - IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) && - IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) && - IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) && - IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) { - I422ToARGBRow = I422ToARGBRow_DSPR2; - } -#endif #if defined(HAS_I422TOARGBROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { I422ToARGBRow = I422ToARGBRow_Any_MSA; @@ -632,12 +608,6 @@ static void ScaleYUVToARGBBilinearUp(int src_width, } } #endif -#if defined(HAS_INTERPOLATEROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(dst_argb, 4) && - IS_ALIGNED(dst_stride_argb, 4)) { - InterpolateRow = InterpolateRow_DSPR2; - } -#endif #if defined(HAS_INTERPOLATEROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { InterpolateRow = InterpolateRow_Any_MSA; diff --git a/chromium/third_party/libyuv/source/scale_common.cc b/chromium/third_party/libyuv/source/scale_common.cc index fefb027bf76..e060c3cb8d9 100644 --- a/chromium/third_party/libyuv/source/scale_common.cc +++ b/chromium/third_party/libyuv/source/scale_common.cc @@ -1063,16 +1063,6 @@ void ScalePlaneVertical(int src_height, } } #endif -#if defined(HAS_INTERPOLATEROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src_argb, 4) && - IS_ALIGNED(src_stride, 4) && IS_ALIGNED(dst_argb, 4) && - IS_ALIGNED(dst_stride, 4)) { - InterpolateRow = InterpolateRow_Any_DSPR2; - if (IS_ALIGNED(dst_width_bytes, 4)) { - InterpolateRow = InterpolateRow_DSPR2; - } - } -#endif #if defined(HAS_INTERPOLATEROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { InterpolateRow = InterpolateRow_Any_MSA; @@ -1150,16 +1140,6 @@ void ScalePlaneVertical_16(int src_height, InterpolateRow = InterpolateRow_16_NEON; } } -#endif -#if defined(HAS_INTERPOLATEROW_16_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src_argb, 4) && - IS_ALIGNED(src_stride, 4) && IS_ALIGNED(dst_argb, 4) && - IS_ALIGNED(dst_stride, 4)) { - InterpolateRow = InterpolateRow_Any_16_DSPR2; - if (IS_ALIGNED(dst_width_bytes, 4)) { - InterpolateRow = InterpolateRow_16_DSPR2; - } - } #endif for (j = 0; j < dst_height; ++j) { int yi; diff --git a/chromium/third_party/libyuv/source/scale_dspr2.cc b/chromium/third_party/libyuv/source/scale_dspr2.cc deleted file mode 100644 index ddedcbf46c2..00000000000 --- a/chromium/third_party/libyuv/source/scale_dspr2.cc +++ /dev/null @@ -1,668 +0,0 @@ -/* - * Copyright 2012 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "libyuv/basic_types.h" -#include "libyuv/row.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -// This module is for GCC MIPS DSPR2 -#if !defined(LIBYUV_DISABLE_DSPR2) && defined(__mips_dsp) && \ - (__mips_dsp_rev >= 2) && (_MIPS_SIM == _MIPS_SIM_ABI32) - -void ScaleRowDown2_DSPR2(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst, - int dst_width) { - __asm__ __volatile__( - ".set push \n" - ".set noreorder \n" - - "srl $t9, %[dst_width], 4 \n" // iterations -> by 16 - "beqz $t9, 2f \n" - " nop \n" - - "1: \n" - "lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0| - "lw $t1, 4(%[src_ptr]) \n" // |7|6|5|4| - "lw $t2, 8(%[src_ptr]) \n" // |11|10|9|8| - "lw $t3, 12(%[src_ptr]) \n" // |15|14|13|12| - "lw $t4, 16(%[src_ptr]) \n" // |19|18|17|16| - "lw $t5, 20(%[src_ptr]) \n" // |23|22|21|20| - "lw $t6, 24(%[src_ptr]) \n" // |27|26|25|24| - "lw $t7, 28(%[src_ptr]) \n" // |31|30|29|28| - // TODO(fbarchard): Use odd pixels instead of even. - "precrq.qb.ph $t8, $t1, $t0 \n" // |7|5|3|1| - "precrq.qb.ph $t0, $t3, $t2 \n" // |15|13|11|9| - "precrq.qb.ph $t1, $t5, $t4 \n" // |23|21|19|17| - "precrq.qb.ph $t2, $t7, $t6 \n" // |31|29|27|25| - "addiu %[src_ptr], %[src_ptr], 32 \n" - "addiu $t9, $t9, -1 \n" - "sw $t8, 0(%[dst]) \n" - "sw $t0, 4(%[dst]) \n" - "sw $t1, 8(%[dst]) \n" - "sw $t2, 12(%[dst]) \n" - "bgtz $t9, 1b \n" - " addiu %[dst], %[dst], 16 \n" - - "2: \n" - "andi $t9, %[dst_width], 0xf \n" // residue - "beqz $t9, 3f \n" - " nop \n" - - "21: \n" - "lbu $t0, 1(%[src_ptr]) \n" - "addiu %[src_ptr], %[src_ptr], 2 \n" - "addiu $t9, $t9, -1 \n" - "sb $t0, 0(%[dst]) \n" - "bgtz $t9, 21b \n" - " addiu %[dst], %[dst], 1 \n" - - "3: \n" - ".set pop \n" - : [src_ptr] "+r"(src_ptr), [dst] "+r"(dst) - : [dst_width] "r"(dst_width) - : "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9"); -} - -void ScaleRowDown2Box_DSPR2(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst, - int dst_width) { - const uint8* t = src_ptr + src_stride; - - __asm__ __volatile__( - ".set push \n" - ".set noreorder \n" - - "srl $t9, %[dst_width], 3 \n" // iterations -> step 8 - "bltz $t9, 2f \n" - " nop \n" - - "1: \n" - "lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0| - "lw $t1, 4(%[src_ptr]) \n" // |7|6|5|4| - "lw $t2, 8(%[src_ptr]) \n" // |11|10|9|8| - "lw $t3, 12(%[src_ptr]) \n" // |15|14|13|12| - "lw $t4, 0(%[t]) \n" // |19|18|17|16| - "lw $t5, 4(%[t]) \n" // |23|22|21|20| - "lw $t6, 8(%[t]) \n" // |27|26|25|24| - "lw $t7, 12(%[t]) \n" // |31|30|29|28| - "addiu $t9, $t9, -1 \n" - "srl $t8, $t0, 16 \n" // |X|X|3|2| - "ins $t0, $t4, 16, 16 \n" // |17|16|1|0| - "ins $t4, $t8, 0, 16 \n" // |19|18|3|2| - "raddu.w.qb $t0, $t0 \n" // |17+16+1+0| - "raddu.w.qb $t4, $t4 \n" // |19+18+3+2| - "shra_r.w $t0, $t0, 2 \n" // |t0+2|>>2 - "shra_r.w $t4, $t4, 2 \n" // |t4+2|>>2 - "srl $t8, $t1, 16 \n" // |X|X|7|6| - "ins $t1, $t5, 16, 16 \n" // |21|20|5|4| - "ins $t5, $t8, 0, 16 \n" // |22|23|7|6| - "raddu.w.qb $t1, $t1 \n" // |21+20+5+4| - "raddu.w.qb $t5, $t5 \n" // |23+22+7+6| - "shra_r.w $t1, $t1, 2 \n" // |t1+2|>>2 - "shra_r.w $t5, $t5, 2 \n" // |t5+2|>>2 - "srl $t8, $t2, 16 \n" // |X|X|11|10| - "ins $t2, $t6, 16, 16 \n" // |25|24|9|8| - "ins $t6, $t8, 0, 16 \n" // |27|26|11|10| - "raddu.w.qb $t2, $t2 \n" // |25+24+9+8| - "raddu.w.qb $t6, $t6 \n" // |27+26+11+10| - "shra_r.w $t2, $t2, 2 \n" // |t2+2|>>2 - "shra_r.w $t6, $t6, 2 \n" // |t5+2|>>2 - "srl $t8, $t3, 16 \n" // |X|X|15|14| - "ins $t3, $t7, 16, 16 \n" // |29|28|13|12| - "ins $t7, $t8, 0, 16 \n" // |31|30|15|14| - "raddu.w.qb $t3, $t3 \n" // |29+28+13+12| - "raddu.w.qb $t7, $t7 \n" // |31+30+15+14| - "shra_r.w $t3, $t3, 2 \n" // |t3+2|>>2 - "shra_r.w $t7, $t7, 2 \n" // |t7+2|>>2 - "addiu %[src_ptr], %[src_ptr], 16 \n" - "addiu %[t], %[t], 16 \n" - "sb $t0, 0(%[dst]) \n" - "sb $t4, 1(%[dst]) \n" - "sb $t1, 2(%[dst]) \n" - "sb $t5, 3(%[dst]) \n" - "sb $t2, 4(%[dst]) \n" - "sb $t6, 5(%[dst]) \n" - "sb $t3, 6(%[dst]) \n" - "sb $t7, 7(%[dst]) \n" - "bgtz $t9, 1b \n" - " addiu %[dst], %[dst], 8 \n" - - "2: \n" - "andi $t9, %[dst_width], 0x7 \n" // x = residue - "beqz $t9, 3f \n" - " nop \n" - - "21: \n" - "lwr $t1, 0(%[src_ptr]) \n" - "lwl $t1, 3(%[src_ptr]) \n" - "lwr $t2, 0(%[t]) \n" - "lwl $t2, 3(%[t]) \n" - "srl $t8, $t1, 16 \n" - "ins $t1, $t2, 16, 16 \n" - "ins $t2, $t8, 0, 16 \n" - "raddu.w.qb $t1, $t1 \n" - "raddu.w.qb $t2, $t2 \n" - "shra_r.w $t1, $t1, 2 \n" - "shra_r.w $t2, $t2, 2 \n" - "sb $t1, 0(%[dst]) \n" - "sb $t2, 1(%[dst]) \n" - "addiu %[src_ptr], %[src_ptr], 4 \n" - "addiu $t9, $t9, -2 \n" - "addiu %[t], %[t], 4 \n" - "bgtz $t9, 21b \n" - " addiu %[dst], %[dst], 2 \n" - - "3: \n" - ".set pop \n" - - : [src_ptr] "+r"(src_ptr), [dst] "+r"(dst), [t] "+r"(t) - : [dst_width] "r"(dst_width) - : "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9"); -} - -void ScaleRowDown4_DSPR2(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst, - int dst_width) { - __asm__ __volatile__( - ".set push \n" - ".set noreorder \n" - - "srl $t9, %[dst_width], 3 \n" - "beqz $t9, 2f \n" - " nop \n" - - "1: \n" - "lw $t1, 0(%[src_ptr]) \n" // |3|2|1|0| - "lw $t2, 4(%[src_ptr]) \n" // |7|6|5|4| - "lw $t3, 8(%[src_ptr]) \n" // |11|10|9|8| - "lw $t4, 12(%[src_ptr]) \n" // |15|14|13|12| - "lw $t5, 16(%[src_ptr]) \n" // |19|18|17|16| - "lw $t6, 20(%[src_ptr]) \n" // |23|22|21|20| - "lw $t7, 24(%[src_ptr]) \n" // |27|26|25|24| - "lw $t8, 28(%[src_ptr]) \n" // |31|30|29|28| - "precr.qb.ph $t1, $t2, $t1 \n" // |6|4|2|0| - "precr.qb.ph $t2, $t4, $t3 \n" // |14|12|10|8| - "precr.qb.ph $t5, $t6, $t5 \n" // |22|20|18|16| - "precr.qb.ph $t6, $t8, $t7 \n" // |30|28|26|24| - "precrq.qb.ph $t1, $t2, $t1 \n" // |14|10|6|2| - "precrq.qb.ph $t5, $t6, $t5 \n" // |30|26|22|18| - "addiu %[src_ptr], %[src_ptr], 32 \n" - "addiu $t9, $t9, -1 \n" - "sw $t1, 0(%[dst]) \n" - "sw $t5, 4(%[dst]) \n" - "bgtz $t9, 1b \n" - " addiu %[dst], %[dst], 8 \n" - - "2: \n" - "andi $t9, %[dst_width], 7 \n" // residue - "beqz $t9, 3f \n" - " nop \n" - - "21: \n" - "lbu $t1, 2(%[src_ptr]) \n" - "addiu %[src_ptr], %[src_ptr], 4 \n" - "addiu $t9, $t9, -1 \n" - "sb $t1, 0(%[dst]) \n" - "bgtz $t9, 21b \n" - " addiu %[dst], %[dst], 1 \n" - - "3: \n" - ".set pop \n" - : [src_ptr] "+r"(src_ptr), [dst] "+r"(dst) - : [dst_width] "r"(dst_width) - : "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9"); -} - -void ScaleRowDown4Box_DSPR2(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst, - int dst_width) { - intptr_t stride = src_stride; - const uint8* s1 = src_ptr + stride; - const uint8* s2 = s1 + stride; - const uint8* s3 = s2 + stride; - - __asm__ __volatile__( - ".set push \n" - ".set noreorder \n" - - "srl $t9, %[dst_width], 1 \n" - "andi $t8, %[dst_width], 1 \n" - - "1: \n" - "lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0| - "lw $t1, 0(%[s1]) \n" // |7|6|5|4| - "lw $t2, 0(%[s2]) \n" // |11|10|9|8| - "lw $t3, 0(%[s3]) \n" // |15|14|13|12| - "lw $t4, 4(%[src_ptr]) \n" // |19|18|17|16| - "lw $t5, 4(%[s1]) \n" // |23|22|21|20| - "lw $t6, 4(%[s2]) \n" // |27|26|25|24| - "lw $t7, 4(%[s3]) \n" // |31|30|29|28| - "raddu.w.qb $t0, $t0 \n" // |3 + 2 + 1 + 0| - "raddu.w.qb $t1, $t1 \n" // |7 + 6 + 5 + 4| - "raddu.w.qb $t2, $t2 \n" // |11 + 10 + 9 + 8| - "raddu.w.qb $t3, $t3 \n" // |15 + 14 + 13 + 12| - "raddu.w.qb $t4, $t4 \n" // |19 + 18 + 17 + 16| - "raddu.w.qb $t5, $t5 \n" // |23 + 22 + 21 + 20| - "raddu.w.qb $t6, $t6 \n" // |27 + 26 + 25 + 24| - "raddu.w.qb $t7, $t7 \n" // |31 + 30 + 29 + 28| - "add $t0, $t0, $t1 \n" - "add $t1, $t2, $t3 \n" - "add $t0, $t0, $t1 \n" - "add $t4, $t4, $t5 \n" - "add $t6, $t6, $t7 \n" - "add $t4, $t4, $t6 \n" - "shra_r.w $t0, $t0, 4 \n" - "shra_r.w $t4, $t4, 4 \n" - "sb $t0, 0(%[dst]) \n" - "sb $t4, 1(%[dst]) \n" - "addiu %[src_ptr], %[src_ptr], 8 \n" - "addiu %[s1], %[s1], 8 \n" - "addiu %[s2], %[s2], 8 \n" - "addiu %[s3], %[s3], 8 \n" - "addiu $t9, $t9, -1 \n" - "bgtz $t9, 1b \n" - " addiu %[dst], %[dst], 2 \n" - "beqz $t8, 2f \n" - " nop \n" - - "lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0| - "lw $t1, 0(%[s1]) \n" // |7|6|5|4| - "lw $t2, 0(%[s2]) \n" // |11|10|9|8| - "lw $t3, 0(%[s3]) \n" // |15|14|13|12| - "raddu.w.qb $t0, $t0 \n" // |3 + 2 + 1 + 0| - "raddu.w.qb $t1, $t1 \n" // |7 + 6 + 5 + 4| - "raddu.w.qb $t2, $t2 \n" // |11 + 10 + 9 + 8| - "raddu.w.qb $t3, $t3 \n" // |15 + 14 + 13 + 12| - "add $t0, $t0, $t1 \n" - "add $t1, $t2, $t3 \n" - "add $t0, $t0, $t1 \n" - "shra_r.w $t0, $t0, 4 \n" - "sb $t0, 0(%[dst]) \n" - - "2: \n" - ".set pop \n" - - : [src_ptr] "+r"(src_ptr), [dst] "+r"(dst), [s1] "+r"(s1), [s2] "+r"(s2), - [s3] "+r"(s3) - : [dst_width] "r"(dst_width) - : "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9"); -} - -void ScaleRowDown34_DSPR2(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst, - int dst_width) { - __asm__ __volatile__( - ".set push \n" - ".set noreorder \n" - "1: \n" - "lw $t1, 0(%[src_ptr]) \n" // |3|2|1|0| - "lw $t2, 4(%[src_ptr]) \n" // |7|6|5|4| - "lw $t3, 8(%[src_ptr]) \n" // |11|10|9|8| - "lw $t4, 12(%[src_ptr]) \n" // |15|14|13|12| - "lw $t5, 16(%[src_ptr]) \n" // |19|18|17|16| - "lw $t6, 20(%[src_ptr]) \n" // |23|22|21|20| - "lw $t7, 24(%[src_ptr]) \n" // |27|26|25|24| - "lw $t8, 28(%[src_ptr]) \n" // |31|30|29|28| - "precrq.qb.ph $t0, $t2, $t4 \n" // |7|5|15|13| - "precrq.qb.ph $t9, $t6, $t8 \n" // |23|21|31|30| - "addiu %[dst_width], %[dst_width], -24 \n" - "ins $t1, $t1, 8, 16 \n" // |3|1|0|X| - "ins $t4, $t0, 8, 16 \n" // |X|15|13|12| - "ins $t5, $t5, 8, 16 \n" // |19|17|16|X| - "ins $t8, $t9, 8, 16 \n" // |X|31|29|28| - "addiu %[src_ptr], %[src_ptr], 32 \n" - "packrl.ph $t0, $t3, $t0 \n" // |9|8|7|5| - "packrl.ph $t9, $t7, $t9 \n" // |25|24|23|21| - "prepend $t1, $t2, 8 \n" // |4|3|1|0| - "prepend $t3, $t4, 24 \n" // |15|13|12|11| - "prepend $t5, $t6, 8 \n" // |20|19|17|16| - "prepend $t7, $t8, 24 \n" // |31|29|28|27| - "sw $t1, 0(%[dst]) \n" - "sw $t0, 4(%[dst]) \n" - "sw $t3, 8(%[dst]) \n" - "sw $t5, 12(%[dst]) \n" - "sw $t9, 16(%[dst]) \n" - "sw $t7, 20(%[dst]) \n" - "bnez %[dst_width], 1b \n" - " addiu %[dst], %[dst], 24 \n" - ".set pop \n" - : [src_ptr] "+r"(src_ptr), [dst] "+r"(dst), [dst_width] "+r"(dst_width) - : - : "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9"); -} - -void ScaleRowDown34_0_Box_DSPR2(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* d, - int dst_width) { - __asm__ __volatile__( - ".set push \n" - ".set noreorder \n" - "repl.ph $t3, 3 \n" // 0x00030003 - - "1: \n" - "lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0| - "lwx $t1, %[src_stride](%[src_ptr]) \n" // |T3|T2|T1|T0| - "rotr $t2, $t0, 8 \n" // |S0|S3|S2|S1| - "rotr $t6, $t1, 8 \n" // |T0|T3|T2|T1| - "muleu_s.ph.qbl $t4, $t2, $t3 \n" // |S0*3|S3*3| - "muleu_s.ph.qbl $t5, $t6, $t3 \n" // |T0*3|T3*3| - "andi $t0, $t2, 0xFFFF \n" // |0|0|S2|S1| - "andi $t1, $t6, 0xFFFF \n" // |0|0|T2|T1| - "raddu.w.qb $t0, $t0 \n" - "raddu.w.qb $t1, $t1 \n" - "shra_r.w $t0, $t0, 1 \n" - "shra_r.w $t1, $t1, 1 \n" - "preceu.ph.qbr $t2, $t2 \n" // |0|S2|0|S1| - "preceu.ph.qbr $t6, $t6 \n" // |0|T2|0|T1| - "rotr $t2, $t2, 16 \n" // |0|S1|0|S2| - "rotr $t6, $t6, 16 \n" // |0|T1|0|T2| - "addu.ph $t2, $t2, $t4 \n" - "addu.ph $t6, $t6, $t5 \n" - "sll $t5, $t0, 1 \n" - "add $t0, $t5, $t0 \n" - "shra_r.ph $t2, $t2, 2 \n" - "shra_r.ph $t6, $t6, 2 \n" - "shll.ph $t4, $t2, 1 \n" - "addq.ph $t4, $t4, $t2 \n" - "addu $t0, $t0, $t1 \n" - "addiu %[src_ptr], %[src_ptr], 4 \n" - "shra_r.w $t0, $t0, 2 \n" - "addu.ph $t6, $t6, $t4 \n" - "shra_r.ph $t6, $t6, 2 \n" - "srl $t1, $t6, 16 \n" - "addiu %[dst_width], %[dst_width], -3 \n" - "sb $t1, 0(%[d]) \n" - "sb $t0, 1(%[d]) \n" - "sb $t6, 2(%[d]) \n" - "bgtz %[dst_width], 1b \n" - " addiu %[d], %[d], 3 \n" - "3: \n" - ".set pop \n" - : [src_ptr] "+r"(src_ptr), [src_stride] "+r"(src_stride), [d] "+r"(d), - [dst_width] "+r"(dst_width) - : - : "t0", "t1", "t2", "t3", "t4", "t5", "t6"); -} - -void ScaleRowDown34_1_Box_DSPR2(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* d, - int dst_width) { - __asm__ __volatile__( - ".set push \n" - ".set noreorder \n" - "repl.ph $t2, 3 \n" // 0x00030003 - - "1: \n" - "lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0| - "lwx $t1, %[src_stride](%[src_ptr]) \n" // |T3|T2|T1|T0| - "rotr $t4, $t0, 8 \n" // |S0|S3|S2|S1| - "rotr $t6, $t1, 8 \n" // |T0|T3|T2|T1| - "muleu_s.ph.qbl $t3, $t4, $t2 \n" // |S0*3|S3*3| - "muleu_s.ph.qbl $t5, $t6, $t2 \n" // |T0*3|T3*3| - "andi $t0, $t4, 0xFFFF \n" // |0|0|S2|S1| - "andi $t1, $t6, 0xFFFF \n" // |0|0|T2|T1| - "raddu.w.qb $t0, $t0 \n" - "raddu.w.qb $t1, $t1 \n" - "shra_r.w $t0, $t0, 1 \n" - "shra_r.w $t1, $t1, 1 \n" - "preceu.ph.qbr $t4, $t4 \n" // |0|S2|0|S1| - "preceu.ph.qbr $t6, $t6 \n" // |0|T2|0|T1| - "rotr $t4, $t4, 16 \n" // |0|S1|0|S2| - "rotr $t6, $t6, 16 \n" // |0|T1|0|T2| - "addu.ph $t4, $t4, $t3 \n" - "addu.ph $t6, $t6, $t5 \n" - "shra_r.ph $t6, $t6, 2 \n" - "shra_r.ph $t4, $t4, 2 \n" - "addu.ph $t6, $t6, $t4 \n" - "addiu %[src_ptr], %[src_ptr], 4 \n" - "shra_r.ph $t6, $t6, 1 \n" - "addu $t0, $t0, $t1 \n" - "addiu %[dst_width], %[dst_width], -3 \n" - "shra_r.w $t0, $t0, 1 \n" - "srl $t1, $t6, 16 \n" - "sb $t1, 0(%[d]) \n" - "sb $t0, 1(%[d]) \n" - "sb $t6, 2(%[d]) \n" - "bgtz %[dst_width], 1b \n" - " addiu %[d], %[d], 3 \n" - "3: \n" - ".set pop \n" - : [src_ptr] "+r"(src_ptr), [src_stride] "+r"(src_stride), [d] "+r"(d), - [dst_width] "+r"(dst_width) - : - : "t0", "t1", "t2", "t3", "t4", "t5", "t6"); -} - -void ScaleRowDown38_DSPR2(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst, - int dst_width) { - __asm__ __volatile__( - ".set push \n" - ".set noreorder \n" - - "1: \n" - "lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0| - "lw $t1, 4(%[src_ptr]) \n" // |7|6|5|4| - "lw $t2, 8(%[src_ptr]) \n" // |11|10|9|8| - "lw $t3, 12(%[src_ptr]) \n" // |15|14|13|12| - "lw $t4, 16(%[src_ptr]) \n" // |19|18|17|16| - "lw $t5, 20(%[src_ptr]) \n" // |23|22|21|20| - "lw $t6, 24(%[src_ptr]) \n" // |27|26|25|24| - "lw $t7, 28(%[src_ptr]) \n" // |31|30|29|28| - "wsbh $t0, $t0 \n" // |2|3|0|1| - "wsbh $t6, $t6 \n" // |26|27|24|25| - "srl $t0, $t0, 8 \n" // |X|2|3|0| - "srl $t3, $t3, 16 \n" // |X|X|15|14| - "srl $t5, $t5, 16 \n" // |X|X|23|22| - "srl $t7, $t7, 16 \n" // |X|X|31|30| - "ins $t1, $t2, 24, 8 \n" // |8|6|5|4| - "ins $t6, $t5, 0, 8 \n" // |26|27|24|22| - "ins $t1, $t0, 0, 16 \n" // |8|6|3|0| - "ins $t6, $t7, 24, 8 \n" // |30|27|24|22| - "prepend $t2, $t3, 24 \n" // |X|15|14|11| - "ins $t4, $t4, 16, 8 \n" // |19|16|17|X| - "ins $t4, $t2, 0, 16 \n" // |19|16|14|11| - "addiu %[src_ptr], %[src_ptr], 32 \n" - "addiu %[dst_width], %[dst_width], -12 \n" - "addiu $t8,%[dst_width], -12 \n" - "sw $t1, 0(%[dst]) \n" - "sw $t4, 4(%[dst]) \n" - "sw $t6, 8(%[dst]) \n" - "bgez $t8, 1b \n" - " addiu %[dst], %[dst], 12 \n" - ".set pop \n" - : [src_ptr] "+r"(src_ptr), [dst] "+r"(dst), [dst_width] "+r"(dst_width) - : - : "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8"); -} - -void ScaleRowDown38_2_Box_DSPR2(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst_ptr, - int dst_width) { - intptr_t stride = src_stride; - const uint8* t = src_ptr + stride; - const int c = 0x2AAA; - - __asm__ __volatile__( - ".set push \n" - ".set noreorder \n" - - "1: \n" - "lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0| - "lw $t1, 4(%[src_ptr]) \n" // |S7|S6|S5|S4| - "lw $t2, 0(%[t]) \n" // |T3|T2|T1|T0| - "lw $t3, 4(%[t]) \n" // |T7|T6|T5|T4| - "rotr $t1, $t1, 16 \n" // |S5|S4|S7|S6| - "packrl.ph $t4, $t1, $t3 \n" // |S7|S6|T7|T6| - "packrl.ph $t5, $t3, $t1 \n" // |T5|T4|S5|S4| - "raddu.w.qb $t4, $t4 \n" // S7+S6+T7+T6 - "raddu.w.qb $t5, $t5 \n" // T5+T4+S5+S4 - "precrq.qb.ph $t6, $t0, $t2 \n" // |S3|S1|T3|T1| - "precrq.qb.ph $t6, $t6, $t6 \n" // |S3|T3|S3|T3| - "srl $t4, $t4, 2 \n" // t4 / 4 - "srl $t6, $t6, 16 \n" // |0|0|S3|T3| - "raddu.w.qb $t6, $t6 \n" // 0+0+S3+T3 - "addu $t6, $t5, $t6 \n" - "mul $t6, $t6, %[c] \n" // t6 * 0x2AAA - "sll $t0, $t0, 8 \n" // |S2|S1|S0|0| - "sll $t2, $t2, 8 \n" // |T2|T1|T0|0| - "raddu.w.qb $t0, $t0 \n" // S2+S1+S0+0 - "raddu.w.qb $t2, $t2 \n" // T2+T1+T0+0 - "addu $t0, $t0, $t2 \n" - "mul $t0, $t0, %[c] \n" // t0 * 0x2AAA - "addiu %[src_ptr], %[src_ptr], 8 \n" - "addiu %[t], %[t], 8 \n" - "addiu %[dst_width], %[dst_width], -3 \n" - "addiu %[dst_ptr], %[dst_ptr], 3 \n" - "srl $t6, $t6, 16 \n" - "srl $t0, $t0, 16 \n" - "sb $t4, -1(%[dst_ptr]) \n" - "sb $t6, -2(%[dst_ptr]) \n" - "bgtz %[dst_width], 1b \n" - " sb $t0, -3(%[dst_ptr]) \n" - ".set pop \n" - : [src_ptr] "+r"(src_ptr), [dst_ptr] "+r"(dst_ptr), [t] "+r"(t), - [dst_width] "+r"(dst_width) - : [c] "r"(c) - : "t0", "t1", "t2", "t3", "t4", "t5", "t6"); -} - -void ScaleRowDown38_3_Box_DSPR2(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst_ptr, - int dst_width) { - intptr_t stride = src_stride; - const uint8* s1 = src_ptr + stride; - stride += stride; - const uint8* s2 = src_ptr + stride; - const int c1 = 0x1C71; - const int c2 = 0x2AAA; - - __asm__ __volatile__( - ".set push \n" - ".set noreorder \n" - - "1: \n" - "lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0| - "lw $t1, 4(%[src_ptr]) \n" // |S7|S6|S5|S4| - "lw $t2, 0(%[s1]) \n" // |T3|T2|T1|T0| - "lw $t3, 4(%[s1]) \n" // |T7|T6|T5|T4| - "lw $t4, 0(%[s2]) \n" // |R3|R2|R1|R0| - "lw $t5, 4(%[s2]) \n" // |R7|R6|R5|R4| - "rotr $t1, $t1, 16 \n" // |S5|S4|S7|S6| - "packrl.ph $t6, $t1, $t3 \n" // |S7|S6|T7|T6| - "raddu.w.qb $t6, $t6 \n" // S7+S6+T7+T6 - "packrl.ph $t7, $t3, $t1 \n" // |T5|T4|S5|S4| - "raddu.w.qb $t7, $t7 \n" // T5+T4+S5+S4 - "sll $t8, $t5, 16 \n" // |R5|R4|0|0| - "raddu.w.qb $t8, $t8 \n" // R5+R4 - "addu $t7, $t7, $t8 \n" - "srl $t8, $t5, 16 \n" // |0|0|R7|R6| - "raddu.w.qb $t8, $t8 \n" // R7 + R6 - "addu $t6, $t6, $t8 \n" - "mul $t6, $t6, %[c2] \n" // t6 * 0x2AAA - "precrq.qb.ph $t8, $t0, $t2 \n" // |S3|S1|T3|T1| - "precrq.qb.ph $t8, $t8, $t4 \n" // |S3|T3|R3|R1| - "srl $t8, $t8, 8 \n" // |0|S3|T3|R3| - "raddu.w.qb $t8, $t8 \n" // S3 + T3 + R3 - "addu $t7, $t7, $t8 \n" - "mul $t7, $t7, %[c1] \n" // t7 * 0x1C71 - "sll $t0, $t0, 8 \n" // |S2|S1|S0|0| - "sll $t2, $t2, 8 \n" // |T2|T1|T0|0| - "sll $t4, $t4, 8 \n" // |R2|R1|R0|0| - "raddu.w.qb $t0, $t0 \n" - "raddu.w.qb $t2, $t2 \n" - "raddu.w.qb $t4, $t4 \n" - "addu $t0, $t0, $t2 \n" - "addu $t0, $t0, $t4 \n" - "mul $t0, $t0, %[c1] \n" // t0 * 0x1C71 - "addiu %[src_ptr], %[src_ptr], 8 \n" - "addiu %[s1], %[s1], 8 \n" - "addiu %[s2], %[s2], 8 \n" - "addiu %[dst_width], %[dst_width], -3 \n" - "addiu %[dst_ptr], %[dst_ptr], 3 \n" - "srl $t6, $t6, 16 \n" - "srl $t7, $t7, 16 \n" - "srl $t0, $t0, 16 \n" - "sb $t6, -1(%[dst_ptr]) \n" - "sb $t7, -2(%[dst_ptr]) \n" - "bgtz %[dst_width], 1b \n" - " sb $t0, -3(%[dst_ptr]) \n" - ".set pop \n" - : [src_ptr] "+r"(src_ptr), [dst_ptr] "+r"(dst_ptr), [s1] "+r"(s1), - [s2] "+r"(s2), [dst_width] "+r"(dst_width) - : [c1] "r"(c1), [c2] "r"(c2) - : "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8"); -} - -void ScaleAddRow_DSPR2(const uint8* src_ptr, uint16* dst_ptr, int src_width) { - int x; - for (x = 0; x < ((src_width - 1)); x += 8) { - uint32 tmp_t1, tmp_t2, tmp_t3, tmp_t4; - uint32 tmp_t5, tmp_t6, tmp_t7, tmp_t8; - __asm__ __volatile__( - ".set push \n" - ".set noreorder \n" - "lw %[tmp_t5], 0(%[src_ptr]) \n" - "lw %[tmp_t6], 4(%[src_ptr]) \n" - "lw %[tmp_t1], 0(%[dst_ptr]) \n" - "lw %[tmp_t2], 4(%[dst_ptr]) \n" - "lw %[tmp_t3], 8(%[dst_ptr]) \n" - "lw %[tmp_t4], 12(%[dst_ptr]) \n" - "preceu.ph.qbr %[tmp_t7], %[tmp_t5] \n" - "preceu.ph.qbl %[tmp_t8], %[tmp_t5] \n" - "addu.ph %[tmp_t1], %[tmp_t1], %[tmp_t7] \n" - "addu.ph %[tmp_t2], %[tmp_t2], %[tmp_t8] \n" - "preceu.ph.qbr %[tmp_t7], %[tmp_t6] \n" - "preceu.ph.qbl %[tmp_t8], %[tmp_t6] \n" - "addu.ph %[tmp_t3], %[tmp_t3], %[tmp_t7] \n" - "addu.ph %[tmp_t4], %[tmp_t4], %[tmp_t8] \n" - "sw %[tmp_t1], 0(%[dst_ptr]) \n" - "sw %[tmp_t2], 4(%[dst_ptr]) \n" - "sw %[tmp_t3], 8(%[dst_ptr]) \n" - "sw %[tmp_t4], 12(%[dst_ptr]) \n" - ".set pop \n" - : - [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2), [tmp_t3] "=&r"(tmp_t3), - [tmp_t4] "=&r"(tmp_t4), [tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6), - [tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8), [src_ptr] "+r"(src_ptr) - : [dst_ptr] "r"(dst_ptr)); - src_ptr += 8; - dst_ptr += 8; - } - - if ((src_width)&7) { - for (x = 0; x < ((src_width - 1) & 7); x += 1) { - dst_ptr[0] += src_ptr[0]; - src_ptr += 1; - dst_ptr += 1; - } - } -} - -#endif // defined(__mips_dsp) && (__mips_dsp_rev >= 2) - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif diff --git a/chromium/third_party/libyuv/source/scale_gcc.cc b/chromium/third_party/libyuv/source/scale_gcc.cc index f0ac56fcb06..336eb2dba44 100644 --- a/chromium/third_party/libyuv/source/scale_gcc.cc +++ b/chromium/third_party/libyuv/source/scale_gcc.cc @@ -21,72 +21,73 @@ extern "C" { (defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER))) // Offsets for source bytes 0 to 9 -static uvec8 kShuf0 = {0, 1, 3, 4, 5, 7, 8, 9, - 128, 128, 128, 128, 128, 128, 128, 128}; +static const uvec8 kShuf0 = {0, 1, 3, 4, 5, 7, 8, 9, + 128, 128, 128, 128, 128, 128, 128, 128}; // Offsets for source bytes 11 to 20 with 8 subtracted = 3 to 12. -static uvec8 kShuf1 = {3, 4, 5, 7, 8, 9, 11, 12, - 128, 128, 128, 128, 128, 128, 128, 128}; +static const uvec8 kShuf1 = {3, 4, 5, 7, 8, 9, 11, 12, + 128, 128, 128, 128, 128, 128, 128, 128}; // Offsets for source bytes 21 to 31 with 16 subtracted = 5 to 31. -static uvec8 kShuf2 = {5, 7, 8, 9, 11, 12, 13, 15, - 128, 128, 128, 128, 128, 128, 128, 128}; +static const uvec8 kShuf2 = {5, 7, 8, 9, 11, 12, 13, 15, + 128, 128, 128, 128, 128, 128, 128, 128}; // Offsets for source bytes 0 to 10 -static uvec8 kShuf01 = {0, 1, 1, 2, 2, 3, 4, 5, 5, 6, 6, 7, 8, 9, 9, 10}; +static const uvec8 kShuf01 = {0, 1, 1, 2, 2, 3, 4, 5, 5, 6, 6, 7, 8, 9, 9, 10}; // Offsets for source bytes 10 to 21 with 8 subtracted = 3 to 13. -static uvec8 kShuf11 = {2, 3, 4, 5, 5, 6, 6, 7, 8, 9, 9, 10, 10, 11, 12, 13}; +static const uvec8 kShuf11 = {2, 3, 4, 5, 5, 6, 6, 7, + 8, 9, 9, 10, 10, 11, 12, 13}; // Offsets for source bytes 21 to 31 with 16 subtracted = 5 to 31. -static uvec8 kShuf21 = {5, 6, 6, 7, 8, 9, 9, 10, - 10, 11, 12, 13, 13, 14, 14, 15}; +static const uvec8 kShuf21 = {5, 6, 6, 7, 8, 9, 9, 10, + 10, 11, 12, 13, 13, 14, 14, 15}; // Coefficients for source bytes 0 to 10 -static uvec8 kMadd01 = {3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2}; +static const uvec8 kMadd01 = {3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2}; // Coefficients for source bytes 10 to 21 -static uvec8 kMadd11 = {1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1}; +static const uvec8 kMadd11 = {1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1}; // Coefficients for source bytes 21 to 31 -static uvec8 kMadd21 = {2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3}; +static const uvec8 kMadd21 = {2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3}; // Coefficients for source bytes 21 to 31 -static vec16 kRound34 = {2, 2, 2, 2, 2, 2, 2, 2}; +static const vec16 kRound34 = {2, 2, 2, 2, 2, 2, 2, 2}; -static uvec8 kShuf38a = {0, 3, 6, 8, 11, 14, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128}; +static const uvec8 kShuf38a = {0, 3, 6, 8, 11, 14, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128}; -static uvec8 kShuf38b = {128, 128, 128, 128, 128, 128, 0, 3, - 6, 8, 11, 14, 128, 128, 128, 128}; +static const uvec8 kShuf38b = {128, 128, 128, 128, 128, 128, 0, 3, + 6, 8, 11, 14, 128, 128, 128, 128}; // Arrange words 0,3,6 into 0,1,2 -static uvec8 kShufAc = {0, 1, 6, 7, 12, 13, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128}; +static const uvec8 kShufAc = {0, 1, 6, 7, 12, 13, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128}; // Arrange words 0,3,6 into 3,4,5 -static uvec8 kShufAc3 = {128, 128, 128, 128, 128, 128, 0, 1, - 6, 7, 12, 13, 128, 128, 128, 128}; +static const uvec8 kShufAc3 = {128, 128, 128, 128, 128, 128, 0, 1, + 6, 7, 12, 13, 128, 128, 128, 128}; // Scaling values for boxes of 3x3 and 2x3 -static uvec16 kScaleAc33 = {65536 / 9, 65536 / 9, 65536 / 6, 65536 / 9, - 65536 / 9, 65536 / 6, 0, 0}; +static const uvec16 kScaleAc33 = {65536 / 9, 65536 / 9, 65536 / 6, 65536 / 9, + 65536 / 9, 65536 / 6, 0, 0}; // Arrange first value for pixels 0,1,2,3,4,5 -static uvec8 kShufAb0 = {0, 128, 3, 128, 6, 128, 8, 128, - 11, 128, 14, 128, 128, 128, 128, 128}; +static const uvec8 kShufAb0 = {0, 128, 3, 128, 6, 128, 8, 128, + 11, 128, 14, 128, 128, 128, 128, 128}; // Arrange second value for pixels 0,1,2,3,4,5 -static uvec8 kShufAb1 = {1, 128, 4, 128, 7, 128, 9, 128, - 12, 128, 15, 128, 128, 128, 128, 128}; +static const uvec8 kShufAb1 = {1, 128, 4, 128, 7, 128, 9, 128, + 12, 128, 15, 128, 128, 128, 128, 128}; // Arrange third value for pixels 0,1,2,3,4,5 -static uvec8 kShufAb2 = {2, 128, 5, 128, 128, 128, 10, 128, - 13, 128, 128, 128, 128, 128, 128, 128}; +static const uvec8 kShufAb2 = {2, 128, 5, 128, 128, 128, 10, 128, + 13, 128, 128, 128, 128, 128, 128, 128}; // Scaling values for boxes of 3x2 and 2x2 -static uvec16 kScaleAb2 = {65536 / 3, 65536 / 3, 65536 / 2, 65536 / 3, - 65536 / 3, 65536 / 2, 0, 0}; +static const uvec16 kScaleAb2 = {65536 / 3, 65536 / 3, 65536 / 2, 65536 / 3, + 65536 / 3, 65536 / 2, 0, 0}; // GCC versions of row functions are verbatim conversions from Visual C. // Generated using gcc disassembly on Visual C object file: @@ -851,12 +852,12 @@ void ScaleAddRow_AVX2(const uint8* src_ptr, uint16* dst_ptr, int src_width) { // Constant for making pixels signed to avoid pmaddubsw // saturation. -static uvec8 kFsub80 = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}; +static const uvec8 kFsub80 = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}; // Constant for making pixels unsigned and adding .5 for rounding. -static uvec16 kFadd40 = {0x4040, 0x4040, 0x4040, 0x4040, - 0x4040, 0x4040, 0x4040, 0x4040}; +static const uvec16 kFadd40 = {0x4040, 0x4040, 0x4040, 0x4040, + 0x4040, 0x4040, 0x4040, 0x4040}; // Bilinear column filtering. SSSE3 version. void ScaleFilterCols_SSSE3(uint8* dst_ptr, @@ -1250,13 +1251,13 @@ void ScaleARGBColsUp2_SSE2(uint8* dst_argb, } // Shuffle table for arranging 2 pixels into pairs for pmaddubsw -static uvec8 kShuffleColARGB = { +static const uvec8 kShuffleColARGB = { 0u, 4u, 1u, 5u, 2u, 6u, 3u, 7u, // bbggrraa 1st pixel 8u, 12u, 9u, 13u, 10u, 14u, 11u, 15u // bbggrraa 2nd pixel }; // Shuffle table for duplicating 2 fractions into 8 bytes each -static uvec8 kShuffleFractions = { +static const uvec8 kShuffleFractions = { 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 4u, 4u, 4u, 4u, 4u, 4u, 4u, 4u, }; diff --git a/chromium/third_party/libyuv/source/scale_neon.cc b/chromium/third_party/libyuv/source/scale_neon.cc index b03a828213d..46da9d5e272 100644 --- a/chromium/third_party/libyuv/source/scale_neon.cc +++ b/chromium/third_party/libyuv/source/scale_neon.cc @@ -264,13 +264,16 @@ void ScaleRowDown34_1_Box_NEON(const uint8* src_ptr, } #define HAS_SCALEROWDOWN38_NEON -static uvec8 kShuf38 = {0, 3, 6, 8, 11, 14, 16, 19, 22, 24, 27, 30, 0, 0, 0, 0}; -static uvec8 kShuf38_2 = {0, 8, 16, 2, 10, 17, 4, 12, - 18, 6, 14, 19, 0, 0, 0, 0}; -static vec16 kMult38_Div6 = {65536 / 12, 65536 / 12, 65536 / 12, 65536 / 12, - 65536 / 12, 65536 / 12, 65536 / 12, 65536 / 12}; -static vec16 kMult38_Div9 = {65536 / 18, 65536 / 18, 65536 / 18, 65536 / 18, - 65536 / 18, 65536 / 18, 65536 / 18, 65536 / 18}; +static const uvec8 kShuf38 = {0, 3, 6, 8, 11, 14, 16, 19, + 22, 24, 27, 30, 0, 0, 0, 0}; +static const uvec8 kShuf38_2 = {0, 8, 16, 2, 10, 17, 4, 12, + 18, 6, 14, 19, 0, 0, 0, 0}; +static const vec16 kMult38_Div6 = {65536 / 12, 65536 / 12, 65536 / 12, + 65536 / 12, 65536 / 12, 65536 / 12, + 65536 / 12, 65536 / 12}; +static const vec16 kMult38_Div9 = {65536 / 18, 65536 / 18, 65536 / 18, + 65536 / 18, 65536 / 18, 65536 / 18, + 65536 / 18, 65536 / 18}; // 32 -> 12 void ScaleRowDown38_NEON(const uint8* src_ptr, diff --git a/chromium/third_party/libyuv/source/scale_neon64.cc b/chromium/third_party/libyuv/source/scale_neon64.cc index 93fe67bf1e6..73aed9e1b38 100644 --- a/chromium/third_party/libyuv/source/scale_neon64.cc +++ b/chromium/third_party/libyuv/source/scale_neon64.cc @@ -261,13 +261,16 @@ void ScaleRowDown34_1_Box_NEON(const uint8* src_ptr, : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", "memory", "cc"); } -static uvec8 kShuf38 = {0, 3, 6, 8, 11, 14, 16, 19, 22, 24, 27, 30, 0, 0, 0, 0}; -static uvec8 kShuf38_2 = {0, 16, 32, 2, 18, 33, 4, 20, - 34, 6, 22, 35, 0, 0, 0, 0}; -static vec16 kMult38_Div6 = {65536 / 12, 65536 / 12, 65536 / 12, 65536 / 12, - 65536 / 12, 65536 / 12, 65536 / 12, 65536 / 12}; -static vec16 kMult38_Div9 = {65536 / 18, 65536 / 18, 65536 / 18, 65536 / 18, - 65536 / 18, 65536 / 18, 65536 / 18, 65536 / 18}; +static const uvec8 kShuf38 = {0, 3, 6, 8, 11, 14, 16, 19, + 22, 24, 27, 30, 0, 0, 0, 0}; +static const uvec8 kShuf38_2 = {0, 16, 32, 2, 18, 33, 4, 20, + 34, 6, 22, 35, 0, 0, 0, 0}; +static const vec16 kMult38_Div6 = {65536 / 12, 65536 / 12, 65536 / 12, + 65536 / 12, 65536 / 12, 65536 / 12, + 65536 / 12, 65536 / 12}; +static const vec16 kMult38_Div9 = {65536 / 18, 65536 / 18, 65536 / 18, + 65536 / 18, 65536 / 18, 65536 / 18, + 65536 / 18, 65536 / 18}; // 32 -> 12 void ScaleRowDown38_NEON(const uint8* src_ptr, diff --git a/chromium/third_party/libyuv/source/scale_win.cc b/chromium/third_party/libyuv/source/scale_win.cc index b5fd6638262..b33881998aa 100644 --- a/chromium/third_party/libyuv/source/scale_win.cc +++ b/chromium/third_party/libyuv/source/scale_win.cc @@ -20,72 +20,73 @@ extern "C" { #if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER) // Offsets for source bytes 0 to 9 -static uvec8 kShuf0 = {0, 1, 3, 4, 5, 7, 8, 9, - 128, 128, 128, 128, 128, 128, 128, 128}; +static const uvec8 kShuf0 = {0, 1, 3, 4, 5, 7, 8, 9, + 128, 128, 128, 128, 128, 128, 128, 128}; // Offsets for source bytes 11 to 20 with 8 subtracted = 3 to 12. -static uvec8 kShuf1 = {3, 4, 5, 7, 8, 9, 11, 12, - 128, 128, 128, 128, 128, 128, 128, 128}; +static const uvec8 kShuf1 = {3, 4, 5, 7, 8, 9, 11, 12, + 128, 128, 128, 128, 128, 128, 128, 128}; // Offsets for source bytes 21 to 31 with 16 subtracted = 5 to 31. -static uvec8 kShuf2 = {5, 7, 8, 9, 11, 12, 13, 15, - 128, 128, 128, 128, 128, 128, 128, 128}; +static const uvec8 kShuf2 = {5, 7, 8, 9, 11, 12, 13, 15, + 128, 128, 128, 128, 128, 128, 128, 128}; // Offsets for source bytes 0 to 10 -static uvec8 kShuf01 = {0, 1, 1, 2, 2, 3, 4, 5, 5, 6, 6, 7, 8, 9, 9, 10}; +static const uvec8 kShuf01 = {0, 1, 1, 2, 2, 3, 4, 5, 5, 6, 6, 7, 8, 9, 9, 10}; // Offsets for source bytes 10 to 21 with 8 subtracted = 3 to 13. -static uvec8 kShuf11 = {2, 3, 4, 5, 5, 6, 6, 7, 8, 9, 9, 10, 10, 11, 12, 13}; +static const uvec8 kShuf11 = {2, 3, 4, 5, 5, 6, 6, 7, + 8, 9, 9, 10, 10, 11, 12, 13}; // Offsets for source bytes 21 to 31 with 16 subtracted = 5 to 31. -static uvec8 kShuf21 = {5, 6, 6, 7, 8, 9, 9, 10, - 10, 11, 12, 13, 13, 14, 14, 15}; +static const uvec8 kShuf21 = {5, 6, 6, 7, 8, 9, 9, 10, + 10, 11, 12, 13, 13, 14, 14, 15}; // Coefficients for source bytes 0 to 10 -static uvec8 kMadd01 = {3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2}; +static const uvec8 kMadd01 = {3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2}; // Coefficients for source bytes 10 to 21 -static uvec8 kMadd11 = {1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1}; +static const uvec8 kMadd11 = {1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1}; // Coefficients for source bytes 21 to 31 -static uvec8 kMadd21 = {2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3}; +static const uvec8 kMadd21 = {2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3}; // Coefficients for source bytes 21 to 31 -static vec16 kRound34 = {2, 2, 2, 2, 2, 2, 2, 2}; +static const vec16 kRound34 = {2, 2, 2, 2, 2, 2, 2, 2}; -static uvec8 kShuf38a = {0, 3, 6, 8, 11, 14, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128}; +static const uvec8 kShuf38a = {0, 3, 6, 8, 11, 14, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128}; -static uvec8 kShuf38b = {128, 128, 128, 128, 128, 128, 0, 3, - 6, 8, 11, 14, 128, 128, 128, 128}; +static const uvec8 kShuf38b = {128, 128, 128, 128, 128, 128, 0, 3, + 6, 8, 11, 14, 128, 128, 128, 128}; // Arrange words 0,3,6 into 0,1,2 -static uvec8 kShufAc = {0, 1, 6, 7, 12, 13, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128}; +static const uvec8 kShufAc = {0, 1, 6, 7, 12, 13, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128}; // Arrange words 0,3,6 into 3,4,5 -static uvec8 kShufAc3 = {128, 128, 128, 128, 128, 128, 0, 1, - 6, 7, 12, 13, 128, 128, 128, 128}; +static const uvec8 kShufAc3 = {128, 128, 128, 128, 128, 128, 0, 1, + 6, 7, 12, 13, 128, 128, 128, 128}; // Scaling values for boxes of 3x3 and 2x3 -static uvec16 kScaleAc33 = {65536 / 9, 65536 / 9, 65536 / 6, 65536 / 9, - 65536 / 9, 65536 / 6, 0, 0}; +static const uvec16 kScaleAc33 = {65536 / 9, 65536 / 9, 65536 / 6, 65536 / 9, + 65536 / 9, 65536 / 6, 0, 0}; // Arrange first value for pixels 0,1,2,3,4,5 -static uvec8 kShufAb0 = {0, 128, 3, 128, 6, 128, 8, 128, - 11, 128, 14, 128, 128, 128, 128, 128}; +static const uvec8 kShufAb0 = {0, 128, 3, 128, 6, 128, 8, 128, + 11, 128, 14, 128, 128, 128, 128, 128}; // Arrange second value for pixels 0,1,2,3,4,5 -static uvec8 kShufAb1 = {1, 128, 4, 128, 7, 128, 9, 128, - 12, 128, 15, 128, 128, 128, 128, 128}; +static const uvec8 kShufAb1 = {1, 128, 4, 128, 7, 128, 9, 128, + 12, 128, 15, 128, 128, 128, 128, 128}; // Arrange third value for pixels 0,1,2,3,4,5 -static uvec8 kShufAb2 = {2, 128, 5, 128, 128, 128, 10, 128, - 13, 128, 128, 128, 128, 128, 128, 128}; +static const uvec8 kShufAb2 = {2, 128, 5, 128, 128, 128, 10, 128, + 13, 128, 128, 128, 128, 128, 128, 128}; // Scaling values for boxes of 3x2 and 2x2 -static uvec16 kScaleAb2 = {65536 / 3, 65536 / 3, 65536 / 2, 65536 / 3, - 65536 / 3, 65536 / 2, 0, 0}; +static const uvec16 kScaleAb2 = {65536 / 3, 65536 / 3, 65536 / 2, 65536 / 3, + 65536 / 3, 65536 / 2, 0, 0}; // Reads 32 pixels, throws half away and writes 16 pixels. __declspec(naked) void ScaleRowDown2_SSSE3(const uint8* src_ptr, @@ -870,12 +871,12 @@ __declspec(naked) void ScaleAddRow_AVX2(const uint8* src_ptr, // Constant for making pixels signed to avoid pmaddubsw // saturation. -static uvec8 kFsub80 = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}; +static const uvec8 kFsub80 = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}; // Constant for making pixels unsigned and adding .5 for rounding. -static uvec16 kFadd40 = {0x4040, 0x4040, 0x4040, 0x4040, - 0x4040, 0x4040, 0x4040, 0x4040}; +static const uvec16 kFadd40 = {0x4040, 0x4040, 0x4040, 0x4040, + 0x4040, 0x4040, 0x4040, 0x4040}; // Bilinear column filtering. SSSE3 version. __declspec(naked) void ScaleFilterCols_SSSE3(uint8* dst_ptr, @@ -1246,13 +1247,13 @@ __declspec(naked) void ScaleARGBCols_SSE2(uint8* dst_argb, // TODO(fbarchard): Port to Neon // Shuffle table for arranging 2 pixels into pairs for pmaddubsw -static uvec8 kShuffleColARGB = { +static const uvec8 kShuffleColARGB = { 0u, 4u, 1u, 5u, 2u, 6u, 3u, 7u, // bbggrraa 1st pixel 8u, 12u, 9u, 13u, 10u, 14u, 11u, 15u // bbggrraa 2nd pixel }; // Shuffle table for duplicating 2 fractions into 8 bytes each -static uvec8 kShuffleFractions = { +static const uvec8 kShuffleFractions = { 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 4u, 4u, 4u, 4u, 4u, 4u, 4u, 4u, }; diff --git a/chromium/third_party/libyuv/tools_libyuv/autoroller/roll_deps.py b/chromium/third_party/libyuv/tools_libyuv/autoroller/roll_deps.py index efea81e07b1..09ddc40e730 100755 --- a/chromium/third_party/libyuv/tools_libyuv/autoroller/roll_deps.py +++ b/chromium/third_party/libyuv/tools_libyuv/autoroller/roll_deps.py @@ -207,7 +207,13 @@ def BuildDepsentryDict(deps_dict): """Builds a dict of paths to DepsEntry objects from a raw parsed deps dict.""" result = {} def AddDepsEntries(deps_subdict): - for path, deps_url in deps_subdict.iteritems(): + for path, deps_url_spec in deps_subdict.iteritems(): + # The deps url is either an URL and a condition, or just the URL. + if isinstance(deps_url_spec, dict): + deps_url = deps_url_spec['url'] + else: + deps_url = deps_url_spec + if not result.has_key(path): url, revision = deps_url.split('@') if deps_url else (None, None) result[path] = DepsEntry(path, url, revision) diff --git a/chromium/third_party/libyuv/unit_test/compare_test.cc b/chromium/third_party/libyuv/unit_test/compare_test.cc index ff39b2b0f60..1c6d988ef2c 100644 --- a/chromium/third_party/libyuv/unit_test/compare_test.cc +++ b/chromium/third_party/libyuv/unit_test/compare_test.cc @@ -338,7 +338,7 @@ static const int kMaxOptCount = (1 << (32 - 3)) - 64; // 536870848 TEST_F(LibYUVCompareTest, TestHammingDistance_Opt) { uint32 h1 = 0; - const int kMaxWidth = benchmark_width_ * benchmark_height_; + const int kMaxWidth = (benchmark_width_ * benchmark_height_ + 31) & ~31; align_buffer_page_end(src_a, kMaxWidth); align_buffer_page_end(src_b, kMaxWidth); memset(src_a, 255u, kMaxWidth); diff --git a/chromium/third_party/libyuv/unit_test/convert_test.cc b/chromium/third_party/libyuv/unit_test/convert_test.cc index 56b6364e5eb..7d196a1d8e0 100644 --- a/chromium/third_party/libyuv/unit_test/convert_test.cc +++ b/chromium/third_party/libyuv/unit_test/convert_test.cc @@ -8,9 +8,12 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include #include #include +#include "libyuv/row.h" /* For ARGBToAR30Row_AVX2 */ + #include "libyuv/basic_types.h" #include "libyuv/compare.h" #include "libyuv/convert.h" @@ -26,102 +29,86 @@ #include "libyuv/rotate.h" #include "libyuv/video_common.h" +#if defined(__arm__) || defined(__aarch64__) +// arm version subsamples by summing 4 pixels then multiplying by matrix with +// 4x smaller coefficients which are rounded to nearest integer. +#define ARM_YUV_ERROR 4 +#else +#define ARM_YUV_ERROR 0 +#endif + namespace libyuv { +// Alias to copy pixels as is +#define AR30ToAR30 ARGBCopy + #define SUBSAMPLE(v, a) ((((v) + (a)-1)) / (a)) -#define TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ - FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, W1280, N, NEG, OFF) \ +// Planar test + +#define TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ + SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \ + DST_SUBSAMP_X, DST_SUBSAMP_Y, W1280, N, NEG, OFF) \ TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \ + assert(SRC_BPC == 1 || SRC_BPC == 2); \ + assert(DST_BPC == 1 || DST_BPC == 2); \ + assert(SRC_SUBSAMP_X == 1 || SRC_SUBSAMP_X == 2); \ + assert(SRC_SUBSAMP_Y == 1 || SRC_SUBSAMP_Y == 2); \ + assert(DST_SUBSAMP_X == 1 || DST_SUBSAMP_X == 2); \ + assert(DST_SUBSAMP_Y == 1 || DST_SUBSAMP_Y == 2); \ const int kWidth = ((W1280) > 0) ? (W1280) : 1; \ const int kHeight = benchmark_height_; \ - align_buffer_page_end(src_y, kWidth* kHeight + OFF); \ - align_buffer_page_end(src_u, SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * \ - SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + \ - OFF); \ - align_buffer_page_end(src_v, SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * \ - SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + \ - OFF); \ - align_buffer_page_end(dst_y_c, kWidth* kHeight); \ - align_buffer_page_end(dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X) * \ - SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - align_buffer_page_end(dst_v_c, SUBSAMPLE(kWidth, SUBSAMP_X) * \ - SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - align_buffer_page_end(dst_y_opt, kWidth* kHeight); \ - align_buffer_page_end(dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * \ - SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - align_buffer_page_end(dst_v_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * \ - SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - for (int i = 0; i < kHeight; ++i) \ - for (int j = 0; j < kWidth; ++j) \ - src_y[i * kWidth + j + OFF] = (fastrand() & 0xff); \ - for (int i = 0; i < SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); ++i) { \ - for (int j = 0; j < SUBSAMPLE(kWidth, SRC_SUBSAMP_X); ++j) { \ - src_u[(i * SUBSAMPLE(kWidth, SRC_SUBSAMP_X)) + j + OFF] = \ - (fastrand() & 0xff); \ - src_v[(i * SUBSAMPLE(kWidth, SRC_SUBSAMP_X)) + j + OFF] = \ - (fastrand() & 0xff); \ - } \ - } \ - memset(dst_y_c, 1, kWidth* kHeight); \ - memset(dst_u_c, 2, \ - SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - memset(dst_v_c, 3, \ - SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - memset(dst_y_opt, 101, kWidth* kHeight); \ - memset(dst_u_opt, 102, \ - SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - memset(dst_v_opt, 103, \ - SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + const int kSrcHalfWidth = SUBSAMPLE(kWidth, SRC_SUBSAMP_X); \ + const int kSrcHalfHeight = SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); \ + const int kDstHalfWidth = SUBSAMPLE(kWidth, DST_SUBSAMP_X); \ + const int kDstHalfHeight = SUBSAMPLE(kHeight, DST_SUBSAMP_Y); \ + align_buffer_page_end(src_y, kWidth* kHeight* SRC_BPC + OFF); \ + align_buffer_page_end(src_u, \ + kSrcHalfWidth* kSrcHalfHeight* SRC_BPC + OFF); \ + align_buffer_page_end(src_v, \ + kSrcHalfWidth* kSrcHalfHeight* SRC_BPC + OFF); \ + align_buffer_page_end(dst_y_c, kWidth* kHeight* DST_BPC); \ + align_buffer_page_end(dst_u_c, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ + align_buffer_page_end(dst_v_c, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ + align_buffer_page_end(dst_y_opt, kWidth* kHeight* DST_BPC); \ + align_buffer_page_end(dst_u_opt, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ + align_buffer_page_end(dst_v_opt, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ + MemRandomize(src_y + OFF, kWidth * kHeight * SRC_BPC); \ + MemRandomize(src_u + OFF, kSrcHalfWidth * kSrcHalfHeight * SRC_BPC); \ + MemRandomize(src_v + OFF, kSrcHalfWidth * kSrcHalfHeight * SRC_BPC); \ + memset(dst_y_c, 1, kWidth* kHeight* DST_BPC); \ + memset(dst_u_c, 2, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ + memset(dst_v_c, 3, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ + memset(dst_y_opt, 101, kWidth* kHeight* DST_BPC); \ + memset(dst_u_opt, 102, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ + memset(dst_v_opt, 103, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ MaskCpuFlags(disable_cpu_flags_); \ SRC_FMT_PLANAR##To##FMT_PLANAR( \ - src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \ - src_v + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), dst_y_c, kWidth, \ - dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X), dst_v_c, \ - SUBSAMPLE(kWidth, SUBSAMP_X), kWidth, NEG kHeight); \ + reinterpret_cast(src_y + OFF), kWidth, \ + reinterpret_cast(src_u + OFF), kSrcHalfWidth, \ + reinterpret_cast(src_v + OFF), kSrcHalfWidth, \ + reinterpret_cast(dst_y_c), kWidth, \ + reinterpret_cast(dst_u_c), kDstHalfWidth, \ + reinterpret_cast(dst_v_c), kDstHalfWidth, kWidth, \ + NEG kHeight); \ MaskCpuFlags(benchmark_cpu_info_); \ for (int i = 0; i < benchmark_iterations_; ++i) { \ SRC_FMT_PLANAR##To##FMT_PLANAR( \ - src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \ - src_v + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), dst_y_opt, kWidth, \ - dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X), dst_v_opt, \ - SUBSAMPLE(kWidth, SUBSAMP_X), kWidth, NEG kHeight); \ - } \ - int max_diff = 0; \ - for (int i = 0; i < kHeight; ++i) { \ - for (int j = 0; j < kWidth; ++j) { \ - int abs_diff = abs(static_cast(dst_y_c[i * kWidth + j]) - \ - static_cast(dst_y_opt[i * kWidth + j])); \ - if (abs_diff > max_diff) { \ - max_diff = abs_diff; \ - } \ - } \ + reinterpret_cast(src_y + OFF), kWidth, \ + reinterpret_cast(src_u + OFF), kSrcHalfWidth, \ + reinterpret_cast(src_v + OFF), kSrcHalfWidth, \ + reinterpret_cast(dst_y_opt), kWidth, \ + reinterpret_cast(dst_u_opt), kDstHalfWidth, \ + reinterpret_cast(dst_v_opt), kDstHalfWidth, kWidth, \ + NEG kHeight); \ } \ - EXPECT_EQ(0, max_diff); \ - for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \ - for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \ - int abs_diff = abs( \ - static_cast(dst_u_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j]) - \ - static_cast( \ - dst_u_opt[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j])); \ - if (abs_diff > max_diff) { \ - max_diff = abs_diff; \ - } \ - } \ + for (int i = 0; i < kHeight * kWidth * DST_BPC; ++i) { \ + EXPECT_EQ(dst_y_c[i], dst_y_opt[i]); \ } \ - EXPECT_LE(max_diff, 3); \ - for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \ - for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \ - int abs_diff = abs( \ - static_cast(dst_v_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j]) - \ - static_cast( \ - dst_v_opt[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j])); \ - if (abs_diff > max_diff) { \ - max_diff = abs_diff; \ - } \ - } \ + for (int i = 0; i < kDstHalfWidth * kDstHalfHeight * DST_BPC; ++i) { \ + EXPECT_EQ(dst_u_c[i], dst_u_opt[i]); \ + EXPECT_EQ(dst_v_c[i], dst_v_opt[i]); \ } \ - EXPECT_LE(max_diff, 3); \ free_aligned_buffer_page_end(dst_y_c); \ free_aligned_buffer_page_end(dst_u_c); \ free_aligned_buffer_page_end(dst_v_c); \ @@ -133,25 +120,36 @@ namespace libyuv { free_aligned_buffer_page_end(src_v); \ } -#define TESTPLANARTOP(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ - FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \ - TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \ - SUBSAMP_X, SUBSAMP_Y, benchmark_width_ - 4, _Any, +, 0) \ - TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \ - SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Unaligned, +, 1) \ - TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \ - SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Invert, -, 0) \ - TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \ - SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Opt, +, 0) - -TESTPLANARTOP(I420, 2, 2, I420, 2, 2) -TESTPLANARTOP(I422, 2, 1, I420, 2, 2) -TESTPLANARTOP(I444, 1, 1, I420, 2, 2) -TESTPLANARTOP(I420, 2, 2, I422, 2, 1) -TESTPLANARTOP(I420, 2, 2, I444, 1, 1) -TESTPLANARTOP(I420, 2, 2, I420Mirror, 2, 2) -TESTPLANARTOP(I422, 2, 1, I422, 2, 1) -TESTPLANARTOP(I444, 1, 1, I444, 1, 1) +#define TESTPLANARTOP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ + SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \ + DST_SUBSAMP_X, DST_SUBSAMP_Y) \ + TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ + benchmark_width_ - 4, _Any, +, 0) \ + TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ + benchmark_width_, _Unaligned, +, 1) \ + TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ + benchmark_width_, _Invert, -, 0) \ + TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ + benchmark_width_, _Opt, +, 0) + +TESTPLANARTOP(I420, uint8, 1, 2, 2, I420, uint8, 1, 2, 2) +TESTPLANARTOP(I422, uint8, 1, 2, 1, I420, uint8, 1, 2, 2) +TESTPLANARTOP(I444, uint8, 1, 1, 1, I420, uint8, 1, 2, 2) +TESTPLANARTOP(I420, uint8, 1, 2, 2, I422, uint8, 1, 2, 1) +TESTPLANARTOP(I420, uint8, 1, 2, 2, I444, uint8, 1, 1, 1) +TESTPLANARTOP(I420, uint8, 1, 2, 2, I420Mirror, uint8, 1, 2, 2) +TESTPLANARTOP(I422, uint8, 1, 2, 1, I422, uint8, 1, 2, 1) +TESTPLANARTOP(I444, uint8, 1, 1, 1, I444, uint8, 1, 1, 1) +TESTPLANARTOP(I010, uint16, 2, 2, 2, I010, uint16, 2, 2, 2) +TESTPLANARTOP(I010, uint16, 2, 2, 2, I420, uint8, 1, 2, 2) +TESTPLANARTOP(I420, uint8, 1, 2, 2, I010, uint16, 2, 2, 2) +TESTPLANARTOP(H010, uint16, 2, 2, 2, H010, uint16, 2, 2, 2) +TESTPLANARTOP(H010, uint16, 2, 2, 2, H420, uint8, 1, 2, 2) +TESTPLANARTOP(H420, uint8, 1, 2, 2, H010, uint16, 2, 2, 2) // Test Android 420 to I420 #define TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, \ @@ -595,6 +593,8 @@ TESTPLANARTOB(I422, 2, 1, YUY2, 2, 4, 1, 0, ARGB, 4) TESTPLANARTOB(I422, 2, 1, UYVY, 2, 4, 1, 0, ARGB, 4) TESTPLANARTOB(I420, 2, 2, I400, 1, 1, 1, 0, ARGB, 4) TESTPLANARTOB(J420, 2, 2, J400, 1, 1, 1, 0, ARGB, 4) +TESTPLANARTOB(I420, 2, 2, AR30, 4, 4, 1, 0, ARGB, 4) +// TESTPLANARTOB(I420, 2, 2, AR30, 4, 4, 1, 0, ABGR, 4) #define TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ YALIGN, W1280, DIFF, N, NEG, OFF, ATTEN) \ @@ -737,6 +737,8 @@ TESTQPLANARTOB(I420Alpha, 2, 2, ABGR, 4, 4, 1, 2) TESTBIPLANARTOB(NV12, 2, 2, ARGB, 4, 2) TESTBIPLANARTOB(NV21, 2, 2, ARGB, 4, 2) +TESTBIPLANARTOB(NV12, 2, 2, ABGR, 4, 2) +TESTBIPLANARTOB(NV21, 2, 2, ABGR, 4, 2) TESTBIPLANARTOB(NV12, 2, 2, RGB565, 2, 9) #ifdef DO_THREE_PLANES @@ -865,15 +867,8 @@ TESTBIPLANARTOB(NV12, 2, 2, RGB565, 2, 9) benchmark_width_, DIFF, _Opt, +, 0) TESTATOPLANAR(ARGB, 4, 1, I420, 2, 2, 4) -#if defined(__arm__) || defined(__aarch64__) -// arm version subsamples by summing 4 pixels then multiplying by matrix with -// 4x smaller coefficients which are rounded to nearest integer. -TESTATOPLANAR(ARGB, 4, 1, J420, 2, 2, 4) -TESTATOPLANAR(ARGB, 4, 1, J422, 2, 1, 4) -#else -TESTATOPLANAR(ARGB, 4, 1, J420, 2, 2, 0) -TESTATOPLANAR(ARGB, 4, 1, J422, 2, 1, 0) -#endif +TESTATOPLANAR(ARGB, 4, 1, J420, 2, 2, ARM_YUV_ERROR) +TESTATOPLANAR(ARGB, 4, 1, J422, 2, 1, ARM_YUV_ERROR) TESTATOPLANAR(BGRA, 4, 1, I420, 2, 2, 4) TESTATOPLANAR(ABGR, 4, 1, I420, 2, 2, 4) TESTATOPLANAR(RGBA, 4, 1, I420, 2, 2, 4) @@ -1069,6 +1064,7 @@ TESTATOB(ARGB, 4, 4, 1, RGB24, 3, 3, 1, 0) TESTATOB(ARGB, 4, 4, 1, RGB565, 2, 2, 1, 0) TESTATOB(ARGB, 4, 4, 1, ARGB1555, 2, 2, 1, 0) TESTATOB(ARGB, 4, 4, 1, ARGB4444, 2, 2, 1, 0) +TESTATOB(ARGB, 4, 4, 1, AR30, 4, 4, 1, 0) TESTATOB(ARGB, 4, 4, 1, YUY2, 2, 4, 1, 4) TESTATOB(ARGB, 4, 4, 1, UYVY, 2, 4, 1, 4) TESTATOB(ARGB, 4, 4, 1, I400, 1, 1, 1, 2) @@ -1076,14 +1072,16 @@ TESTATOB(ARGB, 4, 4, 1, J400, 1, 1, 1, 2) TESTATOB(BGRA, 4, 4, 1, ARGB, 4, 4, 1, 0) TESTATOB(ABGR, 4, 4, 1, ARGB, 4, 4, 1, 0) TESTATOB(RGBA, 4, 4, 1, ARGB, 4, 4, 1, 0) +TESTATOB(AR30, 4, 4, 1, AR30, 4, 4, 1, 0) TESTATOB(RAW, 3, 3, 1, ARGB, 4, 4, 1, 0) TESTATOB(RAW, 3, 3, 1, RGB24, 3, 3, 1, 0) TESTATOB(RGB24, 3, 3, 1, ARGB, 4, 4, 1, 0) TESTATOB(RGB565, 2, 2, 1, ARGB, 4, 4, 1, 0) TESTATOB(ARGB1555, 2, 2, 1, ARGB, 4, 4, 1, 0) TESTATOB(ARGB4444, 2, 2, 1, ARGB, 4, 4, 1, 0) -TESTATOB(YUY2, 2, 4, 1, ARGB, 4, 4, 1, 4) -TESTATOB(UYVY, 2, 4, 1, ARGB, 4, 4, 1, 4) +TESTATOB(AR30, 4, 4, 1, ARGB, 4, 4, 1, 0) +TESTATOB(YUY2, 2, 4, 1, ARGB, 4, 4, 1, ARM_YUV_ERROR) +TESTATOB(UYVY, 2, 4, 1, ARGB, 4, 4, 1, ARM_YUV_ERROR) TESTATOB(YUY2, 2, 4, 1, Y, 1, 1, 1, 0) TESTATOB(I400, 1, 1, 1, ARGB, 4, 4, 1, 0) TESTATOB(J400, 1, 1, 1, ARGB, 4, 4, 1, 0) @@ -1728,6 +1726,8 @@ TESTPLANARTOBD(I420, 2, 2, RGB565, 2, 2, 1, 9, ARGB, 4) TESTPTOB(TestYUY2ToNV12, YUY2ToI420, YUY2ToNV12) TESTPTOB(TestUYVYToNV12, UYVYToI420, UYVYToNV12) +// Transitive tests. A to B to C is same as A to C. + #define TESTPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \ W1280, N, NEG, OFF, FMT_C, BPP_C) \ TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##_##FMT_C##N) { \ @@ -1893,6 +1893,59 @@ TESTPLANARTOE(I422, 2, 1, UYVY, 2, 4, ARGB, 4) TESTQPLANARTOE(I420Alpha, 2, 2, ARGB, 1, 4, ABGR, 4) TESTQPLANARTOE(I420Alpha, 2, 2, ABGR, 1, 4, ARGB, 4) +#define TESTPLANETOEI(FMT_A, SUB_A, BPP_A, FMT_B, SUB_B, BPP_B, W1280, N, NEG, \ + OFF, FMT_C, BPP_C) \ + TEST_F(LibYUVConvertTest, FMT_A##To##FMT_B##_##FMT_C##N) { \ + const int kWidth = ((W1280) > 0) ? (W1280) : 1; \ + const int kHeight = benchmark_height_; \ + const int kStrideA = SUBSAMPLE(kWidth, SUB_A) * BPP_A; \ + const int kStrideB = SUBSAMPLE(kWidth, SUB_B) * BPP_B; \ + align_buffer_page_end(src_argb_a, kStrideA* kHeight + OFF); \ + align_buffer_page_end(dst_argb_b, kStrideB* kHeight + OFF); \ + MemRandomize(src_argb_a + OFF, kStrideA * kHeight); \ + memset(dst_argb_b + OFF, 1, kStrideB * kHeight); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + FMT_A##To##FMT_B(src_argb_a + OFF, kStrideA, dst_argb_b + OFF, kStrideB, \ + kWidth, NEG kHeight); \ + } \ + /* Convert to a 3rd format in 1 step and 2 steps and compare */ \ + const int kStrideC = kWidth * BPP_C; \ + align_buffer_page_end(dst_argb_c, kStrideC* kHeight + OFF); \ + align_buffer_page_end(dst_argb_bc, kStrideC* kHeight + OFF); \ + memset(dst_argb_c + OFF, 2, kStrideC * kHeight); \ + memset(dst_argb_bc + OFF, 3, kStrideC * kHeight); \ + FMT_A##To##FMT_C(src_argb_a + OFF, kStrideA, dst_argb_c + OFF, kStrideC, \ + kWidth, NEG kHeight); \ + /* Convert B to C */ \ + FMT_B##To##FMT_C(dst_argb_b + OFF, kStrideB, dst_argb_bc + OFF, kStrideC, \ + kWidth, kHeight); \ + for (int i = 0; i < kStrideC * kHeight; i += 4) { \ + EXPECT_EQ(dst_argb_c[i + OFF + 0], dst_argb_bc[i + OFF + 0]); \ + EXPECT_EQ(dst_argb_c[i + OFF + 1], dst_argb_bc[i + OFF + 1]); \ + EXPECT_EQ(dst_argb_c[i + OFF + 2], dst_argb_bc[i + OFF + 2]); \ + EXPECT_NEAR(dst_argb_c[i + OFF + 3], dst_argb_bc[i + OFF + 3], 64); \ + } \ + free_aligned_buffer_page_end(src_argb_a); \ + free_aligned_buffer_page_end(dst_argb_b); \ + free_aligned_buffer_page_end(dst_argb_c); \ + free_aligned_buffer_page_end(dst_argb_bc); \ + } + +#define TESTPLANETOE(FMT_A, SUB_A, BPP_A, FMT_B, SUB_B, BPP_B, FMT_C, BPP_C) \ + TESTPLANETOEI(FMT_A, SUB_A, BPP_A, FMT_B, SUB_B, BPP_B, \ + benchmark_width_ - 4, _Any, +, 0, FMT_C, BPP_C) \ + TESTPLANETOEI(FMT_A, SUB_A, BPP_A, FMT_B, SUB_B, BPP_B, benchmark_width_, \ + _Unaligned, +, 1, FMT_C, BPP_C) \ + TESTPLANETOEI(FMT_A, SUB_A, BPP_A, FMT_B, SUB_B, BPP_B, benchmark_width_, \ + _Invert, -, 0, FMT_C, BPP_C) \ + TESTPLANETOEI(FMT_A, SUB_A, BPP_A, FMT_B, SUB_B, BPP_B, benchmark_width_, \ + _Opt, +, 0, FMT_C, BPP_C) + +// Caveat: Destination needs to be 4 bytes +TESTPLANETOE(ARGB, 1, 4, AR30, 1, 4, ARGB, 4) + +// TESTPLANETOE(ARGB, 1, 4, AR30, 1, 4, ABGR, 4) + TEST_F(LibYUVConvertTest, RotateWithARGBSource) { // 2x2 frames uint32_t src[4]; @@ -1928,4 +1981,182 @@ TEST_F(LibYUVConvertTest, RotateWithARGBSource) { EXPECT_EQ(dst[3], src[1]); } +#ifdef HAS_ARGBTOAR30ROW_AVX2 +TEST_F(LibYUVConvertTest, ARGBToAR30Row_Opt) { + // ARGBToAR30Row_AVX2 expects a multiple of 8 pixels. + const int kPixels = (benchmark_width_ * benchmark_height_ + 7) & ~7; + align_buffer_page_end(src, kPixels * 4); + align_buffer_page_end(dst_opt, kPixels * 4); + align_buffer_page_end(dst_c, kPixels * 4); + MemRandomize(src, kPixels * 4); + memset(dst_opt, 0, kPixels * 4); + memset(dst_c, 1, kPixels * 4); + + ARGBToAR30Row_C(src, dst_c, kPixels); + + int has_avx2 = TestCpuFlag(kCpuHasAVX2); + int has_ssse3 = TestCpuFlag(kCpuHasSSSE3); + for (int i = 0; i < benchmark_iterations_; ++i) { + if (has_avx2) { + ARGBToAR30Row_AVX2(src, dst_opt, kPixels); + } else if (has_ssse3) { + ARGBToAR30Row_SSSE3(src, dst_opt, kPixels); + } else { + ARGBToAR30Row_C(src, dst_opt, kPixels); + } + } + for (int i = 0; i < kPixels * 4; ++i) { + EXPECT_EQ(dst_opt[i], dst_c[i]); + } + + free_aligned_buffer_page_end(src); + free_aligned_buffer_page_end(dst_opt); + free_aligned_buffer_page_end(dst_c); +} +#endif // HAS_ARGBTOAR30ROW_AVX2 + +#define TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \ + ALIGN, YALIGN, W1280, DIFF, N, NEG, SOFF, DOFF, \ + FMT_C, BPP_C) \ + TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \ + const int kWidth = ((W1280) > 0) ? (W1280) : 1; \ + const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \ + const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN); \ + const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \ + const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \ + const int kBpc = 2; \ + align_buffer_page_end(src_y, kWidth* kHeight* kBpc + SOFF); \ + align_buffer_page_end(src_u, kSizeUV* kBpc + SOFF); \ + align_buffer_page_end(src_v, kSizeUV* kBpc + SOFF); \ + align_buffer_page_end(dst_argb_c, kStrideB* kHeight + DOFF); \ + align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + DOFF); \ + for (int i = 0; i < kWidth * kHeight; ++i) { \ + reinterpret_cast(src_y + SOFF)[i] = (fastrand() & 0x3ff); \ + } \ + for (int i = 0; i < kSizeUV; ++i) { \ + reinterpret_cast(src_u + SOFF)[i] = (fastrand() & 0x3ff); \ + reinterpret_cast(src_v + SOFF)[i] = (fastrand() & 0x3ff); \ + } \ + memset(dst_argb_c + DOFF, 1, kStrideB * kHeight); \ + memset(dst_argb_opt + DOFF, 101, kStrideB * kHeight); \ + MaskCpuFlags(disable_cpu_flags_); \ + FMT_PLANAR##To##FMT_B(reinterpret_cast(src_y + SOFF), kWidth, \ + reinterpret_cast(src_u + SOFF), kStrideUV, \ + reinterpret_cast(src_v + SOFF), kStrideUV, \ + dst_argb_c + DOFF, kStrideB, kWidth, NEG kHeight); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + FMT_PLANAR##To##FMT_B( \ + reinterpret_cast(src_y + SOFF), kWidth, \ + reinterpret_cast(src_u + SOFF), kStrideUV, \ + reinterpret_cast(src_v + SOFF), kStrideUV, \ + dst_argb_opt + DOFF, kStrideB, kWidth, NEG kHeight); \ + } \ + int max_diff = 0; \ + for (int i = 0; i < kWidth * BPP_C * kHeight; ++i) { \ + int abs_diff = abs(static_cast(dst_argb_c[i + DOFF]) - \ + static_cast(dst_argb_opt[i + DOFF])); \ + if (abs_diff > max_diff) { \ + max_diff = abs_diff; \ + } \ + } \ + EXPECT_LE(max_diff, DIFF); \ + free_aligned_buffer_page_end(src_y); \ + free_aligned_buffer_page_end(src_u); \ + free_aligned_buffer_page_end(src_v); \ + free_aligned_buffer_page_end(dst_argb_c); \ + free_aligned_buffer_page_end(dst_argb_opt); \ + } + +#define TESTPLANAR16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, DIFF, FMT_C, BPP_C) \ + TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_ - 4, DIFF, _Any, +, 0, 0, FMT_C, \ + BPP_C) \ + TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_, DIFF, _Unaligned, +, 1, 1, FMT_C, \ + BPP_C) \ + TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_, DIFF, _Invert, -, 0, 0, FMT_C, \ + BPP_C) \ + TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_, DIFF, _Opt, +, 0, 0, FMT_C, \ + BPP_C) + +TESTPLANAR16TOB(H010, 2, 2, AR30, 4, 4, 1, 2, AR30, 4) +TESTPLANAR16TOB(H010, 2, 2, ARGB, 4, 4, 1, 2, ARGB, 4) +TESTPLANAR16TOB(H010, 2, 2, ABGR, 4, 4, 1, 2, ARGB, 4) +TESTPLANAR16TOB(I010, 2, 2, ARGB, 4, 4, 1, 2, ARGB, 4) +TESTPLANAR16TOB(I010, 2, 2, ABGR, 4, 4, 1, 2, ARGB, 4) + +static int Clamp(int y) { + if (y < 0) { + y = 0; + } + if (y > 255) { + y = 255; + } + return y; +} + +TEST_F(LibYUVConvertTest, TestH420ToARGB) { + const int kSize = 256; + align_buffer_page_end(orig_yuv, kSize + kSize / 2 * 2); + align_buffer_page_end(argb_pixels, kSize * 4); + uint8* orig_y = orig_yuv; + uint8* orig_u = orig_y + kSize; + uint8* orig_v = orig_u + kSize / 2; + + // Test grey scale + for (int i = 0; i < kSize; ++i) { + orig_y[i] = i; + } + for (int i = 0; i < kSize / 2; ++i) { + orig_u[i] = 128; // 128 is 0. + orig_v[i] = 128; + } + + H420ToARGB(orig_y, 0, orig_u, 0, orig_v, 0, argb_pixels, 0, kSize, 1); + + for (int i = 0; i < kSize; ++i) { + int expected_y = Clamp(static_cast((i - 16) * 1.164f)); + EXPECT_NEAR(argb_pixels[i * 4 + 0], expected_y, 1); + EXPECT_NEAR(argb_pixels[i * 4 + 1], expected_y, 1); + EXPECT_NEAR(argb_pixels[i * 4 + 2], expected_y, 1); + EXPECT_EQ(argb_pixels[i * 4 + 3], 255); + } + free_aligned_buffer_page_end(orig_yuv); + free_aligned_buffer_page_end(argb_pixels); +} + +TEST_F(LibYUVConvertTest, TestH010ToARGB) { + const int kSize = 1024; + align_buffer_page_end(orig_yuv, kSize * 2 + kSize / 2 * 2 * 2); + align_buffer_page_end(argb_pixels, kSize * 4); + uint16* orig_y = reinterpret_cast(orig_yuv); + uint16* orig_u = orig_y + kSize; + uint16* orig_v = orig_u + kSize / 2; + + // Test grey scale + for (int i = 0; i < kSize; ++i) { + orig_y[i] = i; + } + for (int i = 0; i < kSize / 2; ++i) { + orig_u[i] = 512; // 512 is 0. + orig_v[i] = 512; + } + + H010ToARGB(orig_y, 0, orig_u, 0, orig_v, 0, argb_pixels, 0, kSize, 1); + + for (int i = 0; i < kSize; ++i) { + int expected_y = Clamp(static_cast((i - 64) * 1.164f / 4)); + EXPECT_NEAR(argb_pixels[i * 4 + 0], expected_y, 1); + EXPECT_NEAR(argb_pixels[i * 4 + 1], expected_y, 1); + EXPECT_NEAR(argb_pixels[i * 4 + 2], expected_y, 1); + EXPECT_EQ(argb_pixels[i * 4 + 3], 255); + } + free_aligned_buffer_page_end(orig_yuv); + free_aligned_buffer_page_end(argb_pixels); +} + } // namespace libyuv diff --git a/chromium/third_party/libyuv/unit_test/cpu_test.cc b/chromium/third_party/libyuv/unit_test/cpu_test.cc index 4e694f55ce5..a8fb4b4ac01 100644 --- a/chromium/third_party/libyuv/unit_test/cpu_test.cc +++ b/chromium/third_party/libyuv/unit_test/cpu_test.cc @@ -65,8 +65,6 @@ TEST_F(LibYUVBaseTest, TestCpuHas) { #if defined(__mips__) int has_mips = TestCpuFlag(kCpuHasMIPS); printf("Has MIPS %x\n", has_mips); - int has_dspr2 = TestCpuFlag(kCpuHasDSPR2); - printf("Has DSPR2 %x\n", has_dspr2); int has_msa = TestCpuFlag(kCpuHasMSA); printf("Has MSA %x\n", has_msa); #endif @@ -147,6 +145,8 @@ static int FileExists(const char* file_name) { TEST_F(LibYUVBaseTest, TestLinuxNeon) { if (FileExists("../../unit_test/testdata/arm_v7.txt")) { + printf("Note: testing to load \"../../unit_test/testdata/arm_v7.txt\"\n"); + EXPECT_EQ(0, ArmCpuCaps("../../unit_test/testdata/arm_v7.txt")); EXPECT_EQ(kCpuHasNEON, ArmCpuCaps("../../unit_test/testdata/tegra3.txt")); EXPECT_EQ(kCpuHasNEON, ArmCpuCaps("../../unit_test/testdata/juno.txt")); diff --git a/chromium/third_party/libyuv/unit_test/planar_test.cc b/chromium/third_party/libyuv/unit_test/planar_test.cc index f9e6f8abb2f..a499688feed 100644 --- a/chromium/third_party/libyuv/unit_test/planar_test.cc +++ b/chromium/third_party/libyuv/unit_test/planar_test.cc @@ -2661,7 +2661,7 @@ TEST_F(LibYUVPlanarTest, MergeUVRow_16_Opt) { } #endif -// TODO(fbarchard): improve test for platforms and cpu detect +// TODO(fbarchard): Improve test for more platforms. #ifdef HAS_MULTIPLYROW_16_AVX2 TEST_F(LibYUVPlanarTest, MultiplyRow_16_Opt) { const int kPixels = benchmark_width_ * benchmark_height_; @@ -2697,7 +2697,159 @@ TEST_F(LibYUVPlanarTest, MultiplyRow_16_Opt) { free_aligned_buffer_page_end(dst_pixels_y_opt); free_aligned_buffer_page_end(dst_pixels_y_c); } -#endif +#endif // HAS_MULTIPLYROW_16_AVX2 + +TEST_F(LibYUVPlanarTest, Convert16To8Plane) { + const int kPixels = benchmark_width_ * benchmark_height_; + align_buffer_page_end(src_pixels_y, kPixels * 2); + align_buffer_page_end(dst_pixels_y_opt, kPixels); + align_buffer_page_end(dst_pixels_y_c, kPixels); + + MemRandomize(src_pixels_y, kPixels * 2); + memset(dst_pixels_y_opt, 0, kPixels); + memset(dst_pixels_y_c, 1, kPixels); + + MaskCpuFlags(disable_cpu_flags_); + Convert16To8Plane(reinterpret_cast(src_pixels_y), + benchmark_width_, dst_pixels_y_c, benchmark_width_, 16384, + benchmark_width_, benchmark_height_); + MaskCpuFlags(benchmark_cpu_info_); + + for (int i = 0; i < benchmark_iterations_; ++i) { + Convert16To8Plane(reinterpret_cast(src_pixels_y), + benchmark_width_, dst_pixels_y_opt, benchmark_width_, + 16384, benchmark_width_, benchmark_height_); + } + + for (int i = 0; i < kPixels; ++i) { + EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]); + } + + free_aligned_buffer_page_end(src_pixels_y); + free_aligned_buffer_page_end(dst_pixels_y_opt); + free_aligned_buffer_page_end(dst_pixels_y_c); +} + +// TODO(fbarchard): Improve test for more platforms. +#ifdef HAS_CONVERT16TO8ROW_AVX2 +TEST_F(LibYUVPlanarTest, Convert16To8Row_Opt) { + // AVX2 does multiple of 32, so round count up + const int kPixels = (benchmark_width_ * benchmark_height_ + 31) & ~31; + align_buffer_page_end(src_pixels_y, kPixels * 2); + align_buffer_page_end(dst_pixels_y_opt, kPixels); + align_buffer_page_end(dst_pixels_y_c, kPixels); + + MemRandomize(src_pixels_y, kPixels * 2); + // clamp source range to 10 bits. + for (int i = 0; i < kPixels; ++i) { + reinterpret_cast(src_pixels_y)[i] &= 1023; + } + + memset(dst_pixels_y_opt, 0, kPixels); + memset(dst_pixels_y_c, 1, kPixels); + + Convert16To8Row_C(reinterpret_cast(src_pixels_y), + dst_pixels_y_c, 16384, kPixels); + + int has_avx2 = TestCpuFlag(kCpuHasAVX2); + int has_ssse3 = TestCpuFlag(kCpuHasSSSE3); + for (int i = 0; i < benchmark_iterations_; ++i) { + if (has_avx2) { + Convert16To8Row_AVX2(reinterpret_cast(src_pixels_y), + dst_pixels_y_opt, 16384, kPixels); + } else if (has_ssse3) { + Convert16To8Row_SSSE3(reinterpret_cast(src_pixels_y), + dst_pixels_y_opt, 16384, kPixels); + } else { + Convert16To8Row_C(reinterpret_cast(src_pixels_y), + dst_pixels_y_opt, 16384, kPixels); + } + } + + for (int i = 0; i < kPixels; ++i) { + EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]); + } + + free_aligned_buffer_page_end(src_pixels_y); + free_aligned_buffer_page_end(dst_pixels_y_opt); + free_aligned_buffer_page_end(dst_pixels_y_c); +} +#endif // HAS_CONVERT16TO8ROW_AVX2 + +TEST_F(LibYUVPlanarTest, Convert8To16Plane) { + const int kPixels = benchmark_width_ * benchmark_height_; + align_buffer_page_end(src_pixels_y, kPixels); + align_buffer_page_end(dst_pixels_y_opt, kPixels * 2); + align_buffer_page_end(dst_pixels_y_c, kPixels * 2); + + MemRandomize(src_pixels_y, kPixels); + memset(dst_pixels_y_opt, 0, kPixels * 2); + memset(dst_pixels_y_c, 1, kPixels * 2); + + MaskCpuFlags(disable_cpu_flags_); + Convert8To16Plane(src_pixels_y, benchmark_width_, + reinterpret_cast(dst_pixels_y_c), benchmark_width_, + 1024, benchmark_width_, benchmark_height_); + MaskCpuFlags(benchmark_cpu_info_); + + for (int i = 0; i < benchmark_iterations_; ++i) { + Convert8To16Plane(src_pixels_y, benchmark_width_, + reinterpret_cast(dst_pixels_y_opt), + benchmark_width_, 1024, benchmark_width_, + benchmark_height_); + } + + for (int i = 0; i < kPixels * 2; ++i) { + EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]); + } + + free_aligned_buffer_page_end(src_pixels_y); + free_aligned_buffer_page_end(dst_pixels_y_opt); + free_aligned_buffer_page_end(dst_pixels_y_c); +} + +// TODO(fbarchard): Improve test for more platforms. +#ifdef HAS_CONVERT8TO16ROW_AVX2 +TEST_F(LibYUVPlanarTest, Convert8To16Row_Opt) { + const int kPixels = (benchmark_width_ * benchmark_height_ + 31) & ~31; + align_buffer_page_end(src_pixels_y, kPixels); + align_buffer_page_end(dst_pixels_y_opt, kPixels * 2); + align_buffer_page_end(dst_pixels_y_c, kPixels * 2); + + MemRandomize(src_pixels_y, kPixels); + memset(dst_pixels_y_opt, 0, kPixels * 2); + memset(dst_pixels_y_c, 1, kPixels * 2); + + Convert8To16Row_C(src_pixels_y, reinterpret_cast(dst_pixels_y_c), + 1024, kPixels); + + int has_avx2 = TestCpuFlag(kCpuHasAVX2); + int has_sse2 = TestCpuFlag(kCpuHasSSE2); + for (int i = 0; i < benchmark_iterations_; ++i) { + if (has_avx2) { + Convert8To16Row_AVX2(src_pixels_y, + reinterpret_cast(dst_pixels_y_opt), 1024, + kPixels); + } else if (has_sse2) { + Convert8To16Row_SSE2(src_pixels_y, + reinterpret_cast(dst_pixels_y_opt), 1024, + kPixels); + } else { + Convert8To16Row_C(src_pixels_y, + reinterpret_cast(dst_pixels_y_opt), 1024, + kPixels); + } + } + + for (int i = 0; i < kPixels * 2; ++i) { + EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]); + } + + free_aligned_buffer_page_end(src_pixels_y); + free_aligned_buffer_page_end(dst_pixels_y_opt); + free_aligned_buffer_page_end(dst_pixels_y_c); +} +#endif // HAS_CONVERT8TO16ROW_AVX2 float TestScaleMaxSamples(int benchmark_width, int benchmark_height, diff --git a/chromium/third_party/libyuv/unit_test/unit_test.cc b/chromium/third_party/libyuv/unit_test/unit_test.cc index 1ad4dece861..c2d7a1db7b5 100644 --- a/chromium/third_party/libyuv/unit_test/unit_test.cc +++ b/chromium/third_party/libyuv/unit_test/unit_test.cc @@ -19,10 +19,6 @@ #endif #include "libyuv/cpu_id.h" -// Change this to 1000 for benchmarking. -// TODO(fbarchard): Add command line parsing to pass this as option. -#define BENCHMARK_ITERATIONS 1 - unsigned int fastrand_seed = 0xfb; #ifdef LIBYUV_USE_GFLAGS @@ -47,7 +43,7 @@ static const int32 FLAGS_libyuv_cpu_info = 0; // Set flags to -1 for benchmarking to avoid slower C code. LibYUVConvertTest::LibYUVConvertTest() - : benchmark_iterations_(BENCHMARK_ITERATIONS), + : benchmark_iterations_(1), benchmark_width_(128), benchmark_height_(72), disable_cpu_flags_(1), @@ -92,12 +88,6 @@ LibYUVConvertTest::LibYUVConvertTest() benchmark_cpu_info_ = FLAGS_libyuv_cpu_info; } libyuv::MaskCpuFlags(benchmark_cpu_info_); - benchmark_pixels_div256_ = - static_cast((static_cast(Abs(benchmark_width_)) * - static_cast(Abs(benchmark_height_)) * - static_cast(benchmark_iterations_) + - 255.0) / - 256.0); benchmark_pixels_div1280_ = static_cast((static_cast(Abs(benchmark_width_)) * static_cast(Abs(benchmark_height_)) * @@ -107,7 +97,7 @@ LibYUVConvertTest::LibYUVConvertTest() } LibYUVColorTest::LibYUVColorTest() - : benchmark_iterations_(BENCHMARK_ITERATIONS), + : benchmark_iterations_(1), benchmark_width_(128), benchmark_height_(72), disable_cpu_flags_(1), @@ -152,12 +142,6 @@ LibYUVColorTest::LibYUVColorTest() benchmark_cpu_info_ = FLAGS_libyuv_cpu_info; } libyuv::MaskCpuFlags(benchmark_cpu_info_); - benchmark_pixels_div256_ = - static_cast((static_cast(Abs(benchmark_width_)) * - static_cast(Abs(benchmark_height_)) * - static_cast(benchmark_iterations_) + - 255.0) / - 256.0); benchmark_pixels_div1280_ = static_cast((static_cast(Abs(benchmark_width_)) * static_cast(Abs(benchmark_height_)) * @@ -167,7 +151,7 @@ LibYUVColorTest::LibYUVColorTest() } LibYUVScaleTest::LibYUVScaleTest() - : benchmark_iterations_(BENCHMARK_ITERATIONS), + : benchmark_iterations_(1), benchmark_width_(128), benchmark_height_(72), disable_cpu_flags_(1), @@ -212,12 +196,6 @@ LibYUVScaleTest::LibYUVScaleTest() benchmark_cpu_info_ = FLAGS_libyuv_cpu_info; } libyuv::MaskCpuFlags(benchmark_cpu_info_); - benchmark_pixels_div256_ = - static_cast((static_cast(Abs(benchmark_width_)) * - static_cast(Abs(benchmark_height_)) * - static_cast(benchmark_iterations_) + - 255.0) / - 256.0); benchmark_pixels_div1280_ = static_cast((static_cast(Abs(benchmark_width_)) * static_cast(Abs(benchmark_height_)) * @@ -227,7 +205,7 @@ LibYUVScaleTest::LibYUVScaleTest() } LibYUVRotateTest::LibYUVRotateTest() - : benchmark_iterations_(BENCHMARK_ITERATIONS), + : benchmark_iterations_(1), benchmark_width_(128), benchmark_height_(72), disable_cpu_flags_(1), @@ -272,12 +250,6 @@ LibYUVRotateTest::LibYUVRotateTest() benchmark_cpu_info_ = FLAGS_libyuv_cpu_info; } libyuv::MaskCpuFlags(benchmark_cpu_info_); - benchmark_pixels_div256_ = - static_cast((static_cast(Abs(benchmark_width_)) * - static_cast(Abs(benchmark_height_)) * - static_cast(benchmark_iterations_) + - 255.0) / - 256.0); benchmark_pixels_div1280_ = static_cast((static_cast(Abs(benchmark_width_)) * static_cast(Abs(benchmark_height_)) * @@ -287,7 +259,7 @@ LibYUVRotateTest::LibYUVRotateTest() } LibYUVPlanarTest::LibYUVPlanarTest() - : benchmark_iterations_(BENCHMARK_ITERATIONS), + : benchmark_iterations_(1), benchmark_width_(128), benchmark_height_(72), disable_cpu_flags_(1), @@ -332,12 +304,6 @@ LibYUVPlanarTest::LibYUVPlanarTest() benchmark_cpu_info_ = FLAGS_libyuv_cpu_info; } libyuv::MaskCpuFlags(benchmark_cpu_info_); - benchmark_pixels_div256_ = - static_cast((static_cast(Abs(benchmark_width_)) * - static_cast(Abs(benchmark_height_)) * - static_cast(benchmark_iterations_) + - 255.0) / - 256.0); benchmark_pixels_div1280_ = static_cast((static_cast(Abs(benchmark_width_)) * static_cast(Abs(benchmark_height_)) * @@ -347,7 +313,7 @@ LibYUVPlanarTest::LibYUVPlanarTest() } LibYUVBaseTest::LibYUVBaseTest() - : benchmark_iterations_(BENCHMARK_ITERATIONS), + : benchmark_iterations_(1), benchmark_width_(128), benchmark_height_(72), disable_cpu_flags_(1), @@ -392,12 +358,6 @@ LibYUVBaseTest::LibYUVBaseTest() benchmark_cpu_info_ = FLAGS_libyuv_cpu_info; } libyuv::MaskCpuFlags(benchmark_cpu_info_); - benchmark_pixels_div256_ = - static_cast((static_cast(Abs(benchmark_width_)) * - static_cast(Abs(benchmark_height_)) * - static_cast(benchmark_iterations_) + - 255.0) / - 256.0); benchmark_pixels_div1280_ = static_cast((static_cast(Abs(benchmark_width_)) * static_cast(Abs(benchmark_height_)) * @@ -407,7 +367,7 @@ LibYUVBaseTest::LibYUVBaseTest() } LibYUVCompareTest::LibYUVCompareTest() - : benchmark_iterations_(BENCHMARK_ITERATIONS), + : benchmark_iterations_(1), benchmark_width_(128), benchmark_height_(72), disable_cpu_flags_(1), @@ -452,12 +412,6 @@ LibYUVCompareTest::LibYUVCompareTest() benchmark_cpu_info_ = FLAGS_libyuv_cpu_info; } libyuv::MaskCpuFlags(benchmark_cpu_info_); - benchmark_pixels_div256_ = - static_cast((static_cast(Abs(benchmark_width_)) * - static_cast(Abs(benchmark_height_)) * - static_cast(benchmark_iterations_) + - 255.0) / - 256.0); benchmark_pixels_div1280_ = static_cast((static_cast(Abs(benchmark_width_)) * static_cast(Abs(benchmark_height_)) * diff --git a/chromium/third_party/libyuv/unit_test/video_common_test.cc b/chromium/third_party/libyuv/unit_test/video_common_test.cc index f16b6772f95..ba7b15a9d28 100644 --- a/chromium/third_party/libyuv/unit_test/video_common_test.cc +++ b/chromium/third_party/libyuv/unit_test/video_common_test.cc @@ -73,12 +73,15 @@ TEST_F(LibYUVBaseTest, TestFourCC) { EXPECT_TRUE(TestValidFourCC(FOURCC_ARGB, FOURCC_BPP_ARGB)); EXPECT_TRUE(TestValidFourCC(FOURCC_BGRA, FOURCC_BPP_BGRA)); EXPECT_TRUE(TestValidFourCC(FOURCC_ABGR, FOURCC_BPP_ABGR)); + EXPECT_TRUE(TestValidFourCC(FOURCC_AR30, FOURCC_BPP_AR30)); EXPECT_TRUE(TestValidFourCC(FOURCC_24BG, FOURCC_BPP_24BG)); EXPECT_TRUE(TestValidFourCC(FOURCC_RAW, FOURCC_BPP_RAW)); EXPECT_TRUE(TestValidFourCC(FOURCC_RGBA, FOURCC_BPP_RGBA)); EXPECT_TRUE(TestValidFourCC(FOURCC_RGBP, FOURCC_BPP_RGBP)); EXPECT_TRUE(TestValidFourCC(FOURCC_RGBO, FOURCC_BPP_RGBO)); EXPECT_TRUE(TestValidFourCC(FOURCC_R444, FOURCC_BPP_R444)); + EXPECT_TRUE(TestValidFourCC(FOURCC_H420, FOURCC_BPP_H420)); + EXPECT_TRUE(TestValidFourCC(FOURCC_H010, FOURCC_BPP_H010)); EXPECT_TRUE(TestValidFourCC(FOURCC_MJPG, FOURCC_BPP_MJPG)); EXPECT_TRUE(TestValidFourCC(FOURCC_YV12, FOURCC_BPP_YV12)); EXPECT_TRUE(TestValidFourCC(FOURCC_YV16, FOURCC_BPP_YV16)); diff --git a/chromium/third_party/libyuv/util/Makefile b/chromium/third_party/libyuv/util/Makefile index 6044d2adf63..40e74b6509c 100644 --- a/chromium/third_party/libyuv/util/Makefile +++ b/chromium/third_party/libyuv/util/Makefile @@ -4,3 +4,6 @@ ifeq ($(CXX),icl) else $(CXX) -msse2 -O3 -fopenmp -static -o psnr psnr.cc ssim.cc psnr_main.cc -Wl,--strip-all endif + +# for MacOS +# /usr/local/bin/g++-7 -msse2 -O3 -fopenmp -Bstatic -o psnr psnr.cc ssim.cc psnr_main.cc diff --git a/chromium/third_party/libyuv/util/cpuid.c b/chromium/third_party/libyuv/util/cpuid.c index 9ff618e0d28..59c65d60e0f 100644 --- a/chromium/third_party/libyuv/util/cpuid.c +++ b/chromium/third_party/libyuv/util/cpuid.c @@ -69,8 +69,8 @@ int main(int argc, const char* argv[]) { printf("Has NEON %x\n", has_neon); } if (has_mips) { - int has_dspr2 = TestCpuFlag(kCpuHasDSPR2); - printf("Has DSPR2 %x\n", has_dspr2); + int has_msa = TestCpuFlag(kCpuHasMSA); + printf("Has MSA %x\n", has_msa); } if (has_x86) { int has_sse2 = TestCpuFlag(kCpuHasSSE2); -- cgit v1.2.1