diff options
author | Allan Sandfeld Jensen <allan.jensen@qt.io> | 2018-05-03 13:42:47 +0200 |
---|---|---|
committer | Allan Sandfeld Jensen <allan.jensen@qt.io> | 2018-05-15 10:27:51 +0000 |
commit | 8c5c43c7b138c9b4b0bf56d946e61d3bbc111bec (patch) | |
tree | d29d987c4d7b173cf853279b79a51598f104b403 /chromium/third_party/libyuv | |
parent | 830c9e163d31a9180fadca926b3e1d7dfffb5021 (diff) | |
download | qtwebengine-chromium-8c5c43c7b138c9b4b0bf56d946e61d3bbc111bec.tar.gz |
BASELINE: Update Chromium to 66.0.3359.156
Change-Id: I0c9831ad39911a086b6377b16f995ad75a51e441
Reviewed-by: Michal Klocek <michal.klocek@qt.io>
Diffstat (limited to 'chromium/third_party/libyuv')
88 files changed, 14514 insertions, 13134 deletions
diff --git a/chromium/third_party/libyuv/BUILD.gn b/chromium/third_party/libyuv/BUILD.gn index 10b5b819a9c..03ce499e6d9 100644 --- a/chromium/third_party/libyuv/BUILD.gn +++ b/chromium/third_party/libyuv/BUILD.gn @@ -48,6 +48,7 @@ group("default") { group("libyuv") { all_dependent_configs = [ ":libyuv_config" ] + deps = [] if (is_win && target_cpu == "x64") { # Compile with clang in order to get inline assembly @@ -60,13 +61,19 @@ group("libyuv") { ] } + if (libyuv_use_neon) { + deps += [ ":libyuv_neon" ] + } + + if (libyuv_use_msa) { + deps += [ ":libyuv_msa" ] + } + if (!is_ios) { # Make sure that clients of libyuv link with libjpeg. This can't go in # libyuv_internal because in Windows x64 builds that will generate a clang # build of libjpeg, and we don't want two copies. - deps = [ - "//third_party:jpeg", - ] + deps += [ "//third_party:jpeg" ] } } @@ -147,14 +154,6 @@ static_library("libyuv_internal") { deps += [ "//third_party:jpeg_includes" ] } - if (libyuv_use_neon) { - deps += [ ":libyuv_neon" ] - } - - if (libyuv_use_msa) { - deps += [ ":libyuv_msa" ] - } - # Always enable optimization for Release and NaCl builds (to workaround # crbug.com/538243). if (!is_debug || is_nacl) { @@ -174,6 +173,7 @@ static_library("libyuv_internal") { ] } } + if (libyuv_use_neon) { static_library("libyuv_neon") { sources = [ @@ -188,6 +188,10 @@ if (libyuv_use_neon) { "source/scale_neon64.cc", ] + deps = [ + ":libyuv_internal", + ] + public_configs = [ ":libyuv_config" ] # Always enable optimization for Release and NaCl builds (to workaround @@ -217,6 +221,10 @@ if (libyuv_use_msa) { "source/scale_msa.cc", ] + deps = [ + ":libyuv_internal", + ] + public_configs = [ ":libyuv_config" ] } } diff --git a/chromium/third_party/libyuv/DEPS b/chromium/third_party/libyuv/DEPS index 688ff1299c2..ccc701c4f69 100644 --- a/chromium/third_party/libyuv/DEPS +++ b/chromium/third_party/libyuv/DEPS @@ -56,6 +56,10 @@ deps = { 'url': Var('chromium_git') + '/chromium/src/base' + '@' + '9b543d487c7c38be191c6180001ff9ce186ae326', 'condition': 'checkout_android', }, + 'src/third_party/android_ndk': { + 'url': Var('chromium_git') + '/android_ndk.git' + '@' + 'e951c37287c7d8cd915bf8d4149fd4a06d808b55', + 'condition': 'checkout_android', + }, 'src/third_party/android_tools': { 'url': Var('chromium_git') + '/android_tools.git' + '@' + 'aadb2fed04af8606545b0afe4e3060bc1a15fad7', 'condition': 'checkout_android', diff --git a/chromium/third_party/libyuv/README.chromium b/chromium/third_party/libyuv/README.chromium index 373e5f75c87..f38414b3a80 100644 --- a/chromium/third_party/libyuv/README.chromium +++ b/chromium/third_party/libyuv/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 1688 +Version: 1698 License: BSD License File: LICENSE diff --git a/chromium/third_party/libyuv/docs/deprecated_builds.md b/chromium/third_party/libyuv/docs/deprecated_builds.md index d54a0282c15..29e0bf9bc30 100644 --- a/chromium/third_party/libyuv/docs/deprecated_builds.md +++ b/chromium/third_party/libyuv/docs/deprecated_builds.md @@ -165,11 +165,11 @@ mipsel arm32 disassembly: - third_party/android_tools/ndk/toolchains/arm-linux-androideabi-4.9/prebuilt/linux-x86_64/bin/arm-linux-androideabi-objdump -d out/Release/obj/source/libyuv.row_neon.o + third_party/android_ndk/toolchains/arm-linux-androideabi-4.9/prebuilt/linux-x86_64/bin/arm-linux-androideabi-objdump -d out/Release/obj/source/libyuv.row_neon.o arm64 disassembly: - third_party/android_tools/ndk/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/bin/aarch64-linux-android-objdump -d out/Release/obj/source/libyuv.row_neon64.o + third_party/android_ndk/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/bin/aarch64-linux-android-objdump -d out/Release/obj/source/libyuv.row_neon64.o Running tests: diff --git a/chromium/third_party/libyuv/docs/formats.md b/chromium/third_party/libyuv/docs/formats.md index 3973e5d5ad8..f78f57bb4c4 100644 --- a/chromium/third_party/libyuv/docs/formats.md +++ b/chromium/third_party/libyuv/docs/formats.md @@ -50,11 +50,12 @@ The following is extracted from video_common.h as a complete list of formats sup // 1 Secondary YUV format: row biplanar. FOURCC_M420 = FOURCC('M', '4', '2', '0'), - // 10 Primary RGB formats: 4 32 bpp, 2 24 bpp, 3 16 bpp, 1 10 bpc + // 11 Primary RGB formats: 4 32 bpp, 2 24 bpp, 3 16 bpp, 1 10 bpc FOURCC_ARGB = FOURCC('A', 'R', 'G', 'B'), FOURCC_BGRA = FOURCC('B', 'G', 'R', 'A'), FOURCC_ABGR = FOURCC('A', 'B', 'G', 'R'), FOURCC_AR30 = FOURCC('A', 'R', '3', '0'), // 10 bit per channel. 2101010. + FOURCC_AB30 = FOURCC('A', 'B', '3', '0'), // ABGR version of 10 bit FOURCC_24BG = FOURCC('2', '4', 'B', 'G'), FOURCC_RAW = FOURCC('r', 'a', 'w', ' '), FOURCC_RGBA = FOURCC('R', 'G', 'B', 'A'), @@ -139,7 +140,7 @@ There are 2 RGB layouts - RGB24 (aka 24BG) and RAW RGB24 is B,G,R in memory RAW is R,G,B in memory -# AR30 +# AR30 and XR30 AR30 is 2 10 10 10 ARGB stored in little endian order. The 2 bit alpha has 4 values. Here are the comparable 8 bit alpha values. @@ -148,3 +149,14 @@ The 2 bit alpha has 4 values. Here are the comparable 8 bit alpha values. 2 - 66%. 10101010b = 0xaa = 170 3 - 100%. 11111111b = 0xff = 255 The 10 bit RGB values range from 0 to 1023. +XR30 is the same as AR30 but with no alpha channel. + +# NV12 and NV21 + +NV12 is a biplanar format with a full sized Y plane followed by a single +chroma plane with weaved U and V values. +NV21 is the same but with weaved V and U values. +The 12 in NV12 refers to 12 bits per pixel. NV12 has a half width and half +height chroma channel, and therefore is a 420 subsampling. +NV16 is 16 bits per pixel, with half width and full height. aka 422. +NV24 is 24 bits per pixel with full sized chroma channel. aka 444. diff --git a/chromium/third_party/libyuv/docs/getting_started.md b/chromium/third_party/libyuv/docs/getting_started.md index fefffce4f1f..09297b66a5c 100644 --- a/chromium/third_party/libyuv/docs/getting_started.md +++ b/chromium/third_party/libyuv/docs/getting_started.md @@ -138,11 +138,11 @@ mips arm disassembly: - third_party/android_tools/ndk/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/bin/aarch64-linux-android-objdump -d ./out/Release/obj/libyuv/row_common.o >row_common.txt + third_party/android_ndk/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/bin/aarch64-linux-android-objdump -d ./out/Release/obj/libyuv/row_common.o >row_common.txt - third_party/android_tools/ndk/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/bin/aarch64-linux-android-objdump -d ./out/Release/obj/libyuv_neon/row_neon.o >row_neon.txt + third_party/android_ndk/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/bin/aarch64-linux-android-objdump -d ./out/Release/obj/libyuv_neon/row_neon.o >row_neon.txt - third_party/android_tools/ndk/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/bin/aarch64-linux-android-objdump -d ./out/Release/obj/libyuv_neon/row_neon64.o >row_neon64.txt + third_party/android_ndk/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/bin/aarch64-linux-android-objdump -d ./out/Release/obj/libyuv_neon/row_neon64.o >row_neon64.txt Running tests: diff --git a/chromium/third_party/libyuv/include/libyuv/basic_types.h b/chromium/third_party/libyuv/include/libyuv/basic_types.h index 7d98bb93f0e..01d9dfc7736 100644 --- a/chromium/third_party/libyuv/include/libyuv/basic_types.h +++ b/chromium/third_party/libyuv/include/libyuv/basic_types.h @@ -11,79 +11,33 @@ #ifndef INCLUDE_LIBYUV_BASIC_TYPES_H_ #define INCLUDE_LIBYUV_BASIC_TYPES_H_ -#include <stddef.h> // for NULL, size_t +#include <stddef.h> // For size_t and NULL + +#if !defined(INT_TYPES_DEFINED) && !defined(GG_LONGLONG) +#define INT_TYPES_DEFINED #if defined(_MSC_VER) && (_MSC_VER < 1600) #include <sys/types.h> // for uintptr_t on x86 +typedef unsigned __int64 uint64_t; +typedef __int64 int64_t; +typedef unsigned int uint32_t; +typedef int int32_t; +typedef unsigned short uint16_t; +typedef short int16_t; +typedef unsigned char uint8_t; +typedef signed char int8_t; #else -#include <stdint.h> // for uintptr_t -#endif - -#ifndef GG_LONGLONG -#ifndef INT_TYPES_DEFINED -#define INT_TYPES_DEFINED -#ifdef COMPILER_MSVC -typedef unsigned __int64 uint64; -typedef __int64 int64; -#ifndef INT64_C -#define INT64_C(x) x##I64 -#endif -#ifndef UINT64_C -#define UINT64_C(x) x##UI64 -#endif -#define INT64_F "I64" -#else // COMPILER_MSVC -#if defined(__LP64__) && !defined(__OpenBSD__) && !defined(__APPLE__) -typedef unsigned long uint64; // NOLINT -typedef long int64; // NOLINT -#ifndef INT64_C -#define INT64_C(x) x##L -#endif -#ifndef UINT64_C -#define UINT64_C(x) x##UL -#endif -#define INT64_F "l" -#else // defined(__LP64__) && !defined(__OpenBSD__) && !defined(__APPLE__) -typedef unsigned long long uint64; // NOLINT -typedef long long int64; // NOLINT -#ifndef INT64_C -#define INT64_C(x) x##LL -#endif -#ifndef UINT64_C -#define UINT64_C(x) x##ULL -#endif -#define INT64_F "ll" -#endif // __LP64__ -#endif // COMPILER_MSVC -typedef unsigned int uint32; -typedef int int32; -typedef unsigned short uint16; // NOLINT -typedef short int16; // NOLINT -typedef unsigned char uint8; -typedef signed char int8; +#include <stdint.h> // for uintptr_t and C99 types +#endif // defined(_MSC_VER) && (_MSC_VER < 1600) +typedef uint64_t uint64; +typedef int64_t int64; +typedef uint32_t uint32; +typedef int32_t int32; +typedef uint16_t uint16; +typedef int16_t int16; +typedef uint8_t uint8; +typedef int8_t int8; #endif // INT_TYPES_DEFINED -#endif // GG_LONGLONG - -// Detect compiler is for x86 or x64. -#if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || \ - defined(_M_IX86) -#define CPU_X86 1 -#endif -// Detect compiler is for ARM. -#if defined(__arm__) || defined(_M_ARM) -#define CPU_ARM 1 -#endif - -#ifndef ALIGNP -#ifdef __cplusplus -#define ALIGNP(p, t) \ - reinterpret_cast<uint8*>( \ - ((reinterpret_cast<uintptr_t>(p) + ((t)-1)) & ~((t)-1))) -#else -#define ALIGNP(p, t) \ - (uint8*)((((uintptr_t)(p) + ((t)-1)) & ~((t)-1))) /* NOLINT */ -#endif -#endif #if !defined(LIBYUV_API) #if defined(_WIN32) || defined(__CYGWIN__) @@ -103,15 +57,9 @@ typedef signed char int8; #endif // __GNUC__ #endif // LIBYUV_API +// TODO(fbarchard): Remove bool macros. #define LIBYUV_BOOL int #define LIBYUV_FALSE 0 #define LIBYUV_TRUE 1 -// Visual C x86 or GCC little endian. -#if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || \ - defined(_M_IX86) || defined(__arm__) || defined(_M_ARM) || \ - (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) -#define LIBYUV_LITTLE_ENDIAN -#endif - #endif // INCLUDE_LIBYUV_BASIC_TYPES_H_ diff --git a/chromium/third_party/libyuv/include/libyuv/compare.h b/chromium/third_party/libyuv/include/libyuv/compare.h index a06eff2066f..3353ad71c68 100644 --- a/chromium/third_party/libyuv/include/libyuv/compare.h +++ b/chromium/third_party/libyuv/include/libyuv/compare.h @@ -20,80 +20,85 @@ extern "C" { // Compute a hash for specified memory. Seed of 5381 recommended. LIBYUV_API -uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed); +uint32_t HashDjb2(const uint8_t* src, uint64_t count, uint32_t seed); // Hamming Distance LIBYUV_API -uint64 ComputeHammingDistance(const uint8* src_a, - const uint8* src_b, - int count); +uint64_t ComputeHammingDistance(const uint8_t* src_a, + const uint8_t* src_b, + int count); // Scan an opaque argb image and return fourcc based on alpha offset. // Returns FOURCC_ARGB, FOURCC_BGRA, or 0 if unknown. LIBYUV_API -uint32 ARGBDetect(const uint8* argb, int stride_argb, int width, int height); +uint32_t ARGBDetect(const uint8_t* argb, + int stride_argb, + int width, + int height); // Sum Square Error - used to compute Mean Square Error or PSNR. LIBYUV_API -uint64 ComputeSumSquareError(const uint8* src_a, const uint8* src_b, int count); +uint64_t ComputeSumSquareError(const uint8_t* src_a, + const uint8_t* src_b, + int count); LIBYUV_API -uint64 ComputeSumSquareErrorPlane(const uint8* src_a, - int stride_a, - const uint8* src_b, - int stride_b, - int width, - int height); +uint64_t ComputeSumSquareErrorPlane(const uint8_t* src_a, + int stride_a, + const uint8_t* src_b, + int stride_b, + int width, + int height); static const int kMaxPsnr = 128; LIBYUV_API -double SumSquareErrorToPsnr(uint64 sse, uint64 count); +double SumSquareErrorToPsnr(uint64_t sse, uint64_t count); LIBYUV_API -double CalcFramePsnr(const uint8* src_a, +double CalcFramePsnr(const uint8_t* src_a, int stride_a, - const uint8* src_b, + const uint8_t* src_b, int stride_b, int width, int height); LIBYUV_API -double I420Psnr(const uint8* src_y_a, +double I420Psnr(const uint8_t* src_y_a, int stride_y_a, - const uint8* src_u_a, + const uint8_t* src_u_a, int stride_u_a, - const uint8* src_v_a, + const uint8_t* src_v_a, int stride_v_a, - const uint8* src_y_b, + const uint8_t* src_y_b, int stride_y_b, - const uint8* src_u_b, + const uint8_t* src_u_b, int stride_u_b, - const uint8* src_v_b, + const uint8_t* src_v_b, int stride_v_b, int width, int height); LIBYUV_API -double CalcFrameSsim(const uint8* src_a, +double CalcFrameSsim(const uint8_t* src_a, int stride_a, - const uint8* src_b, + const uint8_t* src_b, int stride_b, int width, int height); LIBYUV_API -double I420Ssim(const uint8* src_y_a, +double I420Ssim(const uint8_t* src_y_a, int stride_y_a, - const uint8* src_u_a, + const uint8_t* src_u_a, int stride_u_a, - const uint8* src_v_a, + const uint8_t* src_v_a, int stride_v_a, - const uint8* src_y_b, + const uint8_t* src_y_b, int stride_y_b, - const uint8* src_u_b, + const uint8_t* src_u_b, int stride_u_b, - const uint8* src_v_b, + const uint8_t* src_v_b, int stride_v_b, int width, int height); diff --git a/chromium/third_party/libyuv/include/libyuv/compare_row.h b/chromium/third_party/libyuv/include/libyuv/compare_row.h index 2e5ebe508d1..72ee740600a 100644 --- a/chromium/third_party/libyuv/include/libyuv/compare_row.h +++ b/chromium/third_party/libyuv/include/libyuv/compare_row.h @@ -18,17 +18,20 @@ namespace libyuv { extern "C" { #endif -#if defined(__pnacl__) || defined(__CLR_VER) || \ +#if defined(__pnacl__) || defined(__CLR_VER) || \ + (defined(__native_client__) && defined(__x86_64__)) || \ (defined(__i386__) && !defined(__SSE__) && !defined(__clang__)) #define LIBYUV_DISABLE_X86 #endif +#if defined(__native_client__) +#define LIBYUV_DISABLE_NEON +#endif // MemorySanitizer does not support assembly code yet. http://crbug.com/344505 #if defined(__has_feature) #if __has_feature(memory_sanitizer) #define LIBYUV_DISABLE_X86 #endif #endif - // Visual C 2012 required for AVX2. #if defined(_M_IX86) && !defined(__clang__) && defined(_MSC_VER) && \ _MSC_VER >= 1700 @@ -87,22 +90,44 @@ extern "C" { #define HAS_SUMSQUAREERROR_MSA #endif -uint32 HammingDistance_C(const uint8* src_a, const uint8* src_b, int count); -uint32 HammingDistance_SSE42(const uint8* src_a, const uint8* src_b, int count); -uint32 HammingDistance_SSSE3(const uint8* src_a, const uint8* src_b, int count); -uint32 HammingDistance_AVX2(const uint8* src_a, const uint8* src_b, int count); -uint32 HammingDistance_NEON(const uint8* src_a, const uint8* src_b, int count); -uint32 HammingDistance_MSA(const uint8* src_a, const uint8* src_b, int count); - -uint32 SumSquareError_C(const uint8* src_a, const uint8* src_b, int count); -uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count); -uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count); -uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count); -uint32 SumSquareError_MSA(const uint8* src_a, const uint8* src_b, int count); - -uint32 HashDjb2_C(const uint8* src, int count, uint32 seed); -uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed); -uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed); +uint32_t HammingDistance_C(const uint8_t* src_a, + const uint8_t* src_b, + int count); +uint32_t HammingDistance_SSE42(const uint8_t* src_a, + const uint8_t* src_b, + int count); +uint32_t HammingDistance_SSSE3(const uint8_t* src_a, + const uint8_t* src_b, + int count); +uint32_t HammingDistance_AVX2(const uint8_t* src_a, + const uint8_t* src_b, + int count); +uint32_t HammingDistance_NEON(const uint8_t* src_a, + const uint8_t* src_b, + int count); +uint32_t HammingDistance_MSA(const uint8_t* src_a, + const uint8_t* src_b, + int count); + +uint32_t SumSquareError_C(const uint8_t* src_a, + const uint8_t* src_b, + int count); +uint32_t SumSquareError_SSE2(const uint8_t* src_a, + const uint8_t* src_b, + int count); +uint32_t SumSquareError_AVX2(const uint8_t* src_a, + const uint8_t* src_b, + int count); +uint32_t SumSquareError_NEON(const uint8_t* src_a, + const uint8_t* src_b, + int count); +uint32_t SumSquareError_MSA(const uint8_t* src_a, + const uint8_t* src_b, + int count); + +uint32_t HashDjb2_C(const uint8_t* src, int count, uint32_t seed); +uint32_t HashDjb2_SSE41(const uint8_t* src, int count, uint32_t seed); +uint32_t HashDjb2_AVX2(const uint8_t* src, int count, uint32_t seed); #ifdef __cplusplus } // extern "C" diff --git a/chromium/third_party/libyuv/include/libyuv/convert.h b/chromium/third_party/libyuv/include/libyuv/convert.h index d310f8493bf..d12ef24f799 100644 --- a/chromium/third_party/libyuv/include/libyuv/convert.h +++ b/chromium/third_party/libyuv/include/libyuv/convert.h @@ -27,34 +27,34 @@ extern "C" { // Convert I444 to I420. LIBYUV_API -int I444ToI420(const uint8* src_y, +int I444ToI420(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int width, int height); // Convert I422 to I420. LIBYUV_API -int I422ToI420(const uint8* src_y, +int I422ToI420(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int width, int height); @@ -62,17 +62,17 @@ int I422ToI420(const uint8* src_y, // Copy I420 to I420. #define I420ToI420 I420Copy LIBYUV_API -int I420Copy(const uint8* src_y, +int I420Copy(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int width, int height); @@ -81,17 +81,17 @@ int I420Copy(const uint8* src_y, #define I010ToI010 I010Copy #define H010ToH010 I010Copy LIBYUV_API -int I010Copy(const uint16* src_y, +int I010Copy(const uint16_t* src_y, int src_stride_y, - const uint16* src_u, + const uint16_t* src_u, int src_stride_u, - const uint16* src_v, + const uint16_t* src_v, int src_stride_v, - uint16* dst_y, + uint16_t* dst_y, int dst_stride_y, - uint16* dst_u, + uint16_t* dst_u, int dst_stride_u, - uint16* dst_v, + uint16_t* dst_v, int dst_stride_v, int width, int height); @@ -99,30 +99,30 @@ int I010Copy(const uint16* src_y, // Convert 10 bit YUV to 8 bit #define H010ToH420 I010ToI420 LIBYUV_API -int I010ToI420(const uint16* src_y, +int I010ToI420(const uint16_t* src_y, int src_stride_y, - const uint16* src_u, + const uint16_t* src_u, int src_stride_u, - const uint16* src_v, + const uint16_t* src_v, int src_stride_v, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int width, int height); // Convert I400 (grey) to I420. LIBYUV_API -int I400ToI420(const uint8* src_y, +int I400ToI420(const uint8_t* src_y, int src_stride_y, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int width, int height); @@ -131,204 +131,204 @@ int I400ToI420(const uint8* src_y, // Convert NV12 to I420. LIBYUV_API -int NV12ToI420(const uint8* src_y, +int NV12ToI420(const uint8_t* src_y, int src_stride_y, - const uint8* src_uv, + const uint8_t* src_uv, int src_stride_uv, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int width, int height); // Convert NV21 to I420. LIBYUV_API -int NV21ToI420(const uint8* src_y, +int NV21ToI420(const uint8_t* src_y, int src_stride_y, - const uint8* src_vu, + const uint8_t* src_vu, int src_stride_vu, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int width, int height); // Convert YUY2 to I420. LIBYUV_API -int YUY2ToI420(const uint8* src_yuy2, +int YUY2ToI420(const uint8_t* src_yuy2, int src_stride_yuy2, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int width, int height); // Convert UYVY to I420. LIBYUV_API -int UYVYToI420(const uint8* src_uyvy, +int UYVYToI420(const uint8_t* src_uyvy, int src_stride_uyvy, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int width, int height); // Convert M420 to I420. LIBYUV_API -int M420ToI420(const uint8* src_m420, +int M420ToI420(const uint8_t* src_m420, int src_stride_m420, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int width, int height); // Convert Android420 to I420. LIBYUV_API -int Android420ToI420(const uint8* src_y, +int Android420ToI420(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - int pixel_stride_uv, - uint8* dst_y, + int src_pixel_stride_uv, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int width, int height); // ARGB little endian (bgra in memory) to I420. LIBYUV_API -int ARGBToI420(const uint8* src_frame, - int src_stride_frame, - uint8* dst_y, +int ARGBToI420(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int width, int height); // BGRA little endian (argb in memory) to I420. LIBYUV_API -int BGRAToI420(const uint8* src_frame, - int src_stride_frame, - uint8* dst_y, +int BGRAToI420(const uint8_t* src_bgra, + int src_stride_bgra, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int width, int height); // ABGR little endian (rgba in memory) to I420. LIBYUV_API -int ABGRToI420(const uint8* src_frame, - int src_stride_frame, - uint8* dst_y, +int ABGRToI420(const uint8_t* src_abgr, + int src_stride_abgr, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int width, int height); // RGBA little endian (abgr in memory) to I420. LIBYUV_API -int RGBAToI420(const uint8* src_frame, - int src_stride_frame, - uint8* dst_y, +int RGBAToI420(const uint8_t* src_rgba, + int src_stride_rgba, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int width, int height); // RGB little endian (bgr in memory) to I420. LIBYUV_API -int RGB24ToI420(const uint8* src_frame, - int src_stride_frame, - uint8* dst_y, +int RGB24ToI420(const uint8_t* src_rgb24, + int src_stride_rgb24, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int width, int height); // RGB big endian (rgb in memory) to I420. LIBYUV_API -int RAWToI420(const uint8* src_frame, - int src_stride_frame, - uint8* dst_y, +int RAWToI420(const uint8_t* src_raw, + int src_stride_raw, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int width, int height); // RGB16 (RGBP fourcc) little endian to I420. LIBYUV_API -int RGB565ToI420(const uint8* src_frame, - int src_stride_frame, - uint8* dst_y, +int RGB565ToI420(const uint8_t* src_rgb565, + int src_stride_rgb565, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int width, int height); // RGB15 (RGBO fourcc) little endian to I420. LIBYUV_API -int ARGB1555ToI420(const uint8* src_frame, - int src_stride_frame, - uint8* dst_y, +int ARGB1555ToI420(const uint8_t* src_argb1555, + int src_stride_argb1555, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int width, int height); // RGB12 (R444 fourcc) little endian to I420. LIBYUV_API -int ARGB4444ToI420(const uint8* src_frame, - int src_stride_frame, - uint8* dst_y, +int ARGB4444ToI420(const uint8_t* src_argb4444, + int src_stride_argb4444, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int width, int height); @@ -337,13 +337,13 @@ int ARGB4444ToI420(const uint8* src_frame, // src_width/height provided by capture. // dst_width/height for clipping determine final size. LIBYUV_API -int MJPGToI420(const uint8* sample, +int MJPGToI420(const uint8_t* sample, size_t sample_size, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int src_width, int src_height, @@ -352,7 +352,10 @@ int MJPGToI420(const uint8* sample, // Query size of MJPG in pixels. LIBYUV_API -int MJPGSize(const uint8* sample, size_t sample_size, int* width, int* height); +int MJPGSize(const uint8_t* sample, + size_t sample_size, + int* width, + int* height); #endif // Convert camera sample to I420 with cropping, rotation and vertical flip. @@ -375,16 +378,16 @@ int MJPGSize(const uint8* sample, size_t sample_size, int* width, int* height); // Must be less than or equal to src_width/src_height // Cropping parameters are pre-rotation. // "rotation" can be 0, 90, 180 or 270. -// "format" is a fourcc. ie 'I420', 'YUY2' +// "fourcc" is a fourcc. ie 'I420', 'YUY2' // Returns 0 for successful; -1 for invalid parameter. Non-zero for failure. LIBYUV_API -int ConvertToI420(const uint8* src_frame, - size_t src_size, - uint8* dst_y, +int ConvertToI420(const uint8_t* sample, + size_t sample_size, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int crop_x, int crop_y, @@ -393,7 +396,7 @@ int ConvertToI420(const uint8* src_frame, int crop_width, int crop_height, enum RotationMode rotation, - uint32 format); + uint32_t fourcc); #ifdef __cplusplus } // extern "C" diff --git a/chromium/third_party/libyuv/include/libyuv/convert_argb.h b/chromium/third_party/libyuv/include/libyuv/convert_argb.h index b8b57cb12b3..cd4a611de51 100644 --- a/chromium/third_party/libyuv/include/libyuv/convert_argb.h +++ b/chromium/third_party/libyuv/include/libyuv/convert_argb.h @@ -30,167 +30,167 @@ extern "C" { // Copy ARGB to ARGB. LIBYUV_API -int ARGBCopy(const uint8* src_argb, +int ARGBCopy(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height); // Convert I420 to ARGB. LIBYUV_API -int I420ToARGB(const uint8* src_y, +int I420ToARGB(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height); // Duplicate prototype for function in convert_from.h for remoting. LIBYUV_API -int I420ToABGR(const uint8* src_y, +int I420ToABGR(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_abgr, + uint8_t* dst_abgr, int dst_stride_abgr, int width, int height); // Convert I010 to ARGB. LIBYUV_API -int I010ToARGB(const uint16* src_y, +int I010ToARGB(const uint16_t* src_y, int src_stride_y, - const uint16* src_u, + const uint16_t* src_u, int src_stride_u, - const uint16* src_v, + const uint16_t* src_v, int src_stride_v, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height); // Convert I010 to ARGB. LIBYUV_API -int I010ToARGB(const uint16* src_y, +int I010ToARGB(const uint16_t* src_y, int src_stride_y, - const uint16* src_u, + const uint16_t* src_u, int src_stride_u, - const uint16* src_v, + const uint16_t* src_v, int src_stride_v, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height); // Convert I010 to ABGR. LIBYUV_API -int I010ToABGR(const uint16* src_y, +int I010ToABGR(const uint16_t* src_y, int src_stride_y, - const uint16* src_u, + const uint16_t* src_u, int src_stride_u, - const uint16* src_v, + const uint16_t* src_v, int src_stride_v, - uint8* dst_abgr, + uint8_t* dst_abgr, int dst_stride_abgr, int width, int height); // Convert H010 to ARGB. LIBYUV_API -int H010ToARGB(const uint16* src_y, +int H010ToARGB(const uint16_t* src_y, int src_stride_y, - const uint16* src_u, + const uint16_t* src_u, int src_stride_u, - const uint16* src_v, + const uint16_t* src_v, int src_stride_v, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height); // Convert H010 to ABGR. LIBYUV_API -int H010ToABGR(const uint16* src_y, +int H010ToABGR(const uint16_t* src_y, int src_stride_y, - const uint16* src_u, + const uint16_t* src_u, int src_stride_u, - const uint16* src_v, + const uint16_t* src_v, int src_stride_v, - uint8* dst_abgr, + uint8_t* dst_abgr, int dst_stride_abgr, int width, int height); // Convert I422 to ARGB. LIBYUV_API -int I422ToARGB(const uint8* src_y, +int I422ToARGB(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height); // Convert I444 to ARGB. LIBYUV_API -int I444ToARGB(const uint8* src_y, +int I444ToARGB(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height); // Convert J444 to ARGB. LIBYUV_API -int J444ToARGB(const uint8* src_y, +int J444ToARGB(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height); // Convert I444 to ABGR. LIBYUV_API -int I444ToABGR(const uint8* src_y, +int I444ToABGR(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_abgr, + uint8_t* dst_abgr, int dst_stride_abgr, int width, int height); // Convert I420 with Alpha to preattenuated ARGB. LIBYUV_API -int I420AlphaToARGB(const uint8* src_y, +int I420AlphaToARGB(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - const uint8* src_a, + const uint8_t* src_a, int src_stride_a, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height, @@ -198,15 +198,15 @@ int I420AlphaToARGB(const uint8* src_y, // Convert I420 with Alpha to preattenuated ABGR. LIBYUV_API -int I420AlphaToABGR(const uint8* src_y, +int I420AlphaToABGR(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - const uint8* src_a, + const uint8_t* src_a, int src_stride_a, - uint8* dst_abgr, + uint8_t* dst_abgr, int dst_stride_abgr, int width, int height, @@ -214,18 +214,18 @@ int I420AlphaToABGR(const uint8* src_y, // Convert I400 (grey) to ARGB. Reverse of ARGBToI400. LIBYUV_API -int I400ToARGB(const uint8* src_y, +int I400ToARGB(const uint8_t* src_y, int src_stride_y, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height); // Convert J400 (jpeg grey) to ARGB. LIBYUV_API -int J400ToARGB(const uint8* src_y, +int J400ToARGB(const uint8_t* src_y, int src_stride_y, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height); @@ -235,227 +235,266 @@ int J400ToARGB(const uint8* src_y, // Convert NV12 to ARGB. LIBYUV_API -int NV12ToARGB(const uint8* src_y, +int NV12ToARGB(const uint8_t* src_y, int src_stride_y, - const uint8* src_uv, + const uint8_t* src_uv, int src_stride_uv, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height); // Convert NV21 to ARGB. LIBYUV_API -int NV21ToARGB(const uint8* src_y, +int NV21ToARGB(const uint8_t* src_y, int src_stride_y, - const uint8* src_vu, + const uint8_t* src_vu, int src_stride_vu, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height); // Convert NV12 to ABGR. -int NV12ToABGR(const uint8* src_y, +int NV12ToABGR(const uint8_t* src_y, int src_stride_y, - const uint8* src_uv, + const uint8_t* src_uv, int src_stride_uv, - uint8* dst_abgr, + uint8_t* dst_abgr, int dst_stride_abgr, int width, int height); // Convert NV21 to ABGR. LIBYUV_API -int NV21ToABGR(const uint8* src_y, +int NV21ToABGR(const uint8_t* src_y, int src_stride_y, - const uint8* src_uv, - int src_stride_uv, - uint8* dst_abgr, + const uint8_t* src_vu, + int src_stride_vu, + uint8_t* dst_abgr, int dst_stride_abgr, int width, int height); // Convert M420 to ARGB. LIBYUV_API -int M420ToARGB(const uint8* src_m420, +int M420ToARGB(const uint8_t* src_m420, int src_stride_m420, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height); // Convert YUY2 to ARGB. LIBYUV_API -int YUY2ToARGB(const uint8* src_yuy2, +int YUY2ToARGB(const uint8_t* src_yuy2, int src_stride_yuy2, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height); // Convert UYVY to ARGB. LIBYUV_API -int UYVYToARGB(const uint8* src_uyvy, +int UYVYToARGB(const uint8_t* src_uyvy, int src_stride_uyvy, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height); // Convert J420 to ARGB. LIBYUV_API -int J420ToARGB(const uint8* src_y, +int J420ToARGB(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height); // Convert J422 to ARGB. LIBYUV_API -int J422ToARGB(const uint8* src_y, +int J422ToARGB(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height); // Convert J420 to ABGR. LIBYUV_API -int J420ToABGR(const uint8* src_y, +int J420ToABGR(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_abgr, + uint8_t* dst_abgr, int dst_stride_abgr, int width, int height); // Convert J422 to ABGR. LIBYUV_API -int J422ToABGR(const uint8* src_y, +int J422ToABGR(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_abgr, + uint8_t* dst_abgr, int dst_stride_abgr, int width, int height); // Convert H420 to ARGB. LIBYUV_API -int H420ToARGB(const uint8* src_y, +int H420ToARGB(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height); // Convert H422 to ARGB. LIBYUV_API -int H422ToARGB(const uint8* src_y, +int H422ToARGB(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height); // Convert H420 to ABGR. LIBYUV_API -int H420ToABGR(const uint8* src_y, +int H420ToABGR(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_abgr, + uint8_t* dst_abgr, int dst_stride_abgr, int width, int height); // Convert H422 to ABGR. LIBYUV_API -int H422ToABGR(const uint8* src_y, +int H422ToABGR(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_abgr, + uint8_t* dst_abgr, int dst_stride_abgr, int width, int height); // Convert H010 to ARGB. LIBYUV_API -int H010ToARGB(const uint16* src_y, +int H010ToARGB(const uint16_t* src_y, int src_stride_y, - const uint16* src_u, + const uint16_t* src_u, int src_stride_u, - const uint16* src_v, + const uint16_t* src_v, int src_stride_v, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height); +// Convert I010 to AR30. +LIBYUV_API +int I010ToAR30(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_ar30, + int dst_stride_ar30, + int width, + int height); + // Convert H010 to AR30. LIBYUV_API -int H010ToAR30(const uint16* src_y, +int H010ToAR30(const uint16_t* src_y, int src_stride_y, - const uint16* src_u, + const uint16_t* src_u, int src_stride_u, - const uint16* src_v, + const uint16_t* src_v, int src_stride_v, - uint8* dst_ar30, + uint8_t* dst_ar30, int dst_stride_ar30, int width, int height); +// Convert I010 to AB30. +LIBYUV_API +int I010ToAB30(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_ab30, + int dst_stride_ab30, + int width, + int height); + +// Convert H010 to AB30. +LIBYUV_API +int H010ToAB30(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_ab30, + int dst_stride_ab30, + int width, + int height); + // BGRA little endian (argb in memory) to ARGB. LIBYUV_API -int BGRAToARGB(const uint8* src_frame, - int src_stride_frame, - uint8* dst_argb, +int BGRAToARGB(const uint8_t* src_bgra, + int src_stride_bgra, + uint8_t* dst_argb, int dst_stride_argb, int width, int height); // ABGR little endian (rgba in memory) to ARGB. LIBYUV_API -int ABGRToARGB(const uint8* src_frame, - int src_stride_frame, - uint8* dst_argb, +int ABGRToARGB(const uint8_t* src_abgr, + int src_stride_abgr, + uint8_t* dst_argb, int dst_stride_argb, int width, int height); // RGBA little endian (abgr in memory) to ARGB. LIBYUV_API -int RGBAToARGB(const uint8* src_frame, - int src_stride_frame, - uint8* dst_argb, +int RGBAToARGB(const uint8_t* src_rgba, + int src_stride_rgba, + uint8_t* dst_argb, int dst_stride_argb, int width, int height); @@ -465,65 +504,76 @@ int RGBAToARGB(const uint8* src_frame, // RGB little endian (bgr in memory) to ARGB. LIBYUV_API -int RGB24ToARGB(const uint8* src_frame, - int src_stride_frame, - uint8* dst_argb, +int RGB24ToARGB(const uint8_t* src_rgb24, + int src_stride_rgb24, + uint8_t* dst_argb, int dst_stride_argb, int width, int height); // RGB big endian (rgb in memory) to ARGB. LIBYUV_API -int RAWToARGB(const uint8* src_frame, - int src_stride_frame, - uint8* dst_argb, +int RAWToARGB(const uint8_t* src_raw, + int src_stride_raw, + uint8_t* dst_argb, int dst_stride_argb, int width, int height); // RGB16 (RGBP fourcc) little endian to ARGB. LIBYUV_API -int RGB565ToARGB(const uint8* src_frame, - int src_stride_frame, - uint8* dst_argb, +int RGB565ToARGB(const uint8_t* src_rgb565, + int src_stride_rgb565, + uint8_t* dst_argb, int dst_stride_argb, int width, int height); // RGB15 (RGBO fourcc) little endian to ARGB. LIBYUV_API -int ARGB1555ToARGB(const uint8* src_frame, - int src_stride_frame, - uint8* dst_argb, +int ARGB1555ToARGB(const uint8_t* src_argb1555, + int src_stride_argb1555, + uint8_t* dst_argb, int dst_stride_argb, int width, int height); // RGB12 (R444 fourcc) little endian to ARGB. LIBYUV_API -int ARGB4444ToARGB(const uint8* src_frame, - int src_stride_frame, - uint8* dst_argb, +int ARGB4444ToARGB(const uint8_t* src_argb4444, + int src_stride_argb4444, + uint8_t* dst_argb, int dst_stride_argb, int width, int height); // Convert AR30 To ARGB. LIBYUV_API -int AR30ToARGB(const uint8* src_ar30, +int AR30ToARGB(const uint8_t* src_ar30, int src_stride_ar30, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height); +#define AB30ToABGR + +// Convert AR30 To ABGR. +LIBYUV_API +int AR30ToABGR(const uint8_t* src_ar30, + int src_stride_ar30, + uint8_t* dst_abgr, + int dst_stride_abgr, + int width, + int height); + #ifdef HAVE_JPEG // src_width/height provided by capture // dst_width/height for clipping determine final size. LIBYUV_API -int MJPGToARGB(const uint8* sample, +int MJPGToARGB(const uint8_t* sample, size_t sample_size, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int src_width, int src_height, @@ -533,34 +583,34 @@ int MJPGToARGB(const uint8* sample, // Convert Android420 to ARGB. LIBYUV_API -int Android420ToARGB(const uint8* src_y, +int Android420ToARGB(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, int src_pixel_stride_uv, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height); // Convert Android420 to ABGR. LIBYUV_API -int Android420ToABGR(const uint8* src_y, +int Android420ToABGR(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, int src_pixel_stride_uv, - uint8* dst_abgr, + uint8_t* dst_abgr, int dst_stride_abgr, int width, int height); // Convert camera sample to ARGB with cropping, rotation and vertical flip. -// "src_size" is needed to parse MJPG. +// "sample_size" is needed to parse MJPG. // "dst_stride_argb" number of bytes in a row of the dst_argb plane. // Normally this would be the same as dst_width, with recommended alignment // to 16 bytes for better efficiency. @@ -579,12 +629,12 @@ int Android420ToABGR(const uint8* src_y, // Must be less than or equal to src_width/src_height // Cropping parameters are pre-rotation. // "rotation" can be 0, 90, 180 or 270. -// "format" is a fourcc. ie 'I420', 'YUY2' +// "fourcc" is a fourcc. ie 'I420', 'YUY2' // Returns 0 for successful; -1 for invalid parameter. Non-zero for failure. LIBYUV_API -int ConvertToARGB(const uint8* src_frame, - size_t src_size, - uint8* dst_argb, +int ConvertToARGB(const uint8_t* sample, + size_t sample_size, + uint8_t* dst_argb, int dst_stride_argb, int crop_x, int crop_y, @@ -593,7 +643,7 @@ int ConvertToARGB(const uint8* src_frame, int crop_width, int crop_height, enum RotationMode rotation, - uint32 format); + uint32_t fourcc); #ifdef __cplusplus } // extern "C" diff --git a/chromium/third_party/libyuv/include/libyuv/convert_from.h b/chromium/third_party/libyuv/include/libyuv/convert_from.h index b5a422903a5..5cd8a4bfc04 100644 --- a/chromium/third_party/libyuv/include/libyuv/convert_from.h +++ b/chromium/third_party/libyuv/include/libyuv/convert_from.h @@ -23,231 +23,231 @@ extern "C" { // Convert 8 bit YUV to 10 bit. #define H420ToH010 I420ToI010 -int I420ToI010(const uint8* src_y, +int I420ToI010(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint16* dst_y, + uint16_t* dst_y, int dst_stride_y, - uint16* dst_u, + uint16_t* dst_u, int dst_stride_u, - uint16* dst_v, + uint16_t* dst_v, int dst_stride_v, int width, int height); LIBYUV_API -int I420ToI422(const uint8* src_y, +int I420ToI422(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int width, int height); LIBYUV_API -int I420ToI444(const uint8* src_y, +int I420ToI444(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int width, int height); // Copy to I400. Source can be I420, I422, I444, I400, NV12 or NV21. LIBYUV_API -int I400Copy(const uint8* src_y, +int I400Copy(const uint8_t* src_y, int src_stride_y, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, int width, int height); LIBYUV_API -int I420ToNV12(const uint8* src_y, +int I420ToNV12(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_uv, + uint8_t* dst_uv, int dst_stride_uv, int width, int height); LIBYUV_API -int I420ToNV21(const uint8* src_y, +int I420ToNV21(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_vu, + uint8_t* dst_vu, int dst_stride_vu, int width, int height); LIBYUV_API -int I420ToYUY2(const uint8* src_y, +int I420ToYUY2(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_frame, - int dst_stride_frame, + uint8_t* dst_yuy2, + int dst_stride_yuy2, int width, int height); LIBYUV_API -int I420ToUYVY(const uint8* src_y, +int I420ToUYVY(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_frame, - int dst_stride_frame, + uint8_t* dst_uyvy, + int dst_stride_uyvy, int width, int height); LIBYUV_API -int I420ToARGB(const uint8* src_y, +int I420ToARGB(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height); LIBYUV_API -int I420ToBGRA(const uint8* src_y, +int I420ToBGRA(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_argb, - int dst_stride_argb, + uint8_t* dst_bgra, + int dst_stride_bgra, int width, int height); LIBYUV_API -int I420ToABGR(const uint8* src_y, +int I420ToABGR(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_argb, - int dst_stride_argb, + uint8_t* dst_abgr, + int dst_stride_abgr, int width, int height); LIBYUV_API -int I420ToRGBA(const uint8* src_y, +int I420ToRGBA(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_rgba, + uint8_t* dst_rgba, int dst_stride_rgba, int width, int height); LIBYUV_API -int I420ToRGB24(const uint8* src_y, +int I420ToRGB24(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_frame, - int dst_stride_frame, + uint8_t* dst_rgb24, + int dst_stride_rgb24, int width, int height); LIBYUV_API -int I420ToRAW(const uint8* src_y, +int I420ToRAW(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_frame, - int dst_stride_frame, + uint8_t* dst_raw, + int dst_stride_raw, int width, int height); LIBYUV_API -int H420ToRGB24(const uint8* src_y, +int H420ToRGB24(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_frame, - int dst_stride_frame, + uint8_t* dst_rgb24, + int dst_stride_rgb24, int width, int height); LIBYUV_API -int H420ToRAW(const uint8* src_y, +int H420ToRAW(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_frame, - int dst_stride_frame, + uint8_t* dst_raw, + int dst_stride_raw, int width, int height); LIBYUV_API -int I420ToRGB565(const uint8* src_y, +int I420ToRGB565(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_frame, - int dst_stride_frame, + uint8_t* dst_rgb565, + int dst_stride_rgb565, int width, int height); LIBYUV_API -int I422ToRGB565(const uint8* src_y, +int I422ToRGB565(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_frame, - int dst_stride_frame, + uint8_t* dst_rgb565, + int dst_stride_rgb565, int width, int height); @@ -256,50 +256,64 @@ int I422ToRGB565(const uint8* src_y, // The order of the dither matrix is first byte is upper left. LIBYUV_API -int I420ToRGB565Dither(const uint8* src_y, +int I420ToRGB565Dither(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_frame, - int dst_stride_frame, - const uint8* dither4x4, + uint8_t* dst_rgb565, + int dst_stride_rgb565, + const uint8_t* dither4x4, int width, int height); LIBYUV_API -int I420ToARGB1555(const uint8* src_y, +int I420ToARGB1555(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_frame, - int dst_stride_frame, + uint8_t* dst_argb1555, + int dst_stride_argb1555, int width, int height); LIBYUV_API -int I420ToARGB4444(const uint8* src_y, +int I420ToARGB4444(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_frame, - int dst_stride_frame, + uint8_t* dst_argb4444, + int dst_stride_argb4444, int width, int height); + // Convert I420 to AR30. LIBYUV_API -int I420ToAR30(const uint8* src_y, +int I420ToAR30(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_ar30, + int dst_stride_ar30, + int width, + int height); + +// Convert H420 to AR30. +LIBYUV_API +int H420ToAR30(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_ar30, + uint8_t* dst_ar30, int dst_stride_ar30, int width, int height); @@ -308,17 +322,17 @@ int I420ToAR30(const uint8* src_y, // "dst_sample_stride" is bytes in a row for the destination. Pass 0 if the // buffer has contiguous rows. Can be negative. A multiple of 16 is optimal. LIBYUV_API -int ConvertFromI420(const uint8* y, +int ConvertFromI420(const uint8_t* y, int y_stride, - const uint8* u, + const uint8_t* u, int u_stride, - const uint8* v, + const uint8_t* v, int v_stride, - uint8* dst_sample, + uint8_t* dst_sample, int dst_sample_stride, int width, int height, - uint32 format); + uint32_t fourcc); #ifdef __cplusplus } // extern "C" diff --git a/chromium/third_party/libyuv/include/libyuv/convert_from_argb.h b/chromium/third_party/libyuv/include/libyuv/convert_from_argb.h index 4d613502a16..857b46c5611 100644 --- a/chromium/third_party/libyuv/include/libyuv/convert_from_argb.h +++ b/chromium/third_party/libyuv/include/libyuv/convert_from_argb.h @@ -21,72 +21,81 @@ extern "C" { // Copy ARGB to ARGB. #define ARGBToARGB ARGBCopy LIBYUV_API -int ARGBCopy(const uint8* src_argb, +int ARGBCopy(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height); // Convert ARGB To BGRA. LIBYUV_API -int ARGBToBGRA(const uint8* src_argb, +int ARGBToBGRA(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_bgra, + uint8_t* dst_bgra, int dst_stride_bgra, int width, int height); // Convert ARGB To ABGR. LIBYUV_API -int ARGBToABGR(const uint8* src_argb, +int ARGBToABGR(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_abgr, + uint8_t* dst_abgr, int dst_stride_abgr, int width, int height); // Convert ARGB To RGBA. LIBYUV_API -int ARGBToRGBA(const uint8* src_argb, +int ARGBToRGBA(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_rgba, + uint8_t* dst_rgba, int dst_stride_rgba, int width, int height); +// Convert ABGR To AR30. +LIBYUV_API +int ABGRToAR30(const uint8_t* src_abgr, + int src_stride_abgr, + uint8_t* dst_ar30, + int dst_stride_ar30, + int width, + int height); + // Convert ARGB To AR30. LIBYUV_API -int ARGBToAR30(const uint8* src_argb, +int ARGBToAR30(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_ar30, + uint8_t* dst_ar30, int dst_stride_ar30, int width, int height); // Convert ARGB To RGB24. LIBYUV_API -int ARGBToRGB24(const uint8* src_argb, +int ARGBToRGB24(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_rgb24, + uint8_t* dst_rgb24, int dst_stride_rgb24, int width, int height); // Convert ARGB To RAW. LIBYUV_API -int ARGBToRAW(const uint8* src_argb, +int ARGBToRAW(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_rgb, - int dst_stride_rgb, + uint8_t* dst_raw, + int dst_stride_raw, int width, int height); // Convert ARGB To RGB565. LIBYUV_API -int ARGBToRGB565(const uint8* src_argb, +int ARGBToRGB565(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_rgb565, + uint8_t* dst_rgb565, int dst_stride_rgb565, int width, int height); @@ -95,173 +104,173 @@ int ARGBToRGB565(const uint8* src_argb, // Values in dither matrix from 0 to 7 recommended. // The order of the dither matrix is first byte is upper left. // TODO(fbarchard): Consider pointer to 2d array for dither4x4. -// const uint8(*dither)[4][4]; +// const uint8_t(*dither)[4][4]; LIBYUV_API -int ARGBToRGB565Dither(const uint8* src_argb, +int ARGBToRGB565Dither(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_rgb565, + uint8_t* dst_rgb565, int dst_stride_rgb565, - const uint8* dither4x4, + const uint8_t* dither4x4, int width, int height); // Convert ARGB To ARGB1555. LIBYUV_API -int ARGBToARGB1555(const uint8* src_argb, +int ARGBToARGB1555(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_argb1555, + uint8_t* dst_argb1555, int dst_stride_argb1555, int width, int height); // Convert ARGB To ARGB4444. LIBYUV_API -int ARGBToARGB4444(const uint8* src_argb, +int ARGBToARGB4444(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_argb4444, + uint8_t* dst_argb4444, int dst_stride_argb4444, int width, int height); // Convert ARGB To I444. LIBYUV_API -int ARGBToI444(const uint8* src_argb, +int ARGBToI444(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int width, int height); // Convert ARGB To I422. LIBYUV_API -int ARGBToI422(const uint8* src_argb, +int ARGBToI422(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int width, int height); // Convert ARGB To I420. (also in convert.h) LIBYUV_API -int ARGBToI420(const uint8* src_argb, +int ARGBToI420(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int width, int height); // Convert ARGB to J420. (JPeg full range I420). LIBYUV_API -int ARGBToJ420(const uint8* src_argb, +int ARGBToJ420(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_yj, + uint8_t* dst_yj, int dst_stride_yj, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int width, int height); // Convert ARGB to J422. LIBYUV_API -int ARGBToJ422(const uint8* src_argb, +int ARGBToJ422(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_yj, + uint8_t* dst_yj, int dst_stride_yj, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int width, int height); // Convert ARGB to J400. (JPeg full range). LIBYUV_API -int ARGBToJ400(const uint8* src_argb, +int ARGBToJ400(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_yj, + uint8_t* dst_yj, int dst_stride_yj, int width, int height); // Convert ARGB to I400. LIBYUV_API -int ARGBToI400(const uint8* src_argb, +int ARGBToI400(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, int width, int height); // Convert ARGB to G. (Reverse of J400toARGB, which replicates G back to ARGB) LIBYUV_API -int ARGBToG(const uint8* src_argb, +int ARGBToG(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_g, + uint8_t* dst_g, int dst_stride_g, int width, int height); // Convert ARGB To NV12. LIBYUV_API -int ARGBToNV12(const uint8* src_argb, +int ARGBToNV12(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_uv, + uint8_t* dst_uv, int dst_stride_uv, int width, int height); // Convert ARGB To NV21. LIBYUV_API -int ARGBToNV21(const uint8* src_argb, +int ARGBToNV21(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_vu, + uint8_t* dst_vu, int dst_stride_vu, int width, int height); // Convert ARGB To NV21. LIBYUV_API -int ARGBToNV21(const uint8* src_argb, +int ARGBToNV21(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_vu, + uint8_t* dst_vu, int dst_stride_vu, int width, int height); // Convert ARGB To YUY2. LIBYUV_API -int ARGBToYUY2(const uint8* src_argb, +int ARGBToYUY2(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_yuy2, + uint8_t* dst_yuy2, int dst_stride_yuy2, int width, int height); // Convert ARGB To UYVY. LIBYUV_API -int ARGBToUYVY(const uint8* src_argb, +int ARGBToUYVY(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_uyvy, + uint8_t* dst_uyvy, int dst_stride_uyvy, int width, int height); diff --git a/chromium/third_party/libyuv/include/libyuv/cpu_id.h b/chromium/third_party/libyuv/include/libyuv/cpu_id.h index 14f735f57b2..91480c68b01 100644 --- a/chromium/third_party/libyuv/include/libyuv/cpu_id.h +++ b/chromium/third_party/libyuv/include/libyuv/cpu_id.h @@ -84,7 +84,7 @@ int MaskCpuFlags(int enable_flags); // eax is the info type that you want. // ecx is typically the cpu number, and should normally be zero. LIBYUV_API -void CpuId(int eax, int ecx, int* cpu_info); +void CpuId(int info_eax, int info_ecx, int* cpu_info); #ifdef __cplusplus } // extern "C" diff --git a/chromium/third_party/libyuv/include/libyuv/macros_msa.h b/chromium/third_party/libyuv/include/libyuv/macros_msa.h index 61be352e3af..921eb0714d6 100644 --- a/chromium/third_party/libyuv/include/libyuv/macros_msa.h +++ b/chromium/third_party/libyuv/include/libyuv/macros_msa.h @@ -16,38 +16,38 @@ #include <stdint.h> #if (__mips_isa_rev >= 6) -#define LW(psrc) \ - ({ \ - uint8* psrc_lw_m = (uint8*)(psrc); /* NOLINT */ \ - uint32 val_m; \ - asm volatile("lw %[val_m], %[psrc_lw_m] \n" \ - : [val_m] "=r"(val_m) \ - : [psrc_lw_m] "m"(*psrc_lw_m)); \ - val_m; \ +#define LW(psrc) \ + ({ \ + uint8_t* psrc_lw_m = (uint8_t*)(psrc); /* NOLINT */ \ + uint32_t val_m; \ + asm volatile("lw %[val_m], %[psrc_lw_m] \n" \ + : [val_m] "=r"(val_m) \ + : [psrc_lw_m] "m"(*psrc_lw_m)); \ + val_m; \ }) #if (__mips == 64) -#define LD(psrc) \ - ({ \ - uint8* psrc_ld_m = (uint8*)(psrc); /* NOLINT */ \ - uint64 val_m = 0; \ - asm volatile("ld %[val_m], %[psrc_ld_m] \n" \ - : [val_m] "=r"(val_m) \ - : [psrc_ld_m] "m"(*psrc_ld_m)); \ - val_m; \ +#define LD(psrc) \ + ({ \ + uint8_t* psrc_ld_m = (uint8_t*)(psrc); /* NOLINT */ \ + uint64_t val_m = 0; \ + asm volatile("ld %[val_m], %[psrc_ld_m] \n" \ + : [val_m] "=r"(val_m) \ + : [psrc_ld_m] "m"(*psrc_ld_m)); \ + val_m; \ }) #else // !(__mips == 64) -#define LD(psrc) \ - ({ \ - uint8* psrc_ld_m = (uint8*)(psrc); /* NOLINT */ \ - uint32 val0_m, val1_m; \ - uint64 val_m = 0; \ - val0_m = LW(psrc_ld_m); \ - val1_m = LW(psrc_ld_m + 4); \ - val_m = (uint64)(val1_m); /* NOLINT */ \ - val_m = (uint64)((val_m << 32) & 0xFFFFFFFF00000000); /* NOLINT */ \ - val_m = (uint64)(val_m | (uint64)val0_m); /* NOLINT */ \ - val_m; \ +#define LD(psrc) \ + ({ \ + uint8_t* psrc_ld_m = (uint8_t*)(psrc); /* NOLINT */ \ + uint32_t val0_m, val1_m; \ + uint64_t val_m = 0; \ + val0_m = LW(psrc_ld_m); \ + val1_m = LW(psrc_ld_m + 4); \ + val_m = (uint64_t)(val1_m); /* NOLINT */ \ + val_m = (uint64_t)((val_m << 32) & 0xFFFFFFFF00000000); /* NOLINT */ \ + val_m = (uint64_t)(val_m | (uint64_t)val0_m); /* NOLINT */ \ + val_m; \ }) #endif // (__mips == 64) @@ -81,38 +81,38 @@ }) #endif // !(__mips == 64) #else // !(__mips_isa_rev >= 6) -#define LW(psrc) \ - ({ \ - uint8* psrc_lw_m = (uint8*)(psrc); /* NOLINT */ \ - uint32 val_m; \ - asm volatile("ulw %[val_m], %[psrc_lw_m] \n" \ - : [val_m] "=r"(val_m) \ - : [psrc_lw_m] "m"(*psrc_lw_m)); \ - val_m; \ +#define LW(psrc) \ + ({ \ + uint8_t* psrc_lw_m = (uint8_t*)(psrc); /* NOLINT */ \ + uint32_t val_m; \ + asm volatile("ulw %[val_m], %[psrc_lw_m] \n" \ + : [val_m] "=r"(val_m) \ + : [psrc_lw_m] "m"(*psrc_lw_m)); \ + val_m; \ }) #if (__mips == 64) -#define LD(psrc) \ - ({ \ - uint8* psrc_ld_m = (uint8*)(psrc); /* NOLINT */ \ - uint64 val_m = 0; \ - asm volatile("uld %[val_m], %[psrc_ld_m] \n" \ - : [val_m] "=r"(val_m) \ - : [psrc_ld_m] "m"(*psrc_ld_m)); \ - val_m; \ +#define LD(psrc) \ + ({ \ + uint8_t* psrc_ld_m = (uint8_t*)(psrc); /* NOLINT */ \ + uint64_t val_m = 0; \ + asm volatile("uld %[val_m], %[psrc_ld_m] \n" \ + : [val_m] "=r"(val_m) \ + : [psrc_ld_m] "m"(*psrc_ld_m)); \ + val_m; \ }) #else // !(__mips == 64) -#define LD(psrc) \ - ({ \ - uint8* psrc_ld_m = (uint8*)(psrc); /* NOLINT */ \ - uint32 val0_m, val1_m; \ - uint64 val_m = 0; \ - val0_m = LW(psrc_ld_m); \ - val1_m = LW(psrc_ld_m + 4); \ - val_m = (uint64)(val1_m); /* NOLINT */ \ - val_m = (uint64)((val_m << 32) & 0xFFFFFFFF00000000); /* NOLINT */ \ - val_m = (uint64)(val_m | (uint64)val0_m); /* NOLINT */ \ - val_m; \ +#define LD(psrc) \ + ({ \ + uint8_t* psrc_ld_m = (uint8_t*)(psrc); /* NOLINT */ \ + uint32_t val0_m, val1_m; \ + uint64_t val_m = 0; \ + val0_m = LW(psrc_ld_m); \ + val1_m = LW(psrc_ld_m + 4); \ + val_m = (uint64_t)(val1_m); /* NOLINT */ \ + val_m = (uint64_t)((val_m << 32) & 0xFFFFFFFF00000000); /* NOLINT */ \ + val_m = (uint64_t)(val_m | (uint64_t)val0_m); /* NOLINT */ \ + val_m; \ }) #endif // (__mips == 64) diff --git a/chromium/third_party/libyuv/include/libyuv/mjpeg_decoder.h b/chromium/third_party/libyuv/include/libyuv/mjpeg_decoder.h index 8a4f282205b..6c12633387f 100644 --- a/chromium/third_party/libyuv/include/libyuv/mjpeg_decoder.h +++ b/chromium/third_party/libyuv/include/libyuv/mjpeg_decoder.h @@ -26,13 +26,13 @@ namespace libyuv { extern "C" { #endif -LIBYUV_BOOL ValidateJpeg(const uint8* sample, size_t sample_size); +LIBYUV_BOOL ValidateJpeg(const uint8_t* sample, size_t sample_size); #ifdef __cplusplus } // extern "C" #endif -static const uint32 kUnknownDataSize = 0xFFFFFFFF; +static const uint32_t kUnknownDataSize = 0xFFFFFFFF; enum JpegSubsamplingType { kJpegYuv420, @@ -43,7 +43,7 @@ enum JpegSubsamplingType { }; struct Buffer { - const uint8* data; + const uint8_t* data; int len; }; @@ -65,7 +65,7 @@ struct SetJmpErrorMgr; class LIBYUV_API MJpegDecoder { public: typedef void (*CallbackFunction)(void* opaque, - const uint8* const* data, + const uint8_t* const* data, const int* strides, int rows); @@ -85,7 +85,7 @@ class LIBYUV_API MJpegDecoder { // If return value is LIBYUV_TRUE, then the values for all the following // getters are populated. // src_len is the size of the compressed mjpeg frame in bytes. - LIBYUV_BOOL LoadFrame(const uint8* src, size_t src_len); + LIBYUV_BOOL LoadFrame(const uint8_t* src, size_t src_len); // Returns width of the last loaded frame in pixels. int GetWidth(); @@ -138,7 +138,7 @@ class LIBYUV_API MJpegDecoder { // at least GetComponentSize(i). The pointers in planes are incremented // to point to after the end of the written data. // TODO(fbarchard): Add dst_x, dst_y to allow specific rect to be decoded. - LIBYUV_BOOL DecodeToBuffers(uint8** planes, int dst_width, int dst_height); + LIBYUV_BOOL DecodeToBuffers(uint8_t** planes, int dst_width, int dst_height); // Decodes the entire image and passes the data via repeated calls to a // callback function. Each call will get the data for a whole number of @@ -162,14 +162,14 @@ class LIBYUV_API MJpegDecoder { LIBYUV_BOOL StartDecode(); LIBYUV_BOOL FinishDecode(); - void SetScanlinePointers(uint8** data); + void SetScanlinePointers(uint8_t** data); LIBYUV_BOOL DecodeImcuRow(); int GetComponentScanlinePadding(int component); // A buffer holding the input data for a frame. - Buffer buf_; - BufferVector buf_vec_; + Buffer buf_{}; + BufferVector buf_vec_{}; jpeg_decompress_struct* decompress_struct_; jpeg_source_mgr* source_mgr_; @@ -181,12 +181,12 @@ class LIBYUV_API MJpegDecoder { // Temporaries used to point to scanline outputs. int num_outbufs_; // Outermost size of all arrays below. - uint8*** scanlines_; - int* scanlines_sizes_; + uint8_t*** scanlines_{}; + int* scanlines_sizes_{}; // Temporary buffer used for decoding when we can't decode directly to the // output buffers. Large enough for just one iMCU row. - uint8** databuf_; - int* databuf_strides_; + uint8_t** databuf_{}; + int* databuf_strides_{}; }; } // namespace libyuv diff --git a/chromium/third_party/libyuv/include/libyuv/planar_functions.h b/chromium/third_party/libyuv/include/libyuv/planar_functions.h index 653b0619710..324bb1ed0ea 100644 --- a/chromium/third_party/libyuv/include/libyuv/planar_functions.h +++ b/chromium/third_party/libyuv/include/libyuv/planar_functions.h @@ -22,36 +22,54 @@ namespace libyuv { extern "C" { #endif +// TODO(fbarchard): Move cpu macros to row.h +#if defined(__pnacl__) || defined(__CLR_VER) || \ + (defined(__native_client__) && defined(__x86_64__)) || \ + (defined(__i386__) && !defined(__SSE__) && !defined(__clang__)) +#define LIBYUV_DISABLE_X86 +#endif +// MemorySanitizer does not support assembly code yet. http://crbug.com/344505 +#if defined(__has_feature) +#if __has_feature(memory_sanitizer) +#define LIBYUV_DISABLE_X86 +#endif +#endif +// The following are available on all x86 platforms: +#if !defined(LIBYUV_DISABLE_X86) && \ + (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) +#define HAS_ARGBAFFINEROW_SSE2 +#endif + // Copy a plane of data. LIBYUV_API -void CopyPlane(const uint8* src_y, +void CopyPlane(const uint8_t* src_y, int src_stride_y, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, int width, int height); LIBYUV_API -void CopyPlane_16(const uint16* src_y, +void CopyPlane_16(const uint16_t* src_y, int src_stride_y, - uint16* dst_y, + uint16_t* dst_y, int dst_stride_y, int width, int height); LIBYUV_API -void Convert16To8Plane(const uint16* src_y, +void Convert16To8Plane(const uint16_t* src_y, int src_stride_y, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, int scale, // 16384 for 10 bits int width, int height); LIBYUV_API -void Convert8To16Plane(const uint8* src_y, +void Convert8To16Plane(const uint8_t* src_y, int src_stride_y, - uint16* dst_y, + uint16_t* dst_y, int dst_stride_y, int scale, // 1024 for 10 bits int width, @@ -59,65 +77,65 @@ void Convert8To16Plane(const uint8* src_y, // Set a plane of data to a 32 bit value. LIBYUV_API -void SetPlane(uint8* dst_y, +void SetPlane(uint8_t* dst_y, int dst_stride_y, int width, int height, - uint32 value); + uint32_t value); // Split interleaved UV plane into separate U and V planes. LIBYUV_API -void SplitUVPlane(const uint8* src_uv, +void SplitUVPlane(const uint8_t* src_uv, int src_stride_uv, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int width, int height); // Merge separate U and V planes into one interleaved UV plane. LIBYUV_API -void MergeUVPlane(const uint8* src_u, +void MergeUVPlane(const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_uv, + uint8_t* dst_uv, int dst_stride_uv, int width, int height); // Split interleaved RGB plane into separate R, G and B planes. LIBYUV_API -void SplitRGBPlane(const uint8* src_rgb, +void SplitRGBPlane(const uint8_t* src_rgb, int src_stride_rgb, - uint8* dst_r, + uint8_t* dst_r, int dst_stride_r, - uint8* dst_g, + uint8_t* dst_g, int dst_stride_g, - uint8* dst_b, + uint8_t* dst_b, int dst_stride_b, int width, int height); // Merge separate R, G and B planes into one interleaved RGB plane. LIBYUV_API -void MergeRGBPlane(const uint8* src_r, +void MergeRGBPlane(const uint8_t* src_r, int src_stride_r, - const uint8* src_g, + const uint8_t* src_g, int src_stride_g, - const uint8* src_b, + const uint8_t* src_b, int src_stride_b, - uint8* dst_rgb, + uint8_t* dst_rgb, int dst_stride_rgb, int width, int height); // Copy I400. Supports inverting. LIBYUV_API -int I400ToI400(const uint8* src_y, +int I400ToI400(const uint8_t* src_y, int src_stride_y, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, int width, int height); @@ -127,17 +145,17 @@ int I400ToI400(const uint8* src_y, // Copy I422 to I422. #define I422ToI422 I422Copy LIBYUV_API -int I422Copy(const uint8* src_y, +int I422Copy(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int width, int height); @@ -145,84 +163,84 @@ int I422Copy(const uint8* src_y, // Copy I444 to I444. #define I444ToI444 I444Copy LIBYUV_API -int I444Copy(const uint8* src_y, +int I444Copy(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int width, int height); // Convert YUY2 to I422. LIBYUV_API -int YUY2ToI422(const uint8* src_yuy2, +int YUY2ToI422(const uint8_t* src_yuy2, int src_stride_yuy2, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int width, int height); // Convert UYVY to I422. LIBYUV_API -int UYVYToI422(const uint8* src_uyvy, +int UYVYToI422(const uint8_t* src_uyvy, int src_stride_uyvy, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int width, int height); LIBYUV_API -int YUY2ToNV12(const uint8* src_yuy2, +int YUY2ToNV12(const uint8_t* src_yuy2, int src_stride_yuy2, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_uv, + uint8_t* dst_uv, int dst_stride_uv, int width, int height); LIBYUV_API -int UYVYToNV12(const uint8* src_uyvy, +int UYVYToNV12(const uint8_t* src_uyvy, int src_stride_uyvy, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_uv, + uint8_t* dst_uv, int dst_stride_uv, int width, int height); LIBYUV_API -int YUY2ToY(const uint8* src_yuy2, +int YUY2ToY(const uint8_t* src_yuy2, int src_stride_yuy2, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, int width, int height); // Convert I420 to I400. (calls CopyPlane ignoring u/v). LIBYUV_API -int I420ToI400(const uint8* src_y, +int I420ToI400(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, int width, int height); @@ -233,17 +251,17 @@ int I420ToI400(const uint8* src_y, // I420 mirror. LIBYUV_API -int I420Mirror(const uint8* src_y, +int I420Mirror(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int width, int height); @@ -254,9 +272,9 @@ int I420Mirror(const uint8* src_y, // I400 mirror. A single plane is mirrored horizontally. // Pass negative height to achieve 180 degree rotation. LIBYUV_API -int I400Mirror(const uint8* src_y, +int I400Mirror(const uint8_t* src_y, int src_stride_y, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, int width, int height); @@ -266,20 +284,20 @@ int I400Mirror(const uint8* src_y, // ARGB mirror. LIBYUV_API -int ARGBMirror(const uint8* src_argb, +int ARGBMirror(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height); // Convert NV12 to RGB565. LIBYUV_API -int NV12ToRGB565(const uint8* src_y, +int NV12ToRGB565(const uint8_t* src_y, int src_stride_y, - const uint8* src_uv, + const uint8_t* src_uv, int src_stride_uv, - uint8* dst_rgb565, + uint8_t* dst_rgb565, int dst_stride_rgb565, int width, int height); @@ -287,39 +305,39 @@ int NV12ToRGB565(const uint8* src_y, // I422ToARGB is in convert_argb.h // Convert I422 to BGRA. LIBYUV_API -int I422ToBGRA(const uint8* src_y, +int I422ToBGRA(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_bgra, + uint8_t* dst_bgra, int dst_stride_bgra, int width, int height); // Convert I422 to ABGR. LIBYUV_API -int I422ToABGR(const uint8* src_y, +int I422ToABGR(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_abgr, + uint8_t* dst_abgr, int dst_stride_abgr, int width, int height); // Convert I422 to RGBA. LIBYUV_API -int I422ToRGBA(const uint8* src_y, +int I422ToRGBA(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_rgba, + uint8_t* dst_rgba, int dst_stride_rgba, int width, int height); @@ -328,20 +346,20 @@ int I422ToRGBA(const uint8* src_y, #define RGB24ToRAW RAWToRGB24 LIBYUV_API -int RAWToRGB24(const uint8* src_raw, +int RAWToRGB24(const uint8_t* src_raw, int src_stride_raw, - uint8* dst_rgb24, + uint8_t* dst_rgb24, int dst_stride_rgb24, int width, int height); // Draw a rectangle into I420. LIBYUV_API -int I420Rect(uint8* dst_y, +int I420Rect(uint8_t* dst_y, int dst_stride_y, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int x, int y, @@ -353,38 +371,38 @@ int I420Rect(uint8* dst_y, // Draw a rectangle into ARGB. LIBYUV_API -int ARGBRect(uint8* dst_argb, +int ARGBRect(uint8_t* dst_argb, int dst_stride_argb, - int x, - int y, + int dst_x, + int dst_y, int width, int height, - uint32 value); + uint32_t value); // Convert ARGB to gray scale ARGB. LIBYUV_API -int ARGBGrayTo(const uint8* src_argb, +int ARGBGrayTo(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height); // Make a rectangle of ARGB gray scale. LIBYUV_API -int ARGBGray(uint8* dst_argb, +int ARGBGray(uint8_t* dst_argb, int dst_stride_argb, - int x, - int y, + int dst_x, + int dst_y, int width, int height); // Make a rectangle of ARGB Sepia tone. LIBYUV_API -int ARGBSepia(uint8* dst_argb, +int ARGBSepia(uint8_t* dst_argb, int dst_stride_argb, - int x, - int y, + int dst_x, + int dst_y, int width, int height); @@ -395,11 +413,11 @@ int ARGBSepia(uint8* dst_argb, // The next 4 coefficients apply to B, G, R, A and produce R of the output. // The last 4 coefficients apply to B, G, R, A and produce A of the output. LIBYUV_API -int ARGBColorMatrix(const uint8* src_argb, +int ARGBColorMatrix(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, - const int8* matrix_argb, + const int8_t* matrix_argb, int width, int height); @@ -410,33 +428,33 @@ int ARGBColorMatrix(const uint8* src_argb, // The next 4 coefficients apply to B, G, R, A and produce G of the output. // The last 4 coefficients apply to B, G, R, A and produce R of the output. LIBYUV_API -int RGBColorMatrix(uint8* dst_argb, +int RGBColorMatrix(uint8_t* dst_argb, int dst_stride_argb, - const int8* matrix_rgb, - int x, - int y, + const int8_t* matrix_rgb, + int dst_x, + int dst_y, int width, int height); // Apply a color table each ARGB pixel. // Table contains 256 ARGB values. LIBYUV_API -int ARGBColorTable(uint8* dst_argb, +int ARGBColorTable(uint8_t* dst_argb, int dst_stride_argb, - const uint8* table_argb, - int x, - int y, + const uint8_t* table_argb, + int dst_x, + int dst_y, int width, int height); // Apply a color table each ARGB pixel but preserve destination alpha. // Table contains 256 ARGB values. LIBYUV_API -int RGBColorTable(uint8* dst_argb, +int RGBColorTable(uint8_t* dst_argb, int dst_stride_argb, - const uint8* table_argb, - int x, - int y, + const uint8_t* table_argb, + int dst_x, + int dst_y, int width, int height); @@ -444,11 +462,11 @@ int RGBColorTable(uint8* dst_argb, // Table contains 32768 values indexed by [Y][C] where 7 it 7 bit luma from // RGB (YJ style) and C is an 8 bit color component (R, G or B). LIBYUV_API -int ARGBLumaColorTable(const uint8* src_argb, +int ARGBLumaColorTable(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, - const uint8* luma_rgb_table, + const uint8_t* luma, int width, int height); @@ -461,9 +479,9 @@ int ARGBLumaColorTable(const uint8* src_argb, // A polynomial approximation can be dirived using software such as 'R'. LIBYUV_API -int ARGBPolynomial(const uint8* src_argb, +int ARGBPolynomial(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, const float* poly, int width, @@ -472,9 +490,9 @@ int ARGBPolynomial(const uint8* src_argb, // Convert plane of 16 bit shorts to half floats. // Source values are multiplied by scale before storing as half float. LIBYUV_API -int HalfFloatPlane(const uint16* src_y, +int HalfFloatPlane(const uint16_t* src_y, int src_stride_y, - uint16* dst_y, + uint16_t* dst_y, int dst_stride_y, float scale, int width, @@ -485,55 +503,55 @@ int HalfFloatPlane(const uint16* src_y, // interval_size should be a value between 1 and 255. // interval_offset should be a value between 0 and 255. LIBYUV_API -int ARGBQuantize(uint8* dst_argb, +int ARGBQuantize(uint8_t* dst_argb, int dst_stride_argb, int scale, int interval_size, int interval_offset, - int x, - int y, + int dst_x, + int dst_y, int width, int height); // Copy ARGB to ARGB. LIBYUV_API -int ARGBCopy(const uint8* src_argb, +int ARGBCopy(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height); // Copy Alpha channel of ARGB to alpha of ARGB. LIBYUV_API -int ARGBCopyAlpha(const uint8* src_argb, +int ARGBCopyAlpha(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height); // Extract the alpha channel from ARGB. LIBYUV_API -int ARGBExtractAlpha(const uint8* src_argb, +int ARGBExtractAlpha(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_a, + uint8_t* dst_a, int dst_stride_a, int width, int height); // Copy Y channel to Alpha of ARGB. LIBYUV_API -int ARGBCopyYToAlpha(const uint8* src_y, +int ARGBCopyYToAlpha(const uint8_t* src_y, int src_stride_y, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height); -typedef void (*ARGBBlendRow)(const uint8* src_argb0, - const uint8* src_argb1, - uint8* dst_argb, +typedef void (*ARGBBlendRow)(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, int width); // Get function to Alpha Blend ARGB pixels and store to destination. @@ -544,11 +562,11 @@ ARGBBlendRow GetARGBBlend(); // Source is pre-multiplied by alpha using ARGBAttenuate. // Alpha of destination is set to 255. LIBYUV_API -int ARGBBlend(const uint8* src_argb0, +int ARGBBlend(const uint8_t* src_argb0, int src_stride_argb0, - const uint8* src_argb1, + const uint8_t* src_argb1, int src_stride_argb1, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height); @@ -556,13 +574,13 @@ int ARGBBlend(const uint8* src_argb0, // Alpha Blend plane and store to destination. // Source is not pre-multiplied by alpha. LIBYUV_API -int BlendPlane(const uint8* src_y0, +int BlendPlane(const uint8_t* src_y0, int src_stride_y0, - const uint8* src_y1, + const uint8_t* src_y1, int src_stride_y1, - const uint8* alpha, + const uint8_t* alpha, int alpha_stride, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, int width, int height); @@ -571,102 +589,102 @@ int BlendPlane(const uint8* src_y0, // Source is not pre-multiplied by alpha. // Alpha is full width x height and subsampled to half size to apply to UV. LIBYUV_API -int I420Blend(const uint8* src_y0, +int I420Blend(const uint8_t* src_y0, int src_stride_y0, - const uint8* src_u0, + const uint8_t* src_u0, int src_stride_u0, - const uint8* src_v0, + const uint8_t* src_v0, int src_stride_v0, - const uint8* src_y1, + const uint8_t* src_y1, int src_stride_y1, - const uint8* src_u1, + const uint8_t* src_u1, int src_stride_u1, - const uint8* src_v1, + const uint8_t* src_v1, int src_stride_v1, - const uint8* alpha, + const uint8_t* alpha, int alpha_stride, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int width, int height); // Multiply ARGB image by ARGB image. Shifted down by 8. Saturates to 255. LIBYUV_API -int ARGBMultiply(const uint8* src_argb0, +int ARGBMultiply(const uint8_t* src_argb0, int src_stride_argb0, - const uint8* src_argb1, + const uint8_t* src_argb1, int src_stride_argb1, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height); // Add ARGB image with ARGB image. Saturates to 255. LIBYUV_API -int ARGBAdd(const uint8* src_argb0, +int ARGBAdd(const uint8_t* src_argb0, int src_stride_argb0, - const uint8* src_argb1, + const uint8_t* src_argb1, int src_stride_argb1, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height); // Subtract ARGB image (argb1) from ARGB image (argb0). Saturates to 0. LIBYUV_API -int ARGBSubtract(const uint8* src_argb0, +int ARGBSubtract(const uint8_t* src_argb0, int src_stride_argb0, - const uint8* src_argb1, + const uint8_t* src_argb1, int src_stride_argb1, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height); // Convert I422 to YUY2. LIBYUV_API -int I422ToYUY2(const uint8* src_y, +int I422ToYUY2(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_frame, - int dst_stride_frame, + uint8_t* dst_yuy2, + int dst_stride_yuy2, int width, int height); // Convert I422 to UYVY. LIBYUV_API -int I422ToUYVY(const uint8* src_y, +int I422ToUYVY(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_frame, - int dst_stride_frame, + uint8_t* dst_uyvy, + int dst_stride_uyvy, int width, int height); // Convert unattentuated ARGB to preattenuated ARGB. LIBYUV_API -int ARGBAttenuate(const uint8* src_argb, +int ARGBAttenuate(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height); // Convert preattentuated ARGB to unattenuated ARGB. LIBYUV_API -int ARGBUnattenuate(const uint8* src_argb, +int ARGBUnattenuate(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height); @@ -675,9 +693,9 @@ int ARGBUnattenuate(const uint8* src_argb, // Computes table of cumulative sum for image where the value is the sum // of all values above and to the left of the entry. Used by ARGBBlur. LIBYUV_API -int ARGBComputeCumulativeSum(const uint8* src_argb, +int ARGBComputeCumulativeSum(const uint8_t* src_argb, int src_stride_argb, - int32* dst_cumsum, + int32_t* dst_cumsum, int dst_stride32_cumsum, int width, int height); @@ -689,11 +707,11 @@ int ARGBComputeCumulativeSum(const uint8* src_argb, // radius is number of pixels around the center. e.g. 1 = 3x3. 2=5x5. // Blur is optimized for radius of 5 (11x11) or less. LIBYUV_API -int ARGBBlur(const uint8* src_argb, +int ARGBBlur(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, - int32* dst_cumsum, + int32_t* dst_cumsum, int dst_stride32_cumsum, int width, int height, @@ -701,24 +719,24 @@ int ARGBBlur(const uint8* src_argb, // Multiply ARGB image by ARGB value. LIBYUV_API -int ARGBShade(const uint8* src_argb, +int ARGBShade(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height, - uint32 value); + uint32_t value); // Interpolate between two images using specified amount of interpolation // (0 to 255) and store to destination. // 'interpolation' is specified as 8 bit fraction where 0 means 100% src0 // and 255 means 1% src0 and 99% src1. LIBYUV_API -int InterpolatePlane(const uint8* src0, +int InterpolatePlane(const uint8_t* src0, int src_stride0, - const uint8* src1, + const uint8_t* src1, int src_stride1, - uint8* dst, + uint8_t* dst, int dst_stride, int width, int height, @@ -727,11 +745,11 @@ int InterpolatePlane(const uint8* src0, // Interpolate between two ARGB images using specified amount of interpolation // Internally calls InterpolatePlane with width * 4 (bpp). LIBYUV_API -int ARGBInterpolate(const uint8* src_argb0, +int ARGBInterpolate(const uint8_t* src_argb0, int src_stride_argb0, - const uint8* src_argb1, + const uint8_t* src_argb1, int src_stride_argb1, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height, @@ -741,93 +759,78 @@ int ARGBInterpolate(const uint8* src_argb0, // Internally calls InterpolatePlane on each plane where the U and V planes // are half width and half height. LIBYUV_API -int I420Interpolate(const uint8* src0_y, +int I420Interpolate(const uint8_t* src0_y, int src0_stride_y, - const uint8* src0_u, + const uint8_t* src0_u, int src0_stride_u, - const uint8* src0_v, + const uint8_t* src0_v, int src0_stride_v, - const uint8* src1_y, + const uint8_t* src1_y, int src1_stride_y, - const uint8* src1_u, + const uint8_t* src1_u, int src1_stride_u, - const uint8* src1_v, + const uint8_t* src1_v, int src1_stride_v, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int width, int height, int interpolation); -#if defined(__pnacl__) || defined(__CLR_VER) || \ - (defined(__i386__) && !defined(__SSE__) && !defined(__clang__)) -#define LIBYUV_DISABLE_X86 -#endif -// MemorySanitizer does not support assembly code yet. http://crbug.com/344505 -#if defined(__has_feature) -#if __has_feature(memory_sanitizer) -#define LIBYUV_DISABLE_X86 -#endif -#endif -// The following are available on all x86 platforms: -#if !defined(LIBYUV_DISABLE_X86) && \ - (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) -#define HAS_ARGBAFFINEROW_SSE2 -#endif - // Row function for copying pixels from a source with a slope to a row // of destination. Useful for scaling, rotation, mirror, texture mapping. LIBYUV_API -void ARGBAffineRow_C(const uint8* src_argb, +void ARGBAffineRow_C(const uint8_t* src_argb, int src_argb_stride, - uint8* dst_argb, + uint8_t* dst_argb, const float* uv_dudv, int width); +// TODO(fbarchard): Move ARGBAffineRow_SSE2 to row.h LIBYUV_API -void ARGBAffineRow_SSE2(const uint8* src_argb, +void ARGBAffineRow_SSE2(const uint8_t* src_argb, int src_argb_stride, - uint8* dst_argb, + uint8_t* dst_argb, const float* uv_dudv, int width); // Shuffle ARGB channel order. e.g. BGRA to ARGB. // shuffler is 16 bytes and must be aligned. LIBYUV_API -int ARGBShuffle(const uint8* src_bgra, +int ARGBShuffle(const uint8_t* src_bgra, int src_stride_bgra, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, - const uint8* shuffler, + const uint8_t* shuffler, int width, int height); // Sobel ARGB effect with planar output. LIBYUV_API -int ARGBSobelToPlane(const uint8* src_argb, +int ARGBSobelToPlane(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, int width, int height); // Sobel ARGB effect. LIBYUV_API -int ARGBSobel(const uint8* src_argb, +int ARGBSobel(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height); // Sobel ARGB effect w/ Sobel X, Sobel, Sobel Y in ARGB. LIBYUV_API -int ARGBSobelXY(const uint8* src_argb, +int ARGBSobelXY(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height); diff --git a/chromium/third_party/libyuv/include/libyuv/rotate.h b/chromium/third_party/libyuv/include/libyuv/rotate.h index b9f7154a51d..76b692be8b0 100644 --- a/chromium/third_party/libyuv/include/libyuv/rotate.h +++ b/chromium/third_party/libyuv/include/libyuv/rotate.h @@ -33,79 +33,79 @@ typedef enum RotationMode { // Rotate I420 frame. LIBYUV_API -int I420Rotate(const uint8* src_y, +int I420Rotate(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, - int src_width, - int src_height, + int width, + int height, enum RotationMode mode); // Rotate NV12 input and store in I420. LIBYUV_API -int NV12ToI420Rotate(const uint8* src_y, +int NV12ToI420Rotate(const uint8_t* src_y, int src_stride_y, - const uint8* src_uv, + const uint8_t* src_uv, int src_stride_uv, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, - int src_width, - int src_height, + int width, + int height, enum RotationMode mode); // Rotate a plane by 0, 90, 180, or 270. LIBYUV_API -int RotatePlane(const uint8* src, +int RotatePlane(const uint8_t* src, int src_stride, - uint8* dst, + uint8_t* dst, int dst_stride, - int src_width, - int src_height, + int width, + int height, enum RotationMode mode); // Rotate planes by 90, 180, 270. Deprecated. LIBYUV_API -void RotatePlane90(const uint8* src, +void RotatePlane90(const uint8_t* src, int src_stride, - uint8* dst, + uint8_t* dst, int dst_stride, int width, int height); LIBYUV_API -void RotatePlane180(const uint8* src, +void RotatePlane180(const uint8_t* src, int src_stride, - uint8* dst, + uint8_t* dst, int dst_stride, int width, int height); LIBYUV_API -void RotatePlane270(const uint8* src, +void RotatePlane270(const uint8_t* src, int src_stride, - uint8* dst, + uint8_t* dst, int dst_stride, int width, int height); LIBYUV_API -void RotateUV90(const uint8* src, +void RotateUV90(const uint8_t* src, int src_stride, - uint8* dst_a, + uint8_t* dst_a, int dst_stride_a, - uint8* dst_b, + uint8_t* dst_b, int dst_stride_b, int width, int height); @@ -115,21 +115,21 @@ void RotateUV90(const uint8* src, // split the data into two buffers while // rotating them. Deprecated. LIBYUV_API -void RotateUV180(const uint8* src, +void RotateUV180(const uint8_t* src, int src_stride, - uint8* dst_a, + uint8_t* dst_a, int dst_stride_a, - uint8* dst_b, + uint8_t* dst_b, int dst_stride_b, int width, int height); LIBYUV_API -void RotateUV270(const uint8* src, +void RotateUV270(const uint8_t* src, int src_stride, - uint8* dst_a, + uint8_t* dst_a, int dst_stride_a, - uint8* dst_b, + uint8_t* dst_b, int dst_stride_b, int width, int height); @@ -139,19 +139,19 @@ void RotateUV270(const uint8* src, // order will result in a rotation by +- 90 degrees. // Deprecated. LIBYUV_API -void TransposePlane(const uint8* src, +void TransposePlane(const uint8_t* src, int src_stride, - uint8* dst, + uint8_t* dst, int dst_stride, int width, int height); LIBYUV_API -void TransposeUV(const uint8* src, +void TransposeUV(const uint8_t* src, int src_stride, - uint8* dst_a, + uint8_t* dst_a, int dst_stride_a, - uint8* dst_b, + uint8_t* dst_b, int dst_stride_b, int width, int height); diff --git a/chromium/third_party/libyuv/include/libyuv/rotate_argb.h b/chromium/third_party/libyuv/include/libyuv/rotate_argb.h index be0190c1787..20432949ab4 100644 --- a/chromium/third_party/libyuv/include/libyuv/rotate_argb.h +++ b/chromium/third_party/libyuv/include/libyuv/rotate_argb.h @@ -21,9 +21,9 @@ extern "C" { // Rotate ARGB frame LIBYUV_API -int ARGBRotate(const uint8* src_argb, +int ARGBRotate(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int src_width, int src_height, diff --git a/chromium/third_party/libyuv/include/libyuv/rotate_row.h b/chromium/third_party/libyuv/include/libyuv/rotate_row.h index 7e9dfd2cf4e..5edc0fcf13a 100644 --- a/chromium/third_party/libyuv/include/libyuv/rotate_row.h +++ b/chromium/third_party/libyuv/include/libyuv/rotate_row.h @@ -18,10 +18,14 @@ namespace libyuv { extern "C" { #endif -#if defined(__pnacl__) || defined(__CLR_VER) || \ +#if defined(__pnacl__) || defined(__CLR_VER) || \ + (defined(__native_client__) && defined(__x86_64__)) || \ (defined(__i386__) && !defined(__SSE__) && !defined(__clang__)) #define LIBYUV_DISABLE_X86 #endif +#if defined(__native_client__) +#define LIBYUV_DISABLE_NEON +#endif // MemorySanitizer does not support assembly code yet. http://crbug.com/344505 #if defined(__has_feature) #if __has_feature(memory_sanitizer) @@ -34,21 +38,18 @@ extern "C" { #define HAS_TRANSPOSEUVWX8_SSE2 #endif -// The following are available for GCC 32 or 64 bit but not NaCL for 64 bit: -#if !defined(LIBYUV_DISABLE_X86) && \ - (defined(__i386__) || \ - (defined(__x86_64__) && !defined(__native_client__))) +// The following are available for GCC 32 or 64 bit: +#if !defined(LIBYUV_DISABLE_X86) && (defined(__i386__) || defined(__x86_64__)) #define HAS_TRANSPOSEWX8_SSSE3 #endif -// The following are available for 64 bit GCC but not NaCL: -#if !defined(LIBYUV_DISABLE_X86) && !defined(__native_client__) && \ - defined(__x86_64__) +// The following are available for 64 bit GCC: +#if !defined(LIBYUV_DISABLE_X86) && defined(__x86_64__) #define HAS_TRANSPOSEWX8_FAST_SSSE3 #define HAS_TRANSPOSEUVWX8_SSE2 #endif -#if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \ +#if !defined(LIBYUV_DISABLE_NEON) && \ (defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__)) #define HAS_TRANSPOSEWX8_NEON #define HAS_TRANSPOSEUVWX8_NEON @@ -59,129 +60,129 @@ extern "C" { #define HAS_TRANSPOSEUVWX16_MSA #endif -void TransposeWxH_C(const uint8* src, +void TransposeWxH_C(const uint8_t* src, int src_stride, - uint8* dst, + uint8_t* dst, int dst_stride, int width, int height); -void TransposeWx8_C(const uint8* src, +void TransposeWx8_C(const uint8_t* src, int src_stride, - uint8* dst, + uint8_t* dst, int dst_stride, int width); -void TransposeWx16_C(const uint8* src, +void TransposeWx16_C(const uint8_t* src, int src_stride, - uint8* dst, + uint8_t* dst, int dst_stride, int width); -void TransposeWx8_NEON(const uint8* src, +void TransposeWx8_NEON(const uint8_t* src, int src_stride, - uint8* dst, + uint8_t* dst, int dst_stride, int width); -void TransposeWx8_SSSE3(const uint8* src, +void TransposeWx8_SSSE3(const uint8_t* src, int src_stride, - uint8* dst, + uint8_t* dst, int dst_stride, int width); -void TransposeWx8_Fast_SSSE3(const uint8* src, +void TransposeWx8_Fast_SSSE3(const uint8_t* src, int src_stride, - uint8* dst, + uint8_t* dst, int dst_stride, int width); -void TransposeWx16_MSA(const uint8* src, +void TransposeWx16_MSA(const uint8_t* src, int src_stride, - uint8* dst, + uint8_t* dst, int dst_stride, int width); -void TransposeWx8_Any_NEON(const uint8* src, +void TransposeWx8_Any_NEON(const uint8_t* src, int src_stride, - uint8* dst, + uint8_t* dst, int dst_stride, int width); -void TransposeWx8_Any_SSSE3(const uint8* src, +void TransposeWx8_Any_SSSE3(const uint8_t* src, int src_stride, - uint8* dst, + uint8_t* dst, int dst_stride, int width); -void TransposeWx8_Fast_Any_SSSE3(const uint8* src, +void TransposeWx8_Fast_Any_SSSE3(const uint8_t* src, int src_stride, - uint8* dst, + uint8_t* dst, int dst_stride, int width); -void TransposeWx16_Any_MSA(const uint8* src, +void TransposeWx16_Any_MSA(const uint8_t* src, int src_stride, - uint8* dst, + uint8_t* dst, int dst_stride, int width); -void TransposeUVWxH_C(const uint8* src, +void TransposeUVWxH_C(const uint8_t* src, int src_stride, - uint8* dst_a, + uint8_t* dst_a, int dst_stride_a, - uint8* dst_b, + uint8_t* dst_b, int dst_stride_b, int width, int height); -void TransposeUVWx8_C(const uint8* src, +void TransposeUVWx8_C(const uint8_t* src, int src_stride, - uint8* dst_a, + uint8_t* dst_a, int dst_stride_a, - uint8* dst_b, + uint8_t* dst_b, int dst_stride_b, int width); -void TransposeUVWx16_C(const uint8* src, +void TransposeUVWx16_C(const uint8_t* src, int src_stride, - uint8* dst_a, + uint8_t* dst_a, int dst_stride_a, - uint8* dst_b, + uint8_t* dst_b, int dst_stride_b, int width); -void TransposeUVWx8_SSE2(const uint8* src, +void TransposeUVWx8_SSE2(const uint8_t* src, int src_stride, - uint8* dst_a, + uint8_t* dst_a, int dst_stride_a, - uint8* dst_b, + uint8_t* dst_b, int dst_stride_b, int width); -void TransposeUVWx8_NEON(const uint8* src, +void TransposeUVWx8_NEON(const uint8_t* src, int src_stride, - uint8* dst_a, + uint8_t* dst_a, int dst_stride_a, - uint8* dst_b, + uint8_t* dst_b, int dst_stride_b, int width); -void TransposeUVWx16_MSA(const uint8* src, +void TransposeUVWx16_MSA(const uint8_t* src, int src_stride, - uint8* dst_a, + uint8_t* dst_a, int dst_stride_a, - uint8* dst_b, + uint8_t* dst_b, int dst_stride_b, int width); -void TransposeUVWx8_Any_SSE2(const uint8* src, +void TransposeUVWx8_Any_SSE2(const uint8_t* src, int src_stride, - uint8* dst_a, + uint8_t* dst_a, int dst_stride_a, - uint8* dst_b, + uint8_t* dst_b, int dst_stride_b, int width); -void TransposeUVWx8_Any_NEON(const uint8* src, +void TransposeUVWx8_Any_NEON(const uint8_t* src, int src_stride, - uint8* dst_a, + uint8_t* dst_a, int dst_stride_a, - uint8* dst_b, + uint8_t* dst_b, int dst_stride_b, int width); -void TransposeUVWx16_Any_MSA(const uint8* src, +void TransposeUVWx16_Any_MSA(const uint8_t* src, int src_stride, - uint8* dst_a, + uint8_t* dst_a, int dst_stride_a, - uint8* dst_b, + uint8_t* dst_b, int dst_stride_b, int width); diff --git a/chromium/third_party/libyuv/include/libyuv/row.h b/chromium/third_party/libyuv/include/libyuv/row.h index 992d2ceb5d5..62ed119db7b 100644 --- a/chromium/third_party/libyuv/include/libyuv/row.h +++ b/chromium/third_party/libyuv/include/libyuv/row.h @@ -20,29 +20,20 @@ namespace libyuv { extern "C" { #endif -#define IS_ALIGNED(p, a) (!((uintptr_t)(p) & ((a)-1))) - -#define align_buffer_64(var, size) \ - uint8* var##_mem = (uint8*)(malloc((size) + 63)); /* NOLINT */ \ - uint8* var = (uint8*)(((intptr_t)(var##_mem) + 63) & ~63) /* NOLINT */ - -#define free_aligned_buffer_64(var) \ - free(var##_mem); \ - var = 0 - -#if defined(__pnacl__) || defined(__CLR_VER) || \ +#if defined(__pnacl__) || defined(__CLR_VER) || \ + (defined(__native_client__) && defined(__x86_64__)) || \ (defined(__i386__) && !defined(__SSE__) && !defined(__clang__)) #define LIBYUV_DISABLE_X86 #endif +#if defined(__native_client__) +#define LIBYUV_DISABLE_NEON +#endif // MemorySanitizer does not support assembly code yet. http://crbug.com/344505 #if defined(__has_feature) #if __has_feature(memory_sanitizer) -// define LIBYUV_DISABLE_X86 +#define LIBYUV_DISABLE_X86 #endif #endif -#if defined(__native_client__) -#define LIBYUV_DISABLE_NEON -#endif // clang >= 3.5.0 required for Arm64. #if defined(__clang__) && defined(__aarch64__) && !defined(LIBYUV_DISABLE_NEON) #if (__clang_major__ < 3) || (__clang_major__ == 3 && (__clang_minor__ < 5)) @@ -178,7 +169,6 @@ extern "C" { // The following are available on all x86 platforms, but // require VS2012, clang 3.4 or gcc 4.7. -// The code supports NaCL but requires a new compiler and validator. #if !defined(LIBYUV_DISABLE_X86) && \ (defined(VISUALC_HAS_AVX2) || defined(CLANG_HAS_AVX2) || \ defined(GCC_HAS_AVX2)) @@ -262,11 +252,14 @@ extern "C" { // TODO(fbarchard): Port to Visual C #if !defined(LIBYUV_DISABLE_X86) && \ (defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER))) +#define HAS_ABGRTOAR30ROW_SSSE3 #define HAS_ARGBTOAR30ROW_SSSE3 #define HAS_CONVERT16TO8ROW_SSSE3 #define HAS_CONVERT8TO16ROW_SSE2 // I210 is for H010. 2 = 422. I for 601 vs H for 709. +#define HAS_I210TOAR30ROW_SSSE3 #define HAS_I210TOARGBROW_SSSE3 +#define HAS_I422TOAR30ROW_SSSE3 #define HAS_MERGERGBROW_SSSE3 #define HAS_SPLITRGBROW_SSSE3 #endif @@ -276,9 +269,15 @@ extern "C" { #if !defined(LIBYUV_DISABLE_X86) && \ (defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER))) && \ (defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2)) +#define HAS_ABGRTOAR30ROW_AVX2 #define HAS_ARGBTOAR30ROW_AVX2 #define HAS_CONVERT16TO8ROW_AVX2 #define HAS_CONVERT8TO16ROW_AVX2 +#define HAS_I210TOARGBROW_AVX2 +#define HAS_I210TOAR30ROW_AVX2 +#define HAS_I422TOAR30ROW_AVX2 +#define HAS_I422TOUYVYROW_AVX2 +#define HAS_I422TOYUY2ROW_AVX2 #define HAS_MERGEUVROW_16_AVX2 #define HAS_MULTIPLYROW_16_AVX2 #endif @@ -463,18 +462,18 @@ extern "C" { #else #define SIMD_ALIGNED(var) __declspec(align(16)) var #endif -typedef __declspec(align(16)) int16 vec16[8]; -typedef __declspec(align(16)) int32 vec32[4]; -typedef __declspec(align(16)) int8 vec8[16]; -typedef __declspec(align(16)) uint16 uvec16[8]; -typedef __declspec(align(16)) uint32 uvec32[4]; -typedef __declspec(align(16)) uint8 uvec8[16]; -typedef __declspec(align(32)) int16 lvec16[16]; -typedef __declspec(align(32)) int32 lvec32[8]; -typedef __declspec(align(32)) int8 lvec8[32]; -typedef __declspec(align(32)) uint16 ulvec16[16]; -typedef __declspec(align(32)) uint32 ulvec32[8]; -typedef __declspec(align(32)) uint8 ulvec8[32]; +typedef __declspec(align(16)) int16_t vec16[8]; +typedef __declspec(align(16)) int32_t vec32[4]; +typedef __declspec(align(16)) int8_t vec8[16]; +typedef __declspec(align(16)) uint16_t uvec16[8]; +typedef __declspec(align(16)) uint32_t uvec32[4]; +typedef __declspec(align(16)) uint8_t uvec8[16]; +typedef __declspec(align(32)) int16_t lvec16[16]; +typedef __declspec(align(32)) int32_t lvec32[8]; +typedef __declspec(align(32)) int8_t lvec8[32]; +typedef __declspec(align(32)) uint16_t ulvec16[16]; +typedef __declspec(align(32)) uint32_t ulvec32[8]; +typedef __declspec(align(32)) uint8_t ulvec8[32]; #elif !defined(__pnacl__) && (defined(__GNUC__) || defined(__clang__)) // Caveat GCC 4.2 to 4.7 have a known issue using vectors with const. #if defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2) @@ -482,32 +481,32 @@ typedef __declspec(align(32)) uint8 ulvec8[32]; #else #define SIMD_ALIGNED(var) var __attribute__((aligned(16))) #endif -typedef int16 __attribute__((vector_size(16))) vec16; -typedef int32 __attribute__((vector_size(16))) vec32; -typedef int8 __attribute__((vector_size(16))) vec8; -typedef uint16 __attribute__((vector_size(16))) uvec16; -typedef uint32 __attribute__((vector_size(16))) uvec32; -typedef uint8 __attribute__((vector_size(16))) uvec8; -typedef int16 __attribute__((vector_size(32))) lvec16; -typedef int32 __attribute__((vector_size(32))) lvec32; -typedef int8 __attribute__((vector_size(32))) lvec8; -typedef uint16 __attribute__((vector_size(32))) ulvec16; -typedef uint32 __attribute__((vector_size(32))) ulvec32; -typedef uint8 __attribute__((vector_size(32))) ulvec8; +typedef int16_t __attribute__((vector_size(16))) vec16; +typedef int32_t __attribute__((vector_size(16))) vec32; +typedef int8_t __attribute__((vector_size(16))) vec8; +typedef uint16_t __attribute__((vector_size(16))) uvec16; +typedef uint32_t __attribute__((vector_size(16))) uvec32; +typedef uint8_t __attribute__((vector_size(16))) uvec8; +typedef int16_t __attribute__((vector_size(32))) lvec16; +typedef int32_t __attribute__((vector_size(32))) lvec32; +typedef int8_t __attribute__((vector_size(32))) lvec8; +typedef uint16_t __attribute__((vector_size(32))) ulvec16; +typedef uint32_t __attribute__((vector_size(32))) ulvec32; +typedef uint8_t __attribute__((vector_size(32))) ulvec8; #else #define SIMD_ALIGNED(var) var -typedef int16 vec16[8]; -typedef int32 vec32[4]; -typedef int8 vec8[16]; -typedef uint16 uvec16[8]; -typedef uint32 uvec32[4]; -typedef uint8 uvec8[16]; -typedef int16 lvec16[16]; -typedef int32 lvec32[8]; -typedef int8 lvec8[32]; -typedef uint16 ulvec16[16]; -typedef uint32 ulvec32[8]; -typedef uint8 ulvec8[32]; +typedef int16_t vec16[8]; +typedef int32_t vec32[4]; +typedef int8_t vec8[16]; +typedef uint16_t uvec16[8]; +typedef uint32_t uvec32[4]; +typedef uint8_t uvec8[16]; +typedef int16_t lvec16[16]; +typedef int32_t lvec32[8]; +typedef int8_t lvec8[32]; +typedef uint16_t ulvec16[16]; +typedef uint32_t ulvec32[8]; +typedef uint8_t ulvec8[32]; #endif #if defined(__aarch64__) @@ -531,13 +530,13 @@ struct YuvConstants { #else // This struct is for Intel color conversion. struct YuvConstants { - int8 kUVToB[32]; - int8 kUVToG[32]; - int8 kUVToR[32]; - int16 kUVBiasB[16]; - int16 kUVBiasG[16]; - int16 kUVBiasR[16]; - int16 kYToRgb[16]; + int8_t kUVToB[32]; + int8_t kUVToG[32]; + int8_t kUVToR[32]; + int16_t kUVBiasB[16]; + int16_t kUVBiasG[16]; + int16_t kUVBiasR[16]; + int16_t kYToRgb[16]; }; // Offsets into YuvConstants structure @@ -560,6 +559,16 @@ extern const struct YuvConstants SIMD_ALIGNED(kYvuI601Constants); // BT.601 extern const struct YuvConstants SIMD_ALIGNED(kYvuJPEGConstants); // JPeg extern const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants); // BT.709 +#define IS_ALIGNED(p, a) (!((uintptr_t)(p) & ((a)-1))) + +#define align_buffer_64(var, size) \ + uint8_t* var##_mem = (uint8_t*)(malloc((size) + 63)); /* NOLINT */ \ + uint8_t* var = (uint8_t*)(((intptr_t)(var##_mem) + 63) & ~63) /* NOLINT */ + +#define free_aligned_buffer_64(var) \ + free(var##_mem); \ + var = 0 + #if defined(__APPLE__) || defined(__x86_64__) || defined(__llvm__) #define OMITFP #else @@ -572,62 +581,6 @@ extern const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants); // BT.709 #else #define LABELALIGN #endif -#if defined(__native_client__) && defined(__x86_64__) -// r14 is used for MEMOP macros. -#define NACL_R14 "r14", -#define BUNDLELOCK ".bundle_lock\n" -#define BUNDLEUNLOCK ".bundle_unlock\n" -#define MEMACCESS(base) "%%nacl:(%%r15,%q" #base ")" -#define MEMACCESS2(offset, base) "%%nacl:" #offset "(%%r15,%q" #base ")" -#define MEMLEA(offset, base) #offset "(%q" #base ")" -#define MEMLEA3(offset, index, scale) #offset "(,%q" #index "," #scale ")" -#define MEMLEA4(offset, base, index, scale) \ - #offset "(%q" #base ",%q" #index "," #scale ")" -#define MEMMOVESTRING(s, d) "%%nacl:(%q" #s "),%%nacl:(%q" #d "), %%r15" -#define MEMSTORESTRING(reg, d) "%%" #reg ",%%nacl:(%q" #d "), %%r15" -#define MEMOPREG(opcode, offset, base, index, scale, reg) \ - BUNDLELOCK \ - "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" #opcode \ - " (%%r15,%%r14),%%" #reg "\n" BUNDLEUNLOCK -#define MEMOPMEM(opcode, reg, offset, base, index, scale) \ - BUNDLELOCK \ - "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" #opcode \ - " %%" #reg ",(%%r15,%%r14)\n" BUNDLEUNLOCK -#define MEMOPARG(opcode, offset, base, index, scale, arg) \ - BUNDLELOCK \ - "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" #opcode \ - " (%%r15,%%r14),%" #arg "\n" BUNDLEUNLOCK -#define VMEMOPREG(opcode, offset, base, index, scale, reg1, reg2) \ - BUNDLELOCK \ - "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" #opcode \ - " (%%r15,%%r14),%%" #reg1 ",%%" #reg2 "\n" BUNDLEUNLOCK -#define VEXTOPMEM(op, sel, reg, offset, base, index, scale) \ - BUNDLELOCK \ - "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" #op \ - " $" #sel ",%%" #reg ",(%%r15,%%r14)\n" BUNDLEUNLOCK -#else // defined(__native_client__) && defined(__x86_64__) -#define NACL_R14 -#define BUNDLEALIGN -#define MEMACCESS(base) "(%" #base ")" -#define MEMACCESS2(offset, base) #offset "(%" #base ")" -#define MEMLEA(offset, base) #offset "(%" #base ")" -#define MEMLEA3(offset, index, scale) #offset "(,%" #index "," #scale ")" -#define MEMLEA4(offset, base, index, scale) \ - #offset "(%" #base ",%" #index "," #scale ")" -#define MEMMOVESTRING(s, d) -#define MEMSTORESTRING(reg, d) -#define MEMOPREG(opcode, offset, base, index, scale, reg) \ - #opcode " " #offset "(%" #base ",%" #index "," #scale "),%%" #reg "\n" -#define MEMOPMEM(opcode, reg, offset, base, index, scale) \ - #opcode " %%" #reg "," #offset "(%" #base ",%" #index "," #scale ")\n" -#define MEMOPARG(opcode, offset, base, index, scale, arg) \ - #opcode " " #offset "(%" #base ",%" #index "," #scale "),%" #arg "\n" -#define VMEMOPREG(opcode, offset, base, index, scale, reg1, reg2) \ - #opcode " " #offset "(%" #base ",%" #index "," #scale "),%%" #reg1 \ - ",%%" #reg2 "\n" -#define VEXTOPMEM(op, sel, reg, offset, base, index, scale) \ - #op " $" #sel ",%%" #reg "," #offset "(%" #base ",%" #index "," #scale ")\n" -#endif // defined(__native_client__) && defined(__x86_64__) // Intel Code Analizer markers. Insert IACA_START IACA_END around code to be // measured and then run with iaca -64 libyuv_unittest. @@ -680,2452 +633,2701 @@ extern const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants); // BT.709 IACA_UD_BYTES \ } -void I444ToARGBRow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I444ToARGBRow_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void I422ToARGBRow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I422ToARGBRow_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void I422AlphaToARGBRow_NEON(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - const uint8* a_buf, - uint8* dst_argb, +void I422AlphaToARGBRow_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + const uint8_t* src_a, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void I422ToARGBRow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I422ToARGBRow_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void I422ToRGBARow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgba, +void I422ToRGBARow_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_rgba, const struct YuvConstants* yuvconstants, int width); -void I422ToRGB24Row_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgb24, +void I422ToRGB24Row_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_rgb24, const struct YuvConstants* yuvconstants, int width); -void I422ToRGB565Row_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgb565, +void I422ToRGB565Row_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_rgb565, const struct YuvConstants* yuvconstants, int width); -void I422ToARGB1555Row_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb1555, +void I422ToARGB1555Row_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb1555, const struct YuvConstants* yuvconstants, int width); -void I422ToARGB4444Row_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb4444, +void I422ToARGB4444Row_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb4444, const struct YuvConstants* yuvconstants, int width); -void NV12ToARGBRow_NEON(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, +void NV12ToARGBRow_NEON(const uint8_t* src_y, + const uint8_t* src_uv, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void NV12ToRGB565Row_NEON(const uint8* src_y, - const uint8* src_uv, - uint8* dst_rgb565, +void NV12ToRGB565Row_NEON(const uint8_t* src_y, + const uint8_t* src_uv, + uint8_t* dst_rgb565, const struct YuvConstants* yuvconstants, int width); -void NV21ToARGBRow_NEON(const uint8* src_y, - const uint8* src_vu, - uint8* dst_argb, +void NV21ToARGBRow_NEON(const uint8_t* src_y, + const uint8_t* src_vu, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void YUY2ToARGBRow_NEON(const uint8* src_yuy2, - uint8* dst_argb, +void YUY2ToARGBRow_NEON(const uint8_t* src_yuy2, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void UYVYToARGBRow_NEON(const uint8* src_uyvy, - uint8* dst_argb, +void UYVYToARGBRow_NEON(const uint8_t* src_uyvy, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void I444ToARGBRow_MSA(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I444ToARGBRow_MSA(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void I422ToARGBRow_MSA(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I422ToARGBRow_MSA(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void I422ToRGBARow_MSA(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgba, +void I422ToRGBARow_MSA(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void I422AlphaToARGBRow_MSA(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - const uint8* a_buf, - uint8* dst_argb, +void I422AlphaToARGBRow_MSA(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + const uint8_t* src_a, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void I422ToRGB24Row_MSA(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgb24, +void I422ToRGB24Row_MSA(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void I422ToRGB565Row_MSA(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgb565, +void I422ToRGB565Row_MSA(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_rgb565, const struct YuvConstants* yuvconstants, int width); -void I422ToARGB4444Row_MSA(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb4444, +void I422ToARGB4444Row_MSA(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb4444, const struct YuvConstants* yuvconstants, int width); -void I422ToARGB1555Row_MSA(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb1555, +void I422ToARGB1555Row_MSA(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb1555, const struct YuvConstants* yuvconstants, int width); -void NV12ToARGBRow_MSA(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, +void NV12ToARGBRow_MSA(const uint8_t* src_y, + const uint8_t* src_uv, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void NV12ToRGB565Row_MSA(const uint8* src_y, - const uint8* src_uv, - uint8* dst_rgb565, +void NV12ToRGB565Row_MSA(const uint8_t* src_y, + const uint8_t* src_uv, + uint8_t* dst_rgb565, const struct YuvConstants* yuvconstants, int width); -void NV21ToARGBRow_MSA(const uint8* src_y, - const uint8* src_vu, - uint8* dst_argb, +void NV21ToARGBRow_MSA(const uint8_t* src_y, + const uint8_t* src_vu, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void YUY2ToARGBRow_MSA(const uint8* src_yuy2, - uint8* dst_argb, +void YUY2ToARGBRow_MSA(const uint8_t* src_yuy2, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void UYVYToARGBRow_MSA(const uint8* src_uyvy, - uint8* dst_argb, +void UYVYToARGBRow_MSA(const uint8_t* src_uyvy, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void ARGBToYRow_AVX2(const uint8* src_argb, uint8* dst_y, int width); -void ARGBToYRow_Any_AVX2(const uint8* src_argb, uint8* dst_y, int width); -void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int width); -void ARGBToYJRow_AVX2(const uint8* src_argb, uint8* dst_y, int width); -void ARGBToYJRow_Any_AVX2(const uint8* src_argb, uint8* dst_y, int width); -void ARGBToYJRow_SSSE3(const uint8* src_argb, uint8* dst_y, int width); -void BGRAToYRow_SSSE3(const uint8* src_bgra, uint8* dst_y, int width); -void ABGRToYRow_SSSE3(const uint8* src_abgr, uint8* dst_y, int width); -void RGBAToYRow_SSSE3(const uint8* src_rgba, uint8* dst_y, int width); -void RGB24ToYRow_SSSE3(const uint8* src_rgb24, uint8* dst_y, int width); -void RAWToYRow_SSSE3(const uint8* src_raw, uint8* dst_y, int width); -void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int width); -void ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int width); -void ARGBToYRow_MSA(const uint8* src_argb, uint8* dst_y, int width); -void ARGBToYJRow_MSA(const uint8* src_argb, uint8* dst_y, int width); -void ARGBToUV444Row_NEON(const uint8* src_argb, - uint8* dst_u, - uint8* dst_v, +void ARGBToYRow_AVX2(const uint8_t* src_argb, uint8_t* dst_y, int width); +void ARGBToYRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void ARGBToYRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_y, int width); +void ARGBToYJRow_AVX2(const uint8_t* src_argb, uint8_t* dst_y, int width); +void ARGBToYJRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void ARGBToYJRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_y, int width); +void BGRAToYRow_SSSE3(const uint8_t* src_bgra, uint8_t* dst_y, int width); +void ABGRToYRow_SSSE3(const uint8_t* src_abgr, uint8_t* dst_y, int width); +void RGBAToYRow_SSSE3(const uint8_t* src_rgba, uint8_t* dst_y, int width); +void RGB24ToYRow_SSSE3(const uint8_t* src_rgb24, uint8_t* dst_y, int width); +void RAWToYRow_SSSE3(const uint8_t* src_raw, uint8_t* dst_y, int width); +void ARGBToYRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width); +void ARGBToYJRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width); +void ARGBToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width); +void ARGBToYJRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width); +void ARGBToUV444Row_NEON(const uint8_t* src_argb, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void ARGBToUVRow_NEON(const uint8* src_argb, +void ARGBToUVRow_NEON(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void ARGBToUV444Row_MSA(const uint8* src_argb, - uint8* dst_u, - uint8* dst_v, +void ARGBToUV444Row_MSA(const uint8_t* src_argb, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void ARGBToUVRow_MSA(const uint8* src_argb, +void ARGBToUVRow_MSA(const uint8_t* src_argb0, int src_stride_argb, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void ARGBToUVJRow_NEON(const uint8* src_argb, +void ARGBToUVJRow_NEON(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void BGRAToUVRow_NEON(const uint8* src_bgra, +void BGRAToUVRow_NEON(const uint8_t* src_bgra, int src_stride_bgra, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void ABGRToUVRow_NEON(const uint8* src_abgr, +void ABGRToUVRow_NEON(const uint8_t* src_abgr, int src_stride_abgr, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void RGBAToUVRow_NEON(const uint8* src_rgba, +void RGBAToUVRow_NEON(const uint8_t* src_rgba, int src_stride_rgba, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void RGB24ToUVRow_NEON(const uint8* src_rgb24, +void RGB24ToUVRow_NEON(const uint8_t* src_rgb24, int src_stride_rgb24, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void RAWToUVRow_NEON(const uint8* src_raw, +void RAWToUVRow_NEON(const uint8_t* src_raw, int src_stride_raw, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void RGB565ToUVRow_NEON(const uint8* src_rgb565, +void RGB565ToUVRow_NEON(const uint8_t* src_rgb565, int src_stride_rgb565, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void ARGB1555ToUVRow_NEON(const uint8* src_argb1555, +void ARGB1555ToUVRow_NEON(const uint8_t* src_argb1555, int src_stride_argb1555, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void ARGB4444ToUVRow_NEON(const uint8* src_argb4444, +void ARGB4444ToUVRow_NEON(const uint8_t* src_argb4444, int src_stride_argb4444, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void ARGBToUVJRow_MSA(const uint8* src_argb, - int src_stride_argb, - uint8* dst_u, - uint8* dst_v, +void ARGBToUVJRow_MSA(const uint8_t* src_rgb0, + int src_stride_rgb, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void BGRAToUVRow_MSA(const uint8* src_bgra, - int src_stride_bgra, - uint8* dst_u, - uint8* dst_v, +void BGRAToUVRow_MSA(const uint8_t* src_rgb0, + int src_stride_rgb, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void ABGRToUVRow_MSA(const uint8* src_abgr, - int src_stride_abgr, - uint8* dst_u, - uint8* dst_v, +void ABGRToUVRow_MSA(const uint8_t* src_rgb0, + int src_stride_rgb, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void RGBAToUVRow_MSA(const uint8* src_rgba, - int src_stride_rgba, - uint8* dst_u, - uint8* dst_v, +void RGBAToUVRow_MSA(const uint8_t* src_rgb0, + int src_stride_rgb, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void RGB24ToUVRow_MSA(const uint8* src_rgb24, - int src_stride_rgb24, - uint8* dst_u, - uint8* dst_v, +void RGB24ToUVRow_MSA(const uint8_t* src_rgb0, + int src_stride_rgb, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void RAWToUVRow_MSA(const uint8* src_raw, - int src_stride_raw, - uint8* dst_u, - uint8* dst_v, +void RAWToUVRow_MSA(const uint8_t* src_rgb0, + int src_stride_rgb, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void RGB565ToUVRow_MSA(const uint8* src_rgb565, +void RGB565ToUVRow_MSA(const uint8_t* src_rgb565, int src_stride_rgb565, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void ARGB1555ToUVRow_MSA(const uint8* src_argb1555, +void ARGB1555ToUVRow_MSA(const uint8_t* src_argb1555, int src_stride_argb1555, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void BGRAToYRow_NEON(const uint8_t* src_bgra, uint8_t* dst_y, int width); +void ABGRToYRow_NEON(const uint8_t* src_abgr, uint8_t* dst_y, int width); +void RGBAToYRow_NEON(const uint8_t* src_rgba, uint8_t* dst_y, int width); +void RGB24ToYRow_NEON(const uint8_t* src_rgb24, uint8_t* dst_y, int width); +void RAWToYRow_NEON(const uint8_t* src_raw, uint8_t* dst_y, int width); +void RGB565ToYRow_NEON(const uint8_t* src_rgb565, uint8_t* dst_y, int width); +void ARGB1555ToYRow_NEON(const uint8_t* src_argb1555, + uint8_t* dst_y, + int width); +void ARGB4444ToYRow_NEON(const uint8_t* src_argb4444, + uint8_t* dst_y, int width); -void BGRAToYRow_NEON(const uint8* src_bgra, uint8* dst_y, int width); -void ABGRToYRow_NEON(const uint8* src_abgr, uint8* dst_y, int width); -void RGBAToYRow_NEON(const uint8* src_rgba, uint8* dst_y, int width); -void RGB24ToYRow_NEON(const uint8* src_rgb24, uint8* dst_y, int width); -void RAWToYRow_NEON(const uint8* src_raw, uint8* dst_y, int width); -void RGB565ToYRow_NEON(const uint8* src_rgb565, uint8* dst_y, int width); -void ARGB1555ToYRow_NEON(const uint8* src_argb1555, uint8* dst_y, int width); -void ARGB4444ToYRow_NEON(const uint8* src_argb4444, uint8* dst_y, int width); -void BGRAToYRow_MSA(const uint8* src_bgra, uint8* dst_y, int width); -void ABGRToYRow_MSA(const uint8* src_abgr, uint8* dst_y, int width); -void RGBAToYRow_MSA(const uint8* src_rgba, uint8* dst_y, int width); -void RGB24ToYRow_MSA(const uint8* src_rgb24, uint8* dst_y, int width); -void RAWToYRow_MSA(const uint8* src_raw, uint8* dst_y, int width); -void RGB565ToYRow_MSA(const uint8* src_rgb565, uint8* dst_y, int width); -void ARGB1555ToYRow_MSA(const uint8* src_argb1555, uint8* dst_y, int width); -void ARGBToYRow_C(const uint8* src_argb, uint8* dst_y, int width); -void ARGBToYJRow_C(const uint8* src_argb, uint8* dst_y, int width); -void BGRAToYRow_C(const uint8* src_bgra, uint8* dst_y, int width); -void ABGRToYRow_C(const uint8* src_abgr, uint8* dst_y, int width); -void RGBAToYRow_C(const uint8* src_rgba, uint8* dst_y, int width); -void RGB24ToYRow_C(const uint8* src_rgb24, uint8* dst_y, int width); -void RAWToYRow_C(const uint8* src_raw, uint8* dst_y, int width); -void RGB565ToYRow_C(const uint8* src_rgb565, uint8* dst_y, int width); -void ARGB1555ToYRow_C(const uint8* src_argb1555, uint8* dst_y, int width); -void ARGB4444ToYRow_C(const uint8* src_argb4444, uint8* dst_y, int width); -void ARGBToYRow_Any_SSSE3(const uint8* src_argb, uint8* dst_y, int width); -void ARGBToYJRow_Any_SSSE3(const uint8* src_argb, uint8* dst_y, int width); -void BGRAToYRow_Any_SSSE3(const uint8* src_bgra, uint8* dst_y, int width); -void ABGRToYRow_Any_SSSE3(const uint8* src_abgr, uint8* dst_y, int width); -void RGBAToYRow_Any_SSSE3(const uint8* src_rgba, uint8* dst_y, int width); -void RGB24ToYRow_Any_SSSE3(const uint8* src_rgb24, uint8* dst_y, int width); -void RAWToYRow_Any_SSSE3(const uint8* src_raw, uint8* dst_y, int width); -void ARGBToYRow_Any_NEON(const uint8* src_argb, uint8* dst_y, int width); -void ARGBToYJRow_Any_NEON(const uint8* src_argb, uint8* dst_y, int width); -void BGRAToYRow_Any_NEON(const uint8* src_bgra, uint8* dst_y, int width); -void ABGRToYRow_Any_NEON(const uint8* src_abgr, uint8* dst_y, int width); -void RGBAToYRow_Any_NEON(const uint8* src_rgba, uint8* dst_y, int width); -void RGB24ToYRow_Any_NEON(const uint8* src_rgb24, uint8* dst_y, int width); -void RAWToYRow_Any_NEON(const uint8* src_raw, uint8* dst_y, int width); -void RGB565ToYRow_Any_NEON(const uint8* src_rgb565, uint8* dst_y, int width); -void ARGB1555ToYRow_Any_NEON(const uint8* src_argb1555, - uint8* dst_y, +void BGRAToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width); +void ABGRToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width); +void RGBAToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width); +void RGB24ToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width); +void RAWToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width); +void RGB565ToYRow_MSA(const uint8_t* src_rgb565, uint8_t* dst_y, int width); +void ARGB1555ToYRow_MSA(const uint8_t* src_argb1555, uint8_t* dst_y, int width); +void ARGBToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width); +void ARGBToYJRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width); +void BGRAToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width); +void ABGRToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width); +void RGBAToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width); +void RGB24ToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width); +void RAWToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width); +void RGB565ToYRow_C(const uint8_t* src_rgb565, uint8_t* dst_y, int width); +void ARGB1555ToYRow_C(const uint8_t* src_argb1555, uint8_t* dst_y, int width); +void ARGB4444ToYRow_C(const uint8_t* src_argb4444, uint8_t* dst_y, int width); +void ARGBToYRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void ARGBToYJRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void BGRAToYRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void ABGRToYRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RGBAToYRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RGB24ToYRow_Any_SSSE3(const uint8_t* src_rgb24, uint8_t* dst_y, int width); +void RAWToYRow_Any_SSSE3(const uint8_t* src_raw, uint8_t* dst_y, int width); +void ARGBToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void ARGBToYJRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void BGRAToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void ABGRToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RGBAToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RGB24ToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RAWToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RGB565ToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void ARGB1555ToYRow_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_ptr, int width); -void ARGB4444ToYRow_Any_NEON(const uint8* src_argb4444, - uint8* dst_y, +void ARGB4444ToYRow_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_ptr, int width); -void BGRAToYRow_Any_MSA(const uint8* src_bgra, uint8* dst_y, int width); -void ABGRToYRow_Any_MSA(const uint8* src_abgr, uint8* dst_y, int width); -void RGBAToYRow_Any_MSA(const uint8* src_rgba, uint8* dst_y, int width); -void ARGBToYJRow_Any_MSA(const uint8* src_argb, uint8* dst_y, int width); -void ARGBToYRow_Any_MSA(const uint8* src_argb, uint8* dst_y, int width); -void RGB24ToYRow_Any_MSA(const uint8* src_rgb24, uint8* dst_y, int width); -void RAWToYRow_Any_MSA(const uint8* src_raw, uint8* dst_y, int width); -void RGB565ToYRow_Any_MSA(const uint8* src_rgb565, uint8* dst_y, int width); -void ARGB1555ToYRow_Any_MSA(const uint8* src_argb1555, uint8* dst_y, int width); - -void ARGBToUVRow_AVX2(const uint8* src_argb, +void BGRAToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void ABGRToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RGBAToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void ARGBToYJRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void ARGBToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RGB24ToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RAWToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RGB565ToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void ARGB1555ToYRow_Any_MSA(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); + +void ARGBToUVRow_AVX2(const uint8_t* src_argb0, int src_stride_argb, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void ARGBToUVJRow_AVX2(const uint8* src_argb, +void ARGBToUVJRow_AVX2(const uint8_t* src_argb0, int src_stride_argb, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void ARGBToUVRow_SSSE3(const uint8* src_argb, +void ARGBToUVRow_SSSE3(const uint8_t* src_argb0, int src_stride_argb, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void ARGBToUVJRow_SSSE3(const uint8* src_argb, +void ARGBToUVJRow_SSSE3(const uint8_t* src_argb0, int src_stride_argb, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void BGRAToUVRow_SSSE3(const uint8* src_bgra, +void BGRAToUVRow_SSSE3(const uint8_t* src_bgra0, int src_stride_bgra, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void ABGRToUVRow_SSSE3(const uint8* src_abgr, +void ABGRToUVRow_SSSE3(const uint8_t* src_abgr0, int src_stride_abgr, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void RGBAToUVRow_SSSE3(const uint8* src_rgba, +void RGBAToUVRow_SSSE3(const uint8_t* src_rgba0, int src_stride_rgba, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void ARGBToUVRow_Any_AVX2(const uint8* src_argb, - int src_stride_argb, - uint8* dst_u, - uint8* dst_v, +void ARGBToUVRow_Any_AVX2(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void ARGBToUVJRow_Any_AVX2(const uint8* src_argb, - int src_stride_argb, - uint8* dst_u, - uint8* dst_v, +void ARGBToUVJRow_Any_AVX2(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void ARGBToUVRow_Any_SSSE3(const uint8* src_argb, - int src_stride_argb, - uint8* dst_u, - uint8* dst_v, +void ARGBToUVRow_Any_SSSE3(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void ARGBToUVJRow_Any_SSSE3(const uint8* src_argb, - int src_stride_argb, - uint8* dst_u, - uint8* dst_v, - int width); -void BGRAToUVRow_Any_SSSE3(const uint8* src_bgra, - int src_stride_bgra, - uint8* dst_u, - uint8* dst_v, +void ARGBToUVJRow_Any_SSSE3(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void BGRAToUVRow_Any_SSSE3(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void ABGRToUVRow_Any_SSSE3(const uint8* src_abgr, - int src_stride_abgr, - uint8* dst_u, - uint8* dst_v, +void ABGRToUVRow_Any_SSSE3(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void RGBAToUVRow_Any_SSSE3(const uint8* src_rgba, - int src_stride_rgba, - uint8* dst_u, - uint8* dst_v, +void RGBAToUVRow_Any_SSSE3(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void ARGBToUV444Row_Any_NEON(const uint8* src_argb, - uint8* dst_u, - uint8* dst_v, +void ARGBToUV444Row_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void ARGBToUVRow_Any_NEON(const uint8* src_argb, - int src_stride_argb, - uint8* dst_u, - uint8* dst_v, +void ARGBToUVRow_Any_NEON(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void ARGBToUV444Row_Any_MSA(const uint8* src_argb, - uint8* dst_u, - uint8* dst_v, - int width); -void ARGBToUVRow_Any_MSA(const uint8* src_argb, - int src_stride_argb, - uint8* dst_u, - uint8* dst_v, +void ARGBToUV444Row_Any_MSA(const uint8_t* src_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ARGBToUVRow_Any_MSA(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void ARGBToUVJRow_Any_NEON(const uint8* src_argb, - int src_stride_argb, - uint8* dst_u, - uint8* dst_v, +void ARGBToUVJRow_Any_NEON(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void BGRAToUVRow_Any_NEON(const uint8* src_bgra, - int src_stride_bgra, - uint8* dst_u, - uint8* dst_v, +void BGRAToUVRow_Any_NEON(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void ABGRToUVRow_Any_NEON(const uint8* src_abgr, - int src_stride_abgr, - uint8* dst_u, - uint8* dst_v, +void ABGRToUVRow_Any_NEON(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void RGBAToUVRow_Any_NEON(const uint8* src_rgba, - int src_stride_rgba, - uint8* dst_u, - uint8* dst_v, +void RGBAToUVRow_Any_NEON(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void RGB24ToUVRow_Any_NEON(const uint8* src_rgb24, - int src_stride_rgb24, - uint8* dst_u, - uint8* dst_v, +void RGB24ToUVRow_Any_NEON(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void RAWToUVRow_Any_NEON(const uint8* src_raw, - int src_stride_raw, - uint8* dst_u, - uint8* dst_v, +void RAWToUVRow_Any_NEON(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void RGB565ToUVRow_Any_NEON(const uint8* src_rgb565, - int src_stride_rgb565, - uint8* dst_u, - uint8* dst_v, - int width); -void ARGB1555ToUVRow_Any_NEON(const uint8* src_argb1555, - int src_stride_argb1555, - uint8* dst_u, - uint8* dst_v, +void RGB565ToUVRow_Any_NEON(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ARGB1555ToUVRow_Any_NEON(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void ARGB4444ToUVRow_Any_NEON(const uint8* src_argb4444, - int src_stride_argb4444, - uint8* dst_u, - uint8* dst_v, +void ARGB4444ToUVRow_Any_NEON(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void ARGBToUVJRow_Any_MSA(const uint8* src_argb, - int src_stride_argb, - uint8* dst_u, - uint8* dst_v, +void ARGBToUVJRow_Any_MSA(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void BGRAToUVRow_Any_MSA(const uint8* src_bgra, - int src_stride_bgra, - uint8* dst_u, - uint8* dst_v, +void BGRAToUVRow_Any_MSA(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void ABGRToUVRow_Any_MSA(const uint8* src_abgr, - int src_stride_abgr, - uint8* dst_u, - uint8* dst_v, +void ABGRToUVRow_Any_MSA(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void RGBAToUVRow_Any_MSA(const uint8* src_rgba, - int src_stride_rgba, - uint8* dst_u, - uint8* dst_v, +void RGBAToUVRow_Any_MSA(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void RGB24ToUVRow_Any_MSA(const uint8* src_rgb24, - int src_stride_rgb24, - uint8* dst_u, - uint8* dst_v, +void RGB24ToUVRow_Any_MSA(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void RAWToUVRow_Any_MSA(const uint8* src_raw, - int src_stride_raw, - uint8* dst_u, - uint8* dst_v, +void RAWToUVRow_Any_MSA(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void RGB565ToUVRow_Any_MSA(const uint8* src_rgb565, - int src_stride_rgb565, - uint8* dst_u, - uint8* dst_v, +void RGB565ToUVRow_Any_MSA(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void ARGB1555ToUVRow_Any_MSA(const uint8* src_argb1555, - int src_stride_argb1555, - uint8* dst_u, - uint8* dst_v, +void ARGB1555ToUVRow_Any_MSA(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void ARGBToUVRow_C(const uint8* src_argb, - int src_stride_argb, - uint8* dst_u, - uint8* dst_v, +void ARGBToUVRow_C(const uint8_t* src_rgb0, + int src_stride_rgb, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void ARGBToUVJRow_C(const uint8* src_argb, - int src_stride_argb, - uint8* dst_u, - uint8* dst_v, +void ARGBToUVJRow_C(const uint8_t* src_rgb0, + int src_stride_rgb, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void ARGBToUVRow_C(const uint8* src_argb, - int src_stride_argb, - uint8* dst_u, - uint8* dst_v, +void ARGBToUVRow_C(const uint8_t* src_rgb0, + int src_stride_rgb, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void ARGBToUVJRow_C(const uint8* src_argb, - int src_stride_argb, - uint8* dst_u, - uint8* dst_v, +void ARGBToUVJRow_C(const uint8_t* src_rgb0, + int src_stride_rgb, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void BGRAToUVRow_C(const uint8* src_bgra, - int src_stride_bgra, - uint8* dst_u, - uint8* dst_v, +void BGRAToUVRow_C(const uint8_t* src_rgb0, + int src_stride_rgb, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void ABGRToUVRow_C(const uint8* src_abgr, - int src_stride_abgr, - uint8* dst_u, - uint8* dst_v, +void ABGRToUVRow_C(const uint8_t* src_rgb0, + int src_stride_rgb, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void RGBAToUVRow_C(const uint8* src_rgba, - int src_stride_rgba, - uint8* dst_u, - uint8* dst_v, +void RGBAToUVRow_C(const uint8_t* src_rgb0, + int src_stride_rgb, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void RGB24ToUVRow_C(const uint8* src_rgb24, - int src_stride_rgb24, - uint8* dst_u, - uint8* dst_v, +void RGB24ToUVRow_C(const uint8_t* src_rgb0, + int src_stride_rgb, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void RAWToUVRow_C(const uint8* src_raw, - int src_stride_raw, - uint8* dst_u, - uint8* dst_v, +void RAWToUVRow_C(const uint8_t* src_rgb0, + int src_stride_rgb, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void RGB565ToUVRow_C(const uint8* src_rgb565, +void RGB565ToUVRow_C(const uint8_t* src_rgb565, int src_stride_rgb565, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void ARGB1555ToUVRow_C(const uint8* src_argb1555, +void ARGB1555ToUVRow_C(const uint8_t* src_argb1555, int src_stride_argb1555, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void ARGB4444ToUVRow_C(const uint8* src_argb4444, +void ARGB4444ToUVRow_C(const uint8_t* src_argb4444, int src_stride_argb4444, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void ARGBToUV444Row_SSSE3(const uint8* src_argb, - uint8* dst_u, - uint8* dst_v, +void ARGBToUV444Row_SSSE3(const uint8_t* src_argb, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void ARGBToUV444Row_Any_SSSE3(const uint8* src_argb, - uint8* dst_u, - uint8* dst_v, +void ARGBToUV444Row_Any_SSSE3(const uint8_t* src_ptr, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void ARGBToUV444Row_C(const uint8* src_argb, - uint8* dst_u, - uint8* dst_v, +void ARGBToUV444Row_C(const uint8_t* src_argb, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void MirrorRow_AVX2(const uint8* src, uint8* dst, int width); -void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width); -void MirrorRow_NEON(const uint8* src, uint8* dst, int width); -void MirrorRow_MSA(const uint8* src, uint8* dst, int width); -void MirrorRow_C(const uint8* src, uint8* dst, int width); -void MirrorRow_Any_AVX2(const uint8* src, uint8* dst, int width); -void MirrorRow_Any_SSSE3(const uint8* src, uint8* dst, int width); -void MirrorRow_Any_SSE2(const uint8* src, uint8* dst, int width); -void MirrorRow_Any_NEON(const uint8* src, uint8* dst, int width); -void MirrorRow_Any_MSA(const uint8* src, uint8* dst, int width); - -void MirrorUVRow_SSSE3(const uint8* src_uv, - uint8* dst_u, - uint8* dst_v, +void MirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width); +void MirrorRow_SSSE3(const uint8_t* src, uint8_t* dst, int width); +void MirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width); +void MirrorRow_MSA(const uint8_t* src, uint8_t* dst, int width); +void MirrorRow_C(const uint8_t* src, uint8_t* dst, int width); +void MirrorRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void MirrorRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void MirrorRow_Any_SSE2(const uint8_t* src, uint8_t* dst, int width); +void MirrorRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void MirrorRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); + +void MirrorUVRow_SSSE3(const uint8_t* src, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void MirrorUVRow_NEON(const uint8* src_uv, - uint8* dst_u, - uint8* dst_v, +void MirrorUVRow_NEON(const uint8_t* src_uv, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void MirrorUVRow_MSA(const uint8* src_uv, - uint8* dst_u, - uint8* dst_v, +void MirrorUVRow_MSA(const uint8_t* src_uv, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void MirrorUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width); - -void ARGBMirrorRow_AVX2(const uint8* src, uint8* dst, int width); -void ARGBMirrorRow_SSE2(const uint8* src, uint8* dst, int width); -void ARGBMirrorRow_NEON(const uint8* src, uint8* dst, int width); -void ARGBMirrorRow_MSA(const uint8* src, uint8* dst, int width); -void ARGBMirrorRow_C(const uint8* src, uint8* dst, int width); -void ARGBMirrorRow_Any_AVX2(const uint8* src, uint8* dst, int width); -void ARGBMirrorRow_Any_SSE2(const uint8* src, uint8* dst, int width); -void ARGBMirrorRow_Any_NEON(const uint8* src, uint8* dst, int width); -void ARGBMirrorRow_Any_MSA(const uint8* src, uint8* dst, int width); - -void SplitUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width); -void SplitUVRow_SSE2(const uint8* src_uv, - uint8* dst_u, - uint8* dst_v, +void MirrorUVRow_C(const uint8_t* src_uv, + uint8_t* dst_u, + uint8_t* dst_v, + int width); + +void ARGBMirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width); +void ARGBMirrorRow_SSE2(const uint8_t* src, uint8_t* dst, int width); +void ARGBMirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width); +void ARGBMirrorRow_MSA(const uint8_t* src, uint8_t* dst, int width); +void ARGBMirrorRow_C(const uint8_t* src, uint8_t* dst, int width); +void ARGBMirrorRow_Any_AVX2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGBMirrorRow_Any_SSE2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGBMirrorRow_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGBMirrorRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); + +void SplitUVRow_C(const uint8_t* src_uv, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void SplitUVRow_SSE2(const uint8_t* src_uv, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void SplitUVRow_AVX2(const uint8* src_uv, - uint8* dst_u, - uint8* dst_v, +void SplitUVRow_AVX2(const uint8_t* src_uv, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void SplitUVRow_NEON(const uint8* src_uv, - uint8* dst_u, - uint8* dst_v, +void SplitUVRow_NEON(const uint8_t* src_uv, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void SplitUVRow_MSA(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width); -void SplitUVRow_Any_SSE2(const uint8* src_uv, - uint8* dst_u, - uint8* dst_v, +void SplitUVRow_MSA(const uint8_t* src_uv, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void SplitUVRow_Any_SSE2(const uint8_t* src_ptr, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void SplitUVRow_Any_AVX2(const uint8* src_uv, - uint8* dst_u, - uint8* dst_v, +void SplitUVRow_Any_AVX2(const uint8_t* src_ptr, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void SplitUVRow_Any_NEON(const uint8* src_uv, - uint8* dst_u, - uint8* dst_v, +void SplitUVRow_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void SplitUVRow_Any_MSA(const uint8* src_uv, - uint8* dst_u, - uint8* dst_v, +void SplitUVRow_Any_MSA(const uint8_t* src_ptr, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void MergeUVRow_C(const uint8* src_u, - const uint8* src_v, - uint8* dst_uv, +void MergeUVRow_C(const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_uv, int width); -void MergeUVRow_SSE2(const uint8* src_u, - const uint8* src_v, - uint8* dst_uv, +void MergeUVRow_SSE2(const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_uv, int width); -void MergeUVRow_AVX2(const uint8* src_u, - const uint8* src_v, - uint8* dst_uv, +void MergeUVRow_AVX2(const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_uv, int width); -void MergeUVRow_NEON(const uint8* src_u, - const uint8* src_v, - uint8* dst_uv, +void MergeUVRow_NEON(const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_uv, int width); -void MergeUVRow_MSA(const uint8* src_u, - const uint8* src_v, - uint8* dst_uv, +void MergeUVRow_MSA(const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_uv, int width); -void MergeUVRow_Any_SSE2(const uint8* src_u, - const uint8* src_v, - uint8* dst_uv, +void MergeUVRow_Any_SSE2(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, int width); -void MergeUVRow_Any_AVX2(const uint8* src_u, - const uint8* src_v, - uint8* dst_uv, +void MergeUVRow_Any_AVX2(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, int width); -void MergeUVRow_Any_NEON(const uint8* src_u, - const uint8* src_v, - uint8* dst_uv, +void MergeUVRow_Any_NEON(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, int width); -void MergeUVRow_Any_MSA(const uint8* src_u, - const uint8* src_v, - uint8* dst_uv, +void MergeUVRow_Any_MSA(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, int width); -void SplitRGBRow_C(const uint8* src_rgb, - uint8* dst_r, - uint8* dst_g, - uint8* dst_b, +void SplitRGBRow_C(const uint8_t* src_rgb, + uint8_t* dst_r, + uint8_t* dst_g, + uint8_t* dst_b, int width); -void SplitRGBRow_SSSE3(const uint8* src_rgb, - uint8* dst_r, - uint8* dst_g, - uint8* dst_b, +void SplitRGBRow_SSSE3(const uint8_t* src_rgb, + uint8_t* dst_r, + uint8_t* dst_g, + uint8_t* dst_b, int width); -void SplitRGBRow_NEON(const uint8* src_rgb, - uint8* dst_r, - uint8* dst_g, - uint8* dst_b, +void SplitRGBRow_NEON(const uint8_t* src_rgb, + uint8_t* dst_r, + uint8_t* dst_g, + uint8_t* dst_b, int width); -void SplitRGBRow_Any_SSSE3(const uint8* src_rgb, - uint8* dst_r, - uint8* dst_g, - uint8* dst_b, +void SplitRGBRow_Any_SSSE3(const uint8_t* src_ptr, + uint8_t* dst_r, + uint8_t* dst_g, + uint8_t* dst_b, int width); -void SplitRGBRow_Any_NEON(const uint8* src_rgb, - uint8* dst_r, - uint8* dst_g, - uint8* dst_b, +void SplitRGBRow_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_r, + uint8_t* dst_g, + uint8_t* dst_b, int width); -void MergeRGBRow_C(const uint8* src_r, - const uint8* src_g, - const uint8* src_b, - uint8* dst_rgb, +void MergeRGBRow_C(const uint8_t* src_r, + const uint8_t* src_g, + const uint8_t* src_b, + uint8_t* dst_rgb, int width); -void MergeRGBRow_SSSE3(const uint8* src_r, - const uint8* src_g, - const uint8* src_b, - uint8* dst_rgb, +void MergeRGBRow_SSSE3(const uint8_t* src_r, + const uint8_t* src_g, + const uint8_t* src_b, + uint8_t* dst_rgb, int width); -void MergeRGBRow_NEON(const uint8* src_r, - const uint8* src_g, - const uint8* src_b, - uint8* dst_rgb, +void MergeRGBRow_NEON(const uint8_t* src_r, + const uint8_t* src_g, + const uint8_t* src_b, + uint8_t* dst_rgb, int width); -void MergeRGBRow_Any_SSSE3(const uint8* src_r, - const uint8* src_g, - const uint8* src_b, - uint8* dst_rgb, +void MergeRGBRow_Any_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, int width); -void MergeRGBRow_Any_NEON(const uint8* src_r, - const uint8* src_g, - const uint8* src_b, - uint8* dst_rgb, +void MergeRGBRow_Any_NEON(const uint8_t* src_r, + const uint8_t* src_g, + const uint8_t* src_b, + uint8_t* dst_rgb, int width); -void MergeUVRow_16_C(const uint16* src_u, - const uint16* src_v, - uint16* dst_uv, +void MergeUVRow_16_C(const uint16_t* src_u, + const uint16_t* src_v, + uint16_t* dst_uv, int scale, /* 64 for 10 bit */ int width); -void MergeUVRow_16_AVX2(const uint16* src_u, - const uint16* src_v, - uint16* dst_uv, +void MergeUVRow_16_AVX2(const uint16_t* src_u, + const uint16_t* src_v, + uint16_t* dst_uv, int scale, int width); -void MultiplyRow_16_AVX2(const uint16* src_y, - uint16* dst_y, +void MultiplyRow_16_AVX2(const uint16_t* src_y, + uint16_t* dst_y, int scale, int width); -void MultiplyRow_16_C(const uint16* src_y, uint16* dst_y, int scale, int width); +void MultiplyRow_16_C(const uint16_t* src_y, + uint16_t* dst_y, + int scale, + int width); -void Convert8To16Row_C(const uint8* src_y, uint16* dst_y, int scale, int width); -void Convert8To16Row_SSE2(const uint8* src_y, - uint16* dst_y, +void Convert8To16Row_C(const uint8_t* src_y, + uint16_t* dst_y, + int scale, + int width); +void Convert8To16Row_SSE2(const uint8_t* src_y, + uint16_t* dst_y, int scale, int width); -void Convert8To16Row_AVX2(const uint8* src_y, - uint16* dst_y, +void Convert8To16Row_AVX2(const uint8_t* src_y, + uint16_t* dst_y, int scale, int width); -void Convert8To16Row_Any_SSE2(const uint8* src_y, - uint16* dst_y, +void Convert8To16Row_Any_SSE2(const uint8_t* src_ptr, + uint16_t* dst_ptr, int scale, int width); -void Convert8To16Row_Any_AVX2(const uint8* src_y, - uint16* dst_y, +void Convert8To16Row_Any_AVX2(const uint8_t* src_ptr, + uint16_t* dst_ptr, int scale, int width); -void Convert16To8Row_C(const uint16* src_y, uint8* dst_y, int scale, int width); -void Convert16To8Row_SSSE3(const uint16* src_y, - uint8* dst_y, +void Convert16To8Row_C(const uint16_t* src_y, + uint8_t* dst_y, + int scale, + int width); +void Convert16To8Row_SSSE3(const uint16_t* src_y, + uint8_t* dst_y, int scale, int width); -void Convert16To8Row_AVX2(const uint16* src_y, - uint8* dst_y, +void Convert16To8Row_AVX2(const uint16_t* src_y, + uint8_t* dst_y, int scale, int width); -void Convert16To8Row_Any_SSSE3(const uint16* src_y, - uint8* dst_y, +void Convert16To8Row_Any_SSSE3(const uint16_t* src_ptr, + uint8_t* dst_ptr, int scale, int width); -void Convert16To8Row_Any_AVX2(const uint16* src_y, - uint8* dst_y, +void Convert16To8Row_Any_AVX2(const uint16_t* src_ptr, + uint8_t* dst_ptr, int scale, int width); -void CopyRow_SSE2(const uint8* src, uint8* dst, int count); -void CopyRow_AVX(const uint8* src, uint8* dst, int count); -void CopyRow_ERMS(const uint8* src, uint8* dst, int count); -void CopyRow_NEON(const uint8* src, uint8* dst, int count); -void CopyRow_MIPS(const uint8* src, uint8* dst, int count); -void CopyRow_C(const uint8* src, uint8* dst, int count); -void CopyRow_Any_SSE2(const uint8* src, uint8* dst, int count); -void CopyRow_Any_AVX(const uint8* src, uint8* dst, int count); -void CopyRow_Any_NEON(const uint8* src, uint8* dst, int count); - -void CopyRow_16_C(const uint16* src, uint16* dst, int count); - -void ARGBCopyAlphaRow_C(const uint8* src_argb, uint8* dst_argb, int width); -void ARGBCopyAlphaRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width); -void ARGBCopyAlphaRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width); -void ARGBCopyAlphaRow_Any_SSE2(const uint8* src_argb, - uint8* dst_argb, +void CopyRow_SSE2(const uint8_t* src, uint8_t* dst, int width); +void CopyRow_AVX(const uint8_t* src, uint8_t* dst, int width); +void CopyRow_ERMS(const uint8_t* src, uint8_t* dst, int width); +void CopyRow_NEON(const uint8_t* src, uint8_t* dst, int width); +void CopyRow_MIPS(const uint8_t* src, uint8_t* dst, int count); +void CopyRow_C(const uint8_t* src, uint8_t* dst, int count); +void CopyRow_Any_SSE2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void CopyRow_Any_AVX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void CopyRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); + +void CopyRow_16_C(const uint16_t* src, uint16_t* dst, int count); + +void ARGBCopyAlphaRow_C(const uint8_t* src, uint8_t* dst, int width); +void ARGBCopyAlphaRow_SSE2(const uint8_t* src, uint8_t* dst, int width); +void ARGBCopyAlphaRow_AVX2(const uint8_t* src, uint8_t* dst, int width); +void ARGBCopyAlphaRow_Any_SSE2(const uint8_t* src_ptr, + uint8_t* dst_ptr, int width); -void ARGBCopyAlphaRow_Any_AVX2(const uint8* src_argb, - uint8* dst_argb, +void ARGBCopyAlphaRow_Any_AVX2(const uint8_t* src_ptr, + uint8_t* dst_ptr, int width); -void ARGBExtractAlphaRow_C(const uint8* src_argb, uint8* dst_a, int width); -void ARGBExtractAlphaRow_SSE2(const uint8* src_argb, uint8* dst_a, int width); -void ARGBExtractAlphaRow_AVX2(const uint8* src_argb, uint8* dst_a, int width); -void ARGBExtractAlphaRow_NEON(const uint8* src_argb, uint8* dst_a, int width); -void ARGBExtractAlphaRow_MSA(const uint8* src_argb, uint8* dst_a, int width); -void ARGBExtractAlphaRow_Any_SSE2(const uint8* src_argb, - uint8* dst_a, +void ARGBExtractAlphaRow_C(const uint8_t* src_argb, uint8_t* dst_a, int width); +void ARGBExtractAlphaRow_SSE2(const uint8_t* src_argb, + uint8_t* dst_a, + int width); +void ARGBExtractAlphaRow_AVX2(const uint8_t* src_argb, + uint8_t* dst_a, + int width); +void ARGBExtractAlphaRow_NEON(const uint8_t* src_argb, + uint8_t* dst_a, + int width); +void ARGBExtractAlphaRow_MSA(const uint8_t* src_argb, + uint8_t* dst_a, + int width); +void ARGBExtractAlphaRow_Any_SSE2(const uint8_t* src_ptr, + uint8_t* dst_ptr, int width); -void ARGBExtractAlphaRow_Any_AVX2(const uint8* src_argb, - uint8* dst_a, +void ARGBExtractAlphaRow_Any_AVX2(const uint8_t* src_ptr, + uint8_t* dst_ptr, int width); -void ARGBExtractAlphaRow_Any_NEON(const uint8* src_argb, - uint8* dst_a, +void ARGBExtractAlphaRow_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_ptr, int width); -void ARGBExtractAlphaRow_Any_MSA(const uint8* src_argb, - uint8* dst_a, +void ARGBExtractAlphaRow_Any_MSA(const uint8_t* src_ptr, + uint8_t* dst_ptr, int width); -void ARGBCopyYToAlphaRow_C(const uint8* src_y, uint8* dst_argb, int width); -void ARGBCopyYToAlphaRow_SSE2(const uint8* src_y, uint8* dst_argb, int width); -void ARGBCopyYToAlphaRow_AVX2(const uint8* src_y, uint8* dst_argb, int width); -void ARGBCopyYToAlphaRow_Any_SSE2(const uint8* src_y, - uint8* dst_argb, +void ARGBCopyYToAlphaRow_C(const uint8_t* src, uint8_t* dst, int width); +void ARGBCopyYToAlphaRow_SSE2(const uint8_t* src, uint8_t* dst, int width); +void ARGBCopyYToAlphaRow_AVX2(const uint8_t* src, uint8_t* dst, int width); +void ARGBCopyYToAlphaRow_Any_SSE2(const uint8_t* src_ptr, + uint8_t* dst_ptr, int width); -void ARGBCopyYToAlphaRow_Any_AVX2(const uint8* src_y, - uint8* dst_argb, +void ARGBCopyYToAlphaRow_Any_AVX2(const uint8_t* src_ptr, + uint8_t* dst_ptr, int width); -void SetRow_C(uint8* dst, uint8 v8, int count); -void SetRow_MSA(uint8* dst, uint8 v8, int count); -void SetRow_X86(uint8* dst, uint8 v8, int count); -void SetRow_ERMS(uint8* dst, uint8 v8, int count); -void SetRow_NEON(uint8* dst, uint8 v8, int count); -void SetRow_Any_X86(uint8* dst, uint8 v8, int count); -void SetRow_Any_NEON(uint8* dst, uint8 v8, int count); - -void ARGBSetRow_C(uint8* dst_argb, uint32 v32, int count); -void ARGBSetRow_X86(uint8* dst_argb, uint32 v32, int count); -void ARGBSetRow_NEON(uint8* dst_argb, uint32 v32, int count); -void ARGBSetRow_Any_NEON(uint8* dst_argb, uint32 v32, int count); -void ARGBSetRow_MSA(uint8* dst_argb, uint32 v32, int count); -void ARGBSetRow_Any_MSA(uint8* dst_argb, uint32 v32, int count); +void SetRow_C(uint8_t* dst, uint8_t v8, int width); +void SetRow_MSA(uint8_t* dst, uint8_t v8, int width); +void SetRow_X86(uint8_t* dst, uint8_t v8, int width); +void SetRow_ERMS(uint8_t* dst, uint8_t v8, int width); +void SetRow_NEON(uint8_t* dst, uint8_t v8, int width); +void SetRow_Any_X86(uint8_t* dst_ptr, uint8_t v32, int width); +void SetRow_Any_NEON(uint8_t* dst_ptr, uint8_t v32, int width); + +void ARGBSetRow_C(uint8_t* dst_argb, uint32_t v32, int width); +void ARGBSetRow_X86(uint8_t* dst_argb, uint32_t v32, int width); +void ARGBSetRow_NEON(uint8_t* dst, uint32_t v32, int width); +void ARGBSetRow_Any_NEON(uint8_t* dst_ptr, uint32_t v32, int width); +void ARGBSetRow_MSA(uint8_t* dst_argb, uint32_t v32, int width); +void ARGBSetRow_Any_MSA(uint8_t* dst_ptr, uint32_t v32, int width); // ARGBShufflers for BGRAToARGB etc. -void ARGBShuffleRow_C(const uint8* src_argb, - uint8* dst_argb, - const uint8* shuffler, +void ARGBShuffleRow_C(const uint8_t* src_argb, + uint8_t* dst_argb, + const uint8_t* shuffler, int width); -void ARGBShuffleRow_SSSE3(const uint8* src_argb, - uint8* dst_argb, - const uint8* shuffler, +void ARGBShuffleRow_SSSE3(const uint8_t* src_argb, + uint8_t* dst_argb, + const uint8_t* shuffler, int width); -void ARGBShuffleRow_AVX2(const uint8* src_argb, - uint8* dst_argb, - const uint8* shuffler, +void ARGBShuffleRow_AVX2(const uint8_t* src_argb, + uint8_t* dst_argb, + const uint8_t* shuffler, int width); -void ARGBShuffleRow_NEON(const uint8* src_argb, - uint8* dst_argb, - const uint8* shuffler, +void ARGBShuffleRow_NEON(const uint8_t* src_argb, + uint8_t* dst_argb, + const uint8_t* shuffler, int width); -void ARGBShuffleRow_MSA(const uint8* src_argb, - uint8* dst_argb, - const uint8* shuffler, +void ARGBShuffleRow_MSA(const uint8_t* src_argb, + uint8_t* dst_argb, + const uint8_t* shuffler, int width); -void ARGBShuffleRow_Any_SSSE3(const uint8* src_argb, - uint8* dst_argb, - const uint8* shuffler, +void ARGBShuffleRow_Any_SSSE3(const uint8_t* src_ptr, + uint8_t* dst_ptr, + const uint8_t* param, int width); -void ARGBShuffleRow_Any_AVX2(const uint8* src_argb, - uint8* dst_argb, - const uint8* shuffler, +void ARGBShuffleRow_Any_AVX2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + const uint8_t* param, int width); -void ARGBShuffleRow_Any_NEON(const uint8* src_argb, - uint8* dst_argb, - const uint8* shuffler, +void ARGBShuffleRow_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_ptr, + const uint8_t* param, int width); -void ARGBShuffleRow_Any_MSA(const uint8* src_argb, - uint8* dst_argb, - const uint8* shuffler, - int width); - -void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int width); -void RAWToARGBRow_SSSE3(const uint8* src_raw, uint8* dst_argb, int width); -void RAWToRGB24Row_SSSE3(const uint8* src_raw, uint8* dst_rgb24, int width); -void RGB565ToARGBRow_SSE2(const uint8* src_rgb565, uint8* dst_argb, int width); -void ARGB1555ToARGBRow_SSE2(const uint8* src_argb1555, - uint8* dst_argb, - int width); -void ARGB4444ToARGBRow_SSE2(const uint8* src_argb4444, - uint8* dst_argb, - int width); -void RGB565ToARGBRow_AVX2(const uint8* src_rgb565, uint8* dst_argb, int width); -void ARGB1555ToARGBRow_AVX2(const uint8* src_argb1555, - uint8* dst_argb, - int width); -void ARGB4444ToARGBRow_AVX2(const uint8* src_argb4444, - uint8* dst_argb, - int width); - -void RGB24ToARGBRow_NEON(const uint8* src_rgb24, uint8* dst_argb, int width); -void RGB24ToARGBRow_MSA(const uint8* src_rgb24, uint8* dst_argb, int width); -void RAWToARGBRow_NEON(const uint8* src_raw, uint8* dst_argb, int width); -void RAWToARGBRow_MSA(const uint8* src_raw, uint8* dst_argb, int width); -void RAWToRGB24Row_NEON(const uint8* src_raw, uint8* dst_rgb24, int width); -void RAWToRGB24Row_MSA(const uint8* src_raw, uint8* dst_rgb24, int width); -void RGB565ToARGBRow_NEON(const uint8* src_rgb565, uint8* dst_argb, int width); -void RGB565ToARGBRow_MSA(const uint8* src_rgb565, uint8* dst_argb, int width); -void ARGB1555ToARGBRow_NEON(const uint8* src_argb1555, - uint8* dst_argb, - int width); -void ARGB1555ToARGBRow_MSA(const uint8* src_argb1555, - uint8* dst_argb, +void ARGBShuffleRow_Any_MSA(const uint8_t* src_ptr, + uint8_t* dst_ptr, + const uint8_t* param, + int width); + +void RGB24ToARGBRow_SSSE3(const uint8_t* src_rgb24, + uint8_t* dst_argb, + int width); +void RAWToARGBRow_SSSE3(const uint8_t* src_raw, uint8_t* dst_argb, int width); +void RAWToRGB24Row_SSSE3(const uint8_t* src_raw, uint8_t* dst_rgb24, int width); +void RGB565ToARGBRow_SSE2(const uint8_t* src, uint8_t* dst, int width); +void ARGB1555ToARGBRow_SSE2(const uint8_t* src, uint8_t* dst, int width); +void ARGB4444ToARGBRow_SSE2(const uint8_t* src, uint8_t* dst, int width); +void RGB565ToARGBRow_AVX2(const uint8_t* src_rgb565, + uint8_t* dst_argb, + int width); +void ARGB1555ToARGBRow_AVX2(const uint8_t* src_argb1555, + uint8_t* dst_argb, + int width); +void ARGB4444ToARGBRow_AVX2(const uint8_t* src_argb4444, + uint8_t* dst_argb, + int width); + +void RGB24ToARGBRow_NEON(const uint8_t* src_rgb24, + uint8_t* dst_argb, + int width); +void RGB24ToARGBRow_MSA(const uint8_t* src_rgb24, uint8_t* dst_argb, int width); +void RAWToARGBRow_NEON(const uint8_t* src_raw, uint8_t* dst_argb, int width); +void RAWToARGBRow_MSA(const uint8_t* src_raw, uint8_t* dst_argb, int width); +void RAWToRGB24Row_NEON(const uint8_t* src_raw, uint8_t* dst_rgb24, int width); +void RAWToRGB24Row_MSA(const uint8_t* src_raw, uint8_t* dst_rgb24, int width); +void RGB565ToARGBRow_NEON(const uint8_t* src_rgb565, + uint8_t* dst_argb, + int width); +void RGB565ToARGBRow_MSA(const uint8_t* src_rgb565, + uint8_t* dst_argb, + int width); +void ARGB1555ToARGBRow_NEON(const uint8_t* src_argb1555, + uint8_t* dst_argb, + int width); +void ARGB1555ToARGBRow_MSA(const uint8_t* src_argb1555, + uint8_t* dst_argb, int width); -void ARGB4444ToARGBRow_NEON(const uint8* src_argb4444, - uint8* dst_argb, +void ARGB4444ToARGBRow_NEON(const uint8_t* src_argb4444, + uint8_t* dst_argb, int width); -void ARGB4444ToARGBRow_MSA(const uint8* src_argb4444, - uint8* dst_argb, +void ARGB4444ToARGBRow_MSA(const uint8_t* src_argb4444, + uint8_t* dst_argb, int width); -void RGB24ToARGBRow_C(const uint8* src_rgb24, uint8* dst_argb, int width); -void RAWToARGBRow_C(const uint8* src_raw, uint8* dst_argb, int width); -void RAWToRGB24Row_C(const uint8* src_raw, uint8* dst_rgb24, int width); -void RGB565ToARGBRow_C(const uint8* src_rgb, uint8* dst_argb, int width); -void ARGB1555ToARGBRow_C(const uint8* src_argb, uint8* dst_argb, int width); -void ARGB4444ToARGBRow_C(const uint8* src_argb, uint8* dst_argb, int width); -void AR30ToARGBRow_C(const uint8* src_ar30, uint8* dst_argb, int width); -void RGB24ToARGBRow_Any_SSSE3(const uint8* src_rgb24, - uint8* dst_argb, +void RGB24ToARGBRow_C(const uint8_t* src_rgb24, uint8_t* dst_argb, int width); +void RAWToARGBRow_C(const uint8_t* src_raw, uint8_t* dst_argb, int width); +void RAWToRGB24Row_C(const uint8_t* src_raw, uint8_t* dst_rgb24, int width); +void RGB565ToARGBRow_C(const uint8_t* src_rgb565, uint8_t* dst_argb, int width); +void ARGB1555ToARGBRow_C(const uint8_t* src_argb1555, + uint8_t* dst_argb, + int width); +void ARGB4444ToARGBRow_C(const uint8_t* src_argb4444, + uint8_t* dst_argb, + int width); +void AR30ToARGBRow_C(const uint8_t* src_ar30, uint8_t* dst_argb, int width); +void AR30ToABGRRow_C(const uint8_t* src_ar30, uint8_t* dst_abgr, int width); +void RGB24ToARGBRow_Any_SSSE3(const uint8_t* src_ptr, + uint8_t* dst_ptr, int width); -void RAWToARGBRow_Any_SSSE3(const uint8* src_raw, uint8* dst_argb, int width); -void RAWToRGB24Row_Any_SSSE3(const uint8* src_raw, uint8* dst_rgb24, int width); +void RAWToARGBRow_Any_SSSE3(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void RAWToRGB24Row_Any_SSSE3(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); -void RGB565ToARGBRow_Any_SSE2(const uint8* src_rgb565, - uint8* dst_argb, +void RGB565ToARGBRow_Any_SSE2(const uint8_t* src_ptr, + uint8_t* dst_ptr, int width); -void ARGB1555ToARGBRow_Any_SSE2(const uint8* src_argb1555, - uint8* dst_argb, +void ARGB1555ToARGBRow_Any_SSE2(const uint8_t* src_ptr, + uint8_t* dst_ptr, int width); -void ARGB4444ToARGBRow_Any_SSE2(const uint8* src_argb4444, - uint8* dst_argb, +void ARGB4444ToARGBRow_Any_SSE2(const uint8_t* src_ptr, + uint8_t* dst_ptr, int width); -void RGB565ToARGBRow_Any_AVX2(const uint8* src_rgb565, - uint8* dst_argb, +void RGB565ToARGBRow_Any_AVX2(const uint8_t* src_ptr, + uint8_t* dst_ptr, int width); -void ARGB1555ToARGBRow_Any_AVX2(const uint8* src_argb1555, - uint8* dst_argb, +void ARGB1555ToARGBRow_Any_AVX2(const uint8_t* src_ptr, + uint8_t* dst_ptr, int width); -void ARGB4444ToARGBRow_Any_AVX2(const uint8* src_argb4444, - uint8* dst_argb, +void ARGB4444ToARGBRow_Any_AVX2(const uint8_t* src_ptr, + uint8_t* dst_ptr, int width); -void RGB24ToARGBRow_Any_NEON(const uint8* src_rgb24, - uint8* dst_argb, +void RGB24ToARGBRow_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_ptr, int width); -void RGB24ToARGBRow_Any_MSA(const uint8* src_rgb24, uint8* dst_argb, int width); -void RAWToARGBRow_Any_NEON(const uint8* src_raw, uint8* dst_argb, int width); -void RAWToARGBRow_Any_MSA(const uint8* src_raw, uint8* dst_argb, int width); -void RAWToRGB24Row_Any_NEON(const uint8* src_raw, uint8* dst_rgb24, int width); -void RAWToRGB24Row_Any_MSA(const uint8* src_raw, uint8* dst_rgb24, int width); -void RGB565ToARGBRow_Any_NEON(const uint8* src_rgb565, - uint8* dst_argb, +void RGB24ToARGBRow_Any_MSA(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void RAWToARGBRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RAWToARGBRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RAWToRGB24Row_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void RAWToRGB24Row_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RGB565ToARGBRow_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_ptr, int width); -void RGB565ToARGBRow_Any_MSA(const uint8* src_rgb565, - uint8* dst_argb, +void RGB565ToARGBRow_Any_MSA(const uint8_t* src_ptr, + uint8_t* dst_ptr, int width); -void ARGB1555ToARGBRow_Any_NEON(const uint8* src_argb1555, - uint8* dst_argb, +void ARGB1555ToARGBRow_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_ptr, int width); -void ARGB1555ToARGBRow_Any_MSA(const uint8* src_argb1555, - uint8* dst_argb, +void ARGB1555ToARGBRow_Any_MSA(const uint8_t* src_ptr, + uint8_t* dst_ptr, int width); -void ARGB4444ToARGBRow_Any_NEON(const uint8* src_argb4444, - uint8* dst_argb, +void ARGB4444ToARGBRow_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_ptr, int width); -void ARGB4444ToARGBRow_Any_MSA(const uint8* src_argb4444, - uint8* dst_argb, +void ARGB4444ToARGBRow_Any_MSA(const uint8_t* src_ptr, + uint8_t* dst_ptr, int width); -void ARGBToRGB24Row_SSSE3(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToRAWRow_SSSE3(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToRGB565Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToARGB1555Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToARGB4444Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToAR30Row_SSSE3(const uint8* src_argb, uint8* dst_rgb, int width); - -void ARGBToRGB565DitherRow_C(const uint8* src_argb, - uint8* dst_rgb, - const uint32 dither4, +void ARGBToRGB24Row_SSSE3(const uint8_t* src, uint8_t* dst, int width); +void ARGBToRAWRow_SSSE3(const uint8_t* src, uint8_t* dst, int width); +void ARGBToRGB565Row_SSE2(const uint8_t* src, uint8_t* dst, int width); +void ARGBToARGB1555Row_SSE2(const uint8_t* src, uint8_t* dst, int width); +void ARGBToARGB4444Row_SSE2(const uint8_t* src, uint8_t* dst, int width); +void ABGRToAR30Row_SSSE3(const uint8_t* src, uint8_t* dst, int width); +void ARGBToAR30Row_SSSE3(const uint8_t* src, uint8_t* dst, int width); + +void ARGBToRGB565DitherRow_C(const uint8_t* src_argb, + uint8_t* dst_rgb, + const uint32_t dither4, int width); -void ARGBToRGB565DitherRow_SSE2(const uint8* src_argb, - uint8* dst_rgb, - const uint32 dither4, +void ARGBToRGB565DitherRow_SSE2(const uint8_t* src, + uint8_t* dst, + const uint32_t dither4, int width); -void ARGBToRGB565DitherRow_AVX2(const uint8* src_argb, - uint8* dst_rgb, - const uint32 dither4, +void ARGBToRGB565DitherRow_AVX2(const uint8_t* src, + uint8_t* dst, + const uint32_t dither4, int width); -void ARGBToRGB565Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToARGB1555Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToARGB4444Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToAR30Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int width); - -void ARGBToRGB24Row_NEON(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToRAWRow_NEON(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToRGB565Row_NEON(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToARGB1555Row_NEON(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToARGB4444Row_NEON(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToRGB565DitherRow_NEON(const uint8* src_argb, - uint8* dst_rgb, - const uint32 dither4, +void ARGBToRGB565Row_AVX2(const uint8_t* src_argb, uint8_t* dst_rgb, int width); +void ARGBToARGB1555Row_AVX2(const uint8_t* src_argb, + uint8_t* dst_rgb, + int width); +void ARGBToARGB4444Row_AVX2(const uint8_t* src_argb, + uint8_t* dst_rgb, + int width); +void ABGRToAR30Row_AVX2(const uint8_t* src, uint8_t* dst, int width); +void ARGBToAR30Row_AVX2(const uint8_t* src, uint8_t* dst, int width); + +void ARGBToRGB24Row_NEON(const uint8_t* src_argb, + uint8_t* dst_rgb24, + int width); +void ARGBToRAWRow_NEON(const uint8_t* src_argb, uint8_t* dst_raw, int width); +void ARGBToRGB565Row_NEON(const uint8_t* src_argb, + uint8_t* dst_rgb565, + int width); +void ARGBToARGB1555Row_NEON(const uint8_t* src_argb, + uint8_t* dst_argb1555, + int width); +void ARGBToARGB4444Row_NEON(const uint8_t* src_argb, + uint8_t* dst_argb4444, + int width); +void ARGBToRGB565DitherRow_NEON(const uint8_t* src_argb, + uint8_t* dst_rgb, + const uint32_t dither4, int width); -void ARGBToRGB24Row_MSA(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToRAWRow_MSA(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToRGB565Row_MSA(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToARGB1555Row_MSA(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToARGB4444Row_MSA(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToRGB565DitherRow_MSA(const uint8* src_argb, - uint8* dst_rgb, - const uint32 dither4, +void ARGBToRGB24Row_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width); +void ARGBToRAWRow_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width); +void ARGBToRGB565Row_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width); +void ARGBToARGB1555Row_MSA(const uint8_t* src_argb, + uint8_t* dst_rgb, + int width); +void ARGBToARGB4444Row_MSA(const uint8_t* src_argb, + uint8_t* dst_rgb, + int width); +void ARGBToRGB565DitherRow_MSA(const uint8_t* src_argb, + uint8_t* dst_rgb, + const uint32_t dither4, int width); -void ARGBToRGBARow_C(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToRGB24Row_C(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToRAWRow_C(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToRGB565Row_C(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToARGB1555Row_C(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToARGB4444Row_C(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToAR30Row_C(const uint8* src_argb, uint8* dst_rgb, int width); - -void J400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int width); -void J400ToARGBRow_AVX2(const uint8* src_y, uint8* dst_argb, int width); -void J400ToARGBRow_NEON(const uint8* src_y, uint8* dst_argb, int width); -void J400ToARGBRow_MSA(const uint8* src_y, uint8* dst_argb, int width); -void J400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int width); -void J400ToARGBRow_Any_SSE2(const uint8* src_y, uint8* dst_argb, int width); -void J400ToARGBRow_Any_AVX2(const uint8* src_y, uint8* dst_argb, int width); -void J400ToARGBRow_Any_NEON(const uint8* src_y, uint8* dst_argb, int width); -void J400ToARGBRow_Any_MSA(const uint8* src_y, uint8* dst_argb, int width); - -void I444ToARGBRow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void ARGBToRGBARow_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width); +void ARGBToRGB24Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width); +void ARGBToRAWRow_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width); +void ARGBToRGB565Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width); +void ARGBToARGB1555Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width); +void ARGBToARGB4444Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width); +void ABGRToAR30Row_C(const uint8_t* src_abgr, uint8_t* dst_ar30, int width); +void ARGBToAR30Row_C(const uint8_t* src_argb, uint8_t* dst_ar30, int width); + +void J400ToARGBRow_SSE2(const uint8_t* src_y, uint8_t* dst_argb, int width); +void J400ToARGBRow_AVX2(const uint8_t* src_y, uint8_t* dst_argb, int width); +void J400ToARGBRow_NEON(const uint8_t* src_y, uint8_t* dst_argb, int width); +void J400ToARGBRow_MSA(const uint8_t* src_y, uint8_t* dst_argb, int width); +void J400ToARGBRow_C(const uint8_t* src_y, uint8_t* dst_argb, int width); +void J400ToARGBRow_Any_SSE2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void J400ToARGBRow_Any_AVX2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void J400ToARGBRow_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void J400ToARGBRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); + +void I444ToARGBRow_C(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, + int width); +void I422ToARGBRow_C(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, + int width); +void I422ToAR30Row_C(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width); -void I422ToARGBRow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I210ToAR30Row_C(const uint16_t* src_y, + const uint16_t* src_u, + const uint16_t* src_v, + uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width); -void I210ToARGBRow_C(const uint16* src_y, - const uint16* src_u, - const uint16* src_v, - uint8* dst_argb, +void I210ToARGBRow_C(const uint16_t* src_y, + const uint16_t* src_u, + const uint16_t* src_v, + uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width); -void I422AlphaToARGBRow_C(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - const uint8* a_buf, - uint8* dst_argb, +void I422AlphaToARGBRow_C(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + const uint8_t* src_a, + uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width); -void NV12ToARGBRow_C(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, +void NV12ToARGBRow_C(const uint8_t* src_y, + const uint8_t* src_uv, + uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width); -void NV12ToRGB565Row_C(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, +void NV12ToRGB565Row_C(const uint8_t* src_y, + const uint8_t* src_uv, + uint8_t* dst_rgb565, const struct YuvConstants* yuvconstants, int width); -void NV21ToARGBRow_C(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, +void NV21ToARGBRow_C(const uint8_t* src_y, + const uint8_t* src_vu, + uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width); -void YUY2ToARGBRow_C(const uint8* src_yuy2, - uint8* dst_argb, +void YUY2ToARGBRow_C(const uint8_t* src_yuy2, + uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width); -void UYVYToARGBRow_C(const uint8* src_uyvy, - uint8* dst_argb, +void UYVYToARGBRow_C(const uint8_t* src_uyvy, + uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width); -void I422ToRGBARow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgba, +void I422ToRGBARow_C(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width); -void I422ToRGB24Row_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgb24, +void I422ToRGB24Row_C(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width); -void I422ToARGB4444Row_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb4444, +void I422ToARGB4444Row_C(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb4444, const struct YuvConstants* yuvconstants, int width); -void I422ToARGB1555Row_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb4444, +void I422ToARGB1555Row_C(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb1555, const struct YuvConstants* yuvconstants, int width); -void I422ToRGB565Row_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgb565, +void I422ToRGB565Row_C(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_rgb565, const struct YuvConstants* yuvconstants, int width); -void I422ToARGBRow_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I422ToARGBRow_AVX2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void I422ToRGBARow_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I422ToRGBARow_AVX2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void I444ToARGBRow_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I444ToARGBRow_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void I444ToARGBRow_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I444ToARGBRow_AVX2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void I444ToARGBRow_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I444ToARGBRow_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void I444ToARGBRow_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I444ToARGBRow_AVX2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void I422ToARGBRow_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I422ToARGBRow_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void I210ToARGBRow_SSSE3(const uint16* src_y, - const uint16* src_u, - const uint16* src_v, - uint8* dst_argb, +void I422ToAR30Row_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ar30, + const struct YuvConstants* yuvconstants, + int width); +void I210ToAR30Row_SSSE3(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + uint8_t* dst_ar30, + const struct YuvConstants* yuvconstants, + int width); +void I210ToARGBRow_SSSE3(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void I422AlphaToARGBRow_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - const uint8* a_buf, - uint8* dst_argb, +void I422ToAR30Row_AVX2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ar30, + const struct YuvConstants* yuvconstants, + int width); +void I210ToARGBRow_AVX2(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void I210ToAR30Row_AVX2(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + uint8_t* dst_ar30, + const struct YuvConstants* yuvconstants, + int width); +void I422AlphaToARGBRow_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + const uint8_t* a_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void I422AlphaToARGBRow_AVX2(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - const uint8* a_buf, - uint8* dst_argb, +void I422AlphaToARGBRow_AVX2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + const uint8_t* a_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void NV12ToARGBRow_SSSE3(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, +void NV12ToARGBRow_SSSE3(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void NV12ToARGBRow_AVX2(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, +void NV12ToARGBRow_AVX2(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void NV12ToRGB565Row_SSSE3(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, +void NV12ToRGB565Row_SSSE3(const uint8_t* src_y, + const uint8_t* src_uv, + uint8_t* dst_rgb565, const struct YuvConstants* yuvconstants, int width); -void NV12ToRGB565Row_AVX2(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, +void NV12ToRGB565Row_AVX2(const uint8_t* src_y, + const uint8_t* src_uv, + uint8_t* dst_rgb565, const struct YuvConstants* yuvconstants, int width); -void NV21ToARGBRow_SSSE3(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, +void NV21ToARGBRow_SSSE3(const uint8_t* y_buf, + const uint8_t* vu_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void NV21ToARGBRow_AVX2(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, +void NV21ToARGBRow_AVX2(const uint8_t* y_buf, + const uint8_t* vu_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void YUY2ToARGBRow_SSSE3(const uint8* src_yuy2, - uint8* dst_argb, +void YUY2ToARGBRow_SSSE3(const uint8_t* yuy2_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void UYVYToARGBRow_SSSE3(const uint8* src_uyvy, - uint8* dst_argb, +void UYVYToARGBRow_SSSE3(const uint8_t* uyvy_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void YUY2ToARGBRow_AVX2(const uint8* src_yuy2, - uint8* dst_argb, +void YUY2ToARGBRow_AVX2(const uint8_t* yuy2_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void UYVYToARGBRow_AVX2(const uint8* src_uyvy, - uint8* dst_argb, +void UYVYToARGBRow_AVX2(const uint8_t* uyvy_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void I422ToRGBARow_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgba, +void I422ToRGBARow_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_rgba, const struct YuvConstants* yuvconstants, int width); -void I422ToARGB4444Row_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I422ToARGB4444Row_SSSE3(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb4444, const struct YuvConstants* yuvconstants, int width); -void I422ToARGB4444Row_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I422ToARGB4444Row_AVX2(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb4444, const struct YuvConstants* yuvconstants, int width); -void I422ToARGB1555Row_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I422ToARGB1555Row_SSSE3(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb1555, const struct YuvConstants* yuvconstants, int width); -void I422ToARGB1555Row_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I422ToARGB1555Row_AVX2(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb1555, const struct YuvConstants* yuvconstants, int width); -void I422ToRGB565Row_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I422ToRGB565Row_SSSE3(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_rgb565, const struct YuvConstants* yuvconstants, int width); -void I422ToRGB565Row_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I422ToRGB565Row_AVX2(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_rgb565, const struct YuvConstants* yuvconstants, int width); -void I422ToRGB24Row_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgb24, +void I422ToRGB24Row_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_rgb24, const struct YuvConstants* yuvconstants, int width); -void I422ToRGB24Row_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgb24, +void I422ToRGB24Row_AVX2(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_rgb24, const struct YuvConstants* yuvconstants, int width); -void I422ToARGBRow_Any_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I422ToARGBRow_Any_AVX2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I422ToRGBARow_Any_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I422ToRGBARow_Any_AVX2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I444ToARGBRow_Any_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I444ToARGBRow_Any_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I444ToARGBRow_Any_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I444ToARGBRow_Any_AVX2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I422ToARGBRow_Any_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I422ToARGBRow_Any_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void I422ToAR30Row_Any_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void I210ToAR30Row_Any_SSSE3(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I210ToARGBRow_Any_SSSE3(const uint16* src_y, - const uint16* src_u, - const uint16* src_v, - uint8* dst_argb, +void I210ToARGBRow_Any_SSSE3(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I422AlphaToARGBRow_Any_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - const uint8* a_buf, - uint8* dst_argb, +void I422ToAR30Row_Any_AVX2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void I210ToARGBRow_Any_AVX2(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void I210ToAR30Row_Any_AVX2(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void I422AlphaToARGBRow_Any_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + const uint8_t* a_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I422AlphaToARGBRow_Any_AVX2(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - const uint8* a_buf, - uint8* dst_argb, +void I422AlphaToARGBRow_Any_AVX2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + const uint8_t* a_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void NV12ToARGBRow_Any_SSSE3(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, +void NV12ToARGBRow_Any_SSSE3(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void NV12ToARGBRow_Any_AVX2(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, +void NV12ToARGBRow_Any_AVX2(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void NV21ToARGBRow_Any_SSSE3(const uint8* src_y, - const uint8* src_vu, - uint8* dst_argb, +void NV21ToARGBRow_Any_SSSE3(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void NV21ToARGBRow_Any_AVX2(const uint8* src_y, - const uint8* src_vu, - uint8* dst_argb, +void NV21ToARGBRow_Any_AVX2(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void NV12ToRGB565Row_Any_SSSE3(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, +void NV12ToRGB565Row_Any_SSSE3(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void NV12ToRGB565Row_Any_AVX2(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, +void NV12ToRGB565Row_Any_AVX2(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void YUY2ToARGBRow_Any_SSSE3(const uint8* src_yuy2, - uint8* dst_argb, +void YUY2ToARGBRow_Any_SSSE3(const uint8_t* src_ptr, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void UYVYToARGBRow_Any_SSSE3(const uint8* src_uyvy, - uint8* dst_argb, +void UYVYToARGBRow_Any_SSSE3(const uint8_t* src_ptr, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void YUY2ToARGBRow_Any_AVX2(const uint8* src_yuy2, - uint8* dst_argb, +void YUY2ToARGBRow_Any_AVX2(const uint8_t* src_ptr, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void UYVYToARGBRow_Any_AVX2(const uint8* src_uyvy, - uint8* dst_argb, +void UYVYToARGBRow_Any_AVX2(const uint8_t* src_ptr, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I422ToRGBARow_Any_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgba, +void I422ToRGBARow_Any_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I422ToARGB4444Row_Any_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgba, +void I422ToARGB4444Row_Any_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I422ToARGB4444Row_Any_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgba, +void I422ToARGB4444Row_Any_AVX2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I422ToARGB1555Row_Any_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgba, +void I422ToARGB1555Row_Any_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I422ToARGB1555Row_Any_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgba, +void I422ToARGB1555Row_Any_AVX2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I422ToRGB565Row_Any_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgba, +void I422ToRGB565Row_Any_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I422ToRGB565Row_Any_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgba, +void I422ToRGB565Row_Any_AVX2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I422ToRGB24Row_Any_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I422ToRGB24Row_Any_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I422ToRGB24Row_Any_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I422ToRGB24Row_Any_AVX2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int width); -void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int width); -void I400ToARGBRow_AVX2(const uint8* src_y, uint8* dst_argb, int width); -void I400ToARGBRow_NEON(const uint8* src_y, uint8* dst_argb, int width); -void I400ToARGBRow_MSA(const uint8* src_y, uint8* dst_argb, int width); -void I400ToARGBRow_Any_SSE2(const uint8* src_y, uint8* dst_argb, int width); -void I400ToARGBRow_Any_AVX2(const uint8* src_y, uint8* dst_argb, int width); -void I400ToARGBRow_Any_NEON(const uint8* src_y, uint8* dst_argb, int width); -void I400ToARGBRow_Any_MSA(const uint8* src_y, uint8* dst_argb, int width); +void I400ToARGBRow_C(const uint8_t* src_y, uint8_t* rgb_buf, int width); +void I400ToARGBRow_SSE2(const uint8_t* y_buf, uint8_t* dst_argb, int width); +void I400ToARGBRow_AVX2(const uint8_t* y_buf, uint8_t* dst_argb, int width); +void I400ToARGBRow_NEON(const uint8_t* src_y, uint8_t* dst_argb, int width); +void I400ToARGBRow_MSA(const uint8_t* src_y, uint8_t* dst_argb, int width); +void I400ToARGBRow_Any_SSE2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void I400ToARGBRow_Any_AVX2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void I400ToARGBRow_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void I400ToARGBRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); // ARGB preattenuated alpha blend. -void ARGBBlendRow_SSSE3(const uint8* src_argb, - const uint8* src_argb1, - uint8* dst_argb, +void ARGBBlendRow_SSSE3(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, int width); -void ARGBBlendRow_NEON(const uint8* src_argb, - const uint8* src_argb1, - uint8* dst_argb, +void ARGBBlendRow_NEON(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, int width); -void ARGBBlendRow_MSA(const uint8* src_argb, - const uint8* src_argb1, - uint8* dst_argb, +void ARGBBlendRow_MSA(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, int width); -void ARGBBlendRow_C(const uint8* src_argb, - const uint8* src_argb1, - uint8* dst_argb, +void ARGBBlendRow_C(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, int width); // Unattenuated planar alpha blend. -void BlendPlaneRow_SSSE3(const uint8* src0, - const uint8* src1, - const uint8* alpha, - uint8* dst, +void BlendPlaneRow_SSSE3(const uint8_t* src0, + const uint8_t* src1, + const uint8_t* alpha, + uint8_t* dst, int width); -void BlendPlaneRow_Any_SSSE3(const uint8* src0, - const uint8* src1, - const uint8* alpha, - uint8* dst, +void BlendPlaneRow_Any_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, int width); -void BlendPlaneRow_AVX2(const uint8* src0, - const uint8* src1, - const uint8* alpha, - uint8* dst, +void BlendPlaneRow_AVX2(const uint8_t* src0, + const uint8_t* src1, + const uint8_t* alpha, + uint8_t* dst, int width); -void BlendPlaneRow_Any_AVX2(const uint8* src0, - const uint8* src1, - const uint8* alpha, - uint8* dst, - int width); -void BlendPlaneRow_C(const uint8* src0, - const uint8* src1, - const uint8* alpha, - uint8* dst, +void BlendPlaneRow_Any_AVX2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + int width); +void BlendPlaneRow_C(const uint8_t* src0, + const uint8_t* src1, + const uint8_t* alpha, + uint8_t* dst, int width); // ARGB multiply images. Same API as Blend, but these require // pointer and width alignment for SSE2. -void ARGBMultiplyRow_C(const uint8* src_argb, - const uint8* src_argb1, - uint8* dst_argb, +void ARGBMultiplyRow_C(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, int width); -void ARGBMultiplyRow_SSE2(const uint8* src_argb, - const uint8* src_argb1, - uint8* dst_argb, +void ARGBMultiplyRow_SSE2(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, int width); -void ARGBMultiplyRow_Any_SSE2(const uint8* src_argb, - const uint8* src_argb1, - uint8* dst_argb, +void ARGBMultiplyRow_Any_SSE2(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, int width); -void ARGBMultiplyRow_AVX2(const uint8* src_argb, - const uint8* src_argb1, - uint8* dst_argb, +void ARGBMultiplyRow_AVX2(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, int width); -void ARGBMultiplyRow_Any_AVX2(const uint8* src_argb, - const uint8* src_argb1, - uint8* dst_argb, +void ARGBMultiplyRow_Any_AVX2(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, int width); -void ARGBMultiplyRow_NEON(const uint8* src_argb, - const uint8* src_argb1, - uint8* dst_argb, +void ARGBMultiplyRow_NEON(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, int width); -void ARGBMultiplyRow_Any_NEON(const uint8* src_argb, - const uint8* src_argb1, - uint8* dst_argb, +void ARGBMultiplyRow_Any_NEON(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, int width); -void ARGBMultiplyRow_MSA(const uint8* src_argb, - const uint8* src_argb1, - uint8* dst_argb, +void ARGBMultiplyRow_MSA(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, int width); -void ARGBMultiplyRow_Any_MSA(const uint8* src_argb, - const uint8* src_argb1, - uint8* dst_argb, +void ARGBMultiplyRow_Any_MSA(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, int width); // ARGB add images. -void ARGBAddRow_C(const uint8* src_argb, - const uint8* src_argb1, - uint8* dst_argb, +void ARGBAddRow_C(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, int width); -void ARGBAddRow_SSE2(const uint8* src_argb, - const uint8* src_argb1, - uint8* dst_argb, +void ARGBAddRow_SSE2(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, int width); -void ARGBAddRow_Any_SSE2(const uint8* src_argb, - const uint8* src_argb1, - uint8* dst_argb, +void ARGBAddRow_Any_SSE2(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, int width); -void ARGBAddRow_AVX2(const uint8* src_argb, - const uint8* src_argb1, - uint8* dst_argb, +void ARGBAddRow_AVX2(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, int width); -void ARGBAddRow_Any_AVX2(const uint8* src_argb, - const uint8* src_argb1, - uint8* dst_argb, +void ARGBAddRow_Any_AVX2(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, int width); -void ARGBAddRow_NEON(const uint8* src_argb, - const uint8* src_argb1, - uint8* dst_argb, +void ARGBAddRow_NEON(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, int width); -void ARGBAddRow_Any_NEON(const uint8* src_argb, - const uint8* src_argb1, - uint8* dst_argb, +void ARGBAddRow_Any_NEON(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, int width); -void ARGBAddRow_MSA(const uint8* src_argb, - const uint8* src_argb1, - uint8* dst_argb, +void ARGBAddRow_MSA(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, int width); -void ARGBAddRow_Any_MSA(const uint8* src_argb, - const uint8* src_argb1, - uint8* dst_argb, +void ARGBAddRow_Any_MSA(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, int width); // ARGB subtract images. Same API as Blend, but these require // pointer and width alignment for SSE2. -void ARGBSubtractRow_C(const uint8* src_argb, - const uint8* src_argb1, - uint8* dst_argb, +void ARGBSubtractRow_C(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, int width); -void ARGBSubtractRow_SSE2(const uint8* src_argb, - const uint8* src_argb1, - uint8* dst_argb, +void ARGBSubtractRow_SSE2(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, int width); -void ARGBSubtractRow_Any_SSE2(const uint8* src_argb, - const uint8* src_argb1, - uint8* dst_argb, +void ARGBSubtractRow_Any_SSE2(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, int width); -void ARGBSubtractRow_AVX2(const uint8* src_argb, - const uint8* src_argb1, - uint8* dst_argb, +void ARGBSubtractRow_AVX2(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, int width); -void ARGBSubtractRow_Any_AVX2(const uint8* src_argb, - const uint8* src_argb1, - uint8* dst_argb, +void ARGBSubtractRow_Any_AVX2(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, int width); -void ARGBSubtractRow_NEON(const uint8* src_argb, - const uint8* src_argb1, - uint8* dst_argb, +void ARGBSubtractRow_NEON(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, int width); -void ARGBSubtractRow_Any_NEON(const uint8* src_argb, - const uint8* src_argb1, - uint8* dst_argb, +void ARGBSubtractRow_Any_NEON(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, int width); -void ARGBSubtractRow_MSA(const uint8* src_argb, - const uint8* src_argb1, - uint8* dst_argb, +void ARGBSubtractRow_MSA(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, int width); -void ARGBSubtractRow_Any_MSA(const uint8* src_argb, - const uint8* src_argb1, - uint8* dst_argb, +void ARGBSubtractRow_Any_MSA(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, int width); -void ARGBToRGB24Row_Any_SSSE3(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToRAWRow_Any_SSSE3(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToRGB565Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToARGB1555Row_Any_SSE2(const uint8* src_argb, - uint8* dst_rgb, +void ARGBToRGB24Row_Any_SSSE3(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGBToRAWRow_Any_SSSE3(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGBToRGB565Row_Any_SSE2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGBToARGB1555Row_Any_SSE2(const uint8_t* src_ptr, + uint8_t* dst_ptr, int width); -void ARGBToARGB4444Row_Any_SSE2(const uint8* src_argb, - uint8* dst_rgb, +void ARGBToARGB4444Row_Any_SSE2(const uint8_t* src_ptr, + uint8_t* dst_ptr, int width); -void ARGBToAR30Row_Any_SSSE3(const uint8* src_argb, uint8* dst_rgb, int width); +void ABGRToAR30Row_Any_SSSE3(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGBToAR30Row_Any_SSSE3(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); -void ARGBToRGB565DitherRow_Any_SSE2(const uint8* src_argb, - uint8* dst_rgb, - const uint32 dither4, +void ARGBToRGB565DitherRow_Any_SSE2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + const uint32_t param, int width); -void ARGBToRGB565DitherRow_Any_AVX2(const uint8* src_argb, - uint8* dst_rgb, - const uint32 dither4, +void ARGBToRGB565DitherRow_Any_AVX2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + const uint32_t param, int width); -void ARGBToRGB565Row_Any_AVX2(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToARGB1555Row_Any_AVX2(const uint8* src_argb, - uint8* dst_rgb, +void ARGBToRGB565Row_Any_AVX2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGBToARGB1555Row_Any_AVX2(const uint8_t* src_ptr, + uint8_t* dst_ptr, int width); -void ARGBToARGB4444Row_Any_AVX2(const uint8* src_argb, - uint8* dst_rgb, +void ARGBToARGB4444Row_Any_AVX2(const uint8_t* src_ptr, + uint8_t* dst_ptr, int width); -void ARGBToAR30Row_Any_AVX2(const uint8* src_argb, uint8* dst_rgb, int width); +void ABGRToAR30Row_Any_AVX2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGBToAR30Row_Any_AVX2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); -void ARGBToRGB24Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToRAWRow_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToRGB565Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToARGB1555Row_Any_NEON(const uint8* src_argb, - uint8* dst_rgb, +void ARGBToRGB24Row_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGBToRAWRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void ARGBToRGB565Row_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGBToARGB1555Row_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_ptr, int width); -void ARGBToARGB4444Row_Any_NEON(const uint8* src_argb, - uint8* dst_rgb, +void ARGBToARGB4444Row_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_ptr, int width); -void ARGBToRGB565DitherRow_Any_NEON(const uint8* src_argb, - uint8* dst_rgb, - const uint32 dither4, +void ARGBToRGB565DitherRow_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_ptr, + const uint32_t param, int width); -void ARGBToRGB24Row_Any_MSA(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToRAWRow_Any_MSA(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToRGB565Row_Any_MSA(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToARGB1555Row_Any_MSA(const uint8* src_argb, - uint8* dst_rgb, +void ARGBToRGB24Row_Any_MSA(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGBToRAWRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void ARGBToRGB565Row_Any_MSA(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGBToARGB1555Row_Any_MSA(const uint8_t* src_ptr, + uint8_t* dst_ptr, int width); -void ARGBToARGB4444Row_Any_MSA(const uint8* src_argb, - uint8* dst_rgb, +void ARGBToARGB4444Row_Any_MSA(const uint8_t* src_ptr, + uint8_t* dst_ptr, int width); -void ARGBToRGB565DitherRow_Any_MSA(const uint8* src_argb, - uint8* dst_rgb, - const uint32 dither4, +void ARGBToRGB565DitherRow_Any_MSA(const uint8_t* src_ptr, + uint8_t* dst_ptr, + const uint32_t param, int width); -void I444ToARGBRow_Any_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I444ToARGBRow_Any_NEON(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I422ToARGBRow_Any_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I422ToARGBRow_Any_NEON(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I422AlphaToARGBRow_Any_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - const uint8* src_a, - uint8* dst_argb, +void I422AlphaToARGBRow_Any_NEON(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + const uint8_t* a_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I422ToRGBARow_Any_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I422ToRGBARow_Any_NEON(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I422ToRGB24Row_Any_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I422ToRGB24Row_Any_NEON(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I422ToARGB4444Row_Any_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I422ToARGB4444Row_Any_NEON(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I422ToARGB1555Row_Any_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I422ToARGB1555Row_Any_NEON(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I422ToRGB565Row_Any_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I422ToRGB565Row_Any_NEON(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void NV12ToARGBRow_Any_NEON(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, +void NV12ToARGBRow_Any_NEON(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void NV21ToARGBRow_Any_NEON(const uint8* src_y, - const uint8* src_vu, - uint8* dst_argb, +void NV21ToARGBRow_Any_NEON(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void NV12ToRGB565Row_Any_NEON(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, +void NV12ToRGB565Row_Any_NEON(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void YUY2ToARGBRow_Any_NEON(const uint8* src_yuy2, - uint8* dst_argb, +void YUY2ToARGBRow_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void UYVYToARGBRow_Any_NEON(const uint8* src_uyvy, - uint8* dst_argb, +void UYVYToARGBRow_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I444ToARGBRow_Any_MSA(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I444ToARGBRow_Any_MSA(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I422ToARGBRow_Any_MSA(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I422ToARGBRow_Any_MSA(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I422ToRGBARow_Any_MSA(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I422ToRGBARow_Any_MSA(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I422AlphaToARGBRow_Any_MSA(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - const uint8* src_a, - uint8* dst_argb, +void I422AlphaToARGBRow_Any_MSA(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + const uint8_t* a_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I422ToRGB24Row_Any_MSA(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgb24, +void I422ToRGB24Row_Any_MSA(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I422ToRGB565Row_Any_MSA(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgb565, +void I422ToRGB565Row_Any_MSA(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I422ToARGB4444Row_Any_MSA(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb4444, +void I422ToARGB4444Row_Any_MSA(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I422ToARGB1555Row_Any_MSA(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb1555, +void I422ToARGB1555Row_Any_MSA(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void NV12ToARGBRow_Any_MSA(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, +void NV12ToARGBRow_Any_MSA(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void NV12ToRGB565Row_Any_MSA(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, +void NV12ToRGB565Row_Any_MSA(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void NV21ToARGBRow_Any_MSA(const uint8* src_y, - const uint8* src_vu, - uint8* dst_argb, +void NV21ToARGBRow_Any_MSA(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void YUY2ToARGBRow_Any_MSA(const uint8* src_yuy2, - uint8* dst_argb, +void YUY2ToARGBRow_Any_MSA(const uint8_t* src_ptr, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void UYVYToARGBRow_Any_MSA(const uint8* src_uyvy, - uint8* dst_argb, +void UYVYToARGBRow_Any_MSA(const uint8_t* src_ptr, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void YUY2ToYRow_AVX2(const uint8* src_yuy2, uint8* dst_y, int width); -void YUY2ToUVRow_AVX2(const uint8* src_yuy2, +void YUY2ToYRow_AVX2(const uint8_t* src_yuy2, uint8_t* dst_y, int width); +void YUY2ToUVRow_AVX2(const uint8_t* src_yuy2, int stride_yuy2, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void YUY2ToUV422Row_AVX2(const uint8* src_yuy2, - uint8* dst_u, - uint8* dst_v, +void YUY2ToUV422Row_AVX2(const uint8_t* src_yuy2, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void YUY2ToYRow_SSE2(const uint8* src_yuy2, uint8* dst_y, int width); -void YUY2ToUVRow_SSE2(const uint8* src_yuy2, +void YUY2ToYRow_SSE2(const uint8_t* src_yuy2, uint8_t* dst_y, int width); +void YUY2ToUVRow_SSE2(const uint8_t* src_yuy2, int stride_yuy2, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void YUY2ToUV422Row_SSE2(const uint8* src_yuy2, - uint8* dst_u, - uint8* dst_v, +void YUY2ToUV422Row_SSE2(const uint8_t* src_yuy2, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void YUY2ToYRow_NEON(const uint8* src_yuy2, uint8* dst_y, int width); -void YUY2ToUVRow_NEON(const uint8* src_yuy2, +void YUY2ToYRow_NEON(const uint8_t* src_yuy2, uint8_t* dst_y, int width); +void YUY2ToUVRow_NEON(const uint8_t* src_yuy2, int stride_yuy2, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void YUY2ToUV422Row_NEON(const uint8* src_yuy2, - uint8* dst_u, - uint8* dst_v, +void YUY2ToUV422Row_NEON(const uint8_t* src_yuy2, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void YUY2ToYRow_MSA(const uint8* src_yuy2, uint8* dst_y, int width); -void YUY2ToUVRow_MSA(const uint8* src_yuy2, - int stride_yuy2, - uint8* dst_u, - uint8* dst_v, +void YUY2ToYRow_MSA(const uint8_t* src_yuy2, uint8_t* dst_y, int width); +void YUY2ToUVRow_MSA(const uint8_t* src_yuy2, + int src_stride_yuy2, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void YUY2ToUV422Row_MSA(const uint8* src_yuy2, - uint8* dst_u, - uint8* dst_v, +void YUY2ToUV422Row_MSA(const uint8_t* src_yuy2, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void YUY2ToYRow_C(const uint8* src_yuy2, uint8* dst_y, int width); -void YUY2ToUVRow_C(const uint8* src_yuy2, - int stride_yuy2, - uint8* dst_u, - uint8* dst_v, +void YUY2ToYRow_C(const uint8_t* src_yuy2, uint8_t* dst_y, int width); +void YUY2ToUVRow_C(const uint8_t* src_yuy2, + int src_stride_yuy2, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void YUY2ToUV422Row_C(const uint8* src_yuy2, - uint8* dst_u, - uint8* dst_v, +void YUY2ToUV422Row_C(const uint8_t* src_yuy2, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void YUY2ToYRow_Any_AVX2(const uint8* src_yuy2, uint8* dst_y, int width); -void YUY2ToUVRow_Any_AVX2(const uint8* src_yuy2, - int stride_yuy2, - uint8* dst_u, - uint8* dst_v, +void YUY2ToYRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void YUY2ToUVRow_Any_AVX2(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void YUY2ToUV422Row_Any_AVX2(const uint8* src_yuy2, - uint8* dst_u, - uint8* dst_v, +void YUY2ToUV422Row_Any_AVX2(const uint8_t* src_ptr, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void YUY2ToYRow_Any_SSE2(const uint8* src_yuy2, uint8* dst_y, int width); -void YUY2ToUVRow_Any_SSE2(const uint8* src_yuy2, - int stride_yuy2, - uint8* dst_u, - uint8* dst_v, +void YUY2ToYRow_Any_SSE2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void YUY2ToUVRow_Any_SSE2(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void YUY2ToUV422Row_Any_SSE2(const uint8* src_yuy2, - uint8* dst_u, - uint8* dst_v, +void YUY2ToUV422Row_Any_SSE2(const uint8_t* src_ptr, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void YUY2ToYRow_Any_NEON(const uint8* src_yuy2, uint8* dst_y, int width); -void YUY2ToUVRow_Any_NEON(const uint8* src_yuy2, - int stride_yuy2, - uint8* dst_u, - uint8* dst_v, +void YUY2ToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void YUY2ToUVRow_Any_NEON(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void YUY2ToUV422Row_Any_NEON(const uint8* src_yuy2, - uint8* dst_u, - uint8* dst_v, +void YUY2ToUV422Row_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void YUY2ToYRow_Any_MSA(const uint8* src_yuy2, uint8* dst_y, int width); -void YUY2ToUVRow_Any_MSA(const uint8* src_yuy2, - int stride_yuy2, - uint8* dst_u, - uint8* dst_v, +void YUY2ToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void YUY2ToUVRow_Any_MSA(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void YUY2ToUV422Row_Any_MSA(const uint8* src_yuy2, - uint8* dst_u, - uint8* dst_v, +void YUY2ToUV422Row_Any_MSA(const uint8_t* src_ptr, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void UYVYToYRow_AVX2(const uint8* src_uyvy, uint8* dst_y, int width); -void UYVYToUVRow_AVX2(const uint8* src_uyvy, +void UYVYToYRow_AVX2(const uint8_t* src_uyvy, uint8_t* dst_y, int width); +void UYVYToUVRow_AVX2(const uint8_t* src_uyvy, int stride_uyvy, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void UYVYToUV422Row_AVX2(const uint8* src_uyvy, - uint8* dst_u, - uint8* dst_v, +void UYVYToUV422Row_AVX2(const uint8_t* src_uyvy, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void UYVYToYRow_SSE2(const uint8* src_uyvy, uint8* dst_y, int width); -void UYVYToUVRow_SSE2(const uint8* src_uyvy, +void UYVYToYRow_SSE2(const uint8_t* src_uyvy, uint8_t* dst_y, int width); +void UYVYToUVRow_SSE2(const uint8_t* src_uyvy, int stride_uyvy, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void UYVYToUV422Row_SSE2(const uint8* src_uyvy, - uint8* dst_u, - uint8* dst_v, +void UYVYToUV422Row_SSE2(const uint8_t* src_uyvy, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void UYVYToYRow_AVX2(const uint8* src_uyvy, uint8* dst_y, int width); -void UYVYToUVRow_AVX2(const uint8* src_uyvy, +void UYVYToYRow_AVX2(const uint8_t* src_uyvy, uint8_t* dst_y, int width); +void UYVYToUVRow_AVX2(const uint8_t* src_uyvy, int stride_uyvy, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void UYVYToUV422Row_AVX2(const uint8* src_uyvy, - uint8* dst_u, - uint8* dst_v, +void UYVYToUV422Row_AVX2(const uint8_t* src_uyvy, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void UYVYToYRow_NEON(const uint8* src_uyvy, uint8* dst_y, int width); -void UYVYToUVRow_NEON(const uint8* src_uyvy, +void UYVYToYRow_NEON(const uint8_t* src_uyvy, uint8_t* dst_y, int width); +void UYVYToUVRow_NEON(const uint8_t* src_uyvy, int stride_uyvy, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void UYVYToUV422Row_NEON(const uint8* src_uyvy, - uint8* dst_u, - uint8* dst_v, +void UYVYToUV422Row_NEON(const uint8_t* src_uyvy, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void UYVYToYRow_MSA(const uint8* src_uyvy, uint8* dst_y, int width); -void UYVYToUVRow_MSA(const uint8* src_uyvy, - int stride_uyvy, - uint8* dst_u, - uint8* dst_v, +void UYVYToYRow_MSA(const uint8_t* src_uyvy, uint8_t* dst_y, int width); +void UYVYToUVRow_MSA(const uint8_t* src_uyvy, + int src_stride_uyvy, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void UYVYToUV422Row_MSA(const uint8* src_uyvy, - uint8* dst_u, - uint8* dst_v, +void UYVYToUV422Row_MSA(const uint8_t* src_uyvy, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void UYVYToYRow_C(const uint8* src_uyvy, uint8* dst_y, int width); -void UYVYToUVRow_C(const uint8* src_uyvy, - int stride_uyvy, - uint8* dst_u, - uint8* dst_v, +void UYVYToYRow_C(const uint8_t* src_uyvy, uint8_t* dst_y, int width); +void UYVYToUVRow_C(const uint8_t* src_uyvy, + int src_stride_uyvy, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void UYVYToUV422Row_C(const uint8* src_uyvy, - uint8* dst_u, - uint8* dst_v, +void UYVYToUV422Row_C(const uint8_t* src_uyvy, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void UYVYToYRow_Any_AVX2(const uint8* src_uyvy, uint8* dst_y, int width); -void UYVYToUVRow_Any_AVX2(const uint8* src_uyvy, - int stride_uyvy, - uint8* dst_u, - uint8* dst_v, +void UYVYToYRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void UYVYToUVRow_Any_AVX2(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void UYVYToUV422Row_Any_AVX2(const uint8* src_uyvy, - uint8* dst_u, - uint8* dst_v, +void UYVYToUV422Row_Any_AVX2(const uint8_t* src_ptr, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void UYVYToYRow_Any_SSE2(const uint8* src_uyvy, uint8* dst_y, int width); -void UYVYToUVRow_Any_SSE2(const uint8* src_uyvy, - int stride_uyvy, - uint8* dst_u, - uint8* dst_v, +void UYVYToYRow_Any_SSE2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void UYVYToUVRow_Any_SSE2(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void UYVYToUV422Row_Any_SSE2(const uint8* src_uyvy, - uint8* dst_u, - uint8* dst_v, +void UYVYToUV422Row_Any_SSE2(const uint8_t* src_ptr, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void UYVYToYRow_Any_NEON(const uint8* src_uyvy, uint8* dst_y, int width); -void UYVYToUVRow_Any_NEON(const uint8* src_uyvy, - int stride_uyvy, - uint8* dst_u, - uint8* dst_v, +void UYVYToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void UYVYToUVRow_Any_NEON(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void UYVYToUV422Row_Any_NEON(const uint8* src_uyvy, - uint8* dst_u, - uint8* dst_v, +void UYVYToUV422Row_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void UYVYToYRow_Any_MSA(const uint8* src_uyvy, uint8* dst_y, int width); -void UYVYToUVRow_Any_MSA(const uint8* src_uyvy, - int stride_uyvy, - uint8* dst_u, - uint8* dst_v, +void UYVYToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void UYVYToUVRow_Any_MSA(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void UYVYToUV422Row_Any_MSA(const uint8* src_uyvy, - uint8* dst_u, - uint8* dst_v, +void UYVYToUV422Row_Any_MSA(const uint8_t* src_ptr, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void I422ToYUY2Row_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_yuy2, +void I422ToYUY2Row_C(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_frame, int width); -void I422ToUYVYRow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_uyvy, +void I422ToUYVYRow_C(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_frame, int width); -void I422ToYUY2Row_SSE2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_yuy2, +void I422ToYUY2Row_SSE2(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_yuy2, + int width); +void I422ToUYVYRow_SSE2(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_uyvy, int width); -void I422ToUYVYRow_SSE2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_uyvy, +void I422ToYUY2Row_Any_SSE2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + int width); +void I422ToUYVYRow_Any_SSE2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + int width); +void I422ToYUY2Row_AVX2(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_yuy2, + int width); +void I422ToUYVYRow_AVX2(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_uyvy, int width); -void I422ToYUY2Row_Any_SSE2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_yuy2, - int width); -void I422ToUYVYRow_Any_SSE2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_uyvy, - int width); -void I422ToYUY2Row_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_yuy2, +void I422ToYUY2Row_Any_AVX2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + int width); +void I422ToUYVYRow_Any_AVX2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + int width); +void I422ToYUY2Row_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_yuy2, int width); -void I422ToUYVYRow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_uyvy, +void I422ToUYVYRow_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_uyvy, int width); -void I422ToYUY2Row_Any_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_yuy2, - int width); -void I422ToUYVYRow_Any_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_uyvy, - int width); -void I422ToYUY2Row_MSA(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_yuy2, +void I422ToYUY2Row_Any_NEON(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + int width); +void I422ToUYVYRow_Any_NEON(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + int width); +void I422ToYUY2Row_MSA(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_yuy2, int width); -void I422ToUYVYRow_MSA(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_uyvy, +void I422ToUYVYRow_MSA(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_uyvy, int width); -void I422ToYUY2Row_Any_MSA(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_yuy2, +void I422ToYUY2Row_Any_MSA(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, int width); -void I422ToUYVYRow_Any_MSA(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_uyvy, +void I422ToUYVYRow_Any_MSA(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, int width); // Effects related row functions. -void ARGBAttenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width); -void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width); -void ARGBAttenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width); -void ARGBAttenuateRow_NEON(const uint8* src_argb, uint8* dst_argb, int width); -void ARGBAttenuateRow_MSA(const uint8* src_argb, uint8* dst_argb, int width); -void ARGBAttenuateRow_Any_SSSE3(const uint8* src_argb, - uint8* dst_argb, +void ARGBAttenuateRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width); +void ARGBAttenuateRow_SSSE3(const uint8_t* src_argb, + uint8_t* dst_argb, + int width); +void ARGBAttenuateRow_AVX2(const uint8_t* src_argb, + uint8_t* dst_argb, + int width); +void ARGBAttenuateRow_NEON(const uint8_t* src_argb, + uint8_t* dst_argb, + int width); +void ARGBAttenuateRow_MSA(const uint8_t* src_argb, + uint8_t* dst_argb, + int width); +void ARGBAttenuateRow_Any_SSSE3(const uint8_t* src_ptr, + uint8_t* dst_ptr, int width); -void ARGBAttenuateRow_Any_AVX2(const uint8* src_argb, - uint8* dst_argb, +void ARGBAttenuateRow_Any_AVX2(const uint8_t* src_ptr, + uint8_t* dst_ptr, int width); -void ARGBAttenuateRow_Any_NEON(const uint8* src_argb, - uint8* dst_argb, +void ARGBAttenuateRow_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_ptr, int width); -void ARGBAttenuateRow_Any_MSA(const uint8* src_argb, - uint8* dst_argb, +void ARGBAttenuateRow_Any_MSA(const uint8_t* src_ptr, + uint8_t* dst_ptr, int width); // Inverse table for unattenuate, shared by C and SSE2. -extern const uint32 fixed_invtbl8[256]; -void ARGBUnattenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width); -void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width); -void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width); -void ARGBUnattenuateRow_Any_SSE2(const uint8* src_argb, - uint8* dst_argb, +extern const uint32_t fixed_invtbl8[256]; +void ARGBUnattenuateRow_C(const uint8_t* src_argb, + uint8_t* dst_argb, + int width); +void ARGBUnattenuateRow_SSE2(const uint8_t* src_argb, + uint8_t* dst_argb, + int width); +void ARGBUnattenuateRow_AVX2(const uint8_t* src_argb, + uint8_t* dst_argb, + int width); +void ARGBUnattenuateRow_Any_SSE2(const uint8_t* src_ptr, + uint8_t* dst_ptr, int width); -void ARGBUnattenuateRow_Any_AVX2(const uint8* src_argb, - uint8* dst_argb, +void ARGBUnattenuateRow_Any_AVX2(const uint8_t* src_ptr, + uint8_t* dst_ptr, int width); -void ARGBGrayRow_C(const uint8* src_argb, uint8* dst_argb, int width); -void ARGBGrayRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width); -void ARGBGrayRow_NEON(const uint8* src_argb, uint8* dst_argb, int width); -void ARGBGrayRow_MSA(const uint8* src_argb, uint8* dst_argb, int width); +void ARGBGrayRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width); +void ARGBGrayRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_argb, int width); +void ARGBGrayRow_NEON(const uint8_t* src_argb, uint8_t* dst_argb, int width); +void ARGBGrayRow_MSA(const uint8_t* src_argb, uint8_t* dst_argb, int width); -void ARGBSepiaRow_C(uint8* dst_argb, int width); -void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width); -void ARGBSepiaRow_NEON(uint8* dst_argb, int width); -void ARGBSepiaRow_MSA(uint8* dst_argb, int width); +void ARGBSepiaRow_C(uint8_t* dst_argb, int width); +void ARGBSepiaRow_SSSE3(uint8_t* dst_argb, int width); +void ARGBSepiaRow_NEON(uint8_t* dst_argb, int width); +void ARGBSepiaRow_MSA(uint8_t* dst_argb, int width); -void ARGBColorMatrixRow_C(const uint8* src_argb, - uint8* dst_argb, - const int8* matrix_argb, +void ARGBColorMatrixRow_C(const uint8_t* src_argb, + uint8_t* dst_argb, + const int8_t* matrix_argb, int width); -void ARGBColorMatrixRow_SSSE3(const uint8* src_argb, - uint8* dst_argb, - const int8* matrix_argb, +void ARGBColorMatrixRow_SSSE3(const uint8_t* src_argb, + uint8_t* dst_argb, + const int8_t* matrix_argb, int width); -void ARGBColorMatrixRow_NEON(const uint8* src_argb, - uint8* dst_argb, - const int8* matrix_argb, +void ARGBColorMatrixRow_NEON(const uint8_t* src_argb, + uint8_t* dst_argb, + const int8_t* matrix_argb, int width); -void ARGBColorMatrixRow_MSA(const uint8* src_argb, - uint8* dst_argb, - const int8* matrix_argb, +void ARGBColorMatrixRow_MSA(const uint8_t* src_argb, + uint8_t* dst_argb, + const int8_t* matrix_argb, int width); -void ARGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width); -void ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width); +void ARGBColorTableRow_C(uint8_t* dst_argb, + const uint8_t* table_argb, + int width); +void ARGBColorTableRow_X86(uint8_t* dst_argb, + const uint8_t* table_argb, + int width); -void RGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width); -void RGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width); +void RGBColorTableRow_C(uint8_t* dst_argb, + const uint8_t* table_argb, + int width); +void RGBColorTableRow_X86(uint8_t* dst_argb, + const uint8_t* table_argb, + int width); -void ARGBQuantizeRow_C(uint8* dst_argb, +void ARGBQuantizeRow_C(uint8_t* dst_argb, int scale, int interval_size, int interval_offset, int width); -void ARGBQuantizeRow_SSE2(uint8* dst_argb, +void ARGBQuantizeRow_SSE2(uint8_t* dst_argb, int scale, int interval_size, int interval_offset, int width); -void ARGBQuantizeRow_NEON(uint8* dst_argb, +void ARGBQuantizeRow_NEON(uint8_t* dst_argb, int scale, int interval_size, int interval_offset, int width); -void ARGBQuantizeRow_MSA(uint8* dst_argb, +void ARGBQuantizeRow_MSA(uint8_t* dst_argb, int scale, int interval_size, int interval_offset, int width); -void ARGBShadeRow_C(const uint8* src_argb, - uint8* dst_argb, +void ARGBShadeRow_C(const uint8_t* src_argb, + uint8_t* dst_argb, int width, - uint32 value); -void ARGBShadeRow_SSE2(const uint8* src_argb, - uint8* dst_argb, + uint32_t value); +void ARGBShadeRow_SSE2(const uint8_t* src_argb, + uint8_t* dst_argb, int width, - uint32 value); -void ARGBShadeRow_NEON(const uint8* src_argb, - uint8* dst_argb, + uint32_t value); +void ARGBShadeRow_NEON(const uint8_t* src_argb, + uint8_t* dst_argb, int width, - uint32 value); -void ARGBShadeRow_MSA(const uint8* src_argb, - uint8* dst_argb, + uint32_t value); +void ARGBShadeRow_MSA(const uint8_t* src_argb, + uint8_t* dst_argb, int width, - uint32 value); + uint32_t value); // Used for blur. -void CumulativeSumToAverageRow_SSE2(const int32* topleft, - const int32* botleft, +void CumulativeSumToAverageRow_SSE2(const int32_t* topleft, + const int32_t* botleft, int width, int area, - uint8* dst, + uint8_t* dst, int count); -void ComputeCumulativeSumRow_SSE2(const uint8* row, - int32* cumsum, - const int32* previous_cumsum, +void ComputeCumulativeSumRow_SSE2(const uint8_t* row, + int32_t* cumsum, + const int32_t* previous_cumsum, int width); -void CumulativeSumToAverageRow_C(const int32* topleft, - const int32* botleft, - int width, +void CumulativeSumToAverageRow_C(const int32_t* tl, + const int32_t* bl, + int w, int area, - uint8* dst, + uint8_t* dst, int count); -void ComputeCumulativeSumRow_C(const uint8* row, - int32* cumsum, - const int32* previous_cumsum, +void ComputeCumulativeSumRow_C(const uint8_t* row, + int32_t* cumsum, + const int32_t* previous_cumsum, int width); LIBYUV_API -void ARGBAffineRow_C(const uint8* src_argb, +void ARGBAffineRow_C(const uint8_t* src_argb, int src_argb_stride, - uint8* dst_argb, + uint8_t* dst_argb, const float* uv_dudv, int width); LIBYUV_API -void ARGBAffineRow_SSE2(const uint8* src_argb, +void ARGBAffineRow_SSE2(const uint8_t* src_argb, int src_argb_stride, - uint8* dst_argb, - const float* uv_dudv, + uint8_t* dst_argb, + const float* src_dudv, int width); // Used for I420Scale, ARGBScale, and ARGBInterpolate. -void InterpolateRow_C(uint8* dst_ptr, - const uint8* src_ptr, - ptrdiff_t src_stride_ptr, +void InterpolateRow_C(uint8_t* dst_ptr, + const uint8_t* src_ptr, + ptrdiff_t src_stride, int width, int source_y_fraction); -void InterpolateRow_SSSE3(uint8* dst_ptr, - const uint8* src_ptr, - ptrdiff_t src_stride_ptr, - int width, +void InterpolateRow_SSSE3(uint8_t* dst_ptr, + const uint8_t* src_ptr, + ptrdiff_t src_stride, + int dst_width, int source_y_fraction); -void InterpolateRow_AVX2(uint8* dst_ptr, - const uint8* src_ptr, - ptrdiff_t src_stride_ptr, - int width, +void InterpolateRow_AVX2(uint8_t* dst_ptr, + const uint8_t* src_ptr, + ptrdiff_t src_stride, + int dst_width, int source_y_fraction); -void InterpolateRow_NEON(uint8* dst_ptr, - const uint8* src_ptr, - ptrdiff_t src_stride_ptr, - int width, +void InterpolateRow_NEON(uint8_t* dst_ptr, + const uint8_t* src_ptr, + ptrdiff_t src_stride, + int dst_width, int source_y_fraction); -void InterpolateRow_MSA(uint8* dst_ptr, - const uint8* src_ptr, - ptrdiff_t src_stride_ptr, +void InterpolateRow_MSA(uint8_t* dst_ptr, + const uint8_t* src_ptr, + ptrdiff_t src_stride, int width, int source_y_fraction); -void InterpolateRow_Any_NEON(uint8* dst_ptr, - const uint8* src_ptr, +void InterpolateRow_Any_NEON(uint8_t* dst_ptr, + const uint8_t* src_ptr, ptrdiff_t src_stride_ptr, int width, int source_y_fraction); -void InterpolateRow_Any_SSSE3(uint8* dst_ptr, - const uint8* src_ptr, +void InterpolateRow_Any_SSSE3(uint8_t* dst_ptr, + const uint8_t* src_ptr, ptrdiff_t src_stride_ptr, int width, int source_y_fraction); -void InterpolateRow_Any_AVX2(uint8* dst_ptr, - const uint8* src_ptr, +void InterpolateRow_Any_AVX2(uint8_t* dst_ptr, + const uint8_t* src_ptr, ptrdiff_t src_stride_ptr, int width, int source_y_fraction); -void InterpolateRow_Any_MSA(uint8* dst_ptr, - const uint8* src_ptr, +void InterpolateRow_Any_MSA(uint8_t* dst_ptr, + const uint8_t* src_ptr, ptrdiff_t src_stride_ptr, int width, int source_y_fraction); -void InterpolateRow_16_C(uint16* dst_ptr, - const uint16* src_ptr, - ptrdiff_t src_stride_ptr, +void InterpolateRow_16_C(uint16_t* dst_ptr, + const uint16_t* src_ptr, + ptrdiff_t src_stride, int width, int source_y_fraction); // Sobel images. -void SobelXRow_C(const uint8* src_y0, - const uint8* src_y1, - const uint8* src_y2, - uint8* dst_sobelx, +void SobelXRow_C(const uint8_t* src_y0, + const uint8_t* src_y1, + const uint8_t* src_y2, + uint8_t* dst_sobelx, int width); -void SobelXRow_SSE2(const uint8* src_y0, - const uint8* src_y1, - const uint8* src_y2, - uint8* dst_sobelx, +void SobelXRow_SSE2(const uint8_t* src_y0, + const uint8_t* src_y1, + const uint8_t* src_y2, + uint8_t* dst_sobelx, int width); -void SobelXRow_NEON(const uint8* src_y0, - const uint8* src_y1, - const uint8* src_y2, - uint8* dst_sobelx, +void SobelXRow_NEON(const uint8_t* src_y0, + const uint8_t* src_y1, + const uint8_t* src_y2, + uint8_t* dst_sobelx, int width); -void SobelXRow_MSA(const uint8* src_y0, - const uint8* src_y1, - const uint8* src_y2, - uint8* dst_sobelx, +void SobelXRow_MSA(const uint8_t* src_y0, + const uint8_t* src_y1, + const uint8_t* src_y2, + uint8_t* dst_sobelx, int width); -void SobelYRow_C(const uint8* src_y0, - const uint8* src_y1, - uint8* dst_sobely, +void SobelYRow_C(const uint8_t* src_y0, + const uint8_t* src_y1, + uint8_t* dst_sobely, int width); -void SobelYRow_SSE2(const uint8* src_y0, - const uint8* src_y1, - uint8* dst_sobely, +void SobelYRow_SSE2(const uint8_t* src_y0, + const uint8_t* src_y1, + uint8_t* dst_sobely, int width); -void SobelYRow_NEON(const uint8* src_y0, - const uint8* src_y1, - uint8* dst_sobely, +void SobelYRow_NEON(const uint8_t* src_y0, + const uint8_t* src_y1, + uint8_t* dst_sobely, int width); -void SobelYRow_MSA(const uint8* src_y0, - const uint8* src_y1, - uint8* dst_sobely, +void SobelYRow_MSA(const uint8_t* src_y0, + const uint8_t* src_y1, + uint8_t* dst_sobely, int width); -void SobelRow_C(const uint8* src_sobelx, - const uint8* src_sobely, - uint8* dst_argb, +void SobelRow_C(const uint8_t* src_sobelx, + const uint8_t* src_sobely, + uint8_t* dst_argb, int width); -void SobelRow_SSE2(const uint8* src_sobelx, - const uint8* src_sobely, - uint8* dst_argb, +void SobelRow_SSE2(const uint8_t* src_sobelx, + const uint8_t* src_sobely, + uint8_t* dst_argb, int width); -void SobelRow_NEON(const uint8* src_sobelx, - const uint8* src_sobely, - uint8* dst_argb, +void SobelRow_NEON(const uint8_t* src_sobelx, + const uint8_t* src_sobely, + uint8_t* dst_argb, int width); -void SobelRow_MSA(const uint8* src_sobelx, - const uint8* src_sobely, - uint8* dst_argb, +void SobelRow_MSA(const uint8_t* src_sobelx, + const uint8_t* src_sobely, + uint8_t* dst_argb, int width); -void SobelToPlaneRow_C(const uint8* src_sobelx, - const uint8* src_sobely, - uint8* dst_y, +void SobelToPlaneRow_C(const uint8_t* src_sobelx, + const uint8_t* src_sobely, + uint8_t* dst_y, int width); -void SobelToPlaneRow_SSE2(const uint8* src_sobelx, - const uint8* src_sobely, - uint8* dst_y, +void SobelToPlaneRow_SSE2(const uint8_t* src_sobelx, + const uint8_t* src_sobely, + uint8_t* dst_y, int width); -void SobelToPlaneRow_NEON(const uint8* src_sobelx, - const uint8* src_sobely, - uint8* dst_y, +void SobelToPlaneRow_NEON(const uint8_t* src_sobelx, + const uint8_t* src_sobely, + uint8_t* dst_y, int width); -void SobelToPlaneRow_MSA(const uint8* src_sobelx, - const uint8* src_sobely, - uint8* dst_y, +void SobelToPlaneRow_MSA(const uint8_t* src_sobelx, + const uint8_t* src_sobely, + uint8_t* dst_y, int width); -void SobelXYRow_C(const uint8* src_sobelx, - const uint8* src_sobely, - uint8* dst_argb, +void SobelXYRow_C(const uint8_t* src_sobelx, + const uint8_t* src_sobely, + uint8_t* dst_argb, int width); -void SobelXYRow_SSE2(const uint8* src_sobelx, - const uint8* src_sobely, - uint8* dst_argb, +void SobelXYRow_SSE2(const uint8_t* src_sobelx, + const uint8_t* src_sobely, + uint8_t* dst_argb, int width); -void SobelXYRow_NEON(const uint8* src_sobelx, - const uint8* src_sobely, - uint8* dst_argb, +void SobelXYRow_NEON(const uint8_t* src_sobelx, + const uint8_t* src_sobely, + uint8_t* dst_argb, int width); -void SobelXYRow_MSA(const uint8* src_sobelx, - const uint8* src_sobely, - uint8* dst_argb, +void SobelXYRow_MSA(const uint8_t* src_sobelx, + const uint8_t* src_sobely, + uint8_t* dst_argb, int width); -void SobelRow_Any_SSE2(const uint8* src_sobelx, - const uint8* src_sobely, - uint8* dst_argb, +void SobelRow_Any_SSE2(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, int width); -void SobelRow_Any_NEON(const uint8* src_sobelx, - const uint8* src_sobely, - uint8* dst_argb, +void SobelRow_Any_NEON(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, int width); -void SobelRow_Any_MSA(const uint8* src_sobelx, - const uint8* src_sobely, - uint8* dst_argb, +void SobelRow_Any_MSA(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, int width); -void SobelToPlaneRow_Any_SSE2(const uint8* src_sobelx, - const uint8* src_sobely, - uint8* dst_y, +void SobelToPlaneRow_Any_SSE2(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, int width); -void SobelToPlaneRow_Any_NEON(const uint8* src_sobelx, - const uint8* src_sobely, - uint8* dst_y, +void SobelToPlaneRow_Any_NEON(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, int width); -void SobelToPlaneRow_Any_MSA(const uint8* src_sobelx, - const uint8* src_sobely, - uint8* dst_y, +void SobelToPlaneRow_Any_MSA(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, int width); -void SobelXYRow_Any_SSE2(const uint8* src_sobelx, - const uint8* src_sobely, - uint8* dst_argb, +void SobelXYRow_Any_SSE2(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, int width); -void SobelXYRow_Any_NEON(const uint8* src_sobelx, - const uint8* src_sobely, - uint8* dst_argb, +void SobelXYRow_Any_NEON(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, int width); -void SobelXYRow_Any_MSA(const uint8* src_sobelx, - const uint8* src_sobely, - uint8* dst_argb, +void SobelXYRow_Any_MSA(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, int width); -void ARGBPolynomialRow_C(const uint8* src_argb, - uint8* dst_argb, +void ARGBPolynomialRow_C(const uint8_t* src_argb, + uint8_t* dst_argb, const float* poly, int width); -void ARGBPolynomialRow_SSE2(const uint8* src_argb, - uint8* dst_argb, +void ARGBPolynomialRow_SSE2(const uint8_t* src_argb, + uint8_t* dst_argb, const float* poly, int width); -void ARGBPolynomialRow_AVX2(const uint8* src_argb, - uint8* dst_argb, +void ARGBPolynomialRow_AVX2(const uint8_t* src_argb, + uint8_t* dst_argb, const float* poly, int width); // Scale and convert to half float. -void HalfFloatRow_C(const uint16* src, uint16* dst, float scale, int width); -void HalfFloatRow_SSE2(const uint16* src, uint16* dst, float scale, int width); -void HalfFloatRow_Any_SSE2(const uint16* src, - uint16* dst, - float scale, +void HalfFloatRow_C(const uint16_t* src, uint16_t* dst, float scale, int width); +void HalfFloatRow_SSE2(const uint16_t* src, + uint16_t* dst, + float scale, + int width); +void HalfFloatRow_Any_SSE2(const uint16_t* src_ptr, + uint16_t* dst_ptr, + float param, int width); -void HalfFloatRow_AVX2(const uint16* src, uint16* dst, float scale, int width); -void HalfFloatRow_Any_AVX2(const uint16* src, - uint16* dst, - float scale, +void HalfFloatRow_AVX2(const uint16_t* src, + uint16_t* dst, + float scale, + int width); +void HalfFloatRow_Any_AVX2(const uint16_t* src_ptr, + uint16_t* dst_ptr, + float param, int width); -void HalfFloatRow_F16C(const uint16* src, uint16* dst, float scale, int width); -void HalfFloatRow_Any_F16C(const uint16* src, - uint16* dst, +void HalfFloatRow_F16C(const uint16_t* src, + uint16_t* dst, + float scale, + int width); +void HalfFloatRow_Any_F16C(const uint16_t* src, + uint16_t* dst, float scale, int width); -void HalfFloat1Row_F16C(const uint16* src, uint16* dst, float scale, int width); -void HalfFloat1Row_Any_F16C(const uint16* src, - uint16* dst, +void HalfFloat1Row_F16C(const uint16_t* src, + uint16_t* dst, + float scale, + int width); +void HalfFloat1Row_Any_F16C(const uint16_t* src, + uint16_t* dst, float scale, int width); -void HalfFloatRow_NEON(const uint16* src, uint16* dst, float scale, int width); -void HalfFloatRow_Any_NEON(const uint16* src, - uint16* dst, - float scale, +void HalfFloatRow_NEON(const uint16_t* src, + uint16_t* dst, + float scale, + int width); +void HalfFloatRow_Any_NEON(const uint16_t* src_ptr, + uint16_t* dst_ptr, + float param, int width); -void HalfFloat1Row_NEON(const uint16* src, uint16* dst, float scale, int width); -void HalfFloat1Row_Any_NEON(const uint16* src, - uint16* dst, - float scale, +void HalfFloat1Row_NEON(const uint16_t* src, + uint16_t* dst, + float scale, + int width); +void HalfFloat1Row_Any_NEON(const uint16_t* src_ptr, + uint16_t* dst_ptr, + float param, int width); -void HalfFloatRow_MSA(const uint16* src, uint16* dst, float scale, int width); -void HalfFloatRow_Any_MSA(const uint16* src, - uint16* dst, - float scale, +void HalfFloatRow_MSA(const uint16_t* src, + uint16_t* dst, + float scale, + int width); +void HalfFloatRow_Any_MSA(const uint16_t* src_ptr, + uint16_t* dst_ptr, + float param, int width); -void ARGBLumaColorTableRow_C(const uint8* src_argb, - uint8* dst_argb, +void ARGBLumaColorTableRow_C(const uint8_t* src_argb, + uint8_t* dst_argb, int width, - const uint8* luma, - uint32 lumacoeff); -void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb, - uint8* dst_argb, + const uint8_t* luma, + uint32_t lumacoeff); +void ARGBLumaColorTableRow_SSSE3(const uint8_t* src_argb, + uint8_t* dst_argb, int width, - const uint8* luma, - uint32 lumacoeff); + const uint8_t* luma, + uint32_t lumacoeff); float ScaleMaxSamples_C(const float* src, float* dst, float scale, int width); float ScaleMaxSamples_NEON(const float* src, diff --git a/chromium/third_party/libyuv/include/libyuv/scale.h b/chromium/third_party/libyuv/include/libyuv/scale.h index 6d6b9a8583a..b937d348cab 100644 --- a/chromium/third_party/libyuv/include/libyuv/scale.h +++ b/chromium/third_party/libyuv/include/libyuv/scale.h @@ -28,22 +28,22 @@ typedef enum FilterMode { // Scale a YUV plane. LIBYUV_API -void ScalePlane(const uint8* src, +void ScalePlane(const uint8_t* src, int src_stride, int src_width, int src_height, - uint8* dst, + uint8_t* dst, int dst_stride, int dst_width, int dst_height, enum FilterMode filtering); LIBYUV_API -void ScalePlane_16(const uint16* src, +void ScalePlane_16(const uint16_t* src, int src_stride, int src_width, int src_height, - uint16* dst, + uint16_t* dst, int dst_stride, int dst_width, int dst_height, @@ -60,38 +60,38 @@ void ScalePlane_16(const uint16* src, // Returns 0 if successful. LIBYUV_API -int I420Scale(const uint8* src_y, +int I420Scale(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, int src_width, int src_height, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int dst_width, int dst_height, enum FilterMode filtering); LIBYUV_API -int I420Scale_16(const uint16* src_y, +int I420Scale_16(const uint16_t* src_y, int src_stride_y, - const uint16* src_u, + const uint16_t* src_u, int src_stride_u, - const uint16* src_v, + const uint16_t* src_v, int src_stride_v, int src_width, int src_height, - uint16* dst_y, + uint16_t* dst_y, int dst_stride_y, - uint16* dst_u, + uint16_t* dst_u, int dst_stride_u, - uint16* dst_v, + uint16_t* dst_v, int dst_stride_v, int dst_width, int dst_height, @@ -100,17 +100,17 @@ int I420Scale_16(const uint16* src_y, #ifdef __cplusplus // Legacy API. Deprecated. LIBYUV_API -int Scale(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, +int Scale(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, int src_stride_y, int src_stride_u, int src_stride_v, int src_width, int src_height, - uint8* dst_y, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_y, + uint8_t* dst_u, + uint8_t* dst_v, int dst_stride_y, int dst_stride_u, int dst_stride_v, @@ -118,17 +118,6 @@ int Scale(const uint8* src_y, int dst_height, LIBYUV_BOOL interpolate); -// Legacy API. Deprecated. -LIBYUV_API -int ScaleOffset(const uint8* src_i420, - int src_width, - int src_height, - uint8* dst_i420, - int dst_width, - int dst_height, - int dst_yoffset, - LIBYUV_BOOL interpolate); - // For testing, allow disabling of specialized scalers. LIBYUV_API void SetUseReferenceImpl(LIBYUV_BOOL use); diff --git a/chromium/third_party/libyuv/include/libyuv/scale_argb.h b/chromium/third_party/libyuv/include/libyuv/scale_argb.h index 3d25e579cde..7641f18e341 100644 --- a/chromium/third_party/libyuv/include/libyuv/scale_argb.h +++ b/chromium/third_party/libyuv/include/libyuv/scale_argb.h @@ -20,11 +20,11 @@ extern "C" { #endif LIBYUV_API -int ARGBScale(const uint8* src_argb, +int ARGBScale(const uint8_t* src_argb, int src_stride_argb, int src_width, int src_height, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int dst_width, int dst_height, @@ -32,11 +32,11 @@ int ARGBScale(const uint8* src_argb, // Clipped scale takes destination rectangle coordinates for clip values. LIBYUV_API -int ARGBScaleClip(const uint8* src_argb, +int ARGBScaleClip(const uint8_t* src_argb, int src_stride_argb, int src_width, int src_height, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int dst_width, int dst_height, @@ -48,18 +48,18 @@ int ARGBScaleClip(const uint8* src_argb, // Scale with YUV conversion to ARGB and clipping. LIBYUV_API -int YUVToARGBScaleClip(const uint8* src_y, +int YUVToARGBScaleClip(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint32 src_fourcc, + uint32_t src_fourcc, int src_width, int src_height, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, - uint32 dst_fourcc, + uint32_t dst_fourcc, int dst_width, int dst_height, int clip_x, diff --git a/chromium/third_party/libyuv/include/libyuv/scale_row.h b/chromium/third_party/libyuv/include/libyuv/scale_row.h index 3db46d399ea..7194ba09f84 100644 --- a/chromium/third_party/libyuv/include/libyuv/scale_row.h +++ b/chromium/third_party/libyuv/include/libyuv/scale_row.h @@ -19,17 +19,20 @@ namespace libyuv { extern "C" { #endif -#if defined(__pnacl__) || defined(__CLR_VER) || \ +#if defined(__pnacl__) || defined(__CLR_VER) || \ + (defined(__native_client__) && defined(__x86_64__)) || \ (defined(__i386__) && !defined(__SSE__) && !defined(__clang__)) #define LIBYUV_DISABLE_X86 #endif +#if defined(__native_client__) +#define LIBYUV_DISABLE_NEON +#endif // MemorySanitizer does not support assembly code yet. http://crbug.com/344505 #if defined(__has_feature) #if __has_feature(memory_sanitizer) #define LIBYUV_DISABLE_X86 #endif #endif - // GCC >= 4.7.0 required for AVX2. #if defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__)) #if (__GNUC__ > 4) || (__GNUC__ == 4 && (__GNUC_MINOR__ >= 7)) @@ -81,7 +84,7 @@ extern "C" { #endif // The following are available on Neon platforms: -#if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \ +#if !defined(LIBYUV_DISABLE_NEON) && \ (defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__)) #define HAS_SCALEARGBCOLS_NEON #define HAS_SCALEARGBROWDOWN2_NEON @@ -113,8 +116,8 @@ void ScalePlaneVertical(int src_height, int dst_height, int src_stride, int dst_stride, - const uint8* src_argb, - uint8* dst_argb, + const uint8_t* src_argb, + uint8_t* dst_argb, int x, int y, int dy, @@ -126,8 +129,8 @@ void ScalePlaneVertical_16(int src_height, int dst_height, int src_stride, int dst_stride, - const uint16* src_argb, - uint16* dst_argb, + const uint16_t* src_argb, + uint16_t* dst_argb, int x, int y, int dy, @@ -166,425 +169,431 @@ void ScaleSlope(int src_width, int* dx, int* dy); -void ScaleRowDown2_C(const uint8* src_ptr, +void ScaleRowDown2_C(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst, + uint8_t* dst, int dst_width); -void ScaleRowDown2_16_C(const uint16* src_ptr, +void ScaleRowDown2_16_C(const uint16_t* src_ptr, ptrdiff_t src_stride, - uint16* dst, + uint16_t* dst, int dst_width); -void ScaleRowDown2Linear_C(const uint8* src_ptr, +void ScaleRowDown2Linear_C(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst, + uint8_t* dst, int dst_width); -void ScaleRowDown2Linear_16_C(const uint16* src_ptr, +void ScaleRowDown2Linear_16_C(const uint16_t* src_ptr, ptrdiff_t src_stride, - uint16* dst, + uint16_t* dst, int dst_width); -void ScaleRowDown2Box_C(const uint8* src_ptr, +void ScaleRowDown2Box_C(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst, + uint8_t* dst, int dst_width); -void ScaleRowDown2Box_Odd_C(const uint8* src_ptr, +void ScaleRowDown2Box_Odd_C(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst, + uint8_t* dst, int dst_width); -void ScaleRowDown2Box_16_C(const uint16* src_ptr, +void ScaleRowDown2Box_16_C(const uint16_t* src_ptr, ptrdiff_t src_stride, - uint16* dst, + uint16_t* dst, int dst_width); -void ScaleRowDown4_C(const uint8* src_ptr, +void ScaleRowDown4_C(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst, + uint8_t* dst, int dst_width); -void ScaleRowDown4_16_C(const uint16* src_ptr, +void ScaleRowDown4_16_C(const uint16_t* src_ptr, ptrdiff_t src_stride, - uint16* dst, + uint16_t* dst, int dst_width); -void ScaleRowDown4Box_C(const uint8* src_ptr, +void ScaleRowDown4Box_C(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst, + uint8_t* dst, int dst_width); -void ScaleRowDown4Box_16_C(const uint16* src_ptr, +void ScaleRowDown4Box_16_C(const uint16_t* src_ptr, ptrdiff_t src_stride, - uint16* dst, + uint16_t* dst, int dst_width); -void ScaleRowDown34_C(const uint8* src_ptr, +void ScaleRowDown34_C(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst, + uint8_t* dst, int dst_width); -void ScaleRowDown34_16_C(const uint16* src_ptr, +void ScaleRowDown34_16_C(const uint16_t* src_ptr, ptrdiff_t src_stride, - uint16* dst, + uint16_t* dst, int dst_width); -void ScaleRowDown34_0_Box_C(const uint8* src_ptr, +void ScaleRowDown34_0_Box_C(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* d, + uint8_t* d, int dst_width); -void ScaleRowDown34_0_Box_16_C(const uint16* src_ptr, +void ScaleRowDown34_0_Box_16_C(const uint16_t* src_ptr, ptrdiff_t src_stride, - uint16* d, + uint16_t* d, int dst_width); -void ScaleRowDown34_1_Box_C(const uint8* src_ptr, +void ScaleRowDown34_1_Box_C(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* d, + uint8_t* d, int dst_width); -void ScaleRowDown34_1_Box_16_C(const uint16* src_ptr, +void ScaleRowDown34_1_Box_16_C(const uint16_t* src_ptr, ptrdiff_t src_stride, - uint16* d, + uint16_t* d, int dst_width); -void ScaleCols_C(uint8* dst_ptr, - const uint8* src_ptr, +void ScaleCols_C(uint8_t* dst_ptr, + const uint8_t* src_ptr, int dst_width, int x, int dx); -void ScaleCols_16_C(uint16* dst_ptr, - const uint16* src_ptr, +void ScaleCols_16_C(uint16_t* dst_ptr, + const uint16_t* src_ptr, int dst_width, int x, int dx); -void ScaleColsUp2_C(uint8* dst_ptr, - const uint8* src_ptr, +void ScaleColsUp2_C(uint8_t* dst_ptr, + const uint8_t* src_ptr, int dst_width, int, int); -void ScaleColsUp2_16_C(uint16* dst_ptr, - const uint16* src_ptr, +void ScaleColsUp2_16_C(uint16_t* dst_ptr, + const uint16_t* src_ptr, int dst_width, int, int); -void ScaleFilterCols_C(uint8* dst_ptr, - const uint8* src_ptr, +void ScaleFilterCols_C(uint8_t* dst_ptr, + const uint8_t* src_ptr, int dst_width, int x, int dx); -void ScaleFilterCols_16_C(uint16* dst_ptr, - const uint16* src_ptr, +void ScaleFilterCols_16_C(uint16_t* dst_ptr, + const uint16_t* src_ptr, int dst_width, int x, int dx); -void ScaleFilterCols64_C(uint8* dst_ptr, - const uint8* src_ptr, +void ScaleFilterCols64_C(uint8_t* dst_ptr, + const uint8_t* src_ptr, int dst_width, - int x, + int x32, int dx); -void ScaleFilterCols64_16_C(uint16* dst_ptr, - const uint16* src_ptr, +void ScaleFilterCols64_16_C(uint16_t* dst_ptr, + const uint16_t* src_ptr, int dst_width, - int x, + int x32, int dx); -void ScaleRowDown38_C(const uint8* src_ptr, +void ScaleRowDown38_C(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst, + uint8_t* dst, int dst_width); -void ScaleRowDown38_16_C(const uint16* src_ptr, +void ScaleRowDown38_16_C(const uint16_t* src_ptr, ptrdiff_t src_stride, - uint16* dst, + uint16_t* dst, int dst_width); -void ScaleRowDown38_3_Box_C(const uint8* src_ptr, +void ScaleRowDown38_3_Box_C(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width); -void ScaleRowDown38_3_Box_16_C(const uint16* src_ptr, +void ScaleRowDown38_3_Box_16_C(const uint16_t* src_ptr, ptrdiff_t src_stride, - uint16* dst_ptr, + uint16_t* dst_ptr, int dst_width); -void ScaleRowDown38_2_Box_C(const uint8* src_ptr, +void ScaleRowDown38_2_Box_C(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width); -void ScaleRowDown38_2_Box_16_C(const uint16* src_ptr, +void ScaleRowDown38_2_Box_16_C(const uint16_t* src_ptr, ptrdiff_t src_stride, - uint16* dst_ptr, + uint16_t* dst_ptr, int dst_width); -void ScaleAddRow_C(const uint8* src_ptr, uint16* dst_ptr, int src_width); -void ScaleAddRow_16_C(const uint16* src_ptr, uint32* dst_ptr, int src_width); -void ScaleARGBRowDown2_C(const uint8* src_argb, +void ScaleAddRow_C(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width); +void ScaleAddRow_16_C(const uint16_t* src_ptr, + uint32_t* dst_ptr, + int src_width); +void ScaleARGBRowDown2_C(const uint8_t* src_argb, ptrdiff_t src_stride, - uint8* dst_argb, + uint8_t* dst_argb, int dst_width); -void ScaleARGBRowDown2Linear_C(const uint8* src_argb, +void ScaleARGBRowDown2Linear_C(const uint8_t* src_argb, ptrdiff_t src_stride, - uint8* dst_argb, + uint8_t* dst_argb, int dst_width); -void ScaleARGBRowDown2Box_C(const uint8* src_argb, +void ScaleARGBRowDown2Box_C(const uint8_t* src_argb, ptrdiff_t src_stride, - uint8* dst_argb, + uint8_t* dst_argb, int dst_width); -void ScaleARGBRowDownEven_C(const uint8* src_argb, +void ScaleARGBRowDownEven_C(const uint8_t* src_argb, ptrdiff_t src_stride, int src_stepx, - uint8* dst_argb, + uint8_t* dst_argb, int dst_width); -void ScaleARGBRowDownEvenBox_C(const uint8* src_argb, +void ScaleARGBRowDownEvenBox_C(const uint8_t* src_argb, ptrdiff_t src_stride, int src_stepx, - uint8* dst_argb, + uint8_t* dst_argb, int dst_width); -void ScaleARGBCols_C(uint8* dst_argb, - const uint8* src_argb, +void ScaleARGBCols_C(uint8_t* dst_argb, + const uint8_t* src_argb, int dst_width, int x, int dx); -void ScaleARGBCols64_C(uint8* dst_argb, - const uint8* src_argb, +void ScaleARGBCols64_C(uint8_t* dst_argb, + const uint8_t* src_argb, int dst_width, - int x, + int x32, int dx); -void ScaleARGBColsUp2_C(uint8* dst_argb, - const uint8* src_argb, +void ScaleARGBColsUp2_C(uint8_t* dst_argb, + const uint8_t* src_argb, int dst_width, int, int); -void ScaleARGBFilterCols_C(uint8* dst_argb, - const uint8* src_argb, +void ScaleARGBFilterCols_C(uint8_t* dst_argb, + const uint8_t* src_argb, int dst_width, int x, int dx); -void ScaleARGBFilterCols64_C(uint8* dst_argb, - const uint8* src_argb, +void ScaleARGBFilterCols64_C(uint8_t* dst_argb, + const uint8_t* src_argb, int dst_width, - int x, + int x32, int dx); // Specialized scalers for x86. -void ScaleRowDown2_SSSE3(const uint8* src_ptr, +void ScaleRowDown2_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width); -void ScaleRowDown2Linear_SSSE3(const uint8* src_ptr, +void ScaleRowDown2Linear_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width); -void ScaleRowDown2Box_SSSE3(const uint8* src_ptr, +void ScaleRowDown2Box_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width); -void ScaleRowDown2_AVX2(const uint8* src_ptr, +void ScaleRowDown2_AVX2(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width); -void ScaleRowDown2Linear_AVX2(const uint8* src_ptr, +void ScaleRowDown2Linear_AVX2(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width); -void ScaleRowDown2Box_AVX2(const uint8* src_ptr, +void ScaleRowDown2Box_AVX2(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width); -void ScaleRowDown4_SSSE3(const uint8* src_ptr, +void ScaleRowDown4_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width); -void ScaleRowDown4Box_SSSE3(const uint8* src_ptr, +void ScaleRowDown4Box_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width); -void ScaleRowDown4_AVX2(const uint8* src_ptr, +void ScaleRowDown4_AVX2(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width); -void ScaleRowDown4Box_AVX2(const uint8* src_ptr, +void ScaleRowDown4Box_AVX2(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width); -void ScaleRowDown34_SSSE3(const uint8* src_ptr, +void ScaleRowDown34_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width); -void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr, +void ScaleRowDown34_1_Box_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width); -void ScaleRowDown34_0_Box_SSSE3(const uint8* src_ptr, +void ScaleRowDown34_0_Box_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width); -void ScaleRowDown38_SSSE3(const uint8* src_ptr, +void ScaleRowDown38_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width); -void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr, +void ScaleRowDown38_3_Box_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width); -void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr, +void ScaleRowDown38_2_Box_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width); -void ScaleRowDown2_Any_SSSE3(const uint8* src_ptr, +void ScaleRowDown2_Any_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width); -void ScaleRowDown2Linear_Any_SSSE3(const uint8* src_ptr, +void ScaleRowDown2Linear_Any_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width); -void ScaleRowDown2Box_Any_SSSE3(const uint8* src_ptr, +void ScaleRowDown2Box_Any_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width); -void ScaleRowDown2Box_Odd_SSSE3(const uint8* src_ptr, +void ScaleRowDown2Box_Odd_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width); -void ScaleRowDown2_Any_AVX2(const uint8* src_ptr, +void ScaleRowDown2_Any_AVX2(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width); -void ScaleRowDown2Linear_Any_AVX2(const uint8* src_ptr, +void ScaleRowDown2Linear_Any_AVX2(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width); -void ScaleRowDown2Box_Any_AVX2(const uint8* src_ptr, +void ScaleRowDown2Box_Any_AVX2(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width); -void ScaleRowDown2Box_Odd_AVX2(const uint8* src_ptr, +void ScaleRowDown2Box_Odd_AVX2(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width); -void ScaleRowDown4_Any_SSSE3(const uint8* src_ptr, +void ScaleRowDown4_Any_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width); -void ScaleRowDown4Box_Any_SSSE3(const uint8* src_ptr, +void ScaleRowDown4Box_Any_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width); -void ScaleRowDown4_Any_AVX2(const uint8* src_ptr, +void ScaleRowDown4_Any_AVX2(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width); -void ScaleRowDown4Box_Any_AVX2(const uint8* src_ptr, +void ScaleRowDown4Box_Any_AVX2(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width); -void ScaleRowDown34_Any_SSSE3(const uint8* src_ptr, +void ScaleRowDown34_Any_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width); -void ScaleRowDown34_1_Box_Any_SSSE3(const uint8* src_ptr, +void ScaleRowDown34_1_Box_Any_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width); -void ScaleRowDown34_0_Box_Any_SSSE3(const uint8* src_ptr, +void ScaleRowDown34_0_Box_Any_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width); -void ScaleRowDown38_Any_SSSE3(const uint8* src_ptr, +void ScaleRowDown38_Any_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width); -void ScaleRowDown38_3_Box_Any_SSSE3(const uint8* src_ptr, +void ScaleRowDown38_3_Box_Any_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width); -void ScaleRowDown38_2_Box_Any_SSSE3(const uint8* src_ptr, +void ScaleRowDown38_2_Box_Any_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width); -void ScaleAddRow_SSE2(const uint8* src_ptr, uint16* dst_ptr, int src_width); -void ScaleAddRow_AVX2(const uint8* src_ptr, uint16* dst_ptr, int src_width); -void ScaleAddRow_Any_SSE2(const uint8* src_ptr, uint16* dst_ptr, int src_width); -void ScaleAddRow_Any_AVX2(const uint8* src_ptr, uint16* dst_ptr, int src_width); - -void ScaleFilterCols_SSSE3(uint8* dst_ptr, - const uint8* src_ptr, +void ScaleAddRow_SSE2(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width); +void ScaleAddRow_AVX2(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width); +void ScaleAddRow_Any_SSE2(const uint8_t* src_ptr, + uint16_t* dst_ptr, + int src_width); +void ScaleAddRow_Any_AVX2(const uint8_t* src_ptr, + uint16_t* dst_ptr, + int src_width); + +void ScaleFilterCols_SSSE3(uint8_t* dst_ptr, + const uint8_t* src_ptr, int dst_width, int x, int dx); -void ScaleColsUp2_SSE2(uint8* dst_ptr, - const uint8* src_ptr, +void ScaleColsUp2_SSE2(uint8_t* dst_ptr, + const uint8_t* src_ptr, int dst_width, int x, int dx); // ARGB Column functions -void ScaleARGBCols_SSE2(uint8* dst_argb, - const uint8* src_argb, +void ScaleARGBCols_SSE2(uint8_t* dst_argb, + const uint8_t* src_argb, int dst_width, int x, int dx); -void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, - const uint8* src_argb, +void ScaleARGBFilterCols_SSSE3(uint8_t* dst_argb, + const uint8_t* src_argb, int dst_width, int x, int dx); -void ScaleARGBColsUp2_SSE2(uint8* dst_argb, - const uint8* src_argb, +void ScaleARGBColsUp2_SSE2(uint8_t* dst_argb, + const uint8_t* src_argb, int dst_width, int x, int dx); -void ScaleARGBFilterCols_NEON(uint8* dst_argb, - const uint8* src_argb, +void ScaleARGBFilterCols_NEON(uint8_t* dst_argb, + const uint8_t* src_argb, int dst_width, int x, int dx); -void ScaleARGBCols_NEON(uint8* dst_argb, - const uint8* src_argb, +void ScaleARGBCols_NEON(uint8_t* dst_argb, + const uint8_t* src_argb, int dst_width, int x, int dx); -void ScaleARGBFilterCols_Any_NEON(uint8* dst_argb, - const uint8* src_argb, +void ScaleARGBFilterCols_Any_NEON(uint8_t* dst_ptr, + const uint8_t* src_ptr, int dst_width, int x, int dx); -void ScaleARGBCols_Any_NEON(uint8* dst_argb, - const uint8* src_argb, +void ScaleARGBCols_Any_NEON(uint8_t* dst_ptr, + const uint8_t* src_ptr, int dst_width, int x, int dx); -void ScaleARGBFilterCols_MSA(uint8* dst_argb, - const uint8* src_argb, +void ScaleARGBFilterCols_MSA(uint8_t* dst_argb, + const uint8_t* src_argb, int dst_width, int x, int dx); -void ScaleARGBCols_MSA(uint8* dst_argb, - const uint8* src_argb, +void ScaleARGBCols_MSA(uint8_t* dst_argb, + const uint8_t* src_argb, int dst_width, int x, int dx); -void ScaleARGBFilterCols_Any_MSA(uint8* dst_argb, - const uint8* src_argb, +void ScaleARGBFilterCols_Any_MSA(uint8_t* dst_ptr, + const uint8_t* src_ptr, int dst_width, int x, int dx); -void ScaleARGBCols_Any_MSA(uint8* dst_argb, - const uint8* src_argb, +void ScaleARGBCols_Any_MSA(uint8_t* dst_ptr, + const uint8_t* src_ptr, int dst_width, int x, int dx); // ARGB Row functions -void ScaleARGBRowDown2_SSE2(const uint8* src_argb, +void ScaleARGBRowDown2_SSE2(const uint8_t* src_argb, ptrdiff_t src_stride, - uint8* dst_argb, + uint8_t* dst_argb, int dst_width); -void ScaleARGBRowDown2Linear_SSE2(const uint8* src_argb, +void ScaleARGBRowDown2Linear_SSE2(const uint8_t* src_argb, ptrdiff_t src_stride, - uint8* dst_argb, + uint8_t* dst_argb, int dst_width); -void ScaleARGBRowDown2Box_SSE2(const uint8* src_argb, +void ScaleARGBRowDown2Box_SSE2(const uint8_t* src_argb, ptrdiff_t src_stride, - uint8* dst_argb, + uint8_t* dst_argb, int dst_width); -void ScaleARGBRowDown2_NEON(const uint8* src_ptr, +void ScaleARGBRowDown2_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst, + uint8_t* dst, int dst_width); -void ScaleARGBRowDown2Linear_NEON(const uint8* src_argb, +void ScaleARGBRowDown2Linear_NEON(const uint8_t* src_argb, ptrdiff_t src_stride, - uint8* dst_argb, + uint8_t* dst_argb, int dst_width); -void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, +void ScaleARGBRowDown2Box_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst, + uint8_t* dst, int dst_width); void ScaleARGBRowDown2_MSA(const uint8_t* src_argb, ptrdiff_t src_stride, @@ -598,225 +607,227 @@ void ScaleARGBRowDown2Box_MSA(const uint8_t* src_argb, ptrdiff_t src_stride, uint8_t* dst_argb, int dst_width); -void ScaleARGBRowDown2_Any_SSE2(const uint8* src_argb, +void ScaleARGBRowDown2_Any_SSE2(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_argb, + uint8_t* dst_ptr, int dst_width); -void ScaleARGBRowDown2Linear_Any_SSE2(const uint8* src_argb, +void ScaleARGBRowDown2Linear_Any_SSE2(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_argb, + uint8_t* dst_ptr, int dst_width); -void ScaleARGBRowDown2Box_Any_SSE2(const uint8* src_argb, +void ScaleARGBRowDown2Box_Any_SSE2(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_argb, + uint8_t* dst_ptr, int dst_width); -void ScaleARGBRowDown2_Any_NEON(const uint8* src_ptr, +void ScaleARGBRowDown2_Any_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst, + uint8_t* dst_ptr, int dst_width); -void ScaleARGBRowDown2Linear_Any_NEON(const uint8* src_argb, +void ScaleARGBRowDown2Linear_Any_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_argb, + uint8_t* dst_ptr, int dst_width); -void ScaleARGBRowDown2Box_Any_NEON(const uint8* src_ptr, +void ScaleARGBRowDown2Box_Any_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst, + uint8_t* dst_ptr, int dst_width); -void ScaleARGBRowDown2_Any_MSA(const uint8_t* src_argb, +void ScaleARGBRowDown2_Any_MSA(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8_t* dst_argb, + uint8_t* dst_ptr, int dst_width); -void ScaleARGBRowDown2Linear_Any_MSA(const uint8_t* src_argb, +void ScaleARGBRowDown2Linear_Any_MSA(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8_t* dst_argb, + uint8_t* dst_ptr, int dst_width); -void ScaleARGBRowDown2Box_Any_MSA(const uint8_t* src_argb, +void ScaleARGBRowDown2Box_Any_MSA(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8_t* dst_argb, + uint8_t* dst_ptr, int dst_width); -void ScaleARGBRowDownEven_SSE2(const uint8* src_argb, +void ScaleARGBRowDownEven_SSE2(const uint8_t* src_argb, ptrdiff_t src_stride, int src_stepx, - uint8* dst_argb, + uint8_t* dst_argb, int dst_width); -void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb, +void ScaleARGBRowDownEvenBox_SSE2(const uint8_t* src_argb, ptrdiff_t src_stride, int src_stepx, - uint8* dst_argb, + uint8_t* dst_argb, int dst_width); -void ScaleARGBRowDownEven_NEON(const uint8* src_argb, +void ScaleARGBRowDownEven_NEON(const uint8_t* src_argb, ptrdiff_t src_stride, int src_stepx, - uint8* dst_argb, + uint8_t* dst_argb, int dst_width); -void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, +void ScaleARGBRowDownEvenBox_NEON(const uint8_t* src_argb, ptrdiff_t src_stride, int src_stepx, - uint8* dst_argb, + uint8_t* dst_argb, int dst_width); void ScaleARGBRowDownEven_MSA(const uint8_t* src_argb, ptrdiff_t src_stride, int32_t src_stepx, uint8_t* dst_argb, int dst_width); -void ScaleARGBRowDownEvenBox_MSA(const uint8* src_argb, +void ScaleARGBRowDownEvenBox_MSA(const uint8_t* src_argb, ptrdiff_t src_stride, int src_stepx, - uint8* dst_argb, + uint8_t* dst_argb, int dst_width); -void ScaleARGBRowDownEven_Any_SSE2(const uint8* src_argb, +void ScaleARGBRowDownEven_Any_SSE2(const uint8_t* src_ptr, ptrdiff_t src_stride, int src_stepx, - uint8* dst_argb, + uint8_t* dst_ptr, int dst_width); -void ScaleARGBRowDownEvenBox_Any_SSE2(const uint8* src_argb, +void ScaleARGBRowDownEvenBox_Any_SSE2(const uint8_t* src_ptr, ptrdiff_t src_stride, int src_stepx, - uint8* dst_argb, + uint8_t* dst_ptr, int dst_width); -void ScaleARGBRowDownEven_Any_NEON(const uint8* src_argb, +void ScaleARGBRowDownEven_Any_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, int src_stepx, - uint8* dst_argb, + uint8_t* dst_ptr, int dst_width); -void ScaleARGBRowDownEvenBox_Any_NEON(const uint8* src_argb, +void ScaleARGBRowDownEvenBox_Any_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, int src_stepx, - uint8* dst_argb, + uint8_t* dst_ptr, int dst_width); -void ScaleARGBRowDownEven_Any_MSA(const uint8_t* src_argb, +void ScaleARGBRowDownEven_Any_MSA(const uint8_t* src_ptr, ptrdiff_t src_stride, int32_t src_stepx, - uint8_t* dst_argb, + uint8_t* dst_ptr, int dst_width); -void ScaleARGBRowDownEvenBox_Any_MSA(const uint8* src_argb, +void ScaleARGBRowDownEvenBox_Any_MSA(const uint8_t* src_ptr, ptrdiff_t src_stride, int src_stepx, - uint8* dst_argb, + uint8_t* dst_ptr, int dst_width); // ScaleRowDown2Box also used by planar functions // NEON downscalers with interpolation. // Note - not static due to reuse in convert for 444 to 420. -void ScaleRowDown2_NEON(const uint8* src_ptr, +void ScaleRowDown2_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst, + uint8_t* dst, int dst_width); -void ScaleRowDown2Linear_NEON(const uint8* src_ptr, +void ScaleRowDown2Linear_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst, + uint8_t* dst, int dst_width); -void ScaleRowDown2Box_NEON(const uint8* src_ptr, +void ScaleRowDown2Box_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst, + uint8_t* dst, int dst_width); -void ScaleRowDown4_NEON(const uint8* src_ptr, +void ScaleRowDown4_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width); -void ScaleRowDown4Box_NEON(const uint8* src_ptr, +void ScaleRowDown4Box_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width); // Down scale from 4 to 3 pixels. Use the neon multilane read/write // to load up the every 4th pixel into a 4 different registers. // Point samples 32 pixels to 24 pixels. -void ScaleRowDown34_NEON(const uint8* src_ptr, +void ScaleRowDown34_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width); -void ScaleRowDown34_0_Box_NEON(const uint8* src_ptr, +void ScaleRowDown34_0_Box_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width); -void ScaleRowDown34_1_Box_NEON(const uint8* src_ptr, +void ScaleRowDown34_1_Box_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width); // 32 -> 12 -void ScaleRowDown38_NEON(const uint8* src_ptr, +void ScaleRowDown38_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width); // 32x3 -> 12x1 -void ScaleRowDown38_3_Box_NEON(const uint8* src_ptr, +void ScaleRowDown38_3_Box_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width); // 32x2 -> 12x1 -void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr, +void ScaleRowDown38_2_Box_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width); -void ScaleRowDown2_Any_NEON(const uint8* src_ptr, +void ScaleRowDown2_Any_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst, + uint8_t* dst_ptr, int dst_width); -void ScaleRowDown2Linear_Any_NEON(const uint8* src_ptr, +void ScaleRowDown2Linear_Any_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst, + uint8_t* dst_ptr, int dst_width); -void ScaleRowDown2Box_Any_NEON(const uint8* src_ptr, +void ScaleRowDown2Box_Any_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst, + uint8_t* dst_ptr, int dst_width); -void ScaleRowDown2Box_Odd_NEON(const uint8* src_ptr, +void ScaleRowDown2Box_Odd_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst, + uint8_t* dst_ptr, int dst_width); -void ScaleRowDown4_Any_NEON(const uint8* src_ptr, +void ScaleRowDown4_Any_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width); -void ScaleRowDown4Box_Any_NEON(const uint8* src_ptr, +void ScaleRowDown4Box_Any_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width); -void ScaleRowDown34_Any_NEON(const uint8* src_ptr, +void ScaleRowDown34_Any_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width); -void ScaleRowDown34_0_Box_Any_NEON(const uint8* src_ptr, +void ScaleRowDown34_0_Box_Any_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width); -void ScaleRowDown34_1_Box_Any_NEON(const uint8* src_ptr, +void ScaleRowDown34_1_Box_Any_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width); // 32 -> 12 -void ScaleRowDown38_Any_NEON(const uint8* src_ptr, +void ScaleRowDown38_Any_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width); // 32x3 -> 12x1 -void ScaleRowDown38_3_Box_Any_NEON(const uint8* src_ptr, +void ScaleRowDown38_3_Box_Any_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width); // 32x2 -> 12x1 -void ScaleRowDown38_2_Box_Any_NEON(const uint8* src_ptr, +void ScaleRowDown38_2_Box_Any_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width); -void ScaleAddRow_NEON(const uint8* src_ptr, uint16* dst_ptr, int src_width); -void ScaleAddRow_Any_NEON(const uint8* src_ptr, uint16* dst_ptr, int src_width); +void ScaleAddRow_NEON(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width); +void ScaleAddRow_Any_NEON(const uint8_t* src_ptr, + uint16_t* dst_ptr, + int src_width); -void ScaleFilterCols_NEON(uint8* dst_ptr, - const uint8* src_ptr, +void ScaleFilterCols_NEON(uint8_t* dst_ptr, + const uint8_t* src_ptr, int dst_width, int x, int dx); -void ScaleFilterCols_Any_NEON(uint8* dst_ptr, - const uint8* src_ptr, +void ScaleFilterCols_Any_NEON(uint8_t* dst_ptr, + const uint8_t* src_ptr, int dst_width, int x, int dx); @@ -854,47 +865,47 @@ void ScaleRowDown38_3_Box_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int dst_width); void ScaleAddRow_MSA(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width); -void ScaleFilterCols_MSA(uint8* dst_ptr, - const uint8* src_ptr, +void ScaleFilterCols_MSA(uint8_t* dst_ptr, + const uint8_t* src_ptr, int dst_width, int x, int dx); -void ScaleRowDown34_MSA(const uint8* src_ptr, +void ScaleRowDown34_MSA(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst, int dst_width); -void ScaleRowDown34_0_Box_MSA(const uint8* src_ptr, +void ScaleRowDown34_0_Box_MSA(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* d, int dst_width); -void ScaleRowDown34_1_Box_MSA(const uint8* src_ptr, +void ScaleRowDown34_1_Box_MSA(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* d, int dst_width); void ScaleRowDown2_Any_MSA(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8_t* dst, + uint8_t* dst_ptr, int dst_width); void ScaleRowDown2Linear_Any_MSA(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8_t* dst, + uint8_t* dst_ptr, int dst_width); void ScaleRowDown2Box_Any_MSA(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8_t* dst, + uint8_t* dst_ptr, int dst_width); void ScaleRowDown4_Any_MSA(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8_t* dst, + uint8_t* dst_ptr, int dst_width); void ScaleRowDown4Box_Any_MSA(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8_t* dst, + uint8_t* dst_ptr, int dst_width); void ScaleRowDown38_Any_MSA(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8_t* dst, + uint8_t* dst_ptr, int dst_width); void ScaleRowDown38_2_Box_Any_MSA(const uint8_t* src_ptr, ptrdiff_t src_stride, @@ -907,22 +918,22 @@ void ScaleRowDown38_3_Box_Any_MSA(const uint8_t* src_ptr, void ScaleAddRow_Any_MSA(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width); -void ScaleFilterCols_Any_MSA(uint8* dst_ptr, - const uint8* src_ptr, +void ScaleFilterCols_Any_MSA(uint8_t* dst_ptr, + const uint8_t* src_ptr, int dst_width, int x, int dx); -void ScaleRowDown34_Any_MSA(const uint8* src_ptr, +void ScaleRowDown34_Any_MSA(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width); -void ScaleRowDown34_0_Box_Any_MSA(const uint8* src_ptr, +void ScaleRowDown34_0_Box_Any_MSA(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width); -void ScaleRowDown34_1_Box_Any_MSA(const uint8* src_ptr, +void ScaleRowDown34_1_Box_Any_MSA(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width); #ifdef __cplusplus diff --git a/chromium/third_party/libyuv/include/libyuv/version.h b/chromium/third_party/libyuv/include/libyuv/version.h index 97595e58ffc..683ac0482b4 100644 --- a/chromium/third_party/libyuv/include/libyuv/version.h +++ b/chromium/third_party/libyuv/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1688 +#define LIBYUV_VERSION 1698 #endif // INCLUDE_LIBYUV_VERSION_H_ diff --git a/chromium/third_party/libyuv/include/libyuv/video_common.h b/chromium/third_party/libyuv/include/libyuv/video_common.h index e3c180f167d..bcef378b5a4 100644 --- a/chromium/third_party/libyuv/include/libyuv/video_common.h +++ b/chromium/third_party/libyuv/include/libyuv/video_common.h @@ -28,13 +28,13 @@ extern "C" { // Needs to be a macro otherwise the OS X compiler complains when the kFormat* // constants are used in a switch. #ifdef __cplusplus -#define FOURCC(a, b, c, d) \ - ((static_cast<uint32>(a)) | (static_cast<uint32>(b) << 8) | \ - (static_cast<uint32>(c) << 16) | (static_cast<uint32>(d) << 24)) +#define FOURCC(a, b, c, d) \ + ((static_cast<uint32_t>(a)) | (static_cast<uint32_t>(b) << 8) | \ + (static_cast<uint32_t>(c) << 16) | (static_cast<uint32_t>(d) << 24)) #else -#define FOURCC(a, b, c, d) \ - (((uint32)(a)) | ((uint32)(b) << 8) | /* NOLINT */ \ - ((uint32)(c) << 16) | ((uint32)(d) << 24)) /* NOLINT */ +#define FOURCC(a, b, c, d) \ + (((uint32_t)(a)) | ((uint32_t)(b) << 8) | /* NOLINT */ \ + ((uint32_t)(c) << 16) | ((uint32_t)(d) << 24)) /* NOLINT */ #endif // Some pages discussing FourCC codes: @@ -63,11 +63,12 @@ enum FourCC { // 1 Secondary YUV format: row biplanar. FOURCC_M420 = FOURCC('M', '4', '2', '0'), - // 10 Primary RGB formats: 4 32 bpp, 2 24 bpp, 3 16 bpp, 1 10 bpc + // 11 Primary RGB formats: 4 32 bpp, 2 24 bpp, 3 16 bpp, 1 10 bpc FOURCC_ARGB = FOURCC('A', 'R', 'G', 'B'), FOURCC_BGRA = FOURCC('B', 'G', 'R', 'A'), FOURCC_ABGR = FOURCC('A', 'B', 'G', 'R'), FOURCC_AR30 = FOURCC('A', 'R', '3', '0'), // 10 bit per channel. 2101010. + FOURCC_AB30 = FOURCC('A', 'B', '3', '0'), // ABGR version of 10 bit FOURCC_24BG = FOURCC('2', '4', 'B', 'G'), FOURCC_RAW = FOURCC('r', 'a', 'w', ' '), FOURCC_RGBA = FOURCC('R', 'G', 'B', 'A'), @@ -137,6 +138,7 @@ enum FourCCBpp { FOURCC_BPP_ABGR = 32, FOURCC_BPP_RGBA = 32, FOURCC_BPP_AR30 = 32, + FOURCC_BPP_AB30 = 32, FOURCC_BPP_24BG = 24, FOURCC_BPP_RAW = 24, FOURCC_BPP_RGBP = 16, @@ -176,7 +178,7 @@ enum FourCCBpp { }; // Converts fourcc aliases into canonical ones. -LIBYUV_API uint32 CanonicalFourCC(uint32 fourcc); +LIBYUV_API uint32_t CanonicalFourCC(uint32_t fourcc); #ifdef __cplusplus } // extern "C" diff --git a/chromium/third_party/libyuv/source/compare.cc b/chromium/third_party/libyuv/source/compare.cc index 8c379b59cb8..50e3abd0556 100644 --- a/chromium/third_party/libyuv/source/compare.cc +++ b/chromium/third_party/libyuv/source/compare.cc @@ -29,10 +29,11 @@ extern "C" { // hash seed of 5381 recommended. LIBYUV_API -uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed) { +uint32_t HashDjb2(const uint8_t* src, uint64_t count, uint32_t seed) { const int kBlockSize = 1 << 15; // 32768; int remainder; - uint32 (*HashDjb2_SSE)(const uint8* src, int count, uint32 seed) = HashDjb2_C; + uint32_t (*HashDjb2_SSE)(const uint8_t* src, int count, uint32_t seed) = + HashDjb2_C; #if defined(HAS_HASHDJB2_SSE41) if (TestCpuFlag(kCpuHasSSE41)) { HashDjb2_SSE = HashDjb2_SSE41; @@ -44,7 +45,7 @@ uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed) { } #endif - while (count >= (uint64)(kBlockSize)) { + while (count >= (uint64_t)(kBlockSize)) { seed = HashDjb2_SSE(src, kBlockSize, seed); src += kBlockSize; count -= kBlockSize; @@ -62,7 +63,7 @@ uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed) { return seed; } -static uint32 ARGBDetectRow_C(const uint8* argb, int width) { +static uint32_t ARGBDetectRow_C(const uint8_t* argb, int width) { int x; for (x = 0; x < width - 1; x += 2) { if (argb[0] != 255) { // First byte is not Alpha of 255, so not ARGB. @@ -93,8 +94,11 @@ static uint32 ARGBDetectRow_C(const uint8* argb, int width) { // Scan an opaque argb image and return fourcc based on alpha offset. // Returns FOURCC_ARGB, FOURCC_BGRA, or 0 if unknown. LIBYUV_API -uint32 ARGBDetect(const uint8* argb, int stride_argb, int width, int height) { - uint32 fourcc = 0; +uint32_t ARGBDetect(const uint8_t* argb, + int stride_argb, + int width, + int height) { + uint32_t fourcc = 0; int h; // Coalesce rows. @@ -114,17 +118,17 @@ uint32 ARGBDetect(const uint8* argb, int stride_argb, int width, int height) { // So actual maximum is 1 less loop, which is 64436 - 32 bytes. LIBYUV_API -uint64 ComputeHammingDistance(const uint8* src_a, - const uint8* src_b, - int count) { +uint64_t ComputeHammingDistance(const uint8_t* src_a, + const uint8_t* src_b, + int count) { const int kBlockSize = 1 << 15; // 32768; const int kSimdSize = 64; // SIMD for multiple of 64, and C for remainder int remainder = count & (kBlockSize - 1) & ~(kSimdSize - 1); - uint64 diff = 0; + uint64_t diff = 0; int i; - uint32 (*HammingDistance)(const uint8* src_a, const uint8* src_b, int count) = - HammingDistance_C; + uint32_t (*HammingDistance)(const uint8_t* src_a, const uint8_t* src_b, + int count) = HammingDistance_C; #if defined(HAS_HAMMINGDISTANCE_NEON) if (TestCpuFlag(kCpuHasNEON)) { HammingDistance = HammingDistance_NEON; @@ -172,18 +176,18 @@ uint64 ComputeHammingDistance(const uint8* src_a, // TODO(fbarchard): Refactor into row function. LIBYUV_API -uint64 ComputeSumSquareError(const uint8* src_a, - const uint8* src_b, - int count) { +uint64_t ComputeSumSquareError(const uint8_t* src_a, + const uint8_t* src_b, + int count) { // SumSquareError returns values 0 to 65535 for each squared difference. - // Up to 65536 of those can be summed and remain within a uint32. - // After each block of 65536 pixels, accumulate into a uint64. + // Up to 65536 of those can be summed and remain within a uint32_t. + // After each block of 65536 pixels, accumulate into a uint64_t. const int kBlockSize = 65536; int remainder = count & (kBlockSize - 1) & ~31; - uint64 sse = 0; + uint64_t sse = 0; int i; - uint32 (*SumSquareError)(const uint8* src_a, const uint8* src_b, int count) = - SumSquareError_C; + uint32_t (*SumSquareError)(const uint8_t* src_a, const uint8_t* src_b, + int count) = SumSquareError_C; #if defined(HAS_SUMSQUAREERROR_NEON) if (TestCpuFlag(kCpuHasNEON)) { SumSquareError = SumSquareError_NEON; @@ -227,13 +231,13 @@ uint64 ComputeSumSquareError(const uint8* src_a, } LIBYUV_API -uint64 ComputeSumSquareErrorPlane(const uint8* src_a, - int stride_a, - const uint8* src_b, - int stride_b, - int width, - int height) { - uint64 sse = 0; +uint64_t ComputeSumSquareErrorPlane(const uint8_t* src_a, + int stride_a, + const uint8_t* src_b, + int stride_b, + int width, + int height) { + uint64_t sse = 0; int h; // Coalesce rows. if (stride_a == width && stride_b == width) { @@ -250,7 +254,7 @@ uint64 ComputeSumSquareErrorPlane(const uint8* src_a, } LIBYUV_API -double SumSquareErrorToPsnr(uint64 sse, uint64 count) { +double SumSquareErrorToPsnr(uint64_t sse, uint64_t count) { double psnr; if (sse > 0) { double mse = (double)count / (double)sse; @@ -259,65 +263,67 @@ double SumSquareErrorToPsnr(uint64 sse, uint64 count) { psnr = kMaxPsnr; // Limit to prevent divide by 0 } - if (psnr > kMaxPsnr) + if (psnr > kMaxPsnr) { psnr = kMaxPsnr; + } return psnr; } LIBYUV_API -double CalcFramePsnr(const uint8* src_a, +double CalcFramePsnr(const uint8_t* src_a, int stride_a, - const uint8* src_b, + const uint8_t* src_b, int stride_b, int width, int height) { - const uint64 samples = width * height; - const uint64 sse = ComputeSumSquareErrorPlane(src_a, stride_a, src_b, - stride_b, width, height); + const uint64_t samples = (uint64_t)width * (uint64_t)height; + const uint64_t sse = ComputeSumSquareErrorPlane(src_a, stride_a, src_b, + stride_b, width, height); return SumSquareErrorToPsnr(sse, samples); } LIBYUV_API -double I420Psnr(const uint8* src_y_a, +double I420Psnr(const uint8_t* src_y_a, int stride_y_a, - const uint8* src_u_a, + const uint8_t* src_u_a, int stride_u_a, - const uint8* src_v_a, + const uint8_t* src_v_a, int stride_v_a, - const uint8* src_y_b, + const uint8_t* src_y_b, int stride_y_b, - const uint8* src_u_b, + const uint8_t* src_u_b, int stride_u_b, - const uint8* src_v_b, + const uint8_t* src_v_b, int stride_v_b, int width, int height) { - const uint64 sse_y = ComputeSumSquareErrorPlane(src_y_a, stride_y_a, src_y_b, - stride_y_b, width, height); + const uint64_t sse_y = ComputeSumSquareErrorPlane( + src_y_a, stride_y_a, src_y_b, stride_y_b, width, height); const int width_uv = (width + 1) >> 1; const int height_uv = (height + 1) >> 1; - const uint64 sse_u = ComputeSumSquareErrorPlane( + const uint64_t sse_u = ComputeSumSquareErrorPlane( src_u_a, stride_u_a, src_u_b, stride_u_b, width_uv, height_uv); - const uint64 sse_v = ComputeSumSquareErrorPlane( + const uint64_t sse_v = ComputeSumSquareErrorPlane( src_v_a, stride_v_a, src_v_b, stride_v_b, width_uv, height_uv); - const uint64 samples = width * height + 2 * (width_uv * height_uv); - const uint64 sse = sse_y + sse_u + sse_v; + const uint64_t samples = (uint64_t)width * (uint64_t)height + + 2 * ((uint64_t)width_uv * (uint64_t)height_uv); + const uint64_t sse = sse_y + sse_u + sse_v; return SumSquareErrorToPsnr(sse, samples); } -static const int64 cc1 = 26634; // (64^2*(.01*255)^2 -static const int64 cc2 = 239708; // (64^2*(.03*255)^2 +static const int64_t cc1 = 26634; // (64^2*(.01*255)^2 +static const int64_t cc2 = 239708; // (64^2*(.03*255)^2 -static double Ssim8x8_C(const uint8* src_a, +static double Ssim8x8_C(const uint8_t* src_a, int stride_a, - const uint8* src_b, + const uint8_t* src_b, int stride_b) { - int64 sum_a = 0; - int64 sum_b = 0; - int64 sum_sq_a = 0; - int64 sum_sq_b = 0; - int64 sum_axb = 0; + int64_t sum_a = 0; + int64_t sum_b = 0; + int64_t sum_sq_a = 0; + int64_t sum_sq_b = 0; + int64_t sum_axb = 0; int i; for (i = 0; i < 8; ++i) { @@ -335,20 +341,20 @@ static double Ssim8x8_C(const uint8* src_a, } { - const int64 count = 64; + const int64_t count = 64; // scale the constants by number of pixels - const int64 c1 = (cc1 * count * count) >> 12; - const int64 c2 = (cc2 * count * count) >> 12; + const int64_t c1 = (cc1 * count * count) >> 12; + const int64_t c2 = (cc2 * count * count) >> 12; - const int64 sum_a_x_sum_b = sum_a * sum_b; + const int64_t sum_a_x_sum_b = sum_a * sum_b; - const int64 ssim_n = (2 * sum_a_x_sum_b + c1) * - (2 * count * sum_axb - 2 * sum_a_x_sum_b + c2); + const int64_t ssim_n = (2 * sum_a_x_sum_b + c1) * + (2 * count * sum_axb - 2 * sum_a_x_sum_b + c2); - const int64 sum_a_sq = sum_a * sum_a; - const int64 sum_b_sq = sum_b * sum_b; + const int64_t sum_a_sq = sum_a * sum_a; + const int64_t sum_b_sq = sum_b * sum_b; - const int64 ssim_d = + const int64_t ssim_d = (sum_a_sq + sum_b_sq + c1) * (count * sum_sq_a - sum_a_sq + count * sum_sq_b - sum_b_sq + c2); @@ -363,15 +369,15 @@ static double Ssim8x8_C(const uint8* src_a, // on the 4x4 pixel grid. Such arrangement allows the windows to overlap // block boundaries to penalize blocking artifacts. LIBYUV_API -double CalcFrameSsim(const uint8* src_a, +double CalcFrameSsim(const uint8_t* src_a, int stride_a, - const uint8* src_b, + const uint8_t* src_b, int stride_b, int width, int height) { int samples = 0; double ssim_total = 0; - double (*Ssim8x8)(const uint8* src_a, int stride_a, const uint8* src_b, + double (*Ssim8x8)(const uint8_t* src_a, int stride_a, const uint8_t* src_b, int stride_b) = Ssim8x8_C; // sample point start with each 4x4 location @@ -392,17 +398,17 @@ double CalcFrameSsim(const uint8* src_a, } LIBYUV_API -double I420Ssim(const uint8* src_y_a, +double I420Ssim(const uint8_t* src_y_a, int stride_y_a, - const uint8* src_u_a, + const uint8_t* src_u_a, int stride_u_a, - const uint8* src_v_a, + const uint8_t* src_v_a, int stride_v_a, - const uint8* src_y_b, + const uint8_t* src_y_b, int stride_y_b, - const uint8* src_u_b, + const uint8_t* src_u_b, int stride_u_b, - const uint8* src_v_b, + const uint8_t* src_v_b, int stride_v_b, int width, int height) { diff --git a/chromium/third_party/libyuv/source/compare_common.cc b/chromium/third_party/libyuv/source/compare_common.cc index 83564a1bcb5..633466addb5 100644 --- a/chromium/third_party/libyuv/source/compare_common.cc +++ b/chromium/third_party/libyuv/source/compare_common.cc @@ -18,8 +18,10 @@ extern "C" { #endif #if ORIGINAL_OPT -uint32 HammingDistance_C1(const uint8* src_a, const uint8* src_b, int count) { - uint32 diff = 0u; +uint32_t HammingDistance_C1(const uint8_t* src_a, + const uint8_t* src_b, + int count) { + uint32_t diff = 0u; int i; for (i = 0; i < count; ++i) { @@ -46,13 +48,15 @@ uint32 HammingDistance_C1(const uint8* src_a, const uint8* src_b, int count) { #endif // Hakmem method for hamming distance. -uint32 HammingDistance_C(const uint8* src_a, const uint8* src_b, int count) { - uint32 diff = 0u; +uint32_t HammingDistance_C(const uint8_t* src_a, + const uint8_t* src_b, + int count) { + uint32_t diff = 0u; int i; for (i = 0; i < count - 3; i += 4) { - uint32 x = *((uint32*)src_a) ^ *((uint32*)src_b); - uint32 u = x - ((x >> 1) & 0x55555555); + uint32_t x = *((uint32_t*)src_a) ^ *((uint32_t*)src_b); // NOLINT + uint32_t u = x - ((x >> 1) & 0x55555555); u = ((u >> 2) & 0x33333333) + (u & 0x33333333); diff += ((((u + (u >> 4)) & 0x0f0f0f0f) * 0x01010101) >> 24); src_a += 4; @@ -60,8 +64,8 @@ uint32 HammingDistance_C(const uint8* src_a, const uint8* src_b, int count) { } for (; i < count; ++i) { - uint32 x = *src_a ^ *src_b; - uint32 u = x - ((x >> 1) & 0x55); + uint32_t x = *src_a ^ *src_b; + uint32_t u = x - ((x >> 1) & 0x55); u = ((u >> 2) & 0x33) + (u & 0x33); diff += (u + (u >> 4)) & 0x0f; src_a += 1; @@ -71,20 +75,22 @@ uint32 HammingDistance_C(const uint8* src_a, const uint8* src_b, int count) { return diff; } -uint32 SumSquareError_C(const uint8* src_a, const uint8* src_b, int count) { - uint32 sse = 0u; +uint32_t SumSquareError_C(const uint8_t* src_a, + const uint8_t* src_b, + int count) { + uint32_t sse = 0u; int i; for (i = 0; i < count; ++i) { int diff = src_a[i] - src_b[i]; - sse += (uint32)(diff * diff); + sse += (uint32_t)(diff * diff); } return sse; } // hash seed of 5381 recommended. // Internal C version of HashDjb2 with int sized count for efficiency. -uint32 HashDjb2_C(const uint8* src, int count, uint32 seed) { - uint32 hash = seed; +uint32_t HashDjb2_C(const uint8_t* src, int count, uint32_t seed) { + uint32_t hash = seed; int i; for (i = 0; i < count; ++i) { hash += (hash << 5) + src[i]; diff --git a/chromium/third_party/libyuv/source/compare_gcc.cc b/chromium/third_party/libyuv/source/compare_gcc.cc index 49b471af1a0..676527c1b1b 100644 --- a/chromium/third_party/libyuv/source/compare_gcc.cc +++ b/chromium/third_party/libyuv/source/compare_gcc.cc @@ -23,10 +23,10 @@ extern "C" { (defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER))) #if defined(__x86_64__) -uint32 HammingDistance_SSE42(const uint8* src_a, - const uint8* src_b, - int count) { - uint64 diff = 0u; +uint32_t HammingDistance_SSE42(const uint8_t* src_a, + const uint8_t* src_b, + int count) { + uint64_t diff = 0u; asm volatile( "xor %3,%3 \n" @@ -68,13 +68,13 @@ uint32 HammingDistance_SSE42(const uint8* src_a, : : "memory", "cc", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10"); - return static_cast<uint32>(diff); + return static_cast<uint32_t>(diff); } #else -uint32 HammingDistance_SSE42(const uint8* src_a, - const uint8* src_b, - int count) { - uint32 diff = 0u; +uint32_t HammingDistance_SSE42(const uint8_t* src_a, + const uint8_t* src_b, + int count) { + uint32_t diff = 0u; asm volatile( // Process 16 bytes per loop. @@ -115,10 +115,10 @@ static const vec8 kNibbleMask = {15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15}; static const vec8 kBitCount = {0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4}; -uint32 HammingDistance_SSSE3(const uint8* src_a, - const uint8* src_b, - int count) { - uint32 diff = 0u; +uint32_t HammingDistance_SSSE3(const uint8_t* src_a, + const uint8_t* src_b, + int count) { + uint32_t diff = 0u; asm volatile( "movdqa %4,%%xmm2 \n" @@ -174,8 +174,10 @@ uint32 HammingDistance_SSSE3(const uint8* src_a, } #ifdef HAS_HAMMINGDISTANCE_AVX2 -uint32 HammingDistance_AVX2(const uint8* src_a, const uint8* src_b, int count) { - uint32 diff = 0u; +uint32_t HammingDistance_AVX2(const uint8_t* src_a, + const uint8_t* src_b, + int count) { + uint32_t diff = 0u; asm volatile( "vbroadcastf128 %4,%%ymm2 \n" @@ -227,43 +229,46 @@ uint32 HammingDistance_AVX2(const uint8* src_a, const uint8* src_b, int count) { } #endif // HAS_HAMMINGDISTANCE_AVX2 -uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) { - uint32 sse; - asm volatile ( - "pxor %%xmm0,%%xmm0 \n" - "pxor %%xmm5,%%xmm5 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm1 \n" - "lea " MEMLEA(0x10, 0) ",%0 \n" - "movdqu " MEMACCESS(1) ",%%xmm2 \n" - "lea " MEMLEA(0x10, 1) ",%1 \n" - "movdqa %%xmm1,%%xmm3 \n" - "psubusb %%xmm2,%%xmm1 \n" - "psubusb %%xmm3,%%xmm2 \n" - "por %%xmm2,%%xmm1 \n" - "movdqa %%xmm1,%%xmm2 \n" - "punpcklbw %%xmm5,%%xmm1 \n" - "punpckhbw %%xmm5,%%xmm2 \n" - "pmaddwd %%xmm1,%%xmm1 \n" - "pmaddwd %%xmm2,%%xmm2 \n" - "paddd %%xmm1,%%xmm0 \n" - "paddd %%xmm2,%%xmm0 \n" - "sub $0x10,%2 \n" - "jg 1b \n" +uint32_t SumSquareError_SSE2(const uint8_t* src_a, + const uint8_t* src_b, + int count) { + uint32_t sse; + asm volatile( + "pxor %%xmm0,%%xmm0 \n" + "pxor %%xmm5,%%xmm5 \n" - "pshufd $0xee,%%xmm0,%%xmm1 \n" - "paddd %%xmm1,%%xmm0 \n" - "pshufd $0x1,%%xmm0,%%xmm1 \n" - "paddd %%xmm1,%%xmm0 \n" - "movd %%xmm0,%3 \n" + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm1 \n" + "lea 0x10(%0),%0 \n" + "movdqu (%1),%%xmm2 \n" + "lea 0x10(%1),%1 \n" + "movdqa %%xmm1,%%xmm3 \n" + "psubusb %%xmm2,%%xmm1 \n" + "psubusb %%xmm3,%%xmm2 \n" + "por %%xmm2,%%xmm1 \n" + "movdqa %%xmm1,%%xmm2 \n" + "punpcklbw %%xmm5,%%xmm1 \n" + "punpckhbw %%xmm5,%%xmm2 \n" + "pmaddwd %%xmm1,%%xmm1 \n" + "pmaddwd %%xmm2,%%xmm2 \n" + "paddd %%xmm1,%%xmm0 \n" + "paddd %%xmm2,%%xmm0 \n" + "sub $0x10,%2 \n" + "jg 1b \n" - : "+r"(src_a), // %0 - "+r"(src_b), // %1 - "+r"(count), // %2 - "=g"(sse) // %3 - :: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" - ); + "pshufd $0xee,%%xmm0,%%xmm1 \n" + "paddd %%xmm1,%%xmm0 \n" + "pshufd $0x1,%%xmm0,%%xmm1 \n" + "paddd %%xmm1,%%xmm0 \n" + "movd %%xmm0,%3 \n" + + : "+r"(src_a), // %0 + "+r"(src_b), // %1 + "+r"(count), // %2 + "=g"(sse) // %3 + ::"memory", + "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"); return sse; } @@ -293,58 +298,58 @@ static const uvec32 kHashMul3 = { 0x00000001, // 33 ^ 0 }; -uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) { - uint32 hash; - asm volatile ( - "movd %2,%%xmm0 \n" - "pxor %%xmm7,%%xmm7 \n" - "movdqa %4,%%xmm6 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm1 \n" - "lea " MEMLEA(0x10, 0) ",%0 \n" - "pmulld %%xmm6,%%xmm0 \n" - "movdqa %5,%%xmm5 \n" - "movdqa %%xmm1,%%xmm2 \n" - "punpcklbw %%xmm7,%%xmm2 \n" - "movdqa %%xmm2,%%xmm3 \n" - "punpcklwd %%xmm7,%%xmm3 \n" - "pmulld %%xmm5,%%xmm3 \n" - "movdqa %6,%%xmm5 \n" - "movdqa %%xmm2,%%xmm4 \n" - "punpckhwd %%xmm7,%%xmm4 \n" - "pmulld %%xmm5,%%xmm4 \n" - "movdqa %7,%%xmm5 \n" - "punpckhbw %%xmm7,%%xmm1 \n" - "movdqa %%xmm1,%%xmm2 \n" - "punpcklwd %%xmm7,%%xmm2 \n" - "pmulld %%xmm5,%%xmm2 \n" - "movdqa %8,%%xmm5 \n" - "punpckhwd %%xmm7,%%xmm1 \n" - "pmulld %%xmm5,%%xmm1 \n" - "paddd %%xmm4,%%xmm3 \n" - "paddd %%xmm2,%%xmm1 \n" - "paddd %%xmm3,%%xmm1 \n" - "pshufd $0xe,%%xmm1,%%xmm2 \n" - "paddd %%xmm2,%%xmm1 \n" - "pshufd $0x1,%%xmm1,%%xmm2 \n" - "paddd %%xmm2,%%xmm1 \n" - "paddd %%xmm1,%%xmm0 \n" - "sub $0x10,%1 \n" - "jg 1b \n" - "movd %%xmm0,%3 \n" - : "+r"(src), // %0 - "+r"(count), // %1 - "+rm"(seed), // %2 - "=g"(hash) // %3 - : "m"(kHash16x33), // %4 - "m"(kHashMul0), // %5 - "m"(kHashMul1), // %6 - "m"(kHashMul2), // %7 - "m"(kHashMul3) // %8 - : "memory", "cc" - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" - ); +uint32_t HashDjb2_SSE41(const uint8_t* src, int count, uint32_t seed) { + uint32_t hash; + asm volatile( + "movd %2,%%xmm0 \n" + "pxor %%xmm7,%%xmm7 \n" + "movdqa %4,%%xmm6 \n" + + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm1 \n" + "lea 0x10(%0),%0 \n" + "pmulld %%xmm6,%%xmm0 \n" + "movdqa %5,%%xmm5 \n" + "movdqa %%xmm1,%%xmm2 \n" + "punpcklbw %%xmm7,%%xmm2 \n" + "movdqa %%xmm2,%%xmm3 \n" + "punpcklwd %%xmm7,%%xmm3 \n" + "pmulld %%xmm5,%%xmm3 \n" + "movdqa %6,%%xmm5 \n" + "movdqa %%xmm2,%%xmm4 \n" + "punpckhwd %%xmm7,%%xmm4 \n" + "pmulld %%xmm5,%%xmm4 \n" + "movdqa %7,%%xmm5 \n" + "punpckhbw %%xmm7,%%xmm1 \n" + "movdqa %%xmm1,%%xmm2 \n" + "punpcklwd %%xmm7,%%xmm2 \n" + "pmulld %%xmm5,%%xmm2 \n" + "movdqa %8,%%xmm5 \n" + "punpckhwd %%xmm7,%%xmm1 \n" + "pmulld %%xmm5,%%xmm1 \n" + "paddd %%xmm4,%%xmm3 \n" + "paddd %%xmm2,%%xmm1 \n" + "paddd %%xmm3,%%xmm1 \n" + "pshufd $0xe,%%xmm1,%%xmm2 \n" + "paddd %%xmm2,%%xmm1 \n" + "pshufd $0x1,%%xmm1,%%xmm2 \n" + "paddd %%xmm2,%%xmm1 \n" + "paddd %%xmm1,%%xmm0 \n" + "sub $0x10,%1 \n" + "jg 1b \n" + "movd %%xmm0,%3 \n" + : "+r"(src), // %0 + "+r"(count), // %1 + "+rm"(seed), // %2 + "=g"(hash) // %3 + : "m"(kHash16x33), // %4 + "m"(kHashMul0), // %5 + "m"(kHashMul1), // %6 + "m"(kHashMul2), // %7 + "m"(kHashMul3) // %8 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", + "xmm7"); return hash; } #endif // defined(__x86_64__) || (defined(__i386__) && !defined(__pic__))) diff --git a/chromium/third_party/libyuv/source/compare_msa.cc b/chromium/third_party/libyuv/source/compare_msa.cc index 57857cf5127..0b807d37bee 100644 --- a/chromium/third_party/libyuv/source/compare_msa.cc +++ b/chromium/third_party/libyuv/source/compare_msa.cc @@ -22,8 +22,10 @@ namespace libyuv { extern "C" { #endif -uint32 HammingDistance_MSA(const uint8* src_a, const uint8* src_b, int count) { - uint32 diff = 0u; +uint32_t HammingDistance_MSA(const uint8_t* src_a, + const uint8_t* src_b, + int count) { + uint32_t diff = 0u; int i; v16u8 src0, src1, src2, src3; v2i64 vec0 = {0}, vec1 = {0}; @@ -42,13 +44,15 @@ uint32 HammingDistance_MSA(const uint8* src_a, const uint8* src_b, int count) { } vec0 += vec1; - diff = (uint32)__msa_copy_u_w((v4i32)vec0, 0); - diff += (uint32)__msa_copy_u_w((v4i32)vec0, 2); + diff = (uint32_t)__msa_copy_u_w((v4i32)vec0, 0); + diff += (uint32_t)__msa_copy_u_w((v4i32)vec0, 2); return diff; } -uint32 SumSquareError_MSA(const uint8* src_a, const uint8* src_b, int count) { - uint32 sse = 0u; +uint32_t SumSquareError_MSA(const uint8_t* src_a, + const uint8_t* src_b, + int count) { + uint32_t sse = 0u; int i; v16u8 src0, src1, src2, src3; v8i16 vec0, vec1, vec2, vec3; @@ -80,8 +84,8 @@ uint32 SumSquareError_MSA(const uint8* src_a, const uint8* src_b, int count) { reg2 += reg3; reg0 += reg2; tmp0 = __msa_hadd_s_d(reg0, reg0); - sse = (uint32)__msa_copy_u_w((v4i32)tmp0, 0); - sse += (uint32)__msa_copy_u_w((v4i32)tmp0, 2); + sse = (uint32_t)__msa_copy_u_w((v4i32)tmp0, 0); + sse += (uint32_t)__msa_copy_u_w((v4i32)tmp0, 2); return sse; } diff --git a/chromium/third_party/libyuv/source/compare_neon.cc b/chromium/third_party/libyuv/source/compare_neon.cc index 5dfa71edcbf..2a2181e0cb3 100644 --- a/chromium/third_party/libyuv/source/compare_neon.cc +++ b/chromium/third_party/libyuv/source/compare_neon.cc @@ -23,8 +23,10 @@ extern "C" { // 256 bits at a time // uses short accumulator which restricts count to 131 KB -uint32 HammingDistance_NEON(const uint8* src_a, const uint8* src_b, int count) { - uint32 diff; +uint32_t HammingDistance_NEON(const uint8_t* src_a, + const uint8_t* src_b, + int count) { + uint32_t diff; asm volatile( "vmov.u16 q4, #0 \n" // accumulator @@ -52,8 +54,10 @@ uint32 HammingDistance_NEON(const uint8* src_a, const uint8* src_b, int count) { return diff; } -uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) { - uint32 sse; +uint32_t SumSquareError_NEON(const uint8_t* src_a, + const uint8_t* src_b, + int count) { + uint32_t sse; asm volatile( "vmov.u8 q8, #0 \n" "vmov.u8 q10, #0 \n" diff --git a/chromium/third_party/libyuv/source/compare_neon64.cc b/chromium/third_party/libyuv/source/compare_neon64.cc index ddf98fa68b2..6e8f672ab73 100644 --- a/chromium/third_party/libyuv/source/compare_neon64.cc +++ b/chromium/third_party/libyuv/source/compare_neon64.cc @@ -22,8 +22,10 @@ extern "C" { // 256 bits at a time // uses short accumulator which restricts count to 131 KB -uint32 HammingDistance_NEON(const uint8* src_a, const uint8* src_b, int count) { - uint32 diff; +uint32_t HammingDistance_NEON(const uint8_t* src_a, + const uint8_t* src_b, + int count) { + uint32_t diff; asm volatile( "movi v4.8h, #0 \n" @@ -47,8 +49,10 @@ uint32 HammingDistance_NEON(const uint8* src_a, const uint8* src_b, int count) { return diff; } -uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) { - uint32 sse; +uint32_t SumSquareError_NEON(const uint8_t* src_a, + const uint8_t* src_b, + int count) { + uint32_t sse; asm volatile( "eor v16.16b, v16.16b, v16.16b \n" "eor v18.16b, v18.16b, v18.16b \n" diff --git a/chromium/third_party/libyuv/source/compare_win.cc b/chromium/third_party/libyuv/source/compare_win.cc index bcd6a88ebbb..d57d3d9d1c8 100644 --- a/chromium/third_party/libyuv/source/compare_win.cc +++ b/chromium/third_party/libyuv/source/compare_win.cc @@ -25,14 +25,14 @@ extern "C" { // This module is for 32 bit Visual C x86 and clangcl #if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER) -uint32 HammingDistance_SSE42(const uint8* src_a, - const uint8* src_b, - int count) { - uint32 diff = 0u; +uint32_t HammingDistance_SSE42(const uint8_t* src_a, + const uint8_t* src_b, + int count) { + uint32_t diff = 0u; int i; for (i = 0; i < count - 3; i += 4) { - uint32 x = *((uint32*)src_a) ^ *((uint32*)src_b); + uint32_t x = *((uint32_t*)src_a) ^ *((uint32_t*)src_b); // NOLINT src_a += 4; src_b += 4; diff += __popcnt(x); @@ -40,8 +40,8 @@ uint32 HammingDistance_SSE42(const uint8* src_a, return diff; } -__declspec(naked) uint32 - SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) { +__declspec(naked) uint32_t + SumSquareError_SSE2(const uint8_t* src_a, const uint8_t* src_b, int count) { __asm { mov eax, [esp + 4] // src_a mov edx, [esp + 8] // src_b @@ -81,8 +81,8 @@ __declspec(naked) uint32 #if _MSC_VER >= 1700 // C4752: found Intel(R) Advanced Vector Extensions; consider using /arch:AVX. #pragma warning(disable : 4752) -__declspec(naked) uint32 - SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count) { +__declspec(naked) uint32_t + SumSquareError_AVX2(const uint8_t* src_a, const uint8_t* src_b, int count) { __asm { mov eax, [esp + 4] // src_a mov edx, [esp + 8] // src_b @@ -146,8 +146,8 @@ uvec32 kHashMul3 = { 0x00000001, // 33 ^ 0 }; -__declspec(naked) uint32 - HashDjb2_SSE41(const uint8* src, int count, uint32 seed) { +__declspec(naked) uint32_t + HashDjb2_SSE41(const uint8_t* src, int count, uint32_t seed) { __asm { mov eax, [esp + 4] // src mov ecx, [esp + 8] // count @@ -197,8 +197,8 @@ __declspec(naked) uint32 // Visual C 2012 required for AVX2. #if _MSC_VER >= 1700 -__declspec(naked) uint32 - HashDjb2_AVX2(const uint8* src, int count, uint32 seed) { +__declspec(naked) uint32_t + HashDjb2_AVX2(const uint8_t* src, int count, uint32_t seed) { __asm { mov eax, [esp + 4] // src mov ecx, [esp + 8] // count diff --git a/chromium/third_party/libyuv/source/convert.cc b/chromium/third_party/libyuv/source/convert.cc index fd2066e29eb..375cc732c1d 100644 --- a/chromium/third_party/libyuv/source/convert.cc +++ b/chromium/third_party/libyuv/source/convert.cc @@ -28,17 +28,17 @@ static __inline int Abs(int v) { } // Any I4xx To I420 format with mirroring. -static int I4xxToI420(const uint8* src_y, +static int I4xxToI420(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int src_y_width, int src_y_height, @@ -66,17 +66,17 @@ static int I4xxToI420(const uint8* src_y, // TODO(fbarchard): Use Scale plane which supports mirroring, but ensure // is does row coalescing. LIBYUV_API -int I420Copy(const uint8* src_y, +int I420Copy(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int width, int height) { @@ -108,17 +108,17 @@ int I420Copy(const uint8* src_y, // Copy I010 with optional flipping. LIBYUV_API -int I010Copy(const uint16* src_y, +int I010Copy(const uint16_t* src_y, int src_stride_y, - const uint16* src_u, + const uint16_t* src_u, int src_stride_u, - const uint16* src_v, + const uint16_t* src_v, int src_stride_v, - uint16* dst_y, + uint16_t* dst_y, int dst_stride_y, - uint16* dst_u, + uint16_t* dst_u, int dst_stride_u, - uint16* dst_v, + uint16_t* dst_v, int dst_stride_v, int width, int height) { @@ -150,17 +150,17 @@ int I010Copy(const uint16* src_y, // Convert 10 bit YUV to 8 bit. LIBYUV_API -int I010ToI420(const uint16* src_y, +int I010ToI420(const uint16_t* src_y, int src_stride_y, - const uint16* src_u, + const uint16_t* src_u, int src_stride_u, - const uint16* src_v, + const uint16_t* src_v, int src_stride_v, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int width, int height) { @@ -195,17 +195,17 @@ int I010ToI420(const uint16* src_y, // 422 chroma is 1/2 width, 1x height // 420 chroma is 1/2 width, 1/2 height LIBYUV_API -int I422ToI420(const uint8* src_y, +int I422ToI420(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int width, int height) { @@ -218,17 +218,17 @@ int I422ToI420(const uint8* src_y, // 444 chroma is 1x width, 1x height // 420 chroma is 1/2 width, 1/2 height LIBYUV_API -int I444ToI420(const uint8* src_y, +int I444ToI420(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int width, int height) { @@ -239,13 +239,13 @@ int I444ToI420(const uint8* src_y, // I400 is greyscale typically used in MJPG LIBYUV_API -int I400ToI420(const uint8* src_y, +int I400ToI420(const uint8_t* src_y, int src_stride_y, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int width, int height) { @@ -269,15 +269,15 @@ int I400ToI420(const uint8* src_y, return 0; } -static void CopyPlane2(const uint8* src, +static void CopyPlane2(const uint8_t* src, int src_stride_0, int src_stride_1, - uint8* dst, + uint8_t* dst, int dst_stride, int width, int height) { int y; - void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C; + void (*CopyRow)(const uint8_t* src, uint8_t* dst, int width) = CopyRow_C; #if defined(HAS_COPYROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { CopyRow = IS_ALIGNED(width, 32) ? CopyRow_SSE2 : CopyRow_Any_SSE2; @@ -320,16 +320,16 @@ static void CopyPlane2(const uint8* src, // src_stride_m420 is row planar. Normally this will be the width in pixels. // The UV plane is half width, but 2 values, so src_stride_m420 applies to // this as well as the two Y planes. -static int X420ToI420(const uint8* src_y, +static int X420ToI420(const uint8_t* src_y, int src_stride_y0, int src_stride_y1, - const uint8* src_uv, + const uint8_t* src_uv, int src_stride_uv, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int width, int height) { @@ -384,15 +384,15 @@ static int X420ToI420(const uint8* src_y, // Convert NV12 to I420. LIBYUV_API -int NV12ToI420(const uint8* src_y, +int NV12ToI420(const uint8_t* src_y, int src_stride_y, - const uint8* src_uv, + const uint8_t* src_uv, int src_stride_uv, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int width, int height) { @@ -403,15 +403,15 @@ int NV12ToI420(const uint8* src_y, // Convert NV21 to I420. Same as NV12 but u and v pointers swapped. LIBYUV_API -int NV21ToI420(const uint8* src_y, +int NV21ToI420(const uint8_t* src_y, int src_stride_y, - const uint8* src_vu, + const uint8_t* src_vu, int src_stride_vu, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int width, int height) { @@ -422,13 +422,13 @@ int NV21ToI420(const uint8* src_y, // Convert M420 to I420. LIBYUV_API -int M420ToI420(const uint8* src_m420, +int M420ToI420(const uint8_t* src_m420, int src_stride_m420, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int width, int height) { @@ -440,20 +440,21 @@ int M420ToI420(const uint8* src_m420, // Convert YUY2 to I420. LIBYUV_API -int YUY2ToI420(const uint8* src_yuy2, +int YUY2ToI420(const uint8_t* src_yuy2, int src_stride_yuy2, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int width, int height) { int y; - void (*YUY2ToUVRow)(const uint8* src_yuy2, int src_stride_yuy2, uint8* dst_u, - uint8* dst_v, int width) = YUY2ToUVRow_C; - void (*YUY2ToYRow)(const uint8* src_yuy2, uint8* dst_y, int width) = + void (*YUY2ToUVRow)(const uint8_t* src_yuy2, int src_stride_yuy2, + uint8_t* dst_u, uint8_t* dst_v, int width) = + YUY2ToUVRow_C; + void (*YUY2ToYRow)(const uint8_t* src_yuy2, uint8_t* dst_y, int width) = YUY2ToYRow_C; // Negative height means invert the image. if (height < 0) { @@ -520,20 +521,21 @@ int YUY2ToI420(const uint8* src_yuy2, // Convert UYVY to I420. LIBYUV_API -int UYVYToI420(const uint8* src_uyvy, +int UYVYToI420(const uint8_t* src_uyvy, int src_stride_uyvy, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int width, int height) { int y; - void (*UYVYToUVRow)(const uint8* src_uyvy, int src_stride_uyvy, uint8* dst_u, - uint8* dst_v, int width) = UYVYToUVRow_C; - void (*UYVYToYRow)(const uint8* src_uyvy, uint8* dst_y, int width) = + void (*UYVYToUVRow)(const uint8_t* src_uyvy, int src_stride_uyvy, + uint8_t* dst_u, uint8_t* dst_v, int width) = + UYVYToUVRow_C; + void (*UYVYToYRow)(const uint8_t* src_uyvy, uint8_t* dst_y, int width) = UYVYToYRow_C; // Negative height means invert the image. if (height < 0) { @@ -600,20 +602,21 @@ int UYVYToI420(const uint8* src_uyvy, // Convert ARGB to I420. LIBYUV_API -int ARGBToI420(const uint8* src_argb, +int ARGBToI420(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int width, int height) { int y; - void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb, uint8* dst_u, - uint8* dst_v, int width) = ARGBToUVRow_C; - void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) = + void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb, + uint8_t* dst_u, uint8_t* dst_v, int width) = + ARGBToUVRow_C; + void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) = ARGBToYRow_C; if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { return -1; @@ -695,20 +698,21 @@ int ARGBToI420(const uint8* src_argb, // Convert BGRA to I420. LIBYUV_API -int BGRAToI420(const uint8* src_bgra, +int BGRAToI420(const uint8_t* src_bgra, int src_stride_bgra, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int width, int height) { int y; - void (*BGRAToUVRow)(const uint8* src_bgra0, int src_stride_bgra, uint8* dst_u, - uint8* dst_v, int width) = BGRAToUVRow_C; - void (*BGRAToYRow)(const uint8* src_bgra, uint8* dst_y, int width) = + void (*BGRAToUVRow)(const uint8_t* src_bgra0, int src_stride_bgra, + uint8_t* dst_u, uint8_t* dst_v, int width) = + BGRAToUVRow_C; + void (*BGRAToYRow)(const uint8_t* src_bgra, uint8_t* dst_y, int width) = BGRAToYRow_C; if (!src_bgra || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { return -1; @@ -780,20 +784,21 @@ int BGRAToI420(const uint8* src_bgra, // Convert ABGR to I420. LIBYUV_API -int ABGRToI420(const uint8* src_abgr, +int ABGRToI420(const uint8_t* src_abgr, int src_stride_abgr, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int width, int height) { int y; - void (*ABGRToUVRow)(const uint8* src_abgr0, int src_stride_abgr, uint8* dst_u, - uint8* dst_v, int width) = ABGRToUVRow_C; - void (*ABGRToYRow)(const uint8* src_abgr, uint8* dst_y, int width) = + void (*ABGRToUVRow)(const uint8_t* src_abgr0, int src_stride_abgr, + uint8_t* dst_u, uint8_t* dst_v, int width) = + ABGRToUVRow_C; + void (*ABGRToYRow)(const uint8_t* src_abgr, uint8_t* dst_y, int width) = ABGRToYRow_C; if (!src_abgr || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { return -1; @@ -865,20 +870,21 @@ int ABGRToI420(const uint8* src_abgr, // Convert RGBA to I420. LIBYUV_API -int RGBAToI420(const uint8* src_rgba, +int RGBAToI420(const uint8_t* src_rgba, int src_stride_rgba, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int width, int height) { int y; - void (*RGBAToUVRow)(const uint8* src_rgba0, int src_stride_rgba, uint8* dst_u, - uint8* dst_v, int width) = RGBAToUVRow_C; - void (*RGBAToYRow)(const uint8* src_rgba, uint8* dst_y, int width) = + void (*RGBAToUVRow)(const uint8_t* src_rgba0, int src_stride_rgba, + uint8_t* dst_u, uint8_t* dst_v, int width) = + RGBAToUVRow_C; + void (*RGBAToYRow)(const uint8_t* src_rgba, uint8_t* dst_y, int width) = RGBAToYRow_C; if (!src_rgba || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { return -1; @@ -950,28 +956,30 @@ int RGBAToI420(const uint8* src_rgba, // Convert RGB24 to I420. LIBYUV_API -int RGB24ToI420(const uint8* src_rgb24, +int RGB24ToI420(const uint8_t* src_rgb24, int src_stride_rgb24, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int width, int height) { int y; #if (defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_MSA)) - void (*RGB24ToUVRow)(const uint8* src_rgb24, int src_stride_rgb24, - uint8* dst_u, uint8* dst_v, int width) = RGB24ToUVRow_C; - void (*RGB24ToYRow)(const uint8* src_rgb24, uint8* dst_y, int width) = + void (*RGB24ToUVRow)(const uint8_t* src_rgb24, int src_stride_rgb24, + uint8_t* dst_u, uint8_t* dst_v, int width) = + RGB24ToUVRow_C; + void (*RGB24ToYRow)(const uint8_t* src_rgb24, uint8_t* dst_y, int width) = RGB24ToYRow_C; #else - void (*RGB24ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int width) = + void (*RGB24ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) = RGB24ToARGBRow_C; - void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb, uint8* dst_u, - uint8* dst_v, int width) = ARGBToUVRow_C; - void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) = + void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb, + uint8_t* dst_u, uint8_t* dst_v, int width) = + ARGBToUVRow_C; + void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) = ARGBToYRow_C; #endif if (!src_rgb24 || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { @@ -1080,28 +1088,29 @@ int RGB24ToI420(const uint8* src_rgb24, // Convert RAW to I420. LIBYUV_API -int RAWToI420(const uint8* src_raw, +int RAWToI420(const uint8_t* src_raw, int src_stride_raw, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int width, int height) { int y; #if (defined(HAS_RAWTOYROW_NEON) || defined(HAS_RAWTOYROW_MSA)) - void (*RAWToUVRow)(const uint8* src_raw, int src_stride_raw, uint8* dst_u, - uint8* dst_v, int width) = RAWToUVRow_C; - void (*RAWToYRow)(const uint8* src_raw, uint8* dst_y, int width) = + void (*RAWToUVRow)(const uint8_t* src_raw, int src_stride_raw, uint8_t* dst_u, + uint8_t* dst_v, int width) = RAWToUVRow_C; + void (*RAWToYRow)(const uint8_t* src_raw, uint8_t* dst_y, int width) = RAWToYRow_C; #else - void (*RAWToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int width) = + void (*RAWToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) = RAWToARGBRow_C; - void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb, uint8* dst_u, - uint8* dst_v, int width) = ARGBToUVRow_C; - void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) = + void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb, + uint8_t* dst_u, uint8_t* dst_v, int width) = + ARGBToUVRow_C; + void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) = ARGBToYRow_C; #endif if (!src_raw || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { @@ -1210,29 +1219,30 @@ int RAWToI420(const uint8* src_raw, // Convert RGB565 to I420. LIBYUV_API -int RGB565ToI420(const uint8* src_rgb565, +int RGB565ToI420(const uint8_t* src_rgb565, int src_stride_rgb565, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int width, int height) { int y; #if (defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA)) - void (*RGB565ToUVRow)(const uint8* src_rgb565, int src_stride_rgb565, - uint8* dst_u, uint8* dst_v, int width) = + void (*RGB565ToUVRow)(const uint8_t* src_rgb565, int src_stride_rgb565, + uint8_t* dst_u, uint8_t* dst_v, int width) = RGB565ToUVRow_C; - void (*RGB565ToYRow)(const uint8* src_rgb565, uint8* dst_y, int width) = + void (*RGB565ToYRow)(const uint8_t* src_rgb565, uint8_t* dst_y, int width) = RGB565ToYRow_C; #else - void (*RGB565ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int width) = - RGB565ToARGBRow_C; - void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb, uint8* dst_u, - uint8* dst_v, int width) = ARGBToUVRow_C; - void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) = + void (*RGB565ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, + int width) = RGB565ToARGBRow_C; + void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb, + uint8_t* dst_u, uint8_t* dst_v, int width) = + ARGBToUVRow_C; + void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) = ARGBToYRow_C; #endif if (!src_rgb565 || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { @@ -1347,29 +1357,30 @@ int RGB565ToI420(const uint8* src_rgb565, // Convert ARGB1555 to I420. LIBYUV_API -int ARGB1555ToI420(const uint8* src_argb1555, +int ARGB1555ToI420(const uint8_t* src_argb1555, int src_stride_argb1555, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int width, int height) { int y; #if (defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_MSA)) - void (*ARGB1555ToUVRow)(const uint8* src_argb1555, int src_stride_argb1555, - uint8* dst_u, uint8* dst_v, int width) = + void (*ARGB1555ToUVRow)(const uint8_t* src_argb1555, int src_stride_argb1555, + uint8_t* dst_u, uint8_t* dst_v, int width) = ARGB1555ToUVRow_C; - void (*ARGB1555ToYRow)(const uint8* src_argb1555, uint8* dst_y, int width) = - ARGB1555ToYRow_C; + void (*ARGB1555ToYRow)(const uint8_t* src_argb1555, uint8_t* dst_y, + int width) = ARGB1555ToYRow_C; #else - void (*ARGB1555ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int width) = - ARGB1555ToARGBRow_C; - void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb, uint8* dst_u, - uint8* dst_v, int width) = ARGBToUVRow_C; - void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) = + void (*ARGB1555ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, + int width) = ARGB1555ToARGBRow_C; + void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb, + uint8_t* dst_u, uint8_t* dst_v, int width) = + ARGBToUVRow_C; + void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) = ARGBToYRow_C; #endif if (!src_argb1555 || !dst_y || !dst_u || !dst_v || width <= 0 || @@ -1488,29 +1499,30 @@ int ARGB1555ToI420(const uint8* src_argb1555, // Convert ARGB4444 to I420. LIBYUV_API -int ARGB4444ToI420(const uint8* src_argb4444, +int ARGB4444ToI420(const uint8_t* src_argb4444, int src_stride_argb4444, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int width, int height) { int y; #if defined(HAS_ARGB4444TOYROW_NEON) - void (*ARGB4444ToUVRow)(const uint8* src_argb4444, int src_stride_argb4444, - uint8* dst_u, uint8* dst_v, int width) = + void (*ARGB4444ToUVRow)(const uint8_t* src_argb4444, int src_stride_argb4444, + uint8_t* dst_u, uint8_t* dst_v, int width) = ARGB4444ToUVRow_C; - void (*ARGB4444ToYRow)(const uint8* src_argb4444, uint8* dst_y, int width) = - ARGB4444ToYRow_C; + void (*ARGB4444ToYRow)(const uint8_t* src_argb4444, uint8_t* dst_y, + int width) = ARGB4444ToYRow_C; #else - void (*ARGB4444ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int width) = - ARGB4444ToARGBRow_C; - void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb, uint8* dst_u, - uint8* dst_v, int width) = ARGBToUVRow_C; - void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) = + void (*ARGB4444ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, + int width) = ARGB4444ToARGBRow_C; + void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb, + uint8_t* dst_u, uint8_t* dst_v, int width) = + ARGBToUVRow_C; + void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) = ARGBToYRow_C; #endif if (!src_argb4444 || !dst_y || !dst_u || !dst_v || width <= 0 || @@ -1639,9 +1651,9 @@ int ARGB4444ToI420(const uint8* src_argb4444, return 0; } -static void SplitPixels(const uint8* src_u, +static void SplitPixels(const uint8_t* src_u, int src_pixel_stride_uv, - uint8* dst_u, + uint8_t* dst_u, int width) { int i; for (i = 0; i < width; ++i) { @@ -1653,18 +1665,18 @@ static void SplitPixels(const uint8* src_u, // Convert Android420 to I420. LIBYUV_API -int Android420ToI420(const uint8* src_y, +int Android420ToI420(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, int src_pixel_stride_uv, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int width, int height) { @@ -1697,14 +1709,15 @@ int Android420ToI420(const uint8* src_y, CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, halfheight); return 0; // Split UV planes - NV21 - } else if (src_pixel_stride_uv == 2 && vu_off == -1 && - src_stride_u == src_stride_v) { + } + if (src_pixel_stride_uv == 2 && vu_off == -1 && + src_stride_u == src_stride_v) { SplitUVPlane(src_v, src_stride_v, dst_v, dst_stride_v, dst_u, dst_stride_u, halfwidth, halfheight); return 0; // Split UV planes - NV12 - } else if (src_pixel_stride_uv == 2 && vu_off == 1 && - src_stride_u == src_stride_v) { + } + if (src_pixel_stride_uv == 2 && vu_off == 1 && src_stride_u == src_stride_v) { SplitUVPlane(src_u, src_stride_u, dst_u, dst_stride_u, dst_v, dst_stride_v, halfwidth, halfheight); return 0; diff --git a/chromium/third_party/libyuv/source/convert_argb.cc b/chromium/third_party/libyuv/source/convert_argb.cc index 9b93fc15194..e084f680680 100644 --- a/chromium/third_party/libyuv/source/convert_argb.cc +++ b/chromium/third_party/libyuv/source/convert_argb.cc @@ -26,9 +26,9 @@ extern "C" { // Copy ARGB with optional flipping LIBYUV_API -int ARGBCopy(const uint8* src_argb, +int ARGBCopy(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height) { @@ -48,20 +48,20 @@ int ARGBCopy(const uint8* src_argb, } // Convert I420 to ARGB with matrix -static int I420ToARGBMatrix(const uint8* src_y, +static int I420ToARGBMatrix(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, const struct YuvConstants* yuvconstants, int width, int height) { int y; - void (*I422ToARGBRow)(const uint8* y_buf, const uint8* u_buf, - const uint8* v_buf, uint8* rgb_buf, + void (*I422ToARGBRow)(const uint8_t* y_buf, const uint8_t* u_buf, + const uint8_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I422ToARGBRow_C; if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { @@ -120,13 +120,13 @@ static int I420ToARGBMatrix(const uint8* src_y, // Convert I420 to ARGB. LIBYUV_API -int I420ToARGB(const uint8* src_y, +int I420ToARGB(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height) { @@ -137,13 +137,13 @@ int I420ToARGB(const uint8* src_y, // Convert I420 to ABGR. LIBYUV_API -int I420ToABGR(const uint8* src_y, +int I420ToABGR(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_abgr, + uint8_t* dst_abgr, int dst_stride_abgr, int width, int height) { @@ -156,13 +156,13 @@ int I420ToABGR(const uint8* src_y, // Convert J420 to ARGB. LIBYUV_API -int J420ToARGB(const uint8* src_y, +int J420ToARGB(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height) { @@ -173,13 +173,13 @@ int J420ToARGB(const uint8* src_y, // Convert J420 to ABGR. LIBYUV_API -int J420ToABGR(const uint8* src_y, +int J420ToABGR(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_abgr, + uint8_t* dst_abgr, int dst_stride_abgr, int width, int height) { @@ -192,13 +192,13 @@ int J420ToABGR(const uint8* src_y, // Convert H420 to ARGB. LIBYUV_API -int H420ToARGB(const uint8* src_y, +int H420ToARGB(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height) { @@ -209,13 +209,13 @@ int H420ToARGB(const uint8* src_y, // Convert H420 to ABGR. LIBYUV_API -int H420ToABGR(const uint8* src_y, +int H420ToABGR(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_abgr, + uint8_t* dst_abgr, int dst_stride_abgr, int width, int height) { @@ -227,20 +227,20 @@ int H420ToABGR(const uint8* src_y, } // Convert I422 to ARGB with matrix -static int I422ToARGBMatrix(const uint8* src_y, +static int I422ToARGBMatrix(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, const struct YuvConstants* yuvconstants, int width, int height) { int y; - void (*I422ToARGBRow)(const uint8* y_buf, const uint8* u_buf, - const uint8* v_buf, uint8* rgb_buf, + void (*I422ToARGBRow)(const uint8_t* y_buf, const uint8_t* u_buf, + const uint8_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I422ToARGBRow_C; if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { @@ -304,13 +304,13 @@ static int I422ToARGBMatrix(const uint8* src_y, // Convert I422 to ARGB. LIBYUV_API -int I422ToARGB(const uint8* src_y, +int I422ToARGB(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height) { @@ -321,13 +321,13 @@ int I422ToARGB(const uint8* src_y, // Convert I422 to ABGR. LIBYUV_API -int I422ToABGR(const uint8* src_y, +int I422ToABGR(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_abgr, + uint8_t* dst_abgr, int dst_stride_abgr, int width, int height) { @@ -340,13 +340,13 @@ int I422ToABGR(const uint8* src_y, // Convert J422 to ARGB. LIBYUV_API -int J422ToARGB(const uint8* src_y, +int J422ToARGB(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height) { @@ -357,13 +357,13 @@ int J422ToARGB(const uint8* src_y, // Convert J422 to ABGR. LIBYUV_API -int J422ToABGR(const uint8* src_y, +int J422ToABGR(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_abgr, + uint8_t* dst_abgr, int dst_stride_abgr, int width, int height) { @@ -376,13 +376,13 @@ int J422ToABGR(const uint8* src_y, // Convert H422 to ARGB. LIBYUV_API -int H422ToARGB(const uint8* src_y, +int H422ToARGB(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height) { @@ -393,13 +393,13 @@ int H422ToARGB(const uint8* src_y, // Convert H422 to ABGR. LIBYUV_API -int H422ToABGR(const uint8* src_y, +int H422ToABGR(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_abgr, + uint8_t* dst_abgr, int dst_stride_abgr, int width, int height) { @@ -413,24 +413,22 @@ int H422ToABGR(const uint8* src_y, // Convert 10 bit YUV to ARGB with matrix // TODO(fbarchard): Consider passing scale multiplier to I210ToARGB to // multiply 10 bit yuv into high bits to allow any number of bits. -static int H010ToAR30Matrix(const uint16* src_y, +static int I010ToAR30Matrix(const uint16_t* src_y, int src_stride_y, - const uint16* src_u, + const uint16_t* src_u, int src_stride_u, - const uint16* src_v, + const uint16_t* src_v, int src_stride_v, - uint8* dst_ar30, + uint8_t* dst_ar30, int dst_stride_ar30, const struct YuvConstants* yuvconstants, int width, int height) { int y; - void (*I210ToARGBRow)(const uint16* y_buf, const uint16* u_buf, - const uint16* v_buf, uint8* rgb_buf, + void (*I210ToAR30Row)(const uint16_t* y_buf, const uint16_t* u_buf, + const uint16_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = - I210ToARGBRow_C; - void (*ARGBToAR30Row)(const uint8* src_argb, uint8* dst_rgb, int width) = - ARGBToAR30Row_C; + I210ToAR30Row_C; if (!src_y || !src_u || !src_v || !dst_ar30 || width <= 0 || height == 0) { return -1; } @@ -440,84 +438,117 @@ static int H010ToAR30Matrix(const uint16* src_y, dst_ar30 = dst_ar30 + (height - 1) * dst_stride_ar30; dst_stride_ar30 = -dst_stride_ar30; } -#if defined(HAS_I210TOARGBROW_SSSE3) +#if defined(HAS_I210TOAR30ROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - I210ToARGBRow = I210ToARGBRow_Any_SSSE3; + I210ToAR30Row = I210ToAR30Row_Any_SSSE3; if (IS_ALIGNED(width, 8)) { - I210ToARGBRow = I210ToARGBRow_SSSE3; + I210ToAR30Row = I210ToAR30Row_SSSE3; } } #endif -#if defined(HAS_ARGBTOAR30ROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToAR30Row = ARGBToAR30Row_Any_SSSE3; - if (IS_ALIGNED(width, 4)) { - ARGBToAR30Row = ARGBToAR30Row_SSSE3; - } - } -#endif -#if defined(HAS_ARGBTOAR30ROW_AVX2) +#if defined(HAS_I210TOAR30ROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - ARGBToAR30Row = ARGBToAR30Row_Any_AVX2; - if (IS_ALIGNED(width, 8)) { - ARGBToAR30Row = ARGBToAR30Row_AVX2; + I210ToAR30Row = I210ToAR30Row_Any_AVX2; + if (IS_ALIGNED(width, 16)) { + I210ToAR30Row = I210ToAR30Row_AVX2; } } #endif - - { - // Row buffers for 8 bit YUV and RGB. - align_buffer_64(row_argb, width * 4); - - for (y = 0; y < height; ++y) { - I210ToARGBRow(src_y, src_u, src_v, row_argb, yuvconstants, width); - ARGBToAR30Row(row_argb, dst_ar30, width); - dst_ar30 += dst_stride_ar30; - src_y += src_stride_y; - if (y & 1) { - src_u += src_stride_u; - src_v += src_stride_v; - } + for (y = 0; y < height; ++y) { + I210ToAR30Row(src_y, src_u, src_v, dst_ar30, yuvconstants, width); + dst_ar30 += dst_stride_ar30; + src_y += src_stride_y; + if (y & 1) { + src_u += src_stride_u; + src_v += src_stride_v; } - - free_aligned_buffer_64(row_argb); } - return 0; } +// Convert I010 to AR30. +LIBYUV_API +int I010ToAR30(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_ar30, + int dst_stride_ar30, + int width, + int height) { + return I010ToAR30Matrix(src_y, src_stride_y, src_u, src_stride_u, src_v, + src_stride_v, dst_ar30, dst_stride_ar30, + &kYuvI601Constants, width, height); +} + // Convert H010 to AR30. LIBYUV_API -int H010ToAR30(const uint16* src_y, +int H010ToAR30(const uint16_t* src_y, int src_stride_y, - const uint16* src_u, + const uint16_t* src_u, int src_stride_u, - const uint16* src_v, + const uint16_t* src_v, int src_stride_v, - uint8* dst_ar30, + uint8_t* dst_ar30, int dst_stride_ar30, int width, int height) { - return H010ToAR30Matrix(src_y, src_stride_y, src_u, src_stride_u, src_v, + return I010ToAR30Matrix(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, dst_ar30, dst_stride_ar30, &kYuvH709Constants, width, height); } +// Convert I010 to AB30. +LIBYUV_API +int I010ToAB30(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_ab30, + int dst_stride_ab30, + int width, + int height) { + return I010ToAR30Matrix(src_y, src_stride_y, src_v, + src_stride_v, src_u, src_stride_u, dst_ab30, dst_stride_ab30, + &kYvuI601Constants, width, height); +} + +// Convert H010 to AB30. +LIBYUV_API +int H010ToAB30(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_ab30, + int dst_stride_ab30, + int width, + int height) { + return I010ToAR30Matrix(src_y, src_stride_y, src_v, + src_stride_v, src_u, src_stride_u, dst_ab30, dst_stride_ab30, + &kYvuH709Constants, width, height); +} + // Convert 10 bit YUV to ARGB with matrix -static int I010ToARGBMatrix(const uint16* src_y, +static int I010ToARGBMatrix(const uint16_t* src_y, int src_stride_y, - const uint16* src_u, + const uint16_t* src_u, int src_stride_u, - const uint16* src_v, + const uint16_t* src_v, int src_stride_v, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, const struct YuvConstants* yuvconstants, int width, int height) { int y; - void (*I210ToARGBRow)(const uint16* y_buf, const uint16* u_buf, - const uint16* v_buf, uint8* rgb_buf, + void (*I210ToARGBRow)(const uint16_t* y_buf, const uint16_t* u_buf, + const uint16_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I210ToARGBRow_C; if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { @@ -537,7 +568,14 @@ static int I010ToARGBMatrix(const uint16* src_y, } } #endif - +#if defined(HAS_I210TOARGBROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + I210ToARGBRow = I210ToARGBRow_Any_AVX2; + if (IS_ALIGNED(width, 16)) { + I210ToARGBRow = I210ToARGBRow_AVX2; + } + } +#endif for (y = 0; y < height; ++y) { I210ToARGBRow(src_y, src_u, src_v, dst_argb, yuvconstants, width); dst_argb += dst_stride_argb; @@ -552,13 +590,13 @@ static int I010ToARGBMatrix(const uint16* src_y, // Convert I010 to ARGB. LIBYUV_API -int I010ToARGB(const uint16* src_y, +int I010ToARGB(const uint16_t* src_y, int src_stride_y, - const uint16* src_u, + const uint16_t* src_u, int src_stride_u, - const uint16* src_v, + const uint16_t* src_v, int src_stride_v, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height) { @@ -569,13 +607,13 @@ int I010ToARGB(const uint16* src_y, // Convert I010 to ABGR. LIBYUV_API -int I010ToABGR(const uint16* src_y, +int I010ToABGR(const uint16_t* src_y, int src_stride_y, - const uint16* src_u, + const uint16_t* src_u, int src_stride_u, - const uint16* src_v, + const uint16_t* src_v, int src_stride_v, - uint8* dst_abgr, + uint8_t* dst_abgr, int dst_stride_abgr, int width, int height) { @@ -588,13 +626,13 @@ int I010ToABGR(const uint16* src_y, // Convert H010 to ARGB. LIBYUV_API -int H010ToARGB(const uint16* src_y, +int H010ToARGB(const uint16_t* src_y, int src_stride_y, - const uint16* src_u, + const uint16_t* src_u, int src_stride_u, - const uint16* src_v, + const uint16_t* src_v, int src_stride_v, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height) { @@ -605,13 +643,13 @@ int H010ToARGB(const uint16* src_y, // Convert H010 to ABGR. LIBYUV_API -int H010ToABGR(const uint16* src_y, +int H010ToABGR(const uint16_t* src_y, int src_stride_y, - const uint16* src_u, + const uint16_t* src_u, int src_stride_u, - const uint16* src_v, + const uint16_t* src_v, int src_stride_v, - uint8* dst_abgr, + uint8_t* dst_abgr, int dst_stride_abgr, int width, int height) { @@ -623,20 +661,20 @@ int H010ToABGR(const uint16* src_y, } // Convert I444 to ARGB with matrix -static int I444ToARGBMatrix(const uint8* src_y, +static int I444ToARGBMatrix(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, const struct YuvConstants* yuvconstants, int width, int height) { int y; - void (*I444ToARGBRow)(const uint8* y_buf, const uint8* u_buf, - const uint8* v_buf, uint8* rgb_buf, + void (*I444ToARGBRow)(const uint8_t* y_buf, const uint8_t* u_buf, + const uint8_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I444ToARGBRow_C; if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { @@ -700,13 +738,13 @@ static int I444ToARGBMatrix(const uint8* src_y, // Convert I444 to ARGB. LIBYUV_API -int I444ToARGB(const uint8* src_y, +int I444ToARGB(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height) { @@ -717,13 +755,13 @@ int I444ToARGB(const uint8* src_y, // Convert I444 to ABGR. LIBYUV_API -int I444ToABGR(const uint8* src_y, +int I444ToABGR(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_abgr, + uint8_t* dst_abgr, int dst_stride_abgr, int width, int height) { @@ -736,13 +774,13 @@ int I444ToABGR(const uint8* src_y, // Convert J444 to ARGB. LIBYUV_API -int J444ToARGB(const uint8* src_y, +int J444ToARGB(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height) { @@ -752,28 +790,28 @@ int J444ToARGB(const uint8* src_y, } // Convert I420 with Alpha to preattenuated ARGB. -static int I420AlphaToARGBMatrix(const uint8* src_y, +static int I420AlphaToARGBMatrix(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - const uint8* src_a, + const uint8_t* src_a, int src_stride_a, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, const struct YuvConstants* yuvconstants, int width, int height, int attenuate) { int y; - void (*I422AlphaToARGBRow)(const uint8* y_buf, const uint8* u_buf, - const uint8* v_buf, const uint8* a_buf, - uint8* dst_argb, + void (*I422AlphaToARGBRow)(const uint8_t* y_buf, const uint8_t* u_buf, + const uint8_t* v_buf, const uint8_t* a_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) = I422AlphaToARGBRow_C; - void (*ARGBAttenuateRow)(const uint8* src_argb, uint8* dst_argb, int width) = - ARGBAttenuateRow_C; + void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb, + int width) = ARGBAttenuateRow_C; if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { return -1; } @@ -867,15 +905,15 @@ static int I420AlphaToARGBMatrix(const uint8* src_y, // Convert I420 with Alpha to ARGB. LIBYUV_API -int I420AlphaToARGB(const uint8* src_y, +int I420AlphaToARGB(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - const uint8* src_a, + const uint8_t* src_a, int src_stride_a, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height, @@ -888,15 +926,15 @@ int I420AlphaToARGB(const uint8* src_y, // Convert I420 with Alpha to ABGR. LIBYUV_API -int I420AlphaToABGR(const uint8* src_y, +int I420AlphaToABGR(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - const uint8* src_a, + const uint8_t* src_a, int src_stride_a, - uint8* dst_abgr, + uint8_t* dst_abgr, int dst_stride_abgr, int width, int height, @@ -910,14 +948,14 @@ int I420AlphaToABGR(const uint8* src_y, // Convert I400 to ARGB. LIBYUV_API -int I400ToARGB(const uint8* src_y, +int I400ToARGB(const uint8_t* src_y, int src_stride_y, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height) { int y; - void (*I400ToARGBRow)(const uint8* y_buf, uint8* rgb_buf, int width) = + void (*I400ToARGBRow)(const uint8_t* y_buf, uint8_t* rgb_buf, int width) = I400ToARGBRow_C; if (!src_y || !dst_argb || width <= 0 || height == 0) { return -1; @@ -977,14 +1015,14 @@ int I400ToARGB(const uint8* src_y, // Convert J400 to ARGB. LIBYUV_API -int J400ToARGB(const uint8* src_y, +int J400ToARGB(const uint8_t* src_y, int src_stride_y, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height) { int y; - void (*J400ToARGBRow)(const uint8* src_y, uint8* dst_argb, int width) = + void (*J400ToARGBRow)(const uint8_t* src_y, uint8_t* dst_argb, int width) = J400ToARGBRow_C; if (!src_y || !dst_argb || width <= 0 || height == 0) { return -1; @@ -1055,74 +1093,74 @@ static const uvec8 kShuffleMaskRGBAToARGB = { // Convert BGRA to ARGB. LIBYUV_API -int BGRAToARGB(const uint8* src_bgra, +int BGRAToARGB(const uint8_t* src_bgra, int src_stride_bgra, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height) { return ARGBShuffle(src_bgra, src_stride_bgra, dst_argb, dst_stride_argb, - (const uint8*)(&kShuffleMaskBGRAToARGB), width, height); + (const uint8_t*)(&kShuffleMaskBGRAToARGB), width, height); } // Convert ARGB to BGRA (same as BGRAToARGB). LIBYUV_API -int ARGBToBGRA(const uint8* src_bgra, +int ARGBToBGRA(const uint8_t* src_bgra, int src_stride_bgra, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height) { return ARGBShuffle(src_bgra, src_stride_bgra, dst_argb, dst_stride_argb, - (const uint8*)(&kShuffleMaskBGRAToARGB), width, height); + (const uint8_t*)(&kShuffleMaskBGRAToARGB), width, height); } // Convert ABGR to ARGB. LIBYUV_API -int ABGRToARGB(const uint8* src_abgr, +int ABGRToARGB(const uint8_t* src_abgr, int src_stride_abgr, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height) { return ARGBShuffle(src_abgr, src_stride_abgr, dst_argb, dst_stride_argb, - (const uint8*)(&kShuffleMaskABGRToARGB), width, height); + (const uint8_t*)(&kShuffleMaskABGRToARGB), width, height); } // Convert ARGB to ABGR to (same as ABGRToARGB). LIBYUV_API -int ARGBToABGR(const uint8* src_abgr, +int ARGBToABGR(const uint8_t* src_abgr, int src_stride_abgr, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height) { return ARGBShuffle(src_abgr, src_stride_abgr, dst_argb, dst_stride_argb, - (const uint8*)(&kShuffleMaskABGRToARGB), width, height); + (const uint8_t*)(&kShuffleMaskABGRToARGB), width, height); } // Convert RGBA to ARGB. LIBYUV_API -int RGBAToARGB(const uint8* src_rgba, +int RGBAToARGB(const uint8_t* src_rgba, int src_stride_rgba, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height) { return ARGBShuffle(src_rgba, src_stride_rgba, dst_argb, dst_stride_argb, - (const uint8*)(&kShuffleMaskRGBAToARGB), width, height); + (const uint8_t*)(&kShuffleMaskRGBAToARGB), width, height); } // Convert RGB24 to ARGB. LIBYUV_API -int RGB24ToARGB(const uint8* src_rgb24, +int RGB24ToARGB(const uint8_t* src_rgb24, int src_stride_rgb24, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height) { int y; - void (*RGB24ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int width) = + void (*RGB24ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) = RGB24ToARGBRow_C; if (!src_rgb24 || !dst_argb || width <= 0 || height == 0) { return -1; @@ -1174,14 +1212,14 @@ int RGB24ToARGB(const uint8* src_rgb24, // Convert RAW to ARGB. LIBYUV_API -int RAWToARGB(const uint8* src_raw, +int RAWToARGB(const uint8_t* src_raw, int src_stride_raw, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height) { int y; - void (*RAWToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int width) = + void (*RAWToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) = RAWToARGBRow_C; if (!src_raw || !dst_argb || width <= 0 || height == 0) { return -1; @@ -1233,15 +1271,15 @@ int RAWToARGB(const uint8* src_raw, // Convert RGB565 to ARGB. LIBYUV_API -int RGB565ToARGB(const uint8* src_rgb565, +int RGB565ToARGB(const uint8_t* src_rgb565, int src_stride_rgb565, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height) { int y; - void (*RGB565ToARGBRow)(const uint8* src_rgb565, uint8* dst_argb, int width) = - RGB565ToARGBRow_C; + void (*RGB565ToARGBRow)(const uint8_t* src_rgb565, uint8_t* dst_argb, + int width) = RGB565ToARGBRow_C; if (!src_rgb565 || !dst_argb || width <= 0 || height == 0) { return -1; } @@ -1300,14 +1338,14 @@ int RGB565ToARGB(const uint8* src_rgb565, // Convert ARGB1555 to ARGB. LIBYUV_API -int ARGB1555ToARGB(const uint8* src_argb1555, +int ARGB1555ToARGB(const uint8_t* src_argb1555, int src_stride_argb1555, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height) { int y; - void (*ARGB1555ToARGBRow)(const uint8* src_argb1555, uint8* dst_argb, + void (*ARGB1555ToARGBRow)(const uint8_t* src_argb1555, uint8_t* dst_argb, int width) = ARGB1555ToARGBRow_C; if (!src_argb1555 || !dst_argb || width <= 0 || height == 0) { return -1; @@ -1367,14 +1405,14 @@ int ARGB1555ToARGB(const uint8* src_argb1555, // Convert ARGB4444 to ARGB. LIBYUV_API -int ARGB4444ToARGB(const uint8* src_argb4444, +int ARGB4444ToARGB(const uint8_t* src_argb4444, int src_stride_argb4444, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height) { int y; - void (*ARGB4444ToARGBRow)(const uint8* src_argb4444, uint8* dst_argb, + void (*ARGB4444ToARGBRow)(const uint8_t* src_argb4444, uint8_t* dst_argb, int width) = ARGB4444ToARGBRow_C; if (!src_argb4444 || !dst_argb || width <= 0 || height == 0) { return -1; @@ -1434,15 +1472,13 @@ int ARGB4444ToARGB(const uint8* src_argb4444, // Convert AR30 to ARGB. LIBYUV_API -int AR30ToARGB(const uint8* src_ar30, +int AR30ToARGB(const uint8_t* src_ar30, int src_stride_ar30, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height) { int y; - void (*AR30ToARGBRow)(const uint8* src_ar30, uint8* dst_argb, int width) = - AR30ToARGBRow_C; if (!src_ar30 || !dst_argb || width <= 0 || height == 0) { return -1; } @@ -1459,27 +1495,59 @@ int AR30ToARGB(const uint8* src_ar30, src_stride_ar30 = dst_stride_argb = 0; } for (y = 0; y < height; ++y) { - AR30ToARGBRow(src_ar30, dst_argb, width); + AR30ToARGBRow_C(src_ar30, dst_argb, width); src_ar30 += src_stride_ar30; dst_argb += dst_stride_argb; } return 0; } +// Convert AR30 to ABGR. +LIBYUV_API +int AR30ToABGR(const uint8_t* src_ar30, + int src_stride_ar30, + uint8_t* dst_abgr, + int dst_stride_abgr, + int width, + int height) { + int y; + if (!src_ar30 || !dst_abgr || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_ar30 = src_ar30 + (height - 1) * src_stride_ar30; + src_stride_ar30 = -src_stride_ar30; + } + // Coalesce rows. + if (src_stride_ar30 == width * 4 && dst_stride_abgr == width * 4) { + width *= height; + height = 1; + src_stride_ar30 = dst_stride_abgr = 0; + } + for (y = 0; y < height; ++y) { + AR30ToABGRRow_C(src_ar30, dst_abgr, width); + src_ar30 += src_stride_ar30; + dst_abgr += dst_stride_abgr; + } + return 0; +} + // Convert NV12 to ARGB with matrix -static int NV12ToARGBMatrix(const uint8* src_y, +static int NV12ToARGBMatrix(const uint8_t* src_y, int src_stride_y, - const uint8* src_uv, + const uint8_t* src_uv, int src_stride_uv, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, const struct YuvConstants* yuvconstants, int width, int height) { int y; - void (*NV12ToARGBRow)(const uint8* y_buf, const uint8* uv_buf, uint8* rgb_buf, - const struct YuvConstants* yuvconstants, int width) = - NV12ToARGBRow_C; + void (*NV12ToARGBRow)( + const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, int width) = NV12ToARGBRow_C; if (!src_y || !src_uv || !dst_argb || width <= 0 || height == 0) { return -1; } @@ -1534,20 +1602,20 @@ static int NV12ToARGBMatrix(const uint8* src_y, } // Convert NV21 to ARGB with matrix -static int NV21ToARGBMatrix(const uint8* src_y, +static int NV21ToARGBMatrix(const uint8_t* src_y, int src_stride_y, - const uint8* src_uv, - int src_stride_uv, - uint8* dst_argb, + const uint8_t* src_vu, + int src_stride_vu, + uint8_t* dst_argb, int dst_stride_argb, const struct YuvConstants* yuvconstants, int width, int height) { int y; - void (*NV21ToARGBRow)(const uint8* y_buf, const uint8* uv_buf, uint8* rgb_buf, - const struct YuvConstants* yuvconstants, int width) = - NV21ToARGBRow_C; - if (!src_y || !src_uv || !dst_argb || width <= 0 || height == 0) { + void (*NV21ToARGBRow)( + const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, int width) = NV21ToARGBRow_C; + if (!src_y || !src_vu || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. @@ -1590,11 +1658,11 @@ static int NV21ToARGBMatrix(const uint8* src_y, #endif for (y = 0; y < height; ++y) { - NV21ToARGBRow(src_y, src_uv, dst_argb, yuvconstants, width); + NV21ToARGBRow(src_y, src_vu, dst_argb, yuvconstants, width); dst_argb += dst_stride_argb; src_y += src_stride_y; if (y & 1) { - src_uv += src_stride_uv; + src_vu += src_stride_vu; } } return 0; @@ -1602,11 +1670,11 @@ static int NV21ToARGBMatrix(const uint8* src_y, // Convert NV12 to ARGB. LIBYUV_API -int NV12ToARGB(const uint8* src_y, +int NV12ToARGB(const uint8_t* src_y, int src_stride_y, - const uint8* src_uv, + const uint8_t* src_uv, int src_stride_uv, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height) { @@ -1616,26 +1684,26 @@ int NV12ToARGB(const uint8* src_y, // Convert NV21 to ARGB. LIBYUV_API -int NV21ToARGB(const uint8* src_y, +int NV21ToARGB(const uint8_t* src_y, int src_stride_y, - const uint8* src_uv, - int src_stride_uv, - uint8* dst_argb, + const uint8_t* src_vu, + int src_stride_vu, + uint8_t* dst_argb, int dst_stride_argb, int width, int height) { - return NV21ToARGBMatrix(src_y, src_stride_y, src_uv, src_stride_uv, dst_argb, + return NV21ToARGBMatrix(src_y, src_stride_y, src_vu, src_stride_vu, dst_argb, dst_stride_argb, &kYuvI601Constants, width, height); } // Convert NV12 to ABGR. // To output ABGR instead of ARGB swap the UV and use a mirrrored yuc matrix. // To swap the UV use NV12 instead of NV21.LIBYUV_API -int NV12ToABGR(const uint8* src_y, +int NV12ToABGR(const uint8_t* src_y, int src_stride_y, - const uint8* src_uv, + const uint8_t* src_uv, int src_stride_uv, - uint8* dst_abgr, + uint8_t* dst_abgr, int dst_stride_abgr, int width, int height) { @@ -1645,11 +1713,11 @@ int NV12ToABGR(const uint8* src_y, // Convert NV21 to ABGR. LIBYUV_API -int NV21ToABGR(const uint8* src_y, +int NV21ToABGR(const uint8_t* src_y, int src_stride_y, - const uint8* src_vu, + const uint8_t* src_vu, int src_stride_vu, - uint8* dst_abgr, + uint8_t* dst_abgr, int dst_stride_abgr, int width, int height) { @@ -1659,16 +1727,16 @@ int NV21ToABGR(const uint8* src_y, // Convert M420 to ARGB. LIBYUV_API -int M420ToARGB(const uint8* src_m420, +int M420ToARGB(const uint8_t* src_m420, int src_stride_m420, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height) { int y; - void (*NV12ToARGBRow)(const uint8* y_buf, const uint8* uv_buf, uint8* rgb_buf, - const struct YuvConstants* yuvconstants, int width) = - NV12ToARGBRow_C; + void (*NV12ToARGBRow)( + const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, int width) = NV12ToARGBRow_C; if (!src_m420 || !dst_argb || width <= 0 || height == 0) { return -1; } @@ -1728,14 +1796,14 @@ int M420ToARGB(const uint8* src_m420, // Convert YUY2 to ARGB. LIBYUV_API -int YUY2ToARGB(const uint8* src_yuy2, +int YUY2ToARGB(const uint8_t* src_yuy2, int src_stride_yuy2, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height) { int y; - void (*YUY2ToARGBRow)(const uint8* src_yuy2, uint8* dst_argb, + void (*YUY2ToARGBRow)(const uint8_t* src_yuy2, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) = YUY2ToARGBRow_C; if (!src_yuy2 || !dst_argb || width <= 0 || height == 0) { @@ -1795,14 +1863,14 @@ int YUY2ToARGB(const uint8* src_yuy2, // Convert UYVY to ARGB. LIBYUV_API -int UYVYToARGB(const uint8* src_uyvy, +int UYVYToARGB(const uint8_t* src_uyvy, int src_stride_uyvy, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height) { int y; - void (*UYVYToARGBRow)(const uint8* src_uyvy, uint8* dst_argb, + void (*UYVYToARGBRow)(const uint8_t* src_uyvy, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) = UYVYToARGBRow_C; if (!src_uyvy || !dst_argb || width <= 0 || height == 0) { @@ -1859,10 +1927,10 @@ int UYVYToARGB(const uint8* src_uyvy, } return 0; } -static void WeavePixels(const uint8* src_u, - const uint8* src_v, +static void WeavePixels(const uint8_t* src_u, + const uint8_t* src_v, int src_pixel_stride_uv, - uint8* dst_uv, + uint8_t* dst_uv, int width) { int i; for (i = 0; i < width; ++i) { @@ -1876,20 +1944,20 @@ static void WeavePixels(const uint8* src_u, // Convert Android420 to ARGB. LIBYUV_API -int Android420ToARGBMatrix(const uint8* src_y, +int Android420ToARGBMatrix(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, int src_pixel_stride_uv, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, const struct YuvConstants* yuvconstants, int width, int height) { int y; - uint8* dst_uv; + uint8_t* dst_uv; const ptrdiff_t vu_off = src_v - src_u; int halfwidth = (width + 1) >> 1; int halfheight = (height + 1) >> 1; @@ -1910,13 +1978,14 @@ int Android420ToARGBMatrix(const uint8* src_y, src_stride_v, dst_argb, dst_stride_argb, yuvconstants, width, height); // NV21 - } else if (src_pixel_stride_uv == 2 && vu_off == -1 && - src_stride_u == src_stride_v) { + } + if (src_pixel_stride_uv == 2 && vu_off == -1 && + src_stride_u == src_stride_v) { return NV21ToARGBMatrix(src_y, src_stride_y, src_v, src_stride_v, dst_argb, dst_stride_argb, yuvconstants, width, height); // NV12 - } else if (src_pixel_stride_uv == 2 && vu_off == 1 && - src_stride_u == src_stride_v) { + } + if (src_pixel_stride_uv == 2 && vu_off == 1 && src_stride_u == src_stride_v) { return NV12ToARGBMatrix(src_y, src_stride_y, src_u, src_stride_u, dst_argb, dst_stride_argb, yuvconstants, width, height); } @@ -1938,14 +2007,14 @@ int Android420ToARGBMatrix(const uint8* src_y, // Convert Android420 to ARGB. LIBYUV_API -int Android420ToARGB(const uint8* src_y, +int Android420ToARGB(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, int src_pixel_stride_uv, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height) { @@ -1957,14 +2026,14 @@ int Android420ToARGB(const uint8* src_y, // Convert Android420 to ABGR. LIBYUV_API -int Android420ToABGR(const uint8* src_y, +int Android420ToABGR(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, int src_pixel_stride_uv, - uint8* dst_abgr, + uint8_t* dst_abgr, int dst_stride_abgr, int width, int height) { diff --git a/chromium/third_party/libyuv/source/convert_from.cc b/chromium/third_party/libyuv/source/convert_from.cc index 9da607102f6..b5587ced625 100644 --- a/chromium/third_party/libyuv/source/convert_from.cc +++ b/chromium/third_party/libyuv/source/convert_from.cc @@ -30,17 +30,17 @@ static __inline int Abs(int v) { } // I420 To any I4xx YUV format with mirroring. -static int I420ToI4xx(const uint8* src_y, +static int I420ToI4xx(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int src_y_width, int src_y_height, @@ -67,17 +67,17 @@ static int I420ToI4xx(const uint8* src_y, // Convert 8 bit YUV to 10 bit. LIBYUV_API -int I420ToI010(const uint8* src_y, +int I420ToI010(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint16* dst_y, + uint16_t* dst_y, int dst_stride_y, - uint16* dst_u, + uint16_t* dst_u, int dst_stride_u, - uint16* dst_v, + uint16_t* dst_v, int dst_stride_v, int width, int height) { @@ -112,17 +112,17 @@ int I420ToI010(const uint8* src_y, // 420 chroma is 1/2 width, 1/2 height // 422 chroma is 1/2 width, 1x height LIBYUV_API -int I420ToI422(const uint8* src_y, +int I420ToI422(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int width, int height) { @@ -137,17 +137,17 @@ int I420ToI422(const uint8* src_y, // 420 chroma is 1/2 width, 1/2 height // 444 chroma is 1x width, 1x height LIBYUV_API -int I420ToI444(const uint8* src_y, +int I420ToI444(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int width, int height) { @@ -161,9 +161,9 @@ int I420ToI444(const uint8* src_y, // Copy to I400. Source can be I420,422,444,400,NV12,NV21 LIBYUV_API -int I400Copy(const uint8* src_y, +int I400Copy(const uint8_t* src_y, int src_stride_y, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, int width, int height) { @@ -181,19 +181,19 @@ int I400Copy(const uint8* src_y, } LIBYUV_API -int I422ToYUY2(const uint8* src_y, +int I422ToYUY2(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_yuy2, + uint8_t* dst_yuy2, int dst_stride_yuy2, int width, int height) { int y; - void (*I422ToYUY2Row)(const uint8* src_y, const uint8* src_u, - const uint8* src_v, uint8* dst_yuy2, int width) = + void (*I422ToYUY2Row)(const uint8_t* src_y, const uint8_t* src_u, + const uint8_t* src_v, uint8_t* dst_yuy2, int width) = I422ToYUY2Row_C; if (!src_y || !src_u || !src_v || !dst_yuy2 || width <= 0 || height == 0) { return -1; @@ -219,6 +219,14 @@ int I422ToYUY2(const uint8* src_y, } } #endif +#if defined(HAS_I422TOYUY2ROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + I422ToYUY2Row = I422ToYUY2Row_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + I422ToYUY2Row = I422ToYUY2Row_AVX2; + } + } +#endif #if defined(HAS_I422TOYUY2ROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { I422ToYUY2Row = I422ToYUY2Row_Any_NEON; @@ -239,19 +247,19 @@ int I422ToYUY2(const uint8* src_y, } LIBYUV_API -int I420ToYUY2(const uint8* src_y, +int I420ToYUY2(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_yuy2, + uint8_t* dst_yuy2, int dst_stride_yuy2, int width, int height) { int y; - void (*I422ToYUY2Row)(const uint8* src_y, const uint8* src_u, - const uint8* src_v, uint8* dst_yuy2, int width) = + void (*I422ToYUY2Row)(const uint8_t* src_y, const uint8_t* src_u, + const uint8_t* src_v, uint8_t* dst_yuy2, int width) = I422ToYUY2Row_C; if (!src_y || !src_u || !src_v || !dst_yuy2 || width <= 0 || height == 0) { return -1; @@ -270,6 +278,14 @@ int I420ToYUY2(const uint8* src_y, } } #endif +#if defined(HAS_I422TOYUY2ROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + I422ToYUY2Row = I422ToYUY2Row_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + I422ToYUY2Row = I422ToYUY2Row_AVX2; + } + } +#endif #if defined(HAS_I422TOYUY2ROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { I422ToYUY2Row = I422ToYUY2Row_Any_NEON; @@ -303,19 +319,19 @@ int I420ToYUY2(const uint8* src_y, } LIBYUV_API -int I422ToUYVY(const uint8* src_y, +int I422ToUYVY(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_uyvy, + uint8_t* dst_uyvy, int dst_stride_uyvy, int width, int height) { int y; - void (*I422ToUYVYRow)(const uint8* src_y, const uint8* src_u, - const uint8* src_v, uint8* dst_uyvy, int width) = + void (*I422ToUYVYRow)(const uint8_t* src_y, const uint8_t* src_u, + const uint8_t* src_v, uint8_t* dst_uyvy, int width) = I422ToUYVYRow_C; if (!src_y || !src_u || !src_v || !dst_uyvy || width <= 0 || height == 0) { return -1; @@ -341,6 +357,14 @@ int I422ToUYVY(const uint8* src_y, } } #endif +#if defined(HAS_I422TOUYVYROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + I422ToUYVYRow = I422ToUYVYRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + I422ToUYVYRow = I422ToUYVYRow_AVX2; + } + } +#endif #if defined(HAS_I422TOUYVYROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { I422ToUYVYRow = I422ToUYVYRow_Any_NEON; @@ -369,19 +393,19 @@ int I422ToUYVY(const uint8* src_y, } LIBYUV_API -int I420ToUYVY(const uint8* src_y, +int I420ToUYVY(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_uyvy, + uint8_t* dst_uyvy, int dst_stride_uyvy, int width, int height) { int y; - void (*I422ToUYVYRow)(const uint8* src_y, const uint8* src_u, - const uint8* src_v, uint8* dst_uyvy, int width) = + void (*I422ToUYVYRow)(const uint8_t* src_y, const uint8_t* src_u, + const uint8_t* src_v, uint8_t* dst_uyvy, int width) = I422ToUYVYRow_C; if (!src_y || !src_u || !src_v || !dst_uyvy || width <= 0 || height == 0) { return -1; @@ -400,6 +424,14 @@ int I420ToUYVY(const uint8* src_y, } } #endif +#if defined(HAS_I422TOUYVYROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + I422ToUYVYRow = I422ToUYVYRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + I422ToUYVYRow = I422ToUYVYRow_AVX2; + } + } +#endif #if defined(HAS_I422TOUYVYROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { I422ToUYVYRow = I422ToUYVYRow_Any_NEON; @@ -434,15 +466,15 @@ int I420ToUYVY(const uint8* src_y, // TODO(fbarchard): test negative height for invert. LIBYUV_API -int I420ToNV12(const uint8* src_y, +int I420ToNV12(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_uv, + uint8_t* dst_uv, int dst_stride_uv, int width, int height) { @@ -461,15 +493,15 @@ int I420ToNV12(const uint8* src_y, } LIBYUV_API -int I420ToNV21(const uint8* src_y, +int I420ToNV21(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_vu, + uint8_t* dst_vu, int dst_stride_vu, int width, int height) { @@ -479,20 +511,20 @@ int I420ToNV21(const uint8* src_y, } // Convert I422 to RGBA with matrix -static int I420ToRGBAMatrix(const uint8* src_y, +static int I420ToRGBAMatrix(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_rgba, + uint8_t* dst_rgba, int dst_stride_rgba, const struct YuvConstants* yuvconstants, int width, int height) { int y; - void (*I422ToRGBARow)(const uint8* y_buf, const uint8* u_buf, - const uint8* v_buf, uint8* rgb_buf, + void (*I422ToRGBARow)(const uint8_t* y_buf, const uint8_t* u_buf, + const uint8_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I422ToRGBARow_C; if (!src_y || !src_u || !src_v || !dst_rgba || width <= 0 || height == 0) { @@ -551,13 +583,13 @@ static int I420ToRGBAMatrix(const uint8* src_y, // Convert I420 to RGBA. LIBYUV_API -int I420ToRGBA(const uint8* src_y, +int I420ToRGBA(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_rgba, + uint8_t* dst_rgba, int dst_stride_rgba, int width, int height) { @@ -568,13 +600,13 @@ int I420ToRGBA(const uint8* src_y, // Convert I420 to BGRA. LIBYUV_API -int I420ToBGRA(const uint8* src_y, +int I420ToBGRA(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_bgra, + uint8_t* dst_bgra, int dst_stride_bgra, int width, int height) { @@ -586,20 +618,20 @@ int I420ToBGRA(const uint8* src_y, } // Convert I420 to RGB24 with matrix -static int I420ToRGB24Matrix(const uint8* src_y, +static int I420ToRGB24Matrix(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_rgb24, + uint8_t* dst_rgb24, int dst_stride_rgb24, const struct YuvConstants* yuvconstants, int width, int height) { int y; - void (*I422ToRGB24Row)(const uint8* y_buf, const uint8* u_buf, - const uint8* v_buf, uint8* rgb_buf, + void (*I422ToRGB24Row)(const uint8_t* y_buf, const uint8_t* u_buf, + const uint8_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I422ToRGB24Row_C; if (!src_y || !src_u || !src_v || !dst_rgb24 || width <= 0 || height == 0) { @@ -658,13 +690,13 @@ static int I420ToRGB24Matrix(const uint8* src_y, // Convert I420 to RGB24. LIBYUV_API -int I420ToRGB24(const uint8* src_y, +int I420ToRGB24(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_rgb24, + uint8_t* dst_rgb24, int dst_stride_rgb24, int width, int height) { @@ -675,13 +707,13 @@ int I420ToRGB24(const uint8* src_y, // Convert I420 to RAW. LIBYUV_API -int I420ToRAW(const uint8* src_y, +int I420ToRAW(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_raw, + uint8_t* dst_raw, int dst_stride_raw, int width, int height) { @@ -694,13 +726,13 @@ int I420ToRAW(const uint8* src_y, // Convert H420 to RGB24. LIBYUV_API -int H420ToRGB24(const uint8* src_y, +int H420ToRGB24(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_rgb24, + uint8_t* dst_rgb24, int dst_stride_rgb24, int width, int height) { @@ -711,13 +743,13 @@ int H420ToRGB24(const uint8* src_y, // Convert H420 to RAW. LIBYUV_API -int H420ToRAW(const uint8* src_y, +int H420ToRAW(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_raw, + uint8_t* dst_raw, int dst_stride_raw, int width, int height) { @@ -730,19 +762,19 @@ int H420ToRAW(const uint8* src_y, // Convert I420 to ARGB1555. LIBYUV_API -int I420ToARGB1555(const uint8* src_y, +int I420ToARGB1555(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_argb1555, + uint8_t* dst_argb1555, int dst_stride_argb1555, int width, int height) { int y; - void (*I422ToARGB1555Row)(const uint8* y_buf, const uint8* u_buf, - const uint8* v_buf, uint8* rgb_buf, + void (*I422ToARGB1555Row)(const uint8_t* y_buf, const uint8_t* u_buf, + const uint8_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I422ToARGB1555Row_C; if (!src_y || !src_u || !src_v || !dst_argb1555 || width <= 0 || @@ -803,19 +835,19 @@ int I420ToARGB1555(const uint8* src_y, // Convert I420 to ARGB4444. LIBYUV_API -int I420ToARGB4444(const uint8* src_y, +int I420ToARGB4444(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_argb4444, + uint8_t* dst_argb4444, int dst_stride_argb4444, int width, int height) { int y; - void (*I422ToARGB4444Row)(const uint8* y_buf, const uint8* u_buf, - const uint8* v_buf, uint8* rgb_buf, + void (*I422ToARGB4444Row)(const uint8_t* y_buf, const uint8_t* u_buf, + const uint8_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I422ToARGB4444Row_C; if (!src_y || !src_u || !src_v || !dst_argb4444 || width <= 0 || @@ -876,19 +908,19 @@ int I420ToARGB4444(const uint8* src_y, // Convert I420 to RGB565. LIBYUV_API -int I420ToRGB565(const uint8* src_y, +int I420ToRGB565(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_rgb565, + uint8_t* dst_rgb565, int dst_stride_rgb565, int width, int height) { int y; - void (*I422ToRGB565Row)(const uint8* y_buf, const uint8* u_buf, - const uint8* v_buf, uint8* rgb_buf, + void (*I422ToRGB565Row)(const uint8_t* y_buf, const uint8_t* u_buf, + const uint8_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I422ToRGB565Row_C; if (!src_y || !src_u || !src_v || !dst_rgb565 || width <= 0 || height == 0) { @@ -947,19 +979,19 @@ int I420ToRGB565(const uint8* src_y, // Convert I422 to RGB565. LIBYUV_API -int I422ToRGB565(const uint8* src_y, +int I422ToRGB565(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_rgb565, + uint8_t* dst_rgb565, int dst_stride_rgb565, int width, int height) { int y; - void (*I422ToRGB565Row)(const uint8* y_buf, const uint8* u_buf, - const uint8* v_buf, uint8* rgb_buf, + void (*I422ToRGB565Row)(const uint8_t* y_buf, const uint8_t* u_buf, + const uint8_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I422ToRGB565Row_C; if (!src_y || !src_u || !src_v || !dst_rgb565 || width <= 0 || height == 0) { @@ -1015,30 +1047,30 @@ int I422ToRGB565(const uint8* src_y, } // Ordered 8x8 dither for 888 to 565. Values from 0 to 7. -static const uint8 kDither565_4x4[16] = { +static const uint8_t kDither565_4x4[16] = { 0, 4, 1, 5, 6, 2, 7, 3, 1, 5, 0, 4, 7, 3, 6, 2, }; // Convert I420 to RGB565 with dithering. LIBYUV_API -int I420ToRGB565Dither(const uint8* src_y, +int I420ToRGB565Dither(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_rgb565, + uint8_t* dst_rgb565, int dst_stride_rgb565, - const uint8* dither4x4, + const uint8_t* dither4x4, int width, int height) { int y; - void (*I422ToARGBRow)(const uint8* y_buf, const uint8* u_buf, - const uint8* v_buf, uint8* rgb_buf, + void (*I422ToARGBRow)(const uint8_t* y_buf, const uint8_t* u_buf, + const uint8_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I422ToARGBRow_C; - void (*ARGBToRGB565DitherRow)(const uint8* src_argb, uint8* dst_rgb, - const uint32 dither4, int width) = + void (*ARGBToRGB565DitherRow)(const uint8_t* src_argb, uint8_t* dst_rgb, + const uint32_t dither4, int width) = ARGBToRGB565DitherRow_C; if (!src_y || !src_u || !src_v || !dst_rgb565 || width <= 0 || height == 0) { return -1; @@ -1122,8 +1154,8 @@ int I420ToRGB565Dither(const uint8* src_y, for (y = 0; y < height; ++y) { I422ToARGBRow(src_y, src_u, src_v, row_argb, &kYuvI601Constants, width); ARGBToRGB565DitherRow(row_argb, dst_rgb565, - *(uint32*)(dither4x4 + ((y & 3) << 2)), // NOLINT - width); // NOLINT + *(uint32_t*)(dither4x4 + ((y & 3) << 2)), // NOLINT + width); // NOLINT dst_rgb565 += dst_stride_rgb565; src_y += src_stride_y; if (y & 1) { @@ -1137,24 +1169,22 @@ int I420ToRGB565Dither(const uint8* src_y, } // Convert I420 to AR30 with matrix -static int I420ToAR30Matrix(const uint8* src_y, +static int I420ToAR30Matrix(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_ar30, + uint8_t* dst_ar30, int dst_stride_ar30, const struct YuvConstants* yuvconstants, int width, int height) { int y; - void (*I422ToARGBRow)(const uint8* y_buf, const uint8* u_buf, - const uint8* v_buf, uint8* rgb_buf, + void (*I422ToAR30Row)(const uint8_t* y_buf, const uint8_t* u_buf, + const uint8_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = - I422ToARGBRow_C; - void (*ARGBToAR30Row)(const uint8* src_argb, uint8* dst_rgb, int width) = - ARGBToAR30Row_C; + I422ToAR30Row_C; if (!src_y || !src_u || !src_v || !dst_ar30 || width <= 0 || height == 0) { return -1; @@ -1166,84 +1196,44 @@ static int I420ToAR30Matrix(const uint8* src_y, dst_stride_ar30 = -dst_stride_ar30; } -#if defined(HAS_ARGBTOAR30ROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToAR30Row = ARGBToAR30Row_Any_SSSE3; - if (IS_ALIGNED(width, 4)) { - ARGBToAR30Row = ARGBToAR30Row_SSSE3; - } - } -#endif -#if defined(HAS_ARGBTOAR30ROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - ARGBToAR30Row = ARGBToAR30Row_Any_AVX2; - if (IS_ALIGNED(width, 8)) { - ARGBToAR30Row = ARGBToAR30Row_AVX2; - } - } -#endif -#if defined(HAS_I422TOARGBROW_SSSE3) +#if defined(HAS_I422TOAR30ROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - I422ToARGBRow = I422ToARGBRow_Any_SSSE3; + I422ToAR30Row = I422ToAR30Row_Any_SSSE3; if (IS_ALIGNED(width, 8)) { - I422ToARGBRow = I422ToARGBRow_SSSE3; + I422ToAR30Row = I422ToAR30Row_SSSE3; } } #endif -#if defined(HAS_I422TOARGBROW_AVX2) +#if defined(HAS_I422TOAR30ROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - I422ToARGBRow = I422ToARGBRow_Any_AVX2; + I422ToAR30Row = I422ToAR30Row_Any_AVX2; if (IS_ALIGNED(width, 16)) { - I422ToARGBRow = I422ToARGBRow_AVX2; - } - } -#endif -#if defined(HAS_I422TOARGBROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - I422ToARGBRow = I422ToARGBRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - I422ToARGBRow = I422ToARGBRow_NEON; - } - } -#endif -#if defined(HAS_I422TOARGBROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - I422ToARGBRow = I422ToARGBRow_Any_MSA; - if (IS_ALIGNED(width, 8)) { - I422ToARGBRow = I422ToARGBRow_MSA; + I422ToAR30Row = I422ToAR30Row_AVX2; } } #endif - { - // Row buffer for ARGB. - align_buffer_64(row_argb, width * 4); - - for (y = 0; y < height; ++y) { - I422ToARGBRow(src_y, src_u, src_v, row_argb, yuvconstants, width); - ARGBToAR30Row(row_argb, dst_ar30, width); - dst_ar30 += dst_stride_ar30; - src_y += src_stride_y; - if (y & 1) { - src_u += src_stride_u; - src_v += src_stride_v; - } + for (y = 0; y < height; ++y) { + I422ToAR30Row(src_y, src_u, src_v, dst_ar30, yuvconstants, width); + dst_ar30 += dst_stride_ar30; + src_y += src_stride_y; + if (y & 1) { + src_u += src_stride_u; + src_v += src_stride_v; } - - free_aligned_buffer_64(row_argb); } return 0; } // Convert I420 to AR30. LIBYUV_API -int I420ToAR30(const uint8* src_y, +int I420ToAR30(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_ar30, + uint8_t* dst_ar30, int dst_stride_ar30, int width, int height) { @@ -1252,20 +1242,37 @@ int I420ToAR30(const uint8* src_y, &kYuvI601Constants, width, height); } +// Convert H420 to AR30. +LIBYUV_API +int H420ToAR30(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_ar30, + int dst_stride_ar30, + int width, + int height) { + return I420ToAR30Matrix(src_y, src_stride_y, src_u, src_stride_u, src_v, + src_stride_v, dst_ar30, dst_stride_ar30, + &kYvuH709Constants, width, height); +} + // Convert I420 to specified format LIBYUV_API -int ConvertFromI420(const uint8* y, +int ConvertFromI420(const uint8_t* y, int y_stride, - const uint8* u, + const uint8_t* u, int u_stride, - const uint8* v, + const uint8_t* v, int v_stride, - uint8* dst_sample, + uint8_t* dst_sample, int dst_sample_stride, int width, int height, - uint32 fourcc) { - uint32 format = CanonicalFourCC(fourcc); + uint32_t fourcc) { + uint32_t format = CanonicalFourCC(fourcc); int r = 0; if (!y || !u || !v || !dst_sample || width <= 0 || height == 0) { return -1; @@ -1338,7 +1345,7 @@ int ConvertFromI420(const uint8* y, height); break; case FOURCC_NV12: { - uint8* dst_uv = dst_sample + width * height; + uint8_t* dst_uv = dst_sample + width * height; r = I420ToNV12(y, y_stride, u, u_stride, v, v_stride, dst_sample, dst_sample_stride ? dst_sample_stride : width, dst_uv, dst_sample_stride ? dst_sample_stride : width, width, @@ -1346,7 +1353,7 @@ int ConvertFromI420(const uint8* y, break; } case FOURCC_NV21: { - uint8* dst_vu = dst_sample + width * height; + uint8_t* dst_vu = dst_sample + width * height; r = I420ToNV21(y, y_stride, u, u_stride, v, v_stride, dst_sample, dst_sample_stride ? dst_sample_stride : width, dst_vu, dst_sample_stride ? dst_sample_stride : width, width, @@ -1360,8 +1367,8 @@ int ConvertFromI420(const uint8* y, dst_sample_stride = dst_sample_stride ? dst_sample_stride : width; int halfstride = (dst_sample_stride + 1) / 2; int halfheight = (height + 1) / 2; - uint8* dst_u; - uint8* dst_v; + uint8_t* dst_u; + uint8_t* dst_v; if (format == FOURCC_YV12) { dst_v = dst_sample + dst_sample_stride * height; dst_u = dst_v + halfstride * halfheight; @@ -1378,8 +1385,8 @@ int ConvertFromI420(const uint8* y, case FOURCC_YV16: { dst_sample_stride = dst_sample_stride ? dst_sample_stride : width; int halfstride = (dst_sample_stride + 1) / 2; - uint8* dst_u; - uint8* dst_v; + uint8_t* dst_u; + uint8_t* dst_v; if (format == FOURCC_YV16) { dst_v = dst_sample + dst_sample_stride * height; dst_u = dst_v + halfstride * height; @@ -1395,8 +1402,8 @@ int ConvertFromI420(const uint8* y, case FOURCC_I444: case FOURCC_YV24: { dst_sample_stride = dst_sample_stride ? dst_sample_stride : width; - uint8* dst_u; - uint8* dst_v; + uint8_t* dst_u; + uint8_t* dst_v; if (format == FOURCC_YV24) { dst_v = dst_sample + dst_sample_stride * height; dst_u = dst_v + dst_sample_stride * height; diff --git a/chromium/third_party/libyuv/source/convert_from_argb.cc b/chromium/third_party/libyuv/source/convert_from_argb.cc index 02e12a12804..16b838458f0 100644 --- a/chromium/third_party/libyuv/source/convert_from_argb.cc +++ b/chromium/third_party/libyuv/source/convert_from_argb.cc @@ -22,21 +22,21 @@ extern "C" { // ARGB little endian (bgra in memory) to I444 LIBYUV_API -int ARGBToI444(const uint8* src_argb, +int ARGBToI444(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int width, int height) { int y; - void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) = + void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) = ARGBToYRow_C; - void (*ARGBToUV444Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v, - int width) = ARGBToUV444Row_C; + void (*ARGBToUV444Row)(const uint8_t* src_argb, uint8_t* dst_u, + uint8_t* dst_v, int width) = ARGBToUV444Row_C; if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { return -1; } @@ -122,20 +122,21 @@ int ARGBToI444(const uint8* src_argb, // ARGB little endian (bgra in memory) to I422 LIBYUV_API -int ARGBToI422(const uint8* src_argb, +int ARGBToI422(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int width, int height) { int y; - void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb, uint8* dst_u, - uint8* dst_v, int width) = ARGBToUVRow_C; - void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) = + void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb, + uint8_t* dst_u, uint8_t* dst_v, int width) = + ARGBToUVRow_C; + void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) = ARGBToYRow_C; if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { return -1; @@ -219,22 +220,23 @@ int ARGBToI422(const uint8* src_argb, } LIBYUV_API -int ARGBToNV12(const uint8* src_argb, +int ARGBToNV12(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_uv, + uint8_t* dst_uv, int dst_stride_uv, int width, int height) { int y; int halfwidth = (width + 1) >> 1; - void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb, uint8* dst_u, - uint8* dst_v, int width) = ARGBToUVRow_C; - void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) = + void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb, + uint8_t* dst_u, uint8_t* dst_v, int width) = + ARGBToUVRow_C; + void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) = ARGBToYRow_C; - void (*MergeUVRow_)(const uint8* src_u, const uint8* src_v, uint8* dst_uv, - int width) = MergeUVRow_C; + void (*MergeUVRow_)(const uint8_t* src_u, const uint8_t* src_v, + uint8_t* dst_uv, int width) = MergeUVRow_C; if (!src_argb || !dst_y || !dst_uv || width <= 0 || height == 0) { return -1; } @@ -331,7 +333,7 @@ int ARGBToNV12(const uint8* src_argb, { // Allocate a rows of uv. align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2); - uint8* row_v = row_u + ((halfwidth + 31) & ~31); + uint8_t* row_v = row_u + ((halfwidth + 31) & ~31); for (y = 0; y < height - 1; y += 2) { ARGBToUVRow(src_argb, src_stride_argb, row_u, row_v, width); @@ -354,23 +356,24 @@ int ARGBToNV12(const uint8* src_argb, // Same as NV12 but U and V swapped. LIBYUV_API -int ARGBToNV21(const uint8* src_argb, +int ARGBToNV21(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_uv, - int dst_stride_uv, + uint8_t* dst_vu, + int dst_stride_vu, int width, int height) { int y; int halfwidth = (width + 1) >> 1; - void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb, uint8* dst_u, - uint8* dst_v, int width) = ARGBToUVRow_C; - void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) = + void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb, + uint8_t* dst_u, uint8_t* dst_v, int width) = + ARGBToUVRow_C; + void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) = ARGBToYRow_C; - void (*MergeUVRow_)(const uint8* src_u, const uint8* src_v, uint8* dst_uv, - int width) = MergeUVRow_C; - if (!src_argb || !dst_y || !dst_uv || width <= 0 || height == 0) { + void (*MergeUVRow_)(const uint8_t* src_u, const uint8_t* src_v, + uint8_t* dst_vu, int width) = MergeUVRow_C; + if (!src_argb || !dst_y || !dst_vu || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. @@ -466,20 +469,20 @@ int ARGBToNV21(const uint8* src_argb, { // Allocate a rows of uv. align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2); - uint8* row_v = row_u + ((halfwidth + 31) & ~31); + uint8_t* row_v = row_u + ((halfwidth + 31) & ~31); for (y = 0; y < height - 1; y += 2) { ARGBToUVRow(src_argb, src_stride_argb, row_u, row_v, width); - MergeUVRow_(row_v, row_u, dst_uv, halfwidth); + MergeUVRow_(row_v, row_u, dst_vu, halfwidth); ARGBToYRow(src_argb, dst_y, width); ARGBToYRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width); src_argb += src_stride_argb * 2; dst_y += dst_stride_y * 2; - dst_uv += dst_stride_uv; + dst_vu += dst_stride_vu; } if (height & 1) { ARGBToUVRow(src_argb, 0, row_u, row_v, width); - MergeUVRow_(row_v, row_u, dst_uv, halfwidth); + MergeUVRow_(row_v, row_u, dst_vu, halfwidth); ARGBToYRow(src_argb, dst_y, width); } free_aligned_buffer_64(row_u); @@ -489,19 +492,20 @@ int ARGBToNV21(const uint8* src_argb, // Convert ARGB to YUY2. LIBYUV_API -int ARGBToYUY2(const uint8* src_argb, +int ARGBToYUY2(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_yuy2, + uint8_t* dst_yuy2, int dst_stride_yuy2, int width, int height) { int y; - void (*ARGBToUVRow)(const uint8* src_argb, int src_stride_argb, uint8* dst_u, - uint8* dst_v, int width) = ARGBToUVRow_C; - void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) = + void (*ARGBToUVRow)(const uint8_t* src_argb, int src_stride_argb, + uint8_t* dst_u, uint8_t* dst_v, int width) = + ARGBToUVRow_C; + void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) = ARGBToYRow_C; - void (*I422ToYUY2Row)(const uint8* src_y, const uint8* src_u, - const uint8* src_v, uint8* dst_yuy2, int width) = + void (*I422ToYUY2Row)(const uint8_t* src_y, const uint8_t* src_u, + const uint8_t* src_v, uint8_t* dst_yuy2, int width) = I422ToYUY2Row_C; if (!src_argb || !dst_yuy2 || width <= 0 || height == 0) { @@ -579,6 +583,14 @@ int ARGBToYUY2(const uint8* src_argb, } } #endif +#if defined(HAS_I422TOYUY2ROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + I422ToYUY2Row = I422ToYUY2Row_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + I422ToYUY2Row = I422ToYUY2Row_AVX2; + } + } +#endif #if defined(HAS_I422TOYUY2ROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { I422ToYUY2Row = I422ToYUY2Row_Any_NEON; @@ -599,8 +611,8 @@ int ARGBToYUY2(const uint8* src_argb, { // Allocate a rows of yuv. align_buffer_64(row_y, ((width + 63) & ~63) * 2); - uint8* row_u = row_y + ((width + 63) & ~63); - uint8* row_v = row_u + ((width + 63) & ~63) / 2; + uint8_t* row_u = row_y + ((width + 63) & ~63); + uint8_t* row_v = row_u + ((width + 63) & ~63) / 2; for (y = 0; y < height; ++y) { ARGBToUVRow(src_argb, 0, row_u, row_v, width); @@ -617,19 +629,20 @@ int ARGBToYUY2(const uint8* src_argb, // Convert ARGB to UYVY. LIBYUV_API -int ARGBToUYVY(const uint8* src_argb, +int ARGBToUYVY(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_uyvy, + uint8_t* dst_uyvy, int dst_stride_uyvy, int width, int height) { int y; - void (*ARGBToUVRow)(const uint8* src_argb, int src_stride_argb, uint8* dst_u, - uint8* dst_v, int width) = ARGBToUVRow_C; - void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) = + void (*ARGBToUVRow)(const uint8_t* src_argb, int src_stride_argb, + uint8_t* dst_u, uint8_t* dst_v, int width) = + ARGBToUVRow_C; + void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) = ARGBToYRow_C; - void (*I422ToUYVYRow)(const uint8* src_y, const uint8* src_u, - const uint8* src_v, uint8* dst_uyvy, int width) = + void (*I422ToUYVYRow)(const uint8_t* src_y, const uint8_t* src_u, + const uint8_t* src_v, uint8_t* dst_uyvy, int width) = I422ToUYVYRow_C; if (!src_argb || !dst_uyvy || width <= 0 || height == 0) { @@ -707,6 +720,14 @@ int ARGBToUYVY(const uint8* src_argb, } } #endif +#if defined(HAS_I422TOUYVYROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + I422ToUYVYRow = I422ToUYVYRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + I422ToUYVYRow = I422ToUYVYRow_AVX2; + } + } +#endif #if defined(HAS_I422TOUYVYROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { I422ToUYVYRow = I422ToUYVYRow_Any_NEON; @@ -727,8 +748,8 @@ int ARGBToUYVY(const uint8* src_argb, { // Allocate a rows of yuv. align_buffer_64(row_y, ((width + 63) & ~63) * 2); - uint8* row_u = row_y + ((width + 63) & ~63); - uint8* row_v = row_u + ((width + 63) & ~63) / 2; + uint8_t* row_u = row_y + ((width + 63) & ~63); + uint8_t* row_v = row_u + ((width + 63) & ~63) / 2; for (y = 0; y < height; ++y) { ARGBToUVRow(src_argb, 0, row_u, row_v, width); @@ -745,14 +766,14 @@ int ARGBToUYVY(const uint8* src_argb, // Convert ARGB to I400. LIBYUV_API -int ARGBToI400(const uint8* src_argb, +int ARGBToI400(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, int width, int height) { int y; - void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) = + void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) = ARGBToYRow_C; if (!src_argb || !dst_y || width <= 0 || height == 0) { return -1; @@ -815,26 +836,26 @@ static const uvec8 kShuffleMaskARGBToRGBA = { // Convert ARGB to RGBA. LIBYUV_API -int ARGBToRGBA(const uint8* src_argb, +int ARGBToRGBA(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_rgba, + uint8_t* dst_rgba, int dst_stride_rgba, int width, int height) { return ARGBShuffle(src_argb, src_stride_argb, dst_rgba, dst_stride_rgba, - (const uint8*)(&kShuffleMaskARGBToRGBA), width, height); + (const uint8_t*)(&kShuffleMaskARGBToRGBA), width, height); } // Convert ARGB To RGB24. LIBYUV_API -int ARGBToRGB24(const uint8* src_argb, +int ARGBToRGB24(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_rgb24, + uint8_t* dst_rgb24, int dst_stride_rgb24, int width, int height) { int y; - void (*ARGBToRGB24Row)(const uint8* src_argb, uint8* dst_rgb, int width) = + void (*ARGBToRGB24Row)(const uint8_t* src_argb, uint8_t* dst_rgb, int width) = ARGBToRGB24Row_C; if (!src_argb || !dst_rgb24 || width <= 0 || height == 0) { return -1; @@ -885,14 +906,14 @@ int ARGBToRGB24(const uint8* src_argb, // Convert ARGB To RAW. LIBYUV_API -int ARGBToRAW(const uint8* src_argb, +int ARGBToRAW(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_raw, + uint8_t* dst_raw, int dst_stride_raw, int width, int height) { int y; - void (*ARGBToRAWRow)(const uint8* src_argb, uint8* dst_rgb, int width) = + void (*ARGBToRAWRow)(const uint8_t* src_argb, uint8_t* dst_rgb, int width) = ARGBToRAWRow_C; if (!src_argb || !dst_raw || width <= 0 || height == 0) { return -1; @@ -942,22 +963,22 @@ int ARGBToRAW(const uint8* src_argb, } // Ordered 8x8 dither for 888 to 565. Values from 0 to 7. -static const uint8 kDither565_4x4[16] = { +static const uint8_t kDither565_4x4[16] = { 0, 4, 1, 5, 6, 2, 7, 3, 1, 5, 0, 4, 7, 3, 6, 2, }; // Convert ARGB To RGB565 with 4x4 dither matrix (16 bytes). LIBYUV_API -int ARGBToRGB565Dither(const uint8* src_argb, +int ARGBToRGB565Dither(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_rgb565, + uint8_t* dst_rgb565, int dst_stride_rgb565, - const uint8* dither4x4, + const uint8_t* dither4x4, int width, int height) { int y; - void (*ARGBToRGB565DitherRow)(const uint8* src_argb, uint8* dst_rgb, - const uint32 dither4, int width) = + void (*ARGBToRGB565DitherRow)(const uint8_t* src_argb, uint8_t* dst_rgb, + const uint32_t dither4, int width) = ARGBToRGB565DitherRow_C; if (!src_argb || !dst_rgb565 || width <= 0 || height == 0) { return -1; @@ -1005,7 +1026,7 @@ int ARGBToRGB565Dither(const uint8* src_argb, for (y = 0; y < height; ++y) { ARGBToRGB565DitherRow(src_argb, dst_rgb565, - *(uint32*)(dither4x4 + ((y & 3) << 2)), + *(uint32_t*)(dither4x4 + ((y & 3) << 2)), // NOLINT width); /* NOLINT */ src_argb += src_stride_argb; dst_rgb565 += dst_stride_rgb565; @@ -1016,15 +1037,15 @@ int ARGBToRGB565Dither(const uint8* src_argb, // Convert ARGB To RGB565. // TODO(fbarchard): Consider using dither function low level with zeros. LIBYUV_API -int ARGBToRGB565(const uint8* src_argb, +int ARGBToRGB565(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_rgb565, + uint8_t* dst_rgb565, int dst_stride_rgb565, int width, int height) { int y; - void (*ARGBToRGB565Row)(const uint8* src_argb, uint8* dst_rgb, int width) = - ARGBToRGB565Row_C; + void (*ARGBToRGB565Row)(const uint8_t* src_argb, uint8_t* dst_rgb, + int width) = ARGBToRGB565Row_C; if (!src_argb || !dst_rgb565 || width <= 0 || height == 0) { return -1; } @@ -1082,15 +1103,15 @@ int ARGBToRGB565(const uint8* src_argb, // Convert ARGB To ARGB1555. LIBYUV_API -int ARGBToARGB1555(const uint8* src_argb, +int ARGBToARGB1555(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_argb1555, + uint8_t* dst_argb1555, int dst_stride_argb1555, int width, int height) { int y; - void (*ARGBToARGB1555Row)(const uint8* src_argb, uint8* dst_rgb, int width) = - ARGBToARGB1555Row_C; + void (*ARGBToARGB1555Row)(const uint8_t* src_argb, uint8_t* dst_rgb, + int width) = ARGBToARGB1555Row_C; if (!src_argb || !dst_argb1555 || width <= 0 || height == 0) { return -1; } @@ -1148,15 +1169,15 @@ int ARGBToARGB1555(const uint8* src_argb, // Convert ARGB To ARGB4444. LIBYUV_API -int ARGBToARGB4444(const uint8* src_argb, +int ARGBToARGB4444(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_argb4444, + uint8_t* dst_argb4444, int dst_stride_argb4444, int width, int height) { int y; - void (*ARGBToARGB4444Row)(const uint8* src_argb, uint8* dst_rgb, int width) = - ARGBToARGB4444Row_C; + void (*ARGBToARGB4444Row)(const uint8_t* src_argb, uint8_t* dst_rgb, + int width) = ARGBToARGB4444Row_C; if (!src_argb || !dst_argb4444 || width <= 0 || height == 0) { return -1; } @@ -1212,16 +1233,65 @@ int ARGBToARGB4444(const uint8* src_argb, return 0; } +// Convert ABGR To AR30. +LIBYUV_API +int ABGRToAR30(const uint8_t* src_abgr, + int src_stride_abgr, + uint8_t* dst_ar30, + int dst_stride_ar30, + int width, + int height) { + int y; + void (*ABGRToAR30Row)(const uint8_t* src_abgr, uint8_t* dst_rgb, int width) = + ABGRToAR30Row_C; + if (!src_abgr || !dst_ar30 || width <= 0 || height == 0) { + return -1; + } + if (height < 0) { + height = -height; + src_abgr = src_abgr + (height - 1) * src_stride_abgr; + src_stride_abgr = -src_stride_abgr; + } + // Coalesce rows. + if (src_stride_abgr == width * 4 && dst_stride_ar30 == width * 4) { + width *= height; + height = 1; + src_stride_abgr = dst_stride_ar30 = 0; + } +#if defined(HAS_ABGRTOAR30ROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ABGRToAR30Row = ABGRToAR30Row_Any_SSSE3; + if (IS_ALIGNED(width, 4)) { + ABGRToAR30Row = ABGRToAR30Row_SSSE3; + } + } +#endif +#if defined(HAS_ABGRTOAR30ROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ABGRToAR30Row = ABGRToAR30Row_Any_AVX2; + if (IS_ALIGNED(width, 8)) { + ABGRToAR30Row = ABGRToAR30Row_AVX2; + } + } +#endif + for (y = 0; y < height; ++y) { + ABGRToAR30Row(src_abgr, dst_ar30, width); + src_abgr += src_stride_abgr; + dst_ar30 += dst_stride_ar30; + } + return 0; +} + // Convert ARGB To AR30. LIBYUV_API -int ARGBToAR30(const uint8* src_argb, +int ARGBToAR30(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_ar30, + uint8_t* dst_ar30, int dst_stride_ar30, int width, int height) { int y; - void (*ARGBToAR30Row)(const uint8* src_argb, uint8* dst_rgb, int width) = + void (*ARGBToAR30Row)(const uint8_t* src_argb, uint8_t* dst_rgb, int width) = ARGBToAR30Row_C; if (!src_argb || !dst_ar30 || width <= 0 || height == 0) { return -1; @@ -1263,20 +1333,21 @@ int ARGBToAR30(const uint8* src_argb, // Convert ARGB to J420. (JPeg full range I420). LIBYUV_API -int ARGBToJ420(const uint8* src_argb, +int ARGBToJ420(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_yj, + uint8_t* dst_yj, int dst_stride_yj, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int width, int height) { int y; - void (*ARGBToUVJRow)(const uint8* src_argb0, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width) = ARGBToUVJRow_C; - void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_yj, int width) = + void (*ARGBToUVJRow)(const uint8_t* src_argb0, int src_stride_argb, + uint8_t* dst_u, uint8_t* dst_v, int width) = + ARGBToUVJRow_C; + void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_yj, int width) = ARGBToYJRow_C; if (!src_argb || !dst_yj || !dst_u || !dst_v || width <= 0 || height == 0) { return -1; @@ -1356,20 +1427,21 @@ int ARGBToJ420(const uint8* src_argb, // Convert ARGB to J422. (JPeg full range I422). LIBYUV_API -int ARGBToJ422(const uint8* src_argb, +int ARGBToJ422(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_yj, + uint8_t* dst_yj, int dst_stride_yj, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int width, int height) { int y; - void (*ARGBToUVJRow)(const uint8* src_argb0, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width) = ARGBToUVJRow_C; - void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_yj, int width) = + void (*ARGBToUVJRow)(const uint8_t* src_argb0, int src_stride_argb, + uint8_t* dst_u, uint8_t* dst_v, int width) = + ARGBToUVJRow_C; + void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_yj, int width) = ARGBToYJRow_C; if (!src_argb || !dst_yj || !dst_u || !dst_v || width <= 0 || height == 0) { return -1; @@ -1451,14 +1523,14 @@ int ARGBToJ422(const uint8* src_argb, // Convert ARGB to J400. LIBYUV_API -int ARGBToJ400(const uint8* src_argb, +int ARGBToJ400(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_yj, + uint8_t* dst_yj, int dst_stride_yj, int width, int height) { int y; - void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_yj, int width) = + void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_yj, int width) = ARGBToYJRow_C; if (!src_argb || !dst_yj || width <= 0 || height == 0) { return -1; diff --git a/chromium/third_party/libyuv/source/convert_jpeg.cc b/chromium/third_party/libyuv/source/convert_jpeg.cc index 216a9f26d87..c91b43dc226 100644 --- a/chromium/third_party/libyuv/source/convert_jpeg.cc +++ b/chromium/third_party/libyuv/source/convert_jpeg.cc @@ -22,18 +22,18 @@ extern "C" { #ifdef HAVE_JPEG struct I420Buffers { - uint8* y; + uint8_t* y; int y_stride; - uint8* u; + uint8_t* u; int u_stride; - uint8* v; + uint8_t* v; int v_stride; int w; int h; }; static void JpegCopyI420(void* opaque, - const uint8* const* data, + const uint8_t* const* data, const int* strides, int rows) { I420Buffers* dest = (I420Buffers*)(opaque); @@ -47,7 +47,7 @@ static void JpegCopyI420(void* opaque, } static void JpegI422ToI420(void* opaque, - const uint8* const* data, + const uint8_t* const* data, const int* strides, int rows) { I420Buffers* dest = (I420Buffers*)(opaque); @@ -61,7 +61,7 @@ static void JpegI422ToI420(void* opaque, } static void JpegI444ToI420(void* opaque, - const uint8* const* data, + const uint8_t* const* data, const int* strides, int rows) { I420Buffers* dest = (I420Buffers*)(opaque); @@ -75,7 +75,7 @@ static void JpegI444ToI420(void* opaque, } static void JpegI400ToI420(void* opaque, - const uint8* const* data, + const uint8_t* const* data, const int* strides, int rows) { I420Buffers* dest = (I420Buffers*)(opaque); @@ -89,7 +89,10 @@ static void JpegI400ToI420(void* opaque, // Query size of MJPG in pixels. LIBYUV_API -int MJPGSize(const uint8* sample, size_t sample_size, int* width, int* height) { +int MJPGSize(const uint8_t* sample, + size_t sample_size, + int* width, + int* height) { MJpegDecoder mjpeg_decoder; LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(sample, sample_size); if (ret) { @@ -103,13 +106,13 @@ int MJPGSize(const uint8* sample, size_t sample_size, int* width, int* height) { // MJPG (Motion JPeg) to I420 // TODO(fbarchard): review w and h requirement. dw and dh may be enough. LIBYUV_API -int MJPGToI420(const uint8* sample, +int MJPGToI420(const uint8_t* sample, size_t sample_size, - uint8* y, + uint8_t* y, int y_stride, - uint8* u, + uint8_t* u, int u_stride, - uint8* v, + uint8_t* v, int v_stride, int w, int h, @@ -183,14 +186,14 @@ int MJPGToI420(const uint8* sample, #ifdef HAVE_JPEG struct ARGBBuffers { - uint8* argb; + uint8_t* argb; int argb_stride; int w; int h; }; static void JpegI420ToARGB(void* opaque, - const uint8* const* data, + const uint8_t* const* data, const int* strides, int rows) { ARGBBuffers* dest = (ARGBBuffers*)(opaque); @@ -201,7 +204,7 @@ static void JpegI420ToARGB(void* opaque, } static void JpegI422ToARGB(void* opaque, - const uint8* const* data, + const uint8_t* const* data, const int* strides, int rows) { ARGBBuffers* dest = (ARGBBuffers*)(opaque); @@ -212,7 +215,7 @@ static void JpegI422ToARGB(void* opaque, } static void JpegI444ToARGB(void* opaque, - const uint8* const* data, + const uint8_t* const* data, const int* strides, int rows) { ARGBBuffers* dest = (ARGBBuffers*)(opaque); @@ -223,7 +226,7 @@ static void JpegI444ToARGB(void* opaque, } static void JpegI400ToARGB(void* opaque, - const uint8* const* data, + const uint8_t* const* data, const int* strides, int rows) { ARGBBuffers* dest = (ARGBBuffers*)(opaque); @@ -235,9 +238,9 @@ static void JpegI400ToARGB(void* opaque, // MJPG (Motion JPeg) to ARGB // TODO(fbarchard): review w and h requirement. dw and dh may be enough. LIBYUV_API -int MJPGToARGB(const uint8* sample, +int MJPGToARGB(const uint8_t* sample, size_t sample_size, - uint8* argb, + uint8_t* argb, int argb_stride, int w, int h, diff --git a/chromium/third_party/libyuv/source/convert_to_argb.cc b/chromium/third_party/libyuv/source/convert_to_argb.cc index 63a5104b3c7..677e5d56fcc 100644 --- a/chromium/third_party/libyuv/source/convert_to_argb.cc +++ b/chromium/third_party/libyuv/source/convert_to_argb.cc @@ -29,10 +29,10 @@ extern "C" { // sample_size is measured in bytes and is the size of the frame. // With MJPEG it is the compressed size of the frame. LIBYUV_API -int ConvertToARGB(const uint8* sample, +int ConvertToARGB(const uint8_t* sample, size_t sample_size, - uint8* crop_argb, - int argb_stride, + uint8_t* dst_argb, + int dst_stride_argb, int crop_x, int crop_y, int src_width, @@ -40,11 +40,11 @@ int ConvertToARGB(const uint8* sample, int crop_width, int crop_height, enum RotationMode rotation, - uint32 fourcc) { - uint32 format = CanonicalFourCC(fourcc); + uint32_t fourcc) { + uint32_t format = CanonicalFourCC(fourcc); int aligned_src_width = (src_width + 1) & ~1; - const uint8* src; - const uint8* src_uv; + const uint8_t* src; + const uint8_t* src_uv; int abs_src_height = (src_height < 0) ? -src_height : src_height; int inv_crop_height = (crop_height < 0) ? -crop_height : crop_height; int r = 0; @@ -52,17 +52,17 @@ int ConvertToARGB(const uint8* sample, // One pass rotation is available for some formats. For the rest, convert // to ARGB (with optional vertical flipping) into a temporary ARGB buffer, // and then rotate the ARGB to the final destination buffer. - // For in-place conversion, if destination crop_argb is same as source sample, + // For in-place conversion, if destination dst_argb is same as source sample, // also enable temporary buffer. LIBYUV_BOOL need_buf = - (rotation && format != FOURCC_ARGB) || crop_argb == sample; - uint8* dest_argb = crop_argb; - int dest_argb_stride = argb_stride; - uint8* rotate_buffer = NULL; + (rotation && format != FOURCC_ARGB) || dst_argb == sample; + uint8_t* dest_argb = dst_argb; + int dest_dst_stride_argb = dst_stride_argb; + uint8_t* rotate_buffer = NULL; int abs_crop_height = (crop_height < 0) ? -crop_height : crop_height; - if (crop_argb == NULL || sample == NULL || src_width <= 0 || - crop_width <= 0 || src_height == 0 || crop_height == 0) { + if (dst_argb == NULL || sample == NULL || src_width <= 0 || crop_width <= 0 || + src_height == 0 || crop_height == 0) { return -1; } if (src_height < 0) { @@ -71,76 +71,76 @@ int ConvertToARGB(const uint8* sample, if (need_buf) { int argb_size = crop_width * 4 * abs_crop_height; - rotate_buffer = (uint8*)malloc(argb_size); /* NOLINT */ + rotate_buffer = (uint8_t*)malloc(argb_size); /* NOLINT */ if (!rotate_buffer) { return 1; // Out of memory runtime error. } - crop_argb = rotate_buffer; - argb_stride = crop_width * 4; + dst_argb = rotate_buffer; + dst_stride_argb = crop_width * 4; } switch (format) { // Single plane formats case FOURCC_YUY2: src = sample + (aligned_src_width * crop_y + crop_x) * 2; - r = YUY2ToARGB(src, aligned_src_width * 2, crop_argb, argb_stride, + r = YUY2ToARGB(src, aligned_src_width * 2, dst_argb, dst_stride_argb, crop_width, inv_crop_height); break; case FOURCC_UYVY: src = sample + (aligned_src_width * crop_y + crop_x) * 2; - r = UYVYToARGB(src, aligned_src_width * 2, crop_argb, argb_stride, + r = UYVYToARGB(src, aligned_src_width * 2, dst_argb, dst_stride_argb, crop_width, inv_crop_height); break; case FOURCC_24BG: src = sample + (src_width * crop_y + crop_x) * 3; - r = RGB24ToARGB(src, src_width * 3, crop_argb, argb_stride, crop_width, + r = RGB24ToARGB(src, src_width * 3, dst_argb, dst_stride_argb, crop_width, inv_crop_height); break; case FOURCC_RAW: src = sample + (src_width * crop_y + crop_x) * 3; - r = RAWToARGB(src, src_width * 3, crop_argb, argb_stride, crop_width, + r = RAWToARGB(src, src_width * 3, dst_argb, dst_stride_argb, crop_width, inv_crop_height); break; case FOURCC_ARGB: if (!need_buf && !rotation) { src = sample + (src_width * crop_y + crop_x) * 4; - r = ARGBToARGB(src, src_width * 4, crop_argb, argb_stride, crop_width, - inv_crop_height); + r = ARGBToARGB(src, src_width * 4, dst_argb, dst_stride_argb, + crop_width, inv_crop_height); } break; case FOURCC_BGRA: src = sample + (src_width * crop_y + crop_x) * 4; - r = BGRAToARGB(src, src_width * 4, crop_argb, argb_stride, crop_width, + r = BGRAToARGB(src, src_width * 4, dst_argb, dst_stride_argb, crop_width, inv_crop_height); break; case FOURCC_ABGR: src = sample + (src_width * crop_y + crop_x) * 4; - r = ABGRToARGB(src, src_width * 4, crop_argb, argb_stride, crop_width, + r = ABGRToARGB(src, src_width * 4, dst_argb, dst_stride_argb, crop_width, inv_crop_height); break; case FOURCC_RGBA: src = sample + (src_width * crop_y + crop_x) * 4; - r = RGBAToARGB(src, src_width * 4, crop_argb, argb_stride, crop_width, + r = RGBAToARGB(src, src_width * 4, dst_argb, dst_stride_argb, crop_width, inv_crop_height); break; case FOURCC_RGBP: src = sample + (src_width * crop_y + crop_x) * 2; - r = RGB565ToARGB(src, src_width * 2, crop_argb, argb_stride, crop_width, - inv_crop_height); + r = RGB565ToARGB(src, src_width * 2, dst_argb, dst_stride_argb, + crop_width, inv_crop_height); break; case FOURCC_RGBO: src = sample + (src_width * crop_y + crop_x) * 2; - r = ARGB1555ToARGB(src, src_width * 2, crop_argb, argb_stride, crop_width, - inv_crop_height); + r = ARGB1555ToARGB(src, src_width * 2, dst_argb, dst_stride_argb, + crop_width, inv_crop_height); break; case FOURCC_R444: src = sample + (src_width * crop_y + crop_x) * 2; - r = ARGB4444ToARGB(src, src_width * 2, crop_argb, argb_stride, crop_width, - inv_crop_height); + r = ARGB4444ToARGB(src, src_width * 2, dst_argb, dst_stride_argb, + crop_width, inv_crop_height); break; case FOURCC_I400: src = sample + src_width * crop_y + crop_x; - r = I400ToARGB(src, src_width, crop_argb, argb_stride, crop_width, + r = I400ToARGB(src, src_width, dst_argb, dst_stride_argb, crop_width, inv_crop_height); break; @@ -148,27 +148,27 @@ int ConvertToARGB(const uint8* sample, case FOURCC_NV12: src = sample + (src_width * crop_y + crop_x); src_uv = sample + aligned_src_width * (src_height + crop_y / 2) + crop_x; - r = NV12ToARGB(src, src_width, src_uv, aligned_src_width, crop_argb, - argb_stride, crop_width, inv_crop_height); + r = NV12ToARGB(src, src_width, src_uv, aligned_src_width, dst_argb, + dst_stride_argb, crop_width, inv_crop_height); break; case FOURCC_NV21: src = sample + (src_width * crop_y + crop_x); src_uv = sample + aligned_src_width * (src_height + crop_y / 2) + crop_x; // Call NV12 but with u and v parameters swapped. - r = NV21ToARGB(src, src_width, src_uv, aligned_src_width, crop_argb, - argb_stride, crop_width, inv_crop_height); + r = NV21ToARGB(src, src_width, src_uv, aligned_src_width, dst_argb, + dst_stride_argb, crop_width, inv_crop_height); break; case FOURCC_M420: src = sample + (src_width * crop_y) * 12 / 8 + crop_x; - r = M420ToARGB(src, src_width, crop_argb, argb_stride, crop_width, + r = M420ToARGB(src, src_width, dst_argb, dst_stride_argb, crop_width, inv_crop_height); break; // Triplanar formats case FOURCC_I420: case FOURCC_YV12: { - const uint8* src_y = sample + (src_width * crop_y + crop_x); - const uint8* src_u; - const uint8* src_v; + const uint8_t* src_y = sample + (src_width * crop_y + crop_x); + const uint8_t* src_u; + const uint8_t* src_v; int halfwidth = (src_width + 1) / 2; int halfheight = (abs_src_height + 1) / 2; if (format == FOURCC_YV12) { @@ -183,14 +183,14 @@ int ConvertToARGB(const uint8* sample, halfwidth * (halfheight + crop_y / 2) + crop_x / 2; } r = I420ToARGB(src_y, src_width, src_u, halfwidth, src_v, halfwidth, - crop_argb, argb_stride, crop_width, inv_crop_height); + dst_argb, dst_stride_argb, crop_width, inv_crop_height); break; } case FOURCC_J420: { - const uint8* src_y = sample + (src_width * crop_y + crop_x); - const uint8* src_u; - const uint8* src_v; + const uint8_t* src_y = sample + (src_width * crop_y + crop_x); + const uint8_t* src_u; + const uint8_t* src_v; int halfwidth = (src_width + 1) / 2; int halfheight = (abs_src_height + 1) / 2; src_u = sample + src_width * abs_src_height + @@ -198,15 +198,15 @@ int ConvertToARGB(const uint8* sample, src_v = sample + src_width * abs_src_height + halfwidth * (halfheight + crop_y / 2) + crop_x / 2; r = J420ToARGB(src_y, src_width, src_u, halfwidth, src_v, halfwidth, - crop_argb, argb_stride, crop_width, inv_crop_height); + dst_argb, dst_stride_argb, crop_width, inv_crop_height); break; } case FOURCC_I422: case FOURCC_YV16: { - const uint8* src_y = sample + src_width * crop_y + crop_x; - const uint8* src_u; - const uint8* src_v; + const uint8_t* src_y = sample + src_width * crop_y + crop_x; + const uint8_t* src_u; + const uint8_t* src_v; int halfwidth = (src_width + 1) / 2; if (format == FOURCC_YV16) { src_v = sample + src_width * abs_src_height + halfwidth * crop_y + @@ -220,14 +220,14 @@ int ConvertToARGB(const uint8* sample, halfwidth * (abs_src_height + crop_y) + crop_x / 2; } r = I422ToARGB(src_y, src_width, src_u, halfwidth, src_v, halfwidth, - crop_argb, argb_stride, crop_width, inv_crop_height); + dst_argb, dst_stride_argb, crop_width, inv_crop_height); break; } case FOURCC_I444: case FOURCC_YV24: { - const uint8* src_y = sample + src_width * crop_y + crop_x; - const uint8* src_u; - const uint8* src_v; + const uint8_t* src_y = sample + src_width * crop_y + crop_x; + const uint8_t* src_u; + const uint8_t* src_v; if (format == FOURCC_YV24) { src_v = sample + src_width * (abs_src_height + crop_y) + crop_x; src_u = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x; @@ -236,12 +236,12 @@ int ConvertToARGB(const uint8* sample, src_v = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x; } r = I444ToARGB(src_y, src_width, src_u, src_width, src_v, src_width, - crop_argb, argb_stride, crop_width, inv_crop_height); + dst_argb, dst_stride_argb, crop_width, inv_crop_height); break; } #ifdef HAVE_JPEG case FOURCC_MJPG: - r = MJPGToARGB(sample, sample_size, crop_argb, argb_stride, src_width, + r = MJPGToARGB(sample, sample_size, dst_argb, dst_stride_argb, src_width, abs_src_height, crop_width, inv_crop_height); break; #endif @@ -251,13 +251,13 @@ int ConvertToARGB(const uint8* sample, if (need_buf) { if (!r) { - r = ARGBRotate(crop_argb, argb_stride, dest_argb, dest_argb_stride, + r = ARGBRotate(dst_argb, dst_stride_argb, dest_argb, dest_dst_stride_argb, crop_width, abs_crop_height, rotation); } free(rotate_buffer); } else if (rotation) { src = sample + (src_width * crop_y + crop_x) * 4; - r = ARGBRotate(src, src_width * 4, crop_argb, argb_stride, crop_width, + r = ARGBRotate(src, src_width * 4, dst_argb, dst_stride_argb, crop_width, inv_crop_height, rotation); } diff --git a/chromium/third_party/libyuv/source/convert_to_i420.cc b/chromium/third_party/libyuv/source/convert_to_i420.cc index a50689db949..1bed9d6440d 100644 --- a/chromium/third_party/libyuv/source/convert_to_i420.cc +++ b/chromium/third_party/libyuv/source/convert_to_i420.cc @@ -25,14 +25,14 @@ extern "C" { // sample_size is measured in bytes and is the size of the frame. // With MJPEG it is the compressed size of the frame. LIBYUV_API -int ConvertToI420(const uint8* sample, +int ConvertToI420(const uint8_t* sample, size_t sample_size, - uint8* y, - int y_stride, - uint8* u, - int u_stride, - uint8* v, - int v_stride, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, int crop_x, int crop_y, int src_width, @@ -40,11 +40,11 @@ int ConvertToI420(const uint8* sample, int crop_width, int crop_height, enum RotationMode rotation, - uint32 fourcc) { - uint32 format = CanonicalFourCC(fourcc); + uint32_t fourcc) { + uint32_t format = CanonicalFourCC(fourcc); int aligned_src_width = (src_width + 1) & ~1; - const uint8* src; - const uint8* src_uv; + const uint8_t* src; + const uint8_t* src_uv; const int abs_src_height = (src_height < 0) ? -src_height : src_height; // TODO(nisse): Why allow crop_height < 0? const int abs_crop_height = (crop_height < 0) ? -crop_height : crop_height; @@ -52,132 +52,143 @@ int ConvertToI420(const uint8* sample, LIBYUV_BOOL need_buf = (rotation && format != FOURCC_I420 && format != FOURCC_NV12 && format != FOURCC_NV21 && format != FOURCC_YV12) || - y == sample; - uint8* tmp_y = y; - uint8* tmp_u = u; - uint8* tmp_v = v; - int tmp_y_stride = y_stride; - int tmp_u_stride = u_stride; - int tmp_v_stride = v_stride; - uint8* rotate_buffer = NULL; + dst_y == sample; + uint8_t* tmp_y = dst_y; + uint8_t* tmp_u = dst_u; + uint8_t* tmp_v = dst_v; + int tmp_y_stride = dst_stride_y; + int tmp_u_stride = dst_stride_u; + int tmp_v_stride = dst_stride_v; + uint8_t* rotate_buffer = NULL; const int inv_crop_height = (src_height < 0) ? -abs_crop_height : abs_crop_height; - if (!y || !u || !v || !sample || src_width <= 0 || crop_width <= 0 || - src_height == 0 || crop_height == 0) { + if (!dst_y || !dst_u || !dst_v || !sample || src_width <= 0 || + crop_width <= 0 || src_height == 0 || crop_height == 0) { return -1; } // One pass rotation is available for some formats. For the rest, convert // to I420 (with optional vertical flipping) into a temporary I420 buffer, // and then rotate the I420 to the final destination buffer. - // For in-place conversion, if destination y is same as source sample, + // For in-place conversion, if destination dst_y is same as source sample, // also enable temporary buffer. if (need_buf) { int y_size = crop_width * abs_crop_height; int uv_size = ((crop_width + 1) / 2) * ((abs_crop_height + 1) / 2); - rotate_buffer = (uint8*)malloc(y_size + uv_size * 2); /* NOLINT */ + rotate_buffer = (uint8_t*)malloc(y_size + uv_size * 2); /* NOLINT */ if (!rotate_buffer) { return 1; // Out of memory runtime error. } - y = rotate_buffer; - u = y + y_size; - v = u + uv_size; - y_stride = crop_width; - u_stride = v_stride = ((crop_width + 1) / 2); + dst_y = rotate_buffer; + dst_u = dst_y + y_size; + dst_v = dst_u + uv_size; + dst_stride_y = crop_width; + dst_stride_u = dst_stride_v = ((crop_width + 1) / 2); } switch (format) { // Single plane formats case FOURCC_YUY2: src = sample + (aligned_src_width * crop_y + crop_x) * 2; - r = YUY2ToI420(src, aligned_src_width * 2, y, y_stride, u, u_stride, v, - v_stride, crop_width, inv_crop_height); + r = YUY2ToI420(src, aligned_src_width * 2, dst_y, dst_stride_y, dst_u, + dst_stride_u, dst_v, dst_stride_v, crop_width, + inv_crop_height); break; case FOURCC_UYVY: src = sample + (aligned_src_width * crop_y + crop_x) * 2; - r = UYVYToI420(src, aligned_src_width * 2, y, y_stride, u, u_stride, v, - v_stride, crop_width, inv_crop_height); + r = UYVYToI420(src, aligned_src_width * 2, dst_y, dst_stride_y, dst_u, + dst_stride_u, dst_v, dst_stride_v, crop_width, + inv_crop_height); break; case FOURCC_RGBP: src = sample + (src_width * crop_y + crop_x) * 2; - r = RGB565ToI420(src, src_width * 2, y, y_stride, u, u_stride, v, - v_stride, crop_width, inv_crop_height); + r = RGB565ToI420(src, src_width * 2, dst_y, dst_stride_y, dst_u, + dst_stride_u, dst_v, dst_stride_v, crop_width, + inv_crop_height); break; case FOURCC_RGBO: src = sample + (src_width * crop_y + crop_x) * 2; - r = ARGB1555ToI420(src, src_width * 2, y, y_stride, u, u_stride, v, - v_stride, crop_width, inv_crop_height); + r = ARGB1555ToI420(src, src_width * 2, dst_y, dst_stride_y, dst_u, + dst_stride_u, dst_v, dst_stride_v, crop_width, + inv_crop_height); break; case FOURCC_R444: src = sample + (src_width * crop_y + crop_x) * 2; - r = ARGB4444ToI420(src, src_width * 2, y, y_stride, u, u_stride, v, - v_stride, crop_width, inv_crop_height); + r = ARGB4444ToI420(src, src_width * 2, dst_y, dst_stride_y, dst_u, + dst_stride_u, dst_v, dst_stride_v, crop_width, + inv_crop_height); break; case FOURCC_24BG: src = sample + (src_width * crop_y + crop_x) * 3; - r = RGB24ToI420(src, src_width * 3, y, y_stride, u, u_stride, v, v_stride, - crop_width, inv_crop_height); + r = RGB24ToI420(src, src_width * 3, dst_y, dst_stride_y, dst_u, + dst_stride_u, dst_v, dst_stride_v, crop_width, + inv_crop_height); break; case FOURCC_RAW: src = sample + (src_width * crop_y + crop_x) * 3; - r = RAWToI420(src, src_width * 3, y, y_stride, u, u_stride, v, v_stride, - crop_width, inv_crop_height); + r = RAWToI420(src, src_width * 3, dst_y, dst_stride_y, dst_u, + dst_stride_u, dst_v, dst_stride_v, crop_width, + inv_crop_height); break; case FOURCC_ARGB: src = sample + (src_width * crop_y + crop_x) * 4; - r = ARGBToI420(src, src_width * 4, y, y_stride, u, u_stride, v, v_stride, - crop_width, inv_crop_height); + r = ARGBToI420(src, src_width * 4, dst_y, dst_stride_y, dst_u, + dst_stride_u, dst_v, dst_stride_v, crop_width, + inv_crop_height); break; case FOURCC_BGRA: src = sample + (src_width * crop_y + crop_x) * 4; - r = BGRAToI420(src, src_width * 4, y, y_stride, u, u_stride, v, v_stride, - crop_width, inv_crop_height); + r = BGRAToI420(src, src_width * 4, dst_y, dst_stride_y, dst_u, + dst_stride_u, dst_v, dst_stride_v, crop_width, + inv_crop_height); break; case FOURCC_ABGR: src = sample + (src_width * crop_y + crop_x) * 4; - r = ABGRToI420(src, src_width * 4, y, y_stride, u, u_stride, v, v_stride, - crop_width, inv_crop_height); + r = ABGRToI420(src, src_width * 4, dst_y, dst_stride_y, dst_u, + dst_stride_u, dst_v, dst_stride_v, crop_width, + inv_crop_height); break; case FOURCC_RGBA: src = sample + (src_width * crop_y + crop_x) * 4; - r = RGBAToI420(src, src_width * 4, y, y_stride, u, u_stride, v, v_stride, - crop_width, inv_crop_height); + r = RGBAToI420(src, src_width * 4, dst_y, dst_stride_y, dst_u, + dst_stride_u, dst_v, dst_stride_v, crop_width, + inv_crop_height); break; case FOURCC_I400: src = sample + src_width * crop_y + crop_x; - r = I400ToI420(src, src_width, y, y_stride, u, u_stride, v, v_stride, - crop_width, inv_crop_height); + r = I400ToI420(src, src_width, dst_y, dst_stride_y, dst_u, dst_stride_u, + dst_v, dst_stride_v, crop_width, inv_crop_height); break; // Biplanar formats case FOURCC_NV12: src = sample + (src_width * crop_y + crop_x); src_uv = sample + (src_width * src_height) + ((crop_y / 2) * aligned_src_width) + ((crop_x / 2) * 2); - r = NV12ToI420Rotate(src, src_width, src_uv, aligned_src_width, y, - y_stride, u, u_stride, v, v_stride, crop_width, - inv_crop_height, rotation); + r = NV12ToI420Rotate(src, src_width, src_uv, aligned_src_width, dst_y, + dst_stride_y, dst_u, dst_stride_u, dst_v, + dst_stride_v, crop_width, inv_crop_height, rotation); break; case FOURCC_NV21: src = sample + (src_width * crop_y + crop_x); src_uv = sample + (src_width * src_height) + ((crop_y / 2) * aligned_src_width) + ((crop_x / 2) * 2); - // Call NV12 but with u and v parameters swapped. - r = NV12ToI420Rotate(src, src_width, src_uv, aligned_src_width, y, - y_stride, v, v_stride, u, u_stride, crop_width, - inv_crop_height, rotation); + // Call NV12 but with dst_u and dst_v parameters swapped. + r = NV12ToI420Rotate(src, src_width, src_uv, aligned_src_width, dst_y, + dst_stride_y, dst_v, dst_stride_v, dst_u, + dst_stride_u, crop_width, inv_crop_height, rotation); break; case FOURCC_M420: src = sample + (src_width * crop_y) * 12 / 8 + crop_x; - r = M420ToI420(src, src_width, y, y_stride, u, u_stride, v, v_stride, - crop_width, inv_crop_height); + r = M420ToI420(src, src_width, dst_y, dst_stride_y, dst_u, dst_stride_u, + dst_v, dst_stride_v, crop_width, inv_crop_height); break; // Triplanar formats case FOURCC_I420: case FOURCC_YV12: { - const uint8* src_y = sample + (src_width * crop_y + crop_x); - const uint8* src_u; - const uint8* src_v; + const uint8_t* src_y = sample + (src_width * crop_y + crop_x); + const uint8_t* src_u; + const uint8_t* src_v; int halfwidth = (src_width + 1) / 2; int halfheight = (abs_src_height + 1) / 2; if (format == FOURCC_YV12) { @@ -191,16 +202,16 @@ int ConvertToI420(const uint8* sample, src_v = sample + src_width * abs_src_height + halfwidth * (halfheight + crop_y / 2) + crop_x / 2; } - r = I420Rotate(src_y, src_width, src_u, halfwidth, src_v, halfwidth, y, - y_stride, u, u_stride, v, v_stride, crop_width, - inv_crop_height, rotation); + r = I420Rotate(src_y, src_width, src_u, halfwidth, src_v, halfwidth, + dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v, + dst_stride_v, crop_width, inv_crop_height, rotation); break; } case FOURCC_I422: case FOURCC_YV16: { - const uint8* src_y = sample + src_width * crop_y + crop_x; - const uint8* src_u; - const uint8* src_v; + const uint8_t* src_y = sample + src_width * crop_y + crop_x; + const uint8_t* src_u; + const uint8_t* src_v; int halfwidth = (src_width + 1) / 2; if (format == FOURCC_YV16) { src_v = sample + src_width * abs_src_height + halfwidth * crop_y + @@ -213,16 +224,16 @@ int ConvertToI420(const uint8* sample, src_v = sample + src_width * abs_src_height + halfwidth * (abs_src_height + crop_y) + crop_x / 2; } - r = I422ToI420(src_y, src_width, src_u, halfwidth, src_v, halfwidth, y, - y_stride, u, u_stride, v, v_stride, crop_width, - inv_crop_height); + r = I422ToI420(src_y, src_width, src_u, halfwidth, src_v, halfwidth, + dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v, + dst_stride_v, crop_width, inv_crop_height); break; } case FOURCC_I444: case FOURCC_YV24: { - const uint8* src_y = sample + src_width * crop_y + crop_x; - const uint8* src_u; - const uint8* src_v; + const uint8_t* src_y = sample + src_width * crop_y + crop_x; + const uint8_t* src_u; + const uint8_t* src_v; if (format == FOURCC_YV24) { src_v = sample + src_width * (abs_src_height + crop_y) + crop_x; src_u = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x; @@ -230,15 +241,16 @@ int ConvertToI420(const uint8* sample, src_u = sample + src_width * (abs_src_height + crop_y) + crop_x; src_v = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x; } - r = I444ToI420(src_y, src_width, src_u, src_width, src_v, src_width, y, - y_stride, u, u_stride, v, v_stride, crop_width, - inv_crop_height); + r = I444ToI420(src_y, src_width, src_u, src_width, src_v, src_width, + dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v, + dst_stride_v, crop_width, inv_crop_height); break; } #ifdef HAVE_JPEG case FOURCC_MJPG: - r = MJPGToI420(sample, sample_size, y, y_stride, u, u_stride, v, v_stride, - src_width, abs_src_height, crop_width, inv_crop_height); + r = MJPGToI420(sample, sample_size, dst_y, dst_stride_y, dst_u, + dst_stride_u, dst_v, dst_stride_v, src_width, + abs_src_height, crop_width, inv_crop_height); break; #endif default: @@ -247,9 +259,10 @@ int ConvertToI420(const uint8* sample, if (need_buf) { if (!r) { - r = I420Rotate(y, y_stride, u, u_stride, v, v_stride, tmp_y, tmp_y_stride, - tmp_u, tmp_u_stride, tmp_v, tmp_v_stride, crop_width, - abs_crop_height, rotation); + r = I420Rotate(dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v, + dst_stride_v, tmp_y, tmp_y_stride, tmp_u, tmp_u_stride, + tmp_v, tmp_v_stride, crop_width, abs_crop_height, + rotation); } free(rotate_buffer); } diff --git a/chromium/third_party/libyuv/source/cpu_id.cc b/chromium/third_party/libyuv/source/cpu_id.cc index d08fc365988..446aad12078 100644 --- a/chromium/third_party/libyuv/source/cpu_id.cc +++ b/chromium/third_party/libyuv/source/cpu_id.cc @@ -27,8 +27,6 @@ #include <stdio.h> #include <string.h> -#include "libyuv/basic_types.h" // For CPU_X86 - #ifdef __cplusplus namespace libyuv { extern "C" { @@ -218,7 +216,9 @@ static LIBYUV_BOOL TestEnv(const char*) { static SAFEBUFFERS int GetCpuFlags(void) { int cpu_info = 0; -#if !defined(__pnacl__) && !defined(__CLR_VER) && defined(CPU_X86) +#if !defined(__pnacl__) && !defined(__CLR_VER) && \ + (defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || \ + defined(_M_IX86)) int cpu_info0[4] = {0, 0, 0, 0}; int cpu_info1[4] = {0, 0, 0, 0}; int cpu_info7[4] = {0, 0, 0, 0}; diff --git a/chromium/third_party/libyuv/source/mjpeg_decoder.cc b/chromium/third_party/libyuv/source/mjpeg_decoder.cc index b43c008bdd2..eaf2530130b 100644 --- a/chromium/third_party/libyuv/source/mjpeg_decoder.cc +++ b/chromium/third_party/libyuv/source/mjpeg_decoder.cc @@ -102,7 +102,7 @@ MJpegDecoder::~MJpegDecoder() { DestroyOutputBuffers(); } -LIBYUV_BOOL MJpegDecoder::LoadFrame(const uint8* src, size_t src_len) { +LIBYUV_BOOL MJpegDecoder::LoadFrame(const uint8_t* src, size_t src_len) { if (!ValidateJpeg(src, src_len)) { return LIBYUV_FALSE; } @@ -129,7 +129,7 @@ LIBYUV_BOOL MJpegDecoder::LoadFrame(const uint8* src, size_t src_len) { if (scanlines_[i]) { delete scanlines_[i]; } - scanlines_[i] = new uint8*[scanlines_size]; + scanlines_[i] = new uint8_t*[scanlines_size]; scanlines_sizes_[i] = scanlines_size; } @@ -145,7 +145,7 @@ LIBYUV_BOOL MJpegDecoder::LoadFrame(const uint8* src, size_t src_len) { if (databuf_[i]) { delete databuf_[i]; } - databuf_[i] = new uint8[databuf_size]; + databuf_[i] = new uint8_t[databuf_size]; databuf_strides_[i] = databuf_stride; } @@ -243,7 +243,7 @@ LIBYUV_BOOL MJpegDecoder::UnloadFrame() { } // TODO(fbarchard): Allow rectangle to be specified: x, y, width, height. -LIBYUV_BOOL MJpegDecoder::DecodeToBuffers(uint8** planes, +LIBYUV_BOOL MJpegDecoder::DecodeToBuffers(uint8_t** planes, int dst_width, int dst_height) { if (dst_width != GetWidth() || dst_height > GetHeight()) { @@ -469,9 +469,9 @@ void MJpegDecoder::AllocOutputBuffers(int num_outbufs) { // it. DestroyOutputBuffers(); - scanlines_ = new uint8**[num_outbufs]; + scanlines_ = new uint8_t**[num_outbufs]; scanlines_sizes_ = new int[num_outbufs]; - databuf_ = new uint8*[num_outbufs]; + databuf_ = new uint8_t*[num_outbufs]; databuf_strides_ = new int[num_outbufs]; for (int i = 0; i < num_outbufs; ++i) { @@ -527,9 +527,9 @@ LIBYUV_BOOL MJpegDecoder::FinishDecode() { return LIBYUV_TRUE; } -void MJpegDecoder::SetScanlinePointers(uint8** data) { +void MJpegDecoder::SetScanlinePointers(uint8_t** data) { for (int i = 0; i < num_outbufs_; ++i) { - uint8* data_i = data[i]; + uint8_t* data_i = data[i]; for (int j = 0; j < scanlines_sizes_[i]; ++j) { scanlines_[i][j] = data_i; data_i += GetComponentStride(i); @@ -552,13 +552,13 @@ JpegSubsamplingType MJpegDecoder::JpegSubsamplingTypeHelper( if (subsample_x[0] == 1 && subsample_y[0] == 1 && subsample_x[1] == 2 && subsample_y[1] == 2 && subsample_x[2] == 2 && subsample_y[2] == 2) { return kJpegYuv420; - } else if (subsample_x[0] == 1 && subsample_y[0] == 1 && - subsample_x[1] == 2 && subsample_y[1] == 1 && - subsample_x[2] == 2 && subsample_y[2] == 1) { + } + if (subsample_x[0] == 1 && subsample_y[0] == 1 && subsample_x[1] == 2 && + subsample_y[1] == 1 && subsample_x[2] == 2 && subsample_y[2] == 1) { return kJpegYuv422; - } else if (subsample_x[0] == 1 && subsample_y[0] == 1 && - subsample_x[1] == 1 && subsample_y[1] == 1 && - subsample_x[2] == 1 && subsample_y[2] == 1) { + } + if (subsample_x[0] == 1 && subsample_y[0] == 1 && subsample_x[1] == 1 && + subsample_y[1] == 1 && subsample_x[2] == 1 && subsample_y[2] == 1) { return kJpegYuv444; } } else if (number_of_components == 1) { // Grey-scale images. diff --git a/chromium/third_party/libyuv/source/mjpeg_validate.cc b/chromium/third_party/libyuv/source/mjpeg_validate.cc index bd760425359..80c2cc0cb9b 100644 --- a/chromium/third_party/libyuv/source/mjpeg_validate.cc +++ b/chromium/third_party/libyuv/source/mjpeg_validate.cc @@ -18,13 +18,13 @@ extern "C" { #endif // Helper function to scan for EOI marker (0xff 0xd9). -static LIBYUV_BOOL ScanEOI(const uint8* sample, size_t sample_size) { +static LIBYUV_BOOL ScanEOI(const uint8_t* sample, size_t sample_size) { if (sample_size >= 2) { - const uint8* end = sample + sample_size - 1; - const uint8* it = sample; + const uint8_t* end = sample + sample_size - 1; + const uint8_t* it = sample; while (it < end) { // TODO(fbarchard): scan for 0xd9 instead. - it = (const uint8*)(memchr(it, 0xff, end - it)); + it = (const uint8_t*)(memchr(it, 0xff, end - it)); if (it == NULL) { break; } @@ -39,7 +39,7 @@ static LIBYUV_BOOL ScanEOI(const uint8* sample, size_t sample_size) { } // Helper function to validate the jpeg appears intact. -LIBYUV_BOOL ValidateJpeg(const uint8* sample, size_t sample_size) { +LIBYUV_BOOL ValidateJpeg(const uint8_t* sample, size_t sample_size) { // Maximum size that ValidateJpeg will consider valid. const size_t kMaxJpegSize = 0x7fffffffull; const size_t kBackSearchSize = 1024; diff --git a/chromium/third_party/libyuv/source/planar_functions.cc b/chromium/third_party/libyuv/source/planar_functions.cc index c55ef7f2742..77d71633f57 100644 --- a/chromium/third_party/libyuv/source/planar_functions.cc +++ b/chromium/third_party/libyuv/source/planar_functions.cc @@ -26,14 +26,14 @@ extern "C" { // Copy a plane of data LIBYUV_API -void CopyPlane(const uint8* src_y, +void CopyPlane(const uint8_t* src_y, int src_stride_y, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, int width, int height) { int y; - void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C; + void (*CopyRow)(const uint8_t* src, uint8_t* dst, int width) = CopyRow_C; // Negative height means invert the image. if (height < 0) { height = -height; @@ -83,14 +83,14 @@ void CopyPlane(const uint8* src_y, // TODO(fbarchard): Consider support for negative height. // TODO(fbarchard): Consider stride measured in bytes. LIBYUV_API -void CopyPlane_16(const uint16* src_y, +void CopyPlane_16(const uint16_t* src_y, int src_stride_y, - uint16* dst_y, + uint16_t* dst_y, int dst_stride_y, int width, int height) { int y; - void (*CopyRow)(const uint16* src, uint16* dst, int width) = CopyRow_16_C; + void (*CopyRow)(const uint16_t* src, uint16_t* dst, int width) = CopyRow_16_C; // Coalesce rows. if (src_stride_y == width && dst_stride_y == width) { width *= height; @@ -123,15 +123,15 @@ void CopyPlane_16(const uint16* src_y, // Convert a plane of 16 bit data to 8 bit LIBYUV_API -void Convert16To8Plane(const uint16* src_y, +void Convert16To8Plane(const uint16_t* src_y, int src_stride_y, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, int scale, // 16384 for 10 bits int width, int height) { int y; - void (*Convert16To8Row)(const uint16* src_y, uint8* dst_y, int scale, + void (*Convert16To8Row)(const uint16_t* src_y, uint8_t* dst_y, int scale, int width) = Convert16To8Row_C; // Negative height means invert the image. @@ -173,15 +173,15 @@ void Convert16To8Plane(const uint16* src_y, // Convert a plane of 8 bit data to 16 bit LIBYUV_API -void Convert8To16Plane(const uint8* src_y, +void Convert8To16Plane(const uint8_t* src_y, int src_stride_y, - uint16* dst_y, + uint16_t* dst_y, int dst_stride_y, int scale, // 16384 for 10 bits int width, int height) { int y; - void (*Convert8To16Row)(const uint8* src_y, uint16* dst_y, int scale, + void (*Convert8To16Row)(const uint8_t* src_y, uint16_t* dst_y, int scale, int width) = Convert8To16Row_C; // Negative height means invert the image. @@ -223,17 +223,17 @@ void Convert8To16Plane(const uint8* src_y, // Copy I422. LIBYUV_API -int I422Copy(const uint8* src_y, +int I422Copy(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int width, int height) { @@ -262,17 +262,17 @@ int I422Copy(const uint8* src_y, // Copy I444. LIBYUV_API -int I444Copy(const uint8* src_y, +int I444Copy(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int width, int height) { @@ -300,9 +300,9 @@ int I444Copy(const uint8* src_y, // Copy I400. LIBYUV_API -int I400ToI400(const uint8* src_y, +int I400ToI400(const uint8_t* src_y, int src_stride_y, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, int width, int height) { @@ -321,13 +321,13 @@ int I400ToI400(const uint8* src_y, // Convert I420 to I400. LIBYUV_API -int I420ToI400(const uint8* src_y, +int I420ToI400(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, int width, int height) { @@ -352,16 +352,16 @@ int I420ToI400(const uint8* src_y, // Support function for NV12 etc UV channels. // Width and height are plane sizes (typically half pixel width). LIBYUV_API -void SplitUVPlane(const uint8* src_uv, +void SplitUVPlane(const uint8_t* src_uv, int src_stride_uv, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int width, int height) { int y; - void (*SplitUVRow)(const uint8* src_uv, uint8* dst_u, uint8* dst_v, + void (*SplitUVRow)(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v, int width) = SplitUVRow_C; // Negative height means invert the image. if (height < 0) { @@ -421,17 +421,17 @@ void SplitUVPlane(const uint8* src_uv, } LIBYUV_API -void MergeUVPlane(const uint8* src_u, +void MergeUVPlane(const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_uv, + uint8_t* dst_uv, int dst_stride_uv, int width, int height) { int y; - void (*MergeUVRow)(const uint8* src_u, const uint8* src_v, uint8* dst_uv, - int width) = MergeUVRow_C; + void (*MergeUVRow)(const uint8_t* src_u, const uint8_t* src_v, + uint8_t* dst_uv, int width) = MergeUVRow_C; // Coalesce rows. // Negative height means invert the image. if (height < 0) { @@ -491,19 +491,19 @@ void MergeUVPlane(const uint8* src_u, // Support function for NV12 etc RGB channels. // Width and height are plane sizes (typically half pixel width). LIBYUV_API -void SplitRGBPlane(const uint8* src_rgb, +void SplitRGBPlane(const uint8_t* src_rgb, int src_stride_rgb, - uint8* dst_r, + uint8_t* dst_r, int dst_stride_r, - uint8* dst_g, + uint8_t* dst_g, int dst_stride_g, - uint8* dst_b, + uint8_t* dst_b, int dst_stride_b, int width, int height) { int y; - void (*SplitRGBRow)(const uint8* src_rgb, uint8* dst_r, uint8* dst_g, - uint8* dst_b, int width) = SplitRGBRow_C; + void (*SplitRGBRow)(const uint8_t* src_rgb, uint8_t* dst_r, uint8_t* dst_g, + uint8_t* dst_b, int width) = SplitRGBRow_C; // Negative height means invert the image. if (height < 0) { height = -height; @@ -549,19 +549,19 @@ void SplitRGBPlane(const uint8* src_rgb, } LIBYUV_API -void MergeRGBPlane(const uint8* src_r, +void MergeRGBPlane(const uint8_t* src_r, int src_stride_r, - const uint8* src_g, + const uint8_t* src_g, int src_stride_g, - const uint8* src_b, + const uint8_t* src_b, int src_stride_b, - uint8* dst_rgb, + uint8_t* dst_rgb, int dst_stride_rgb, int width, int height) { int y; - void (*MergeRGBRow)(const uint8* src_r, const uint8* src_g, - const uint8* src_b, uint8* dst_rgb, int width) = + void (*MergeRGBRow)(const uint8_t* src_r, const uint8_t* src_g, + const uint8_t* src_b, uint8_t* dst_rgb, int width) = MergeRGBRow_C; // Coalesce rows. // Negative height means invert the image. @@ -605,14 +605,14 @@ void MergeRGBPlane(const uint8* src_r, } // Mirror a plane of data. -void MirrorPlane(const uint8* src_y, +void MirrorPlane(const uint8_t* src_y, int src_stride_y, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, int width, int height) { int y; - void (*MirrorRow)(const uint8* src, uint8* dst, int width) = MirrorRow_C; + void (*MirrorRow)(const uint8_t* src, uint8_t* dst, int width) = MirrorRow_C; // Negative height means invert the image. if (height < 0) { height = -height; @@ -662,20 +662,20 @@ void MirrorPlane(const uint8* src_y, // Convert YUY2 to I422. LIBYUV_API -int YUY2ToI422(const uint8* src_yuy2, +int YUY2ToI422(const uint8_t* src_yuy2, int src_stride_yuy2, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int width, int height) { int y; - void (*YUY2ToUV422Row)(const uint8* src_yuy2, uint8* dst_u, uint8* dst_v, - int width) = YUY2ToUV422Row_C; - void (*YUY2ToYRow)(const uint8* src_yuy2, uint8* dst_y, int width) = + void (*YUY2ToUV422Row)(const uint8_t* src_yuy2, uint8_t* dst_u, + uint8_t* dst_v, int width) = YUY2ToUV422Row_C; + void (*YUY2ToYRow)(const uint8_t* src_yuy2, uint8_t* dst_y, int width) = YUY2ToYRow_C; if (!src_yuy2 || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { return -1; @@ -748,20 +748,20 @@ int YUY2ToI422(const uint8* src_yuy2, // Convert UYVY to I422. LIBYUV_API -int UYVYToI422(const uint8* src_uyvy, +int UYVYToI422(const uint8_t* src_uyvy, int src_stride_uyvy, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int width, int height) { int y; - void (*UYVYToUV422Row)(const uint8* src_uyvy, uint8* dst_u, uint8* dst_v, - int width) = UYVYToUV422Row_C; - void (*UYVYToYRow)(const uint8* src_uyvy, uint8* dst_y, int width) = + void (*UYVYToUV422Row)(const uint8_t* src_uyvy, uint8_t* dst_u, + uint8_t* dst_v, int width) = UYVYToUV422Row_C; + void (*UYVYToYRow)(const uint8_t* src_uyvy, uint8_t* dst_y, int width) = UYVYToYRow_C; if (!src_uyvy || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { return -1; @@ -834,14 +834,14 @@ int UYVYToI422(const uint8* src_uyvy, // Convert YUY2 to Y. LIBYUV_API -int YUY2ToY(const uint8* src_yuy2, +int YUY2ToY(const uint8_t* src_yuy2, int src_stride_yuy2, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, int width, int height) { int y; - void (*YUY2ToYRow)(const uint8* src_yuy2, uint8* dst_y, int width) = + void (*YUY2ToYRow)(const uint8_t* src_yuy2, uint8_t* dst_y, int width) = YUY2ToYRow_C; if (!src_yuy2 || !dst_y || width <= 0 || height == 0) { return -1; @@ -901,9 +901,9 @@ int YUY2ToY(const uint8* src_yuy2, // Mirror I400 with optional flipping LIBYUV_API -int I400Mirror(const uint8* src_y, +int I400Mirror(const uint8_t* src_y, int src_stride_y, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, int width, int height) { @@ -923,17 +923,17 @@ int I400Mirror(const uint8* src_y, // Mirror I420 with optional flipping LIBYUV_API -int I420Mirror(const uint8* src_y, +int I420Mirror(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int width, int height) { @@ -965,14 +965,14 @@ int I420Mirror(const uint8* src_y, // ARGB mirror. LIBYUV_API -int ARGBMirror(const uint8* src_argb, +int ARGBMirror(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height) { int y; - void (*ARGBMirrorRow)(const uint8* src, uint8* dst, int width) = + void (*ARGBMirrorRow)(const uint8_t* src, uint8_t* dst, int width) = ARGBMirrorRow_C; if (!src_argb || !dst_argb || width <= 0 || height == 0) { return -1; @@ -1030,8 +1030,8 @@ int ARGBMirror(const uint8* src_argb, // the same blend function for all pixels if possible. LIBYUV_API ARGBBlendRow GetARGBBlend() { - void (*ARGBBlendRow)(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width) = ARGBBlendRow_C; + void (*ARGBBlendRow)(const uint8_t* src_argb, const uint8_t* src_argb1, + uint8_t* dst_argb, int width) = ARGBBlendRow_C; #if defined(HAS_ARGBBLENDROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ARGBBlendRow = ARGBBlendRow_SSSE3; @@ -1053,17 +1053,17 @@ ARGBBlendRow GetARGBBlend() { // Alpha Blend 2 ARGB images and store to destination. LIBYUV_API -int ARGBBlend(const uint8* src_argb0, +int ARGBBlend(const uint8_t* src_argb0, int src_stride_argb0, - const uint8* src_argb1, + const uint8_t* src_argb1, int src_stride_argb1, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height) { int y; - void (*ARGBBlendRow)(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width) = GetARGBBlend(); + void (*ARGBBlendRow)(const uint8_t* src_argb, const uint8_t* src_argb1, + uint8_t* dst_argb, int width) = GetARGBBlend(); if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) { return -1; } @@ -1092,19 +1092,19 @@ int ARGBBlend(const uint8* src_argb0, // Alpha Blend plane and store to destination. LIBYUV_API -int BlendPlane(const uint8* src_y0, +int BlendPlane(const uint8_t* src_y0, int src_stride_y0, - const uint8* src_y1, + const uint8_t* src_y1, int src_stride_y1, - const uint8* alpha, + const uint8_t* alpha, int alpha_stride, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, int width, int height) { int y; - void (*BlendPlaneRow)(const uint8* src0, const uint8* src1, - const uint8* alpha, uint8* dst, int width) = + void (*BlendPlaneRow)(const uint8_t* src0, const uint8_t* src1, + const uint8_t* alpha, uint8_t* dst, int width) = BlendPlaneRow_C; if (!src_y0 || !src_y1 || !alpha || !dst_y || width <= 0 || height == 0) { return -1; @@ -1154,36 +1154,36 @@ int BlendPlane(const uint8* src_y0, #define MAXTWIDTH 2048 // Alpha Blend YUV images and store to destination. LIBYUV_API -int I420Blend(const uint8* src_y0, +int I420Blend(const uint8_t* src_y0, int src_stride_y0, - const uint8* src_u0, + const uint8_t* src_u0, int src_stride_u0, - const uint8* src_v0, + const uint8_t* src_v0, int src_stride_v0, - const uint8* src_y1, + const uint8_t* src_y1, int src_stride_y1, - const uint8* src_u1, + const uint8_t* src_u1, int src_stride_u1, - const uint8* src_v1, + const uint8_t* src_v1, int src_stride_v1, - const uint8* alpha, + const uint8_t* alpha, int alpha_stride, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int width, int height) { int y; // Half width/height for UV. int halfwidth = (width + 1) >> 1; - void (*BlendPlaneRow)(const uint8* src0, const uint8* src1, - const uint8* alpha, uint8* dst, int width) = + void (*BlendPlaneRow)(const uint8_t* src0, const uint8_t* src1, + const uint8_t* alpha, uint8_t* dst, int width) = BlendPlaneRow_C; - void (*ScaleRowDown2)(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) = ScaleRowDown2Box_C; + void (*ScaleRowDown2)(const uint8_t* src_ptr, ptrdiff_t src_stride, + uint8_t* dst_ptr, int dst_width) = ScaleRowDown2Box_C; if (!src_y0 || !src_u0 || !src_v0 || !src_y1 || !src_u1 || !src_v1 || !alpha || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { return -1; @@ -1278,17 +1278,17 @@ int I420Blend(const uint8* src_y0, // Multiply 2 ARGB images and store to destination. LIBYUV_API -int ARGBMultiply(const uint8* src_argb0, +int ARGBMultiply(const uint8_t* src_argb0, int src_stride_argb0, - const uint8* src_argb1, + const uint8_t* src_argb1, int src_stride_argb1, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height) { int y; - void (*ARGBMultiplyRow)(const uint8* src0, const uint8* src1, uint8* dst, - int width) = ARGBMultiplyRow_C; + void (*ARGBMultiplyRow)(const uint8_t* src0, const uint8_t* src1, + uint8_t* dst, int width) = ARGBMultiplyRow_C; if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) { return -1; } @@ -1350,16 +1350,16 @@ int ARGBMultiply(const uint8* src_argb0, // Add 2 ARGB images and store to destination. LIBYUV_API -int ARGBAdd(const uint8* src_argb0, +int ARGBAdd(const uint8_t* src_argb0, int src_stride_argb0, - const uint8* src_argb1, + const uint8_t* src_argb1, int src_stride_argb1, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height) { int y; - void (*ARGBAddRow)(const uint8* src0, const uint8* src1, uint8* dst, + void (*ARGBAddRow)(const uint8_t* src0, const uint8_t* src1, uint8_t* dst, int width) = ARGBAddRow_C; if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) { return -1; @@ -1427,17 +1427,17 @@ int ARGBAdd(const uint8* src_argb0, // Subtract 2 ARGB images and store to destination. LIBYUV_API -int ARGBSubtract(const uint8* src_argb0, +int ARGBSubtract(const uint8_t* src_argb0, int src_stride_argb0, - const uint8* src_argb1, + const uint8_t* src_argb1, int src_stride_argb1, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height) { int y; - void (*ARGBSubtractRow)(const uint8* src0, const uint8* src1, uint8* dst, - int width) = ARGBSubtractRow_C; + void (*ARGBSubtractRow)(const uint8_t* src0, const uint8_t* src1, + uint8_t* dst, int width) = ARGBSubtractRow_C; if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) { return -1; } @@ -1497,20 +1497,20 @@ int ARGBSubtract(const uint8* src_argb0, return 0; } // Convert I422 to RGBA with matrix -static int I422ToRGBAMatrix(const uint8* src_y, +static int I422ToRGBAMatrix(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_rgba, + uint8_t* dst_rgba, int dst_stride_rgba, const struct YuvConstants* yuvconstants, int width, int height) { int y; - void (*I422ToRGBARow)(const uint8* y_buf, const uint8* u_buf, - const uint8* v_buf, uint8* rgb_buf, + void (*I422ToRGBARow)(const uint8_t* y_buf, const uint8_t* u_buf, + const uint8_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I422ToRGBARow_C; if (!src_y || !src_u || !src_v || !dst_rgba || width <= 0 || height == 0) { @@ -1567,13 +1567,13 @@ static int I422ToRGBAMatrix(const uint8* src_y, // Convert I422 to RGBA. LIBYUV_API -int I422ToRGBA(const uint8* src_y, +int I422ToRGBA(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_rgba, + uint8_t* dst_rgba, int dst_stride_rgba, int width, int height) { @@ -1584,13 +1584,13 @@ int I422ToRGBA(const uint8* src_y, // Convert I422 to BGRA. LIBYUV_API -int I422ToBGRA(const uint8* src_y, +int I422ToBGRA(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_bgra, + uint8_t* dst_bgra, int dst_stride_bgra, int width, int height) { @@ -1603,17 +1603,17 @@ int I422ToBGRA(const uint8* src_y, // Convert NV12 to RGB565. LIBYUV_API -int NV12ToRGB565(const uint8* src_y, +int NV12ToRGB565(const uint8_t* src_y, int src_stride_y, - const uint8* src_uv, + const uint8_t* src_uv, int src_stride_uv, - uint8* dst_rgb565, + uint8_t* dst_rgb565, int dst_stride_rgb565, int width, int height) { int y; void (*NV12ToRGB565Row)( - const uint8* y_buf, const uint8* uv_buf, uint8* rgb_buf, + const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = NV12ToRGB565Row_C; if (!src_y || !src_uv || !dst_rgb565 || width <= 0 || height == 0) { return -1; @@ -1670,14 +1670,14 @@ int NV12ToRGB565(const uint8* src_y, // Convert RAW to RGB24. LIBYUV_API -int RAWToRGB24(const uint8* src_raw, +int RAWToRGB24(const uint8_t* src_raw, int src_stride_raw, - uint8* dst_rgb24, + uint8_t* dst_rgb24, int dst_stride_rgb24, int width, int height) { int y; - void (*RAWToRGB24Row)(const uint8* src_rgb, uint8* dst_rgb24, int width) = + void (*RAWToRGB24Row)(const uint8_t* src_rgb, uint8_t* dst_rgb24, int width) = RAWToRGB24Row_C; if (!src_raw || !dst_rgb24 || width <= 0 || height == 0) { return -1; @@ -1728,13 +1728,13 @@ int RAWToRGB24(const uint8* src_raw, } LIBYUV_API -void SetPlane(uint8* dst_y, +void SetPlane(uint8_t* dst_y, int dst_stride_y, int width, int height, - uint32 value) { + uint32_t value) { int y; - void (*SetRow)(uint8 * dst, uint8 value, int width) = SetRow_C; + void (*SetRow)(uint8_t * dst, uint8_t value, int width) = SetRow_C; if (height < 0) { height = -height; dst_y = dst_y + (height - 1) * dst_stride_y; @@ -1782,11 +1782,11 @@ void SetPlane(uint8* dst_y, // Draw a rectangle into I420 LIBYUV_API -int I420Rect(uint8* dst_y, +int I420Rect(uint8_t* dst_y, int dst_stride_y, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int x, int y, @@ -1797,9 +1797,9 @@ int I420Rect(uint8* dst_y, int value_v) { int halfwidth = (width + 1) >> 1; int halfheight = (height + 1) >> 1; - uint8* start_y = dst_y + y * dst_stride_y + x; - uint8* start_u = dst_u + (y / 2) * dst_stride_u + (x / 2); - uint8* start_v = dst_v + (y / 2) * dst_stride_v + (x / 2); + uint8_t* start_y = dst_y + y * dst_stride_y + x; + uint8_t* start_u = dst_u + (y / 2) * dst_stride_u + (x / 2); + uint8_t* start_v = dst_v + (y / 2) * dst_stride_v + (x / 2); if (!dst_y || !dst_u || !dst_v || width <= 0 || height == 0 || x < 0 || y < 0 || value_y < 0 || value_y > 255 || value_u < 0 || value_u > 255 || value_v < 0 || value_v > 255) { @@ -1814,15 +1814,16 @@ int I420Rect(uint8* dst_y, // Draw a rectangle into ARGB LIBYUV_API -int ARGBRect(uint8* dst_argb, +int ARGBRect(uint8_t* dst_argb, int dst_stride_argb, int dst_x, int dst_y, int width, int height, - uint32 value) { + uint32_t value) { int y; - void (*ARGBSetRow)(uint8 * dst_argb, uint32 value, int width) = ARGBSetRow_C; + void (*ARGBSetRow)(uint8_t * dst_argb, uint32_t value, int width) = + ARGBSetRow_C; if (!dst_argb || width <= 0 || height == 0 || dst_x < 0 || dst_y < 0) { return -1; } @@ -1883,15 +1884,15 @@ int ARGBRect(uint8* dst_argb, // f is foreground pixel premultiplied by alpha LIBYUV_API -int ARGBAttenuate(const uint8* src_argb, +int ARGBAttenuate(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height) { int y; - void (*ARGBAttenuateRow)(const uint8* src_argb, uint8* dst_argb, int width) = - ARGBAttenuateRow_C; + void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb, + int width) = ARGBAttenuateRow_C; if (!src_argb || !dst_argb || width <= 0 || height == 0) { return -1; } @@ -1949,14 +1950,14 @@ int ARGBAttenuate(const uint8* src_argb, // Convert preattentuated ARGB to unattenuated ARGB. LIBYUV_API -int ARGBUnattenuate(const uint8* src_argb, +int ARGBUnattenuate(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height) { int y; - void (*ARGBUnattenuateRow)(const uint8* src_argb, uint8* dst_argb, + void (*ARGBUnattenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) = ARGBUnattenuateRow_C; if (!src_argb || !dst_argb || width <= 0 || height == 0) { return -1; @@ -2000,14 +2001,14 @@ int ARGBUnattenuate(const uint8* src_argb, // Convert ARGB to Grayed ARGB. LIBYUV_API -int ARGBGrayTo(const uint8* src_argb, +int ARGBGrayTo(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height) { int y; - void (*ARGBGrayRow)(const uint8* src_argb, uint8* dst_argb, int width) = + void (*ARGBGrayRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) = ARGBGrayRow_C; if (!src_argb || !dst_argb || width <= 0 || height == 0) { return -1; @@ -2049,16 +2050,16 @@ int ARGBGrayTo(const uint8* src_argb, // Make a rectangle of ARGB gray scale. LIBYUV_API -int ARGBGray(uint8* dst_argb, +int ARGBGray(uint8_t* dst_argb, int dst_stride_argb, int dst_x, int dst_y, int width, int height) { int y; - void (*ARGBGrayRow)(const uint8* src_argb, uint8* dst_argb, int width) = + void (*ARGBGrayRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) = ARGBGrayRow_C; - uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; + uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) { return -1; } @@ -2093,15 +2094,15 @@ int ARGBGray(uint8* dst_argb, // Make a rectangle of ARGB Sepia tone. LIBYUV_API -int ARGBSepia(uint8* dst_argb, +int ARGBSepia(uint8_t* dst_argb, int dst_stride_argb, int dst_x, int dst_y, int width, int height) { int y; - void (*ARGBSepiaRow)(uint8 * dst_argb, int width) = ARGBSepiaRow_C; - uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; + void (*ARGBSepiaRow)(uint8_t * dst_argb, int width) = ARGBSepiaRow_C; + uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) { return -1; } @@ -2137,16 +2138,16 @@ int ARGBSepia(uint8* dst_argb, // Apply a 4x4 matrix to each ARGB pixel. // Note: Normally for shading, but can be used to swizzle or invert. LIBYUV_API -int ARGBColorMatrix(const uint8* src_argb, +int ARGBColorMatrix(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, - const int8* matrix_argb, + const int8_t* matrix_argb, int width, int height) { int y; - void (*ARGBColorMatrixRow)(const uint8* src_argb, uint8* dst_argb, - const int8* matrix_argb, int width) = + void (*ARGBColorMatrixRow)(const uint8_t* src_argb, uint8_t* dst_argb, + const int8_t* matrix_argb, int width) = ARGBColorMatrixRow_C; if (!src_argb || !dst_argb || !matrix_argb || width <= 0 || height == 0) { return -1; @@ -2188,15 +2189,15 @@ int ARGBColorMatrix(const uint8* src_argb, // Apply a 4x3 matrix to each ARGB pixel. // Deprecated. LIBYUV_API -int RGBColorMatrix(uint8* dst_argb, +int RGBColorMatrix(uint8_t* dst_argb, int dst_stride_argb, - const int8* matrix_rgb, + const int8_t* matrix_rgb, int dst_x, int dst_y, int width, int height) { - SIMD_ALIGNED(int8 matrix_argb[16]); - uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; + SIMD_ALIGNED(int8_t matrix_argb[16]); + uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; if (!dst_argb || !matrix_rgb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) { return -1; @@ -2218,24 +2219,24 @@ int RGBColorMatrix(uint8* dst_argb, matrix_argb[14] = matrix_argb[13] = matrix_argb[12] = 0; matrix_argb[15] = 64; // 1.0 - return ARGBColorMatrix((const uint8*)(dst), dst_stride_argb, dst, + return ARGBColorMatrix((const uint8_t*)(dst), dst_stride_argb, dst, dst_stride_argb, &matrix_argb[0], width, height); } // Apply a color table each ARGB pixel. // Table contains 256 ARGB values. LIBYUV_API -int ARGBColorTable(uint8* dst_argb, +int ARGBColorTable(uint8_t* dst_argb, int dst_stride_argb, - const uint8* table_argb, + const uint8_t* table_argb, int dst_x, int dst_y, int width, int height) { int y; - void (*ARGBColorTableRow)(uint8 * dst_argb, const uint8* table_argb, + void (*ARGBColorTableRow)(uint8_t * dst_argb, const uint8_t* table_argb, int width) = ARGBColorTableRow_C; - uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; + uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; if (!dst_argb || !table_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) { return -1; @@ -2261,17 +2262,17 @@ int ARGBColorTable(uint8* dst_argb, // Apply a color table each ARGB pixel but preserve destination alpha. // Table contains 256 ARGB values. LIBYUV_API -int RGBColorTable(uint8* dst_argb, +int RGBColorTable(uint8_t* dst_argb, int dst_stride_argb, - const uint8* table_argb, + const uint8_t* table_argb, int dst_x, int dst_y, int width, int height) { int y; - void (*RGBColorTableRow)(uint8 * dst_argb, const uint8* table_argb, + void (*RGBColorTableRow)(uint8_t * dst_argb, const uint8_t* table_argb, int width) = RGBColorTableRow_C; - uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; + uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; if (!dst_argb || !table_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) { return -1; @@ -2304,7 +2305,7 @@ int RGBColorTable(uint8* dst_argb, // Caveat - although SSE2 saturates, the C function does not and should be used // with care if doing anything but quantization. LIBYUV_API -int ARGBQuantize(uint8* dst_argb, +int ARGBQuantize(uint8_t* dst_argb, int dst_stride_argb, int scale, int interval_size, @@ -2314,9 +2315,9 @@ int ARGBQuantize(uint8* dst_argb, int width, int height) { int y; - void (*ARGBQuantizeRow)(uint8 * dst_argb, int scale, int interval_size, + void (*ARGBQuantizeRow)(uint8_t * dst_argb, int scale, int interval_size, int interval_offset, int width) = ARGBQuantizeRow_C; - uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; + uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0 || interval_size < 1 || interval_size > 255) { return -1; @@ -2352,17 +2353,17 @@ int ARGBQuantize(uint8* dst_argb, // Computes table of cumulative sum for image where the value is the sum // of all values above and to the left of the entry. Used by ARGBBlur. LIBYUV_API -int ARGBComputeCumulativeSum(const uint8* src_argb, +int ARGBComputeCumulativeSum(const uint8_t* src_argb, int src_stride_argb, - int32* dst_cumsum, + int32_t* dst_cumsum, int dst_stride32_cumsum, int width, int height) { int y; - void (*ComputeCumulativeSumRow)(const uint8* row, int32* cumsum, - const int32* previous_cumsum, int width) = + void (*ComputeCumulativeSumRow)(const uint8_t* row, int32_t* cumsum, + const int32_t* previous_cumsum, int width) = ComputeCumulativeSumRow_C; - int32* previous_cumsum = dst_cumsum; + int32_t* previous_cumsum = dst_cumsum; if (!dst_cumsum || !src_argb || width <= 0 || height <= 0) { return -1; } @@ -2386,25 +2387,25 @@ int ARGBComputeCumulativeSum(const uint8* src_argb, // aligned to 16 byte boundary. height can be radius * 2 + 2 to save memory // as the buffer is treated as circular. LIBYUV_API -int ARGBBlur(const uint8* src_argb, +int ARGBBlur(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, - int32* dst_cumsum, + int32_t* dst_cumsum, int dst_stride32_cumsum, int width, int height, int radius) { int y; - void (*ComputeCumulativeSumRow)(const uint8* row, int32* cumsum, - const int32* previous_cumsum, int width) = + void (*ComputeCumulativeSumRow)(const uint8_t* row, int32_t* cumsum, + const int32_t* previous_cumsum, int width) = ComputeCumulativeSumRow_C; - void (*CumulativeSumToAverageRow)(const int32* topleft, const int32* botleft, - int width, int area, uint8* dst, - int count) = CumulativeSumToAverageRow_C; - int32* cumsum_bot_row; - int32* max_cumsum_bot_row; - int32* cumsum_top_row; + void (*CumulativeSumToAverageRow)( + const int32_t* topleft, const int32_t* botleft, int width, int area, + uint8_t* dst, int count) = CumulativeSumToAverageRow_C; + int32_t* cumsum_bot_row; + int32_t* max_cumsum_bot_row; + int32_t* cumsum_top_row; if (!src_argb || !dst_argb || width <= 0 || height == 0) { return -1; @@ -2458,7 +2459,7 @@ int ARGBBlur(const uint8* src_argb, // Increment cumsum_bot_row pointer with circular buffer wrap around and // then fill in a row of CumulativeSum. if ((y + radius) < height) { - const int32* prev_cumsum_bot_row = cumsum_bot_row; + const int32_t* prev_cumsum_bot_row = cumsum_bot_row; cumsum_bot_row += dst_stride32_cumsum; if (cumsum_bot_row >= max_cumsum_bot_row) { cumsum_bot_row = dst_cumsum; @@ -2496,16 +2497,16 @@ int ARGBBlur(const uint8* src_argb, // Multiply ARGB image by a specified ARGB value. LIBYUV_API -int ARGBShade(const uint8* src_argb, +int ARGBShade(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height, - uint32 value) { + uint32_t value) { int y; - void (*ARGBShadeRow)(const uint8* src_argb, uint8* dst_argb, int width, - uint32 value) = ARGBShadeRow_C; + void (*ARGBShadeRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width, + uint32_t value) = ARGBShadeRow_C; if (!src_argb || !dst_argb || width <= 0 || height == 0 || value == 0u) { return -1; } @@ -2546,17 +2547,17 @@ int ARGBShade(const uint8* src_argb, // Interpolate 2 planes by specified amount (0 to 255). LIBYUV_API -int InterpolatePlane(const uint8* src0, +int InterpolatePlane(const uint8_t* src0, int src_stride0, - const uint8* src1, + const uint8_t* src1, int src_stride1, - uint8* dst, + uint8_t* dst, int dst_stride, int width, int height, int interpolation) { int y; - void (*InterpolateRow)(uint8 * dst_ptr, const uint8* src_ptr, + void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr, ptrdiff_t src_stride, int dst_width, int source_y_fraction) = InterpolateRow_C; if (!src0 || !src1 || !dst || width <= 0 || height == 0) { @@ -2618,11 +2619,11 @@ int InterpolatePlane(const uint8* src0, // Interpolate 2 ARGB images by specified amount (0 to 255). LIBYUV_API -int ARGBInterpolate(const uint8* src_argb0, +int ARGBInterpolate(const uint8_t* src_argb0, int src_stride_argb0, - const uint8* src_argb1, + const uint8_t* src_argb1, int src_stride_argb1, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height, @@ -2634,23 +2635,23 @@ int ARGBInterpolate(const uint8* src_argb0, // Interpolate 2 YUV images by specified amount (0 to 255). LIBYUV_API -int I420Interpolate(const uint8* src0_y, +int I420Interpolate(const uint8_t* src0_y, int src0_stride_y, - const uint8* src0_u, + const uint8_t* src0_u, int src0_stride_u, - const uint8* src0_v, + const uint8_t* src0_v, int src0_stride_v, - const uint8* src1_y, + const uint8_t* src1_y, int src1_stride_y, - const uint8* src1_u, + const uint8_t* src1_u, int src1_stride_u, - const uint8* src1_v, + const uint8_t* src1_v, int src1_stride_v, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int width, int height, @@ -2672,16 +2673,16 @@ int I420Interpolate(const uint8* src0_y, // Shuffle ARGB channel order. e.g. BGRA to ARGB. LIBYUV_API -int ARGBShuffle(const uint8* src_bgra, +int ARGBShuffle(const uint8_t* src_bgra, int src_stride_bgra, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, - const uint8* shuffler, + const uint8_t* shuffler, int width, int height) { int y; - void (*ARGBShuffleRow)(const uint8* src_bgra, uint8* dst_argb, - const uint8* shuffler, int width) = ARGBShuffleRow_C; + void (*ARGBShuffleRow)(const uint8_t* src_bgra, uint8_t* dst_argb, + const uint8_t* shuffler, int width) = ARGBShuffleRow_C; if (!src_bgra || !dst_argb || width <= 0 || height == 0) { return -1; } @@ -2739,23 +2740,23 @@ int ARGBShuffle(const uint8* src_bgra, } // Sobel ARGB effect. -static int ARGBSobelize(const uint8* src_argb, +static int ARGBSobelize(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height, - void (*SobelRow)(const uint8* src_sobelx, - const uint8* src_sobely, - uint8* dst, + void (*SobelRow)(const uint8_t* src_sobelx, + const uint8_t* src_sobely, + uint8_t* dst, int width)) { int y; - void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_g, int width) = + void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_g, int width) = ARGBToYJRow_C; - void (*SobelYRow)(const uint8* src_y0, const uint8* src_y1, uint8* dst_sobely, - int width) = SobelYRow_C; - void (*SobelXRow)(const uint8* src_y0, const uint8* src_y1, - const uint8* src_y2, uint8* dst_sobely, int width) = + void (*SobelYRow)(const uint8_t* src_y0, const uint8_t* src_y1, + uint8_t* dst_sobely, int width) = SobelYRow_C; + void (*SobelXRow)(const uint8_t* src_y0, const uint8_t* src_y1, + const uint8_t* src_y2, uint8_t* dst_sobely, int width) = SobelXRow_C; const int kEdge = 16; // Extra pixels at start of row for extrude/align. if (!src_argb || !dst_argb || width <= 0 || height == 0) { @@ -2835,14 +2836,14 @@ static int ARGBSobelize(const uint8* src_argb, // 3 rows with edges before/after. const int kRowSize = (width + kEdge + 31) & ~31; align_buffer_64(rows, kRowSize * 2 + (kEdge + kRowSize * 3 + kEdge)); - uint8* row_sobelx = rows; - uint8* row_sobely = rows + kRowSize; - uint8* row_y = rows + kRowSize * 2; + uint8_t* row_sobelx = rows; + uint8_t* row_sobely = rows + kRowSize; + uint8_t* row_y = rows + kRowSize * 2; // Convert first row. - uint8* row_y0 = row_y + kEdge; - uint8* row_y1 = row_y0 + kRowSize; - uint8* row_y2 = row_y1 + kRowSize; + uint8_t* row_y0 = row_y + kEdge; + uint8_t* row_y1 = row_y0 + kRowSize; + uint8_t* row_y2 = row_y1 + kRowSize; ARGBToYJRow(src_argb, row_y0, width); row_y0[-1] = row_y0[0]; memset(row_y0 + width, row_y0[width - 1], 16); // Extrude 16 for valgrind. @@ -2866,7 +2867,7 @@ static int ARGBSobelize(const uint8* src_argb, // Cycle thru circular queue of 3 row_y buffers. { - uint8* row_yt = row_y0; + uint8_t* row_yt = row_y0; row_y0 = row_y1; row_y1 = row_y2; row_y2 = row_yt; @@ -2881,14 +2882,14 @@ static int ARGBSobelize(const uint8* src_argb, // Sobel ARGB effect. LIBYUV_API -int ARGBSobel(const uint8* src_argb, +int ARGBSobel(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height) { - void (*SobelRow)(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_argb, int width) = SobelRow_C; + void (*SobelRow)(const uint8_t* src_sobelx, const uint8_t* src_sobely, + uint8_t* dst_argb, int width) = SobelRow_C; #if defined(HAS_SOBELROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { SobelRow = SobelRow_Any_SSE2; @@ -2919,14 +2920,14 @@ int ARGBSobel(const uint8* src_argb, // Sobel ARGB effect with planar output. LIBYUV_API -int ARGBSobelToPlane(const uint8* src_argb, +int ARGBSobelToPlane(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, int width, int height) { - void (*SobelToPlaneRow)(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_, int width) = SobelToPlaneRow_C; + void (*SobelToPlaneRow)(const uint8_t* src_sobelx, const uint8_t* src_sobely, + uint8_t* dst_, int width) = SobelToPlaneRow_C; #if defined(HAS_SOBELTOPLANEROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { SobelToPlaneRow = SobelToPlaneRow_Any_SSE2; @@ -2958,14 +2959,14 @@ int ARGBSobelToPlane(const uint8* src_argb, // SobelXY ARGB effect. // Similar to Sobel, but also stores Sobel X in R and Sobel Y in B. G = Sobel. LIBYUV_API -int ARGBSobelXY(const uint8* src_argb, +int ARGBSobelXY(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height) { - void (*SobelXYRow)(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_argb, int width) = SobelXYRow_C; + void (*SobelXYRow)(const uint8_t* src_sobelx, const uint8_t* src_sobely, + uint8_t* dst_argb, int width) = SobelXYRow_C; #if defined(HAS_SOBELXYROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { SobelXYRow = SobelXYRow_Any_SSE2; @@ -2996,15 +2997,15 @@ int ARGBSobelXY(const uint8* src_argb, // Apply a 4x4 polynomial to each ARGB pixel. LIBYUV_API -int ARGBPolynomial(const uint8* src_argb, +int ARGBPolynomial(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, const float* poly, int width, int height) { int y; - void (*ARGBPolynomialRow)(const uint8* src_argb, uint8* dst_argb, + void (*ARGBPolynomialRow)(const uint8_t* src_argb, uint8_t* dst_argb, const float* poly, int width) = ARGBPolynomialRow_C; if (!src_argb || !dst_argb || !poly || width <= 0 || height == 0) { return -1; @@ -3044,16 +3045,16 @@ int ARGBPolynomial(const uint8* src_argb, // Convert plane of 16 bit shorts to half floats. // Source values are multiplied by scale before storing as half float. LIBYUV_API -int HalfFloatPlane(const uint16* src_y, +int HalfFloatPlane(const uint16_t* src_y, int src_stride_y, - uint16* dst_y, + uint16_t* dst_y, int dst_stride_y, float scale, int width, int height) { int y; - void (*HalfFloatRow)(const uint16* src, uint16* dst, float scale, int width) = - HalfFloatRow_C; + void (*HalfFloatRow)(const uint16_t* src, uint16_t* dst, float scale, + int width) = HalfFloatRow_C; if (!src_y || !dst_y || width <= 0 || height == 0) { return -1; } @@ -3124,17 +3125,17 @@ int HalfFloatPlane(const uint16* src_y, // Apply a lumacolortable to each ARGB pixel. LIBYUV_API -int ARGBLumaColorTable(const uint8* src_argb, +int ARGBLumaColorTable(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, - const uint8* luma, + const uint8_t* luma, int width, int height) { int y; void (*ARGBLumaColorTableRow)( - const uint8* src_argb, uint8* dst_argb, int width, const uint8* luma, - const uint32 lumacoeff) = ARGBLumaColorTableRow_C; + const uint8_t* src_argb, uint8_t* dst_argb, int width, + const uint8_t* luma, const uint32_t lumacoeff) = ARGBLumaColorTableRow_C; if (!src_argb || !dst_argb || !luma || width <= 0 || height == 0) { return -1; } @@ -3166,15 +3167,15 @@ int ARGBLumaColorTable(const uint8* src_argb, // Copy Alpha from one ARGB image to another. LIBYUV_API -int ARGBCopyAlpha(const uint8* src_argb, +int ARGBCopyAlpha(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height) { int y; - void (*ARGBCopyAlphaRow)(const uint8* src_argb, uint8* dst_argb, int width) = - ARGBCopyAlphaRow_C; + void (*ARGBCopyAlphaRow)(const uint8_t* src_argb, uint8_t* dst_argb, + int width) = ARGBCopyAlphaRow_C; if (!src_argb || !dst_argb || width <= 0 || height == 0) { return -1; } @@ -3217,10 +3218,10 @@ int ARGBCopyAlpha(const uint8* src_argb, // Extract just the alpha channel from ARGB. LIBYUV_API -int ARGBExtractAlpha(const uint8* src_argb, - int src_stride, - uint8* dst_a, - int dst_stride, +int ARGBExtractAlpha(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_a, + int dst_stride_a, int width, int height) { if (!src_argb || !dst_a || width <= 0 || height == 0) { @@ -3229,17 +3230,17 @@ int ARGBExtractAlpha(const uint8* src_argb, // Negative height means invert the image. if (height < 0) { height = -height; - src_argb += (height - 1) * src_stride; - src_stride = -src_stride; + src_argb += (height - 1) * src_stride_argb; + src_stride_argb = -src_stride_argb; } // Coalesce rows. - if (src_stride == width * 4 && dst_stride == width) { + if (src_stride_argb == width * 4 && dst_stride_a == width) { width *= height; height = 1; - src_stride = dst_stride = 0; + src_stride_argb = dst_stride_a = 0; } - void (*ARGBExtractAlphaRow)(const uint8* src_argb, uint8* dst_a, int width) = - ARGBExtractAlphaRow_C; + void (*ARGBExtractAlphaRow)(const uint8_t* src_argb, uint8_t* dst_a, + int width) = ARGBExtractAlphaRow_C; #if defined(HAS_ARGBEXTRACTALPHAROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { ARGBExtractAlphaRow = IS_ALIGNED(width, 8) ? ARGBExtractAlphaRow_SSE2 @@ -3267,23 +3268,23 @@ int ARGBExtractAlpha(const uint8* src_argb, for (int y = 0; y < height; ++y) { ARGBExtractAlphaRow(src_argb, dst_a, width); - src_argb += src_stride; - dst_a += dst_stride; + src_argb += src_stride_argb; + dst_a += dst_stride_a; } return 0; } // Copy a planar Y channel to the alpha channel of a destination ARGB image. LIBYUV_API -int ARGBCopyYToAlpha(const uint8* src_y, +int ARGBCopyYToAlpha(const uint8_t* src_y, int src_stride_y, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height) { int y; - void (*ARGBCopyYToAlphaRow)(const uint8* src_y, uint8* dst_argb, int width) = - ARGBCopyYToAlphaRow_C; + void (*ARGBCopyYToAlphaRow)(const uint8_t* src_y, uint8_t* dst_argb, + int width) = ARGBCopyYToAlphaRow_C; if (!src_y || !dst_argb || width <= 0 || height == 0) { return -1; } @@ -3328,19 +3329,19 @@ int ARGBCopyYToAlpha(const uint8* src_y, // directly. A SplitUVRow_Odd function could copy the remaining chroma. LIBYUV_API -int YUY2ToNV12(const uint8* src_yuy2, +int YUY2ToNV12(const uint8_t* src_yuy2, int src_stride_yuy2, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_uv, + uint8_t* dst_uv, int dst_stride_uv, int width, int height) { int y; int halfwidth = (width + 1) >> 1; - void (*SplitUVRow)(const uint8* src_uv, uint8* dst_u, uint8* dst_v, + void (*SplitUVRow)(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v, int width) = SplitUVRow_C; - void (*InterpolateRow)(uint8 * dst_ptr, const uint8* src_ptr, + void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr, ptrdiff_t src_stride, int dst_width, int source_y_fraction) = InterpolateRow_C; if (!src_yuy2 || !dst_y || !dst_uv || width <= 0 || height == 0) { @@ -3444,19 +3445,19 @@ int YUY2ToNV12(const uint8* src_yuy2, } LIBYUV_API -int UYVYToNV12(const uint8* src_uyvy, +int UYVYToNV12(const uint8_t* src_uyvy, int src_stride_uyvy, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_uv, + uint8_t* dst_uv, int dst_stride_uv, int width, int height) { int y; int halfwidth = (width + 1) >> 1; - void (*SplitUVRow)(const uint8* src_uv, uint8* dst_u, uint8* dst_v, + void (*SplitUVRow)(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v, int width) = SplitUVRow_C; - void (*InterpolateRow)(uint8 * dst_ptr, const uint8* src_ptr, + void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr, ptrdiff_t src_stride, int dst_width, int source_y_fraction) = InterpolateRow_C; if (!src_uyvy || !dst_y || !dst_uv || width <= 0 || height == 0) { diff --git a/chromium/third_party/libyuv/source/rotate.cc b/chromium/third_party/libyuv/source/rotate.cc index 1f74cd0714a..f2bed85b755 100644 --- a/chromium/third_party/libyuv/source/rotate.cc +++ b/chromium/third_party/libyuv/source/rotate.cc @@ -22,18 +22,18 @@ extern "C" { #endif LIBYUV_API -void TransposePlane(const uint8* src, +void TransposePlane(const uint8_t* src, int src_stride, - uint8* dst, + uint8_t* dst, int dst_stride, int width, int height) { int i = height; #if defined(HAS_TRANSPOSEWX16_MSA) - void (*TransposeWx16)(const uint8* src, int src_stride, uint8* dst, + void (*TransposeWx16)(const uint8_t* src, int src_stride, uint8_t* dst, int dst_stride, int width) = TransposeWx16_C; #else - void (*TransposeWx8)(const uint8* src, int src_stride, uint8* dst, + void (*TransposeWx8)(const uint8_t* src, int src_stride, uint8_t* dst, int dst_stride, int width) = TransposeWx8_C; #endif #if defined(HAS_TRANSPOSEWX8_NEON) @@ -90,9 +90,9 @@ void TransposePlane(const uint8* src, } LIBYUV_API -void RotatePlane90(const uint8* src, +void RotatePlane90(const uint8_t* src, int src_stride, - uint8* dst, + uint8_t* dst, int dst_stride, int width, int height) { @@ -105,9 +105,9 @@ void RotatePlane90(const uint8* src, } LIBYUV_API -void RotatePlane270(const uint8* src, +void RotatePlane270(const uint8_t* src, int src_stride, - uint8* dst, + uint8_t* dst, int dst_stride, int width, int height) { @@ -120,20 +120,20 @@ void RotatePlane270(const uint8* src, } LIBYUV_API -void RotatePlane180(const uint8* src, +void RotatePlane180(const uint8_t* src, int src_stride, - uint8* dst, + uint8_t* dst, int dst_stride, int width, int height) { // Swap first and last row and mirror the content. Uses a temporary row. align_buffer_64(row, width); - const uint8* src_bot = src + src_stride * (height - 1); - uint8* dst_bot = dst + dst_stride * (height - 1); + const uint8_t* src_bot = src + src_stride * (height - 1); + uint8_t* dst_bot = dst + dst_stride * (height - 1); int half_height = (height + 1) >> 1; int y; - void (*MirrorRow)(const uint8* src, uint8* dst, int width) = MirrorRow_C; - void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C; + void (*MirrorRow)(const uint8_t* src, uint8_t* dst, int width) = MirrorRow_C; + void (*CopyRow)(const uint8_t* src, uint8_t* dst, int width) = CopyRow_C; #if defined(HAS_MIRRORROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { MirrorRow = MirrorRow_Any_NEON; @@ -201,22 +201,22 @@ void RotatePlane180(const uint8* src, } LIBYUV_API -void TransposeUV(const uint8* src, +void TransposeUV(const uint8_t* src, int src_stride, - uint8* dst_a, + uint8_t* dst_a, int dst_stride_a, - uint8* dst_b, + uint8_t* dst_b, int dst_stride_b, int width, int height) { int i = height; #if defined(HAS_TRANSPOSEUVWX16_MSA) - void (*TransposeUVWx16)(const uint8* src, int src_stride, uint8* dst_a, - int dst_stride_a, uint8* dst_b, int dst_stride_b, + void (*TransposeUVWx16)(const uint8_t* src, int src_stride, uint8_t* dst_a, + int dst_stride_a, uint8_t* dst_b, int dst_stride_b, int width) = TransposeUVWx16_C; #else - void (*TransposeUVWx8)(const uint8* src, int src_stride, uint8* dst_a, - int dst_stride_a, uint8* dst_b, int dst_stride_b, + void (*TransposeUVWx8)(const uint8_t* src, int src_stride, uint8_t* dst_a, + int dst_stride_a, uint8_t* dst_b, int dst_stride_b, int width) = TransposeUVWx8_C; #endif #if defined(HAS_TRANSPOSEUVWX8_NEON) @@ -270,11 +270,11 @@ void TransposeUV(const uint8* src, } LIBYUV_API -void RotateUV90(const uint8* src, +void RotateUV90(const uint8_t* src, int src_stride, - uint8* dst_a, + uint8_t* dst_a, int dst_stride_a, - uint8* dst_b, + uint8_t* dst_b, int dst_stride_b, int width, int height) { @@ -286,11 +286,11 @@ void RotateUV90(const uint8* src, } LIBYUV_API -void RotateUV270(const uint8* src, +void RotateUV270(const uint8_t* src, int src_stride, - uint8* dst_a, + uint8_t* dst_a, int dst_stride_a, - uint8* dst_b, + uint8_t* dst_b, int dst_stride_b, int width, int height) { @@ -305,17 +305,17 @@ void RotateUV270(const uint8* src, // Rotate 180 is a horizontal and vertical flip. LIBYUV_API -void RotateUV180(const uint8* src, +void RotateUV180(const uint8_t* src, int src_stride, - uint8* dst_a, + uint8_t* dst_a, int dst_stride_a, - uint8* dst_b, + uint8_t* dst_b, int dst_stride_b, int width, int height) { int i; - void (*MirrorUVRow)(const uint8* src, uint8* dst_u, uint8* dst_v, int width) = - MirrorUVRow_C; + void (*MirrorUVRow)(const uint8_t* src, uint8_t* dst_u, uint8_t* dst_v, + int width) = MirrorUVRow_C; #if defined(HAS_MIRRORUVROW_NEON) if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) { MirrorUVRow = MirrorUVRow_NEON; @@ -344,9 +344,9 @@ void RotateUV180(const uint8* src, } LIBYUV_API -int RotatePlane(const uint8* src, +int RotatePlane(const uint8_t* src, int src_stride, - uint8* dst, + uint8_t* dst, int dst_stride, int width, int height, @@ -383,17 +383,17 @@ int RotatePlane(const uint8* src, } LIBYUV_API -int I420Rotate(const uint8* src_y, +int I420Rotate(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int width, int height, @@ -451,15 +451,15 @@ int I420Rotate(const uint8* src_y, } LIBYUV_API -int NV12ToI420Rotate(const uint8* src_y, +int NV12ToI420Rotate(const uint8_t* src_y, int src_stride_y, - const uint8* src_uv, + const uint8_t* src_uv, int src_stride_uv, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int width, int height, diff --git a/chromium/third_party/libyuv/source/rotate_any.cc b/chromium/third_party/libyuv/source/rotate_any.cc index eb4f7418660..c2752e6222c 100644 --- a/chromium/third_party/libyuv/source/rotate_any.cc +++ b/chromium/third_party/libyuv/source/rotate_any.cc @@ -19,8 +19,8 @@ extern "C" { #endif #define TANY(NAMEANY, TPOS_SIMD, MASK) \ - void NAMEANY(const uint8* src, int src_stride, uint8* dst, int dst_stride, \ - int width) { \ + void NAMEANY(const uint8_t* src, int src_stride, uint8_t* dst, \ + int dst_stride, int width) { \ int r = width & MASK; \ int n = width - r; \ if (n > 0) { \ @@ -44,8 +44,9 @@ TANY(TransposeWx16_Any_MSA, TransposeWx16_MSA, 15) #undef TANY #define TUVANY(NAMEANY, TPOS_SIMD, MASK) \ - void NAMEANY(const uint8* src, int src_stride, uint8* dst_a, \ - int dst_stride_a, uint8* dst_b, int dst_stride_b, int width) { \ + void NAMEANY(const uint8_t* src, int src_stride, uint8_t* dst_a, \ + int dst_stride_a, uint8_t* dst_b, int dst_stride_b, \ + int width) { \ int r = width & MASK; \ int n = width - r; \ if (n > 0) { \ diff --git a/chromium/third_party/libyuv/source/rotate_argb.cc b/chromium/third_party/libyuv/source/rotate_argb.cc index f6a2bf69f94..f13b041f88b 100644 --- a/chromium/third_party/libyuv/source/rotate_argb.cc +++ b/chromium/third_party/libyuv/source/rotate_argb.cc @@ -22,43 +22,41 @@ extern "C" { // ARGBScale has a function to copy pixels to a row, striding each source // pixel by a constant. -#if !defined(LIBYUV_DISABLE_X86) && \ - (defined(_M_IX86) || \ - (defined(__x86_64__) && !defined(__native_client__)) || \ - defined(__i386__)) +#if !defined(LIBYUV_DISABLE_X86) && \ + (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) #define HAS_SCALEARGBROWDOWNEVEN_SSE2 -void ScaleARGBRowDownEven_SSE2(const uint8* src_ptr, +void ScaleARGBRowDownEven_SSE2(const uint8_t* src_ptr, int src_stride, int src_stepx, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width); #endif -#if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \ +#if !defined(LIBYUV_DISABLE_NEON) && \ (defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__)) #define HAS_SCALEARGBROWDOWNEVEN_NEON -void ScaleARGBRowDownEven_NEON(const uint8* src_ptr, +void ScaleARGBRowDownEven_NEON(const uint8_t* src_ptr, int src_stride, int src_stepx, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width); #endif -void ScaleARGBRowDownEven_C(const uint8* src_ptr, +void ScaleARGBRowDownEven_C(const uint8_t* src_ptr, int, int src_stepx, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width); -static void ARGBTranspose(const uint8* src, +static void ARGBTranspose(const uint8_t* src, int src_stride, - uint8* dst, + uint8_t* dst, int dst_stride, int width, int height) { int i; int src_pixel_step = src_stride >> 2; - void (*ScaleARGBRowDownEven)(const uint8* src_ptr, int src_stride, - int src_step, uint8* dst_ptr, int dst_width) = + void (*ScaleARGBRowDownEven)(const uint8_t* src_ptr, int src_stride, + int src_step, uint8_t* dst_ptr, int dst_width) = ScaleARGBRowDownEven_C; #if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2) if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(height, 4)) { // Width of dest. @@ -78,9 +76,9 @@ static void ARGBTranspose(const uint8* src, } } -void ARGBRotate90(const uint8* src, +void ARGBRotate90(const uint8_t* src, int src_stride, - uint8* dst, + uint8_t* dst, int dst_stride, int width, int height) { @@ -92,9 +90,9 @@ void ARGBRotate90(const uint8* src, ARGBTranspose(src, src_stride, dst, dst_stride, width, height); } -void ARGBRotate270(const uint8* src, +void ARGBRotate270(const uint8_t* src, int src_stride, - uint8* dst, + uint8_t* dst, int dst_stride, int width, int height) { @@ -106,21 +104,21 @@ void ARGBRotate270(const uint8* src, ARGBTranspose(src, src_stride, dst, dst_stride, width, height); } -void ARGBRotate180(const uint8* src, +void ARGBRotate180(const uint8_t* src, int src_stride, - uint8* dst, + uint8_t* dst, int dst_stride, int width, int height) { // Swap first and last row and mirror the content. Uses a temporary row. align_buffer_64(row, width * 4); - const uint8* src_bot = src + src_stride * (height - 1); - uint8* dst_bot = dst + dst_stride * (height - 1); + const uint8_t* src_bot = src + src_stride * (height - 1); + uint8_t* dst_bot = dst + dst_stride * (height - 1); int half_height = (height + 1) >> 1; int y; - void (*ARGBMirrorRow)(const uint8* src, uint8* dst, int width) = + void (*ARGBMirrorRow)(const uint8_t* src, uint8_t* dst, int width) = ARGBMirrorRow_C; - void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C; + void (*CopyRow)(const uint8_t* src, uint8_t* dst, int width) = CopyRow_C; #if defined(HAS_ARGBMIRRORROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { ARGBMirrorRow = ARGBMirrorRow_Any_NEON; @@ -188,9 +186,9 @@ void ARGBRotate180(const uint8* src, } LIBYUV_API -int ARGBRotate(const uint8* src_argb, +int ARGBRotate(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int width, int height, diff --git a/chromium/third_party/libyuv/source/rotate_common.cc b/chromium/third_party/libyuv/source/rotate_common.cc index 89357e732d2..ff212adebc4 100644 --- a/chromium/third_party/libyuv/source/rotate_common.cc +++ b/chromium/third_party/libyuv/source/rotate_common.cc @@ -16,9 +16,9 @@ namespace libyuv { extern "C" { #endif -void TransposeWx8_C(const uint8* src, +void TransposeWx8_C(const uint8_t* src, int src_stride, - uint8* dst, + uint8_t* dst, int dst_stride, int width) { int i; @@ -36,11 +36,11 @@ void TransposeWx8_C(const uint8* src, } } -void TransposeUVWx8_C(const uint8* src, +void TransposeUVWx8_C(const uint8_t* src, int src_stride, - uint8* dst_a, + uint8_t* dst_a, int dst_stride_a, - uint8* dst_b, + uint8_t* dst_b, int dst_stride_b, int width) { int i; @@ -67,9 +67,9 @@ void TransposeUVWx8_C(const uint8* src, } } -void TransposeWxH_C(const uint8* src, +void TransposeWxH_C(const uint8_t* src, int src_stride, - uint8* dst, + uint8_t* dst, int dst_stride, int width, int height) { @@ -82,11 +82,11 @@ void TransposeWxH_C(const uint8* src, } } -void TransposeUVWxH_C(const uint8* src, +void TransposeUVWxH_C(const uint8_t* src, int src_stride, - uint8* dst_a, + uint8_t* dst_a, int dst_stride_a, - uint8* dst_b, + uint8_t* dst_b, int dst_stride_b, int width, int height) { diff --git a/chromium/third_party/libyuv/source/rotate_gcc.cc b/chromium/third_party/libyuv/source/rotate_gcc.cc index 74b48ac4084..04e19e29eef 100644 --- a/chromium/third_party/libyuv/source/rotate_gcc.cc +++ b/chromium/third_party/libyuv/source/rotate_gcc.cc @@ -22,9 +22,9 @@ extern "C" { // Transpose 8x8. 32 or 64 bit, but not NaCL for 64 bit. #if defined(HAS_TRANSPOSEWX8_SSSE3) -void TransposeWx8_SSSE3(const uint8* src, +void TransposeWx8_SSSE3(const uint8_t* src, int src_stride, - uint8* dst, + uint8_t* dst, int dst_stride, int width) { asm volatile( @@ -112,9 +112,9 @@ void TransposeWx8_SSSE3(const uint8* src, // Transpose 16x8. 64 bit #if defined(HAS_TRANSPOSEWX8_FAST_SSSE3) -void TransposeWx8_Fast_SSSE3(const uint8* src, +void TransposeWx8_Fast_SSSE3(const uint8_t* src, int src_stride, - uint8* dst, + uint8_t* dst, int dst_stride, int width) { asm volatile( @@ -255,11 +255,11 @@ void TransposeWx8_Fast_SSSE3(const uint8* src, // Transpose UV 8x8. 64 bit. #if defined(HAS_TRANSPOSEUVWX8_SSE2) -void TransposeUVWx8_SSE2(const uint8* src, +void TransposeUVWx8_SSE2(const uint8_t* src, int src_stride, - uint8* dst_a, + uint8_t* dst_a, int dst_stride_a, - uint8* dst_b, + uint8_t* dst_b, int dst_stride_b, int width) { asm volatile( diff --git a/chromium/third_party/libyuv/source/rotate_msa.cc b/chromium/third_party/libyuv/source/rotate_msa.cc index 8907765aba7..99bdca65b32 100644 --- a/chromium/third_party/libyuv/source/rotate_msa.cc +++ b/chromium/third_party/libyuv/source/rotate_msa.cc @@ -51,9 +51,9 @@ extern "C" { out3 = (v16u8)__msa_ilvl_d((v2i64)in3, (v2i64)in2); \ } -void TransposeWx16_C(const uint8* src, +void TransposeWx16_C(const uint8_t* src, int src_stride, - uint8* dst, + uint8_t* dst, int dst_stride, int width) { TransposeWx8_C(src, src_stride, dst, dst_stride, width); @@ -61,11 +61,11 @@ void TransposeWx16_C(const uint8* src, width); } -void TransposeUVWx16_C(const uint8* src, +void TransposeUVWx16_C(const uint8_t* src, int src_stride, - uint8* dst_a, + uint8_t* dst_a, int dst_stride_a, - uint8* dst_b, + uint8_t* dst_b, int dst_stride_b, int width) { TransposeUVWx8_C(src, src_stride, dst_a, dst_stride_a, dst_b, dst_stride_b, @@ -74,13 +74,13 @@ void TransposeUVWx16_C(const uint8* src, dst_stride_a, (dst_b + 8), dst_stride_b, width); } -void TransposeWx16_MSA(const uint8* src, +void TransposeWx16_MSA(const uint8_t* src, int src_stride, - uint8* dst, + uint8_t* dst, int dst_stride, int width) { int x; - const uint8* s; + const uint8_t* s; v16u8 src0, src1, src2, src3, dst0, dst1, dst2, dst3, vec0, vec1, vec2, vec3; v16u8 reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7; v16u8 res0, res1, res2, res3, res4, res5, res6, res7, res8, res9; @@ -153,15 +153,15 @@ void TransposeWx16_MSA(const uint8* src, } } -void TransposeUVWx16_MSA(const uint8* src, +void TransposeUVWx16_MSA(const uint8_t* src, int src_stride, - uint8* dst_a, + uint8_t* dst_a, int dst_stride_a, - uint8* dst_b, + uint8_t* dst_b, int dst_stride_b, int width) { int x; - const uint8* s; + const uint8_t* s; v16u8 src0, src1, src2, src3, dst0, dst1, dst2, dst3, vec0, vec1, vec2, vec3; v16u8 reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7; v16u8 res0, res1, res2, res3, res4, res5, res6, res7, res8, res9; diff --git a/chromium/third_party/libyuv/source/rotate_neon.cc b/chromium/third_party/libyuv/source/rotate_neon.cc index 47ff9b29ef5..fdc0dd476c6 100644 --- a/chromium/third_party/libyuv/source/rotate_neon.cc +++ b/chromium/third_party/libyuv/source/rotate_neon.cc @@ -24,12 +24,12 @@ extern "C" { static const uvec8 kVTbl4x4Transpose = {0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15}; -void TransposeWx8_NEON(const uint8* src, +void TransposeWx8_NEON(const uint8_t* src, int src_stride, - uint8* dst, + uint8_t* dst, int dst_stride, int width) { - const uint8* src_temp; + const uint8_t* src_temp; asm volatile( // loops are on blocks of 8. loop will stop when // counter gets to or below 0. starting the counter @@ -192,14 +192,14 @@ void TransposeWx8_NEON(const uint8* src, static const uvec8 kVTbl4x4TransposeDi = {0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15}; -void TransposeUVWx8_NEON(const uint8* src, +void TransposeUVWx8_NEON(const uint8_t* src, int src_stride, - uint8* dst_a, + uint8_t* dst_a, int dst_stride_a, - uint8* dst_b, + uint8_t* dst_b, int dst_stride_b, int width) { - const uint8* src_temp; + const uint8_t* src_temp; asm volatile( // loops are on blocks of 8. loop will stop when // counter gets to or below 0. starting the counter diff --git a/chromium/third_party/libyuv/source/rotate_neon64.cc b/chromium/third_party/libyuv/source/rotate_neon64.cc index 93c30546bd2..f469baacf68 100644 --- a/chromium/third_party/libyuv/source/rotate_neon64.cc +++ b/chromium/third_party/libyuv/source/rotate_neon64.cc @@ -24,12 +24,12 @@ extern "C" { static const uvec8 kVTbl4x4Transpose = {0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15}; -void TransposeWx8_NEON(const uint8* src, +void TransposeWx8_NEON(const uint8_t* src, int src_stride, - uint8* dst, + uint8_t* dst, int dst_stride, int width) { - const uint8* src_temp; + const uint8_t* src_temp; asm volatile( // loops are on blocks of 8. loop will stop when // counter gets to or below 0. starting the counter @@ -196,18 +196,18 @@ void TransposeWx8_NEON(const uint8* src, "v17", "v18", "v19", "v20", "v21", "v22", "v23"); } -static const uint8 kVTbl4x4TransposeDi[32] = { +static const uint8_t kVTbl4x4TransposeDi[32] = { 0, 16, 32, 48, 2, 18, 34, 50, 4, 20, 36, 52, 6, 22, 38, 54, 1, 17, 33, 49, 3, 19, 35, 51, 5, 21, 37, 53, 7, 23, 39, 55}; -void TransposeUVWx8_NEON(const uint8* src, +void TransposeUVWx8_NEON(const uint8_t* src, int src_stride, - uint8* dst_a, + uint8_t* dst_a, int dst_stride_a, - uint8* dst_b, + uint8_t* dst_b, int dst_stride_b, int width) { - const uint8* src_temp; + const uint8_t* src_temp; asm volatile( // loops are on blocks of 8. loop will stop when // counter gets to or below 0. starting the counter diff --git a/chromium/third_party/libyuv/source/rotate_win.cc b/chromium/third_party/libyuv/source/rotate_win.cc index fb052f65212..e887dd525c7 100644 --- a/chromium/third_party/libyuv/source/rotate_win.cc +++ b/chromium/third_party/libyuv/source/rotate_win.cc @@ -19,9 +19,9 @@ extern "C" { // This module is for 32 bit Visual C x86 and clangcl #if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER) -__declspec(naked) void TransposeWx8_SSSE3(const uint8* src, +__declspec(naked) void TransposeWx8_SSSE3(const uint8_t* src, int src_stride, - uint8* dst, + uint8_t* dst, int dst_stride, int width) { __asm { @@ -112,11 +112,11 @@ __declspec(naked) void TransposeWx8_SSSE3(const uint8* src, } } -__declspec(naked) void TransposeUVWx8_SSE2(const uint8* src, +__declspec(naked) void TransposeUVWx8_SSE2(const uint8_t* src, int src_stride, - uint8* dst_a, + uint8_t* dst_a, int dst_stride_a, - uint8* dst_b, + uint8_t* dst_b, int dst_stride_b, int w) { __asm { diff --git a/chromium/third_party/libyuv/source/row_any.cc b/chromium/third_party/libyuv/source/row_any.cc index 7e557d42109..9343992b1e9 100644 --- a/chromium/third_party/libyuv/source/row_any.cc +++ b/chromium/third_party/libyuv/source/row_any.cc @@ -31,25 +31,25 @@ extern "C" { #define SS(width, shift) (((width) + (1 << (shift)) - 1) >> (shift)) // Any 4 planes to 1 with yuvconstants -#define ANY41C(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \ - void NAMEANY(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, \ - const uint8* a_buf, uint8* dst_ptr, \ - const struct YuvConstants* yuvconstants, int width) { \ - SIMD_ALIGNED(uint8 temp[64 * 5]); \ - memset(temp, 0, 64 * 4); /* for msan */ \ - int r = width & MASK; \ - int n = width & ~MASK; \ - if (n > 0) { \ - ANY_SIMD(y_buf, u_buf, v_buf, a_buf, dst_ptr, yuvconstants, n); \ - } \ - memcpy(temp, y_buf + n, r); \ - memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ - memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ - memcpy(temp + 192, a_buf + n, r); \ - ANY_SIMD(temp, temp + 64, temp + 128, temp + 192, temp + 256, \ - yuvconstants, MASK + 1); \ - memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 256, \ - SS(r, DUVSHIFT) * BPP); \ +#define ANY41C(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \ + void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf, \ + const uint8_t* v_buf, const uint8_t* a_buf, uint8_t* dst_ptr, \ + const struct YuvConstants* yuvconstants, int width) { \ + SIMD_ALIGNED(uint8_t temp[64 * 5]); \ + memset(temp, 0, 64 * 4); /* for msan */ \ + int r = width & MASK; \ + int n = width & ~MASK; \ + if (n > 0) { \ + ANY_SIMD(y_buf, u_buf, v_buf, a_buf, dst_ptr, yuvconstants, n); \ + } \ + memcpy(temp, y_buf + n, r); \ + memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ + memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ + memcpy(temp + 192, a_buf + n, r); \ + ANY_SIMD(temp, temp + 64, temp + 128, temp + 192, temp + 256, \ + yuvconstants, MASK + 1); \ + memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 256, \ + SS(r, DUVSHIFT) * BPP); \ } #ifdef HAS_I422ALPHATOARGBROW_SSSE3 @@ -67,22 +67,22 @@ ANY41C(I422AlphaToARGBRow_Any_MSA, I422AlphaToARGBRow_MSA, 1, 0, 4, 7) #undef ANY41C // Any 3 planes to 1. -#define ANY31(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \ - void NAMEANY(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, \ - uint8* dst_ptr, int width) { \ - SIMD_ALIGNED(uint8 temp[64 * 4]); \ - memset(temp, 0, 64 * 3); /* for YUY2 and msan */ \ - int r = width & MASK; \ - int n = width & ~MASK; \ - if (n > 0) { \ - ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, n); \ - } \ - memcpy(temp, y_buf + n, r); \ - memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ - memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ - ANY_SIMD(temp, temp + 64, temp + 128, temp + 192, MASK + 1); \ - memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 192, \ - SS(r, DUVSHIFT) * BPP); \ +#define ANY31(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \ + void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf, \ + const uint8_t* v_buf, uint8_t* dst_ptr, int width) { \ + SIMD_ALIGNED(uint8_t temp[64 * 4]); \ + memset(temp, 0, 64 * 3); /* for YUY2 and msan */ \ + int r = width & MASK; \ + int n = width & ~MASK; \ + if (n > 0) { \ + ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, n); \ + } \ + memcpy(temp, y_buf + n, r); \ + memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ + memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ + ANY_SIMD(temp, temp + 64, temp + 128, temp + 192, MASK + 1); \ + memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 192, \ + SS(r, DUVSHIFT) * BPP); \ } // Merge functions. @@ -96,6 +96,10 @@ ANY31(MergeRGBRow_Any_NEON, MergeRGBRow_NEON, 0, 0, 3, 15) ANY31(I422ToYUY2Row_Any_SSE2, I422ToYUY2Row_SSE2, 1, 1, 4, 15) ANY31(I422ToUYVYRow_Any_SSE2, I422ToUYVYRow_SSE2, 1, 1, 4, 15) #endif +#ifdef HAS_I422TOYUY2ROW_AVX2 +ANY31(I422ToYUY2Row_Any_AVX2, I422ToYUY2Row_AVX2, 1, 1, 4, 31) +ANY31(I422ToUYVYRow_Any_AVX2, I422ToUYVYRow_AVX2, 1, 1, 4, 31) +#endif #ifdef HAS_I422TOYUY2ROW_NEON ANY31(I422ToYUY2Row_Any_NEON, I422ToYUY2Row_NEON, 1, 1, 4, 15) #endif @@ -120,10 +124,10 @@ ANY31(BlendPlaneRow_Any_SSSE3, BlendPlaneRow_SSSE3, 0, 0, 1, 7) // on arm that subsamples 444 to 422 internally. // Any 3 planes to 1 with yuvconstants #define ANY31C(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \ - void NAMEANY(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, \ - uint8* dst_ptr, const struct YuvConstants* yuvconstants, \ - int width) { \ - SIMD_ALIGNED(uint8 temp[64 * 4]); \ + void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf, \ + const uint8_t* v_buf, uint8_t* dst_ptr, \ + const struct YuvConstants* yuvconstants, int width) { \ + SIMD_ALIGNED(uint8_t temp[64 * 4]); \ memset(temp, 0, 64 * 3); /* for YUY2 and msan */ \ int r = width & MASK; \ int n = width & ~MASK; \ @@ -145,6 +149,12 @@ ANY31(BlendPlaneRow_Any_SSSE3, BlendPlaneRow_SSSE3, 0, 0, 1, 7) #ifdef HAS_I422TOARGBROW_SSSE3 ANY31C(I422ToARGBRow_Any_SSSE3, I422ToARGBRow_SSSE3, 1, 0, 4, 7) #endif +#ifdef HAS_I422TOAR30ROW_SSSE3 +ANY31C(I422ToAR30Row_Any_SSSE3, I422ToAR30Row_SSSE3, 1, 0, 4, 7) +#endif +#ifdef HAS_I422TOAR30ROW_AVX2 +ANY31C(I422ToAR30Row_Any_AVX2, I422ToAR30Row_AVX2, 1, 0, 4, 15) +#endif #ifdef HAS_I444TOARGBROW_SSSE3 ANY31C(I444ToARGBRow_Any_SSSE3, I444ToARGBRow_SSSE3, 0, 0, 4, 7) ANY31C(I422ToRGBARow_Any_SSSE3, I422ToRGBARow_SSSE3, 1, 0, 4, 7) @@ -194,48 +204,57 @@ ANY31C(I422ToRGB565Row_Any_MSA, I422ToRGB565Row_MSA, 1, 0, 2, 7) #endif #undef ANY31C -// 64 byte per row for future AVX2 // Any 3 planes of 16 bit to 1 with yuvconstants -// TODO(fbarchard): consider -#define ANY31CT(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, T, SBPP, BPP, MASK) \ - void NAMEANY(const T* y_buf, const T* u_buf, const T* v_buf, uint8* dst_ptr, \ - const struct YuvConstants* yuvconstants, int width) { \ - SIMD_ALIGNED(T temp[16 * 3]); \ - SIMD_ALIGNED(uint8 out[64]); \ - memset(temp, 0, 16 * 3 * SBPP); /* for YUY2 and msan */ \ - int r = width & MASK; \ - int n = width & ~MASK; \ - if (n > 0) { \ - ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, yuvconstants, n); \ - } \ - memcpy(temp, y_buf + n, r * SBPP); \ - memcpy(temp + 16, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP); \ - memcpy(temp + 32, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP); \ - ANY_SIMD(temp, temp + 16, temp + 32, out, yuvconstants, MASK + 1); \ - memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, out, SS(r, DUVSHIFT) * BPP); \ +// TODO(fbarchard): consider sharing this code with ANY31C +#define ANY31CT(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, T, SBPP, BPP, MASK) \ + void NAMEANY(const T* y_buf, const T* u_buf, const T* v_buf, \ + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, \ + int width) { \ + SIMD_ALIGNED(T temp[16 * 3]); \ + SIMD_ALIGNED(uint8_t out[64]); \ + memset(temp, 0, 16 * 3 * SBPP); /* for YUY2 and msan */ \ + int r = width & MASK; \ + int n = width & ~MASK; \ + if (n > 0) { \ + ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, yuvconstants, n); \ + } \ + memcpy(temp, y_buf + n, r * SBPP); \ + memcpy(temp + 16, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP); \ + memcpy(temp + 32, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP); \ + ANY_SIMD(temp, temp + 16, temp + 32, out, yuvconstants, MASK + 1); \ + memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, out, SS(r, DUVSHIFT) * BPP); \ } +#ifdef HAS_I210TOAR30ROW_SSSE3 +ANY31CT(I210ToAR30Row_Any_SSSE3, I210ToAR30Row_SSSE3, 1, 0, uint16_t, 2, 4, 7) +#endif #ifdef HAS_I210TOARGBROW_SSSE3 -ANY31CT(I210ToARGBRow_Any_SSSE3, I210ToARGBRow_SSSE3, 1, 0, uint16, 2, 4, 7) +ANY31CT(I210ToARGBRow_Any_SSSE3, I210ToARGBRow_SSSE3, 1, 0, uint16_t, 2, 4, 7) +#endif +#ifdef HAS_I210TOARGBROW_AVX2 +ANY31CT(I210ToARGBRow_Any_AVX2, I210ToARGBRow_AVX2, 1, 0, uint16_t, 2, 4, 15) +#endif +#ifdef HAS_I210TOAR30ROW_AVX2 +ANY31CT(I210ToAR30Row_Any_AVX2, I210ToAR30Row_AVX2, 1, 0, uint16_t, 2, 4, 15) #endif #undef ANY31CT // Any 2 planes to 1. -#define ANY21(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK) \ - void NAMEANY(const uint8* y_buf, const uint8* uv_buf, uint8* dst_ptr, \ - int width) { \ - SIMD_ALIGNED(uint8 temp[64 * 3]); \ - memset(temp, 0, 64 * 2); /* for msan */ \ - int r = width & MASK; \ - int n = width & ~MASK; \ - if (n > 0) { \ - ANY_SIMD(y_buf, uv_buf, dst_ptr, n); \ - } \ - memcpy(temp, y_buf + n * SBPP, r * SBPP); \ - memcpy(temp + 64, uv_buf + (n >> UVSHIFT) * SBPP2, \ - SS(r, UVSHIFT) * SBPP2); \ - ANY_SIMD(temp, temp + 64, temp + 128, MASK + 1); \ - memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \ +#define ANY21(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK) \ + void NAMEANY(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, \ + int width) { \ + SIMD_ALIGNED(uint8_t temp[64 * 3]); \ + memset(temp, 0, 64 * 2); /* for msan */ \ + int r = width & MASK; \ + int n = width & ~MASK; \ + if (n > 0) { \ + ANY_SIMD(y_buf, uv_buf, dst_ptr, n); \ + } \ + memcpy(temp, y_buf + n * SBPP, r * SBPP); \ + memcpy(temp + 64, uv_buf + (n >> UVSHIFT) * SBPP2, \ + SS(r, UVSHIFT) * SBPP2); \ + ANY_SIMD(temp, temp + 64, temp + 128, MASK + 1); \ + memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \ } // Merge functions. @@ -319,21 +338,21 @@ ANY21(SobelXYRow_Any_MSA, SobelXYRow_MSA, 0, 1, 1, 4, 15) #undef ANY21 // Any 2 planes to 1 with yuvconstants -#define ANY21C(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK) \ - void NAMEANY(const uint8* y_buf, const uint8* uv_buf, uint8* dst_ptr, \ - const struct YuvConstants* yuvconstants, int width) { \ - SIMD_ALIGNED(uint8 temp[64 * 3]); \ - memset(temp, 0, 64 * 2); /* for msan */ \ - int r = width & MASK; \ - int n = width & ~MASK; \ - if (n > 0) { \ - ANY_SIMD(y_buf, uv_buf, dst_ptr, yuvconstants, n); \ - } \ - memcpy(temp, y_buf + n * SBPP, r * SBPP); \ - memcpy(temp + 64, uv_buf + (n >> UVSHIFT) * SBPP2, \ - SS(r, UVSHIFT) * SBPP2); \ - ANY_SIMD(temp, temp + 64, temp + 128, yuvconstants, MASK + 1); \ - memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \ +#define ANY21C(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK) \ + void NAMEANY(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, \ + const struct YuvConstants* yuvconstants, int width) { \ + SIMD_ALIGNED(uint8_t temp[64 * 3]); \ + memset(temp, 0, 64 * 2); /* for msan */ \ + int r = width & MASK; \ + int n = width & ~MASK; \ + if (n > 0) { \ + ANY_SIMD(y_buf, uv_buf, dst_ptr, yuvconstants, n); \ + } \ + memcpy(temp, y_buf + n * SBPP, r * SBPP); \ + memcpy(temp + 64, uv_buf + (n >> UVSHIFT) * SBPP2, \ + SS(r, UVSHIFT) * SBPP2); \ + ANY_SIMD(temp, temp + 64, temp + 128, yuvconstants, MASK + 1); \ + memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \ } // Biplanar to RGB. @@ -377,8 +396,8 @@ ANY21C(NV12ToRGB565Row_Any_MSA, NV12ToRGB565Row_MSA, 1, 1, 2, 2, 7) // Any 1 to 1. #define ANY11(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \ - void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, int width) { \ - SIMD_ALIGNED(uint8 temp[128 * 2]); \ + void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) { \ + SIMD_ALIGNED(uint8_t temp[128 * 2]); \ memset(temp, 0, 128); /* for YUY2 and msan */ \ int r = width & MASK; \ int n = width & ~MASK; \ @@ -413,9 +432,15 @@ ANY11(ARGBToRGB565Row_Any_AVX2, ARGBToRGB565Row_AVX2, 0, 4, 2, 7) ANY11(ARGBToARGB1555Row_Any_AVX2, ARGBToARGB1555Row_AVX2, 0, 4, 2, 7) ANY11(ARGBToARGB4444Row_Any_AVX2, ARGBToARGB4444Row_AVX2, 0, 4, 2, 7) #endif +#if defined(HAS_ABGRTOAR30ROW_SSSE3) +ANY11(ABGRToAR30Row_Any_SSSE3, ABGRToAR30Row_SSSE3, 0, 4, 4, 3) +#endif #if defined(HAS_ARGBTOAR30ROW_SSSE3) ANY11(ARGBToAR30Row_Any_SSSE3, ARGBToAR30Row_SSSE3, 0, 4, 4, 3) #endif +#if defined(HAS_ABGRTOAR30ROW_AVX2) +ANY11(ABGRToAR30Row_Any_AVX2, ABGRToAR30Row_AVX2, 0, 4, 4, 7) +#endif #if defined(HAS_ARGBTOAR30ROW_AVX2) ANY11(ARGBToAR30Row_Any_AVX2, ARGBToAR30Row_AVX2, 0, 4, 4, 7) #endif @@ -632,8 +657,8 @@ ANY11(ARGBExtractAlphaRow_Any_MSA, ARGBExtractAlphaRow_MSA, 0, 4, 1, 15) // Any 1 to 1 blended. Destination is read, modify, write. #define ANY11B(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \ - void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, int width) { \ - SIMD_ALIGNED(uint8 temp[64 * 2]); \ + void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) { \ + SIMD_ALIGNED(uint8_t temp[64 * 2]); \ memset(temp, 0, 64 * 2); /* for msan */ \ int r = width & MASK; \ int n = width & ~MASK; \ @@ -661,24 +686,24 @@ ANY11B(ARGBCopyYToAlphaRow_Any_SSE2, ARGBCopyYToAlphaRow_SSE2, 0, 1, 4, 7) #undef ANY11B // Any 1 to 1 with parameter. -#define ANY11P(NAMEANY, ANY_SIMD, T, SBPP, BPP, MASK) \ - void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, T param, int width) { \ - SIMD_ALIGNED(uint8 temp[64 * 2]); \ - memset(temp, 0, 64); /* for msan */ \ - int r = width & MASK; \ - int n = width & ~MASK; \ - if (n > 0) { \ - ANY_SIMD(src_ptr, dst_ptr, param, n); \ - } \ - memcpy(temp, src_ptr + n * SBPP, r * SBPP); \ - ANY_SIMD(temp, temp + 64, param, MASK + 1); \ - memcpy(dst_ptr + n * BPP, temp + 64, r * BPP); \ +#define ANY11P(NAMEANY, ANY_SIMD, T, SBPP, BPP, MASK) \ + void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, T param, int width) { \ + SIMD_ALIGNED(uint8_t temp[64 * 2]); \ + memset(temp, 0, 64); /* for msan */ \ + int r = width & MASK; \ + int n = width & ~MASK; \ + if (n > 0) { \ + ANY_SIMD(src_ptr, dst_ptr, param, n); \ + } \ + memcpy(temp, src_ptr + n * SBPP, r * SBPP); \ + ANY_SIMD(temp, temp + 64, param, MASK + 1); \ + memcpy(dst_ptr + n * BPP, temp + 64, r * BPP); \ } #if defined(HAS_ARGBTORGB565DITHERROW_SSE2) ANY11P(ARGBToRGB565DitherRow_Any_SSE2, ARGBToRGB565DitherRow_SSE2, - const uint32, + const uint32_t, 4, 2, 3) @@ -686,7 +711,7 @@ ANY11P(ARGBToRGB565DitherRow_Any_SSE2, #if defined(HAS_ARGBTORGB565DITHERROW_AVX2) ANY11P(ARGBToRGB565DitherRow_Any_AVX2, ARGBToRGB565DitherRow_AVX2, - const uint32, + const uint32_t, 4, 2, 7) @@ -694,7 +719,7 @@ ANY11P(ARGBToRGB565DitherRow_Any_AVX2, #if defined(HAS_ARGBTORGB565DITHERROW_NEON) ANY11P(ARGBToRGB565DitherRow_Any_NEON, ARGBToRGB565DitherRow_NEON, - const uint32, + const uint32_t, 4, 2, 7) @@ -702,22 +727,22 @@ ANY11P(ARGBToRGB565DitherRow_Any_NEON, #if defined(HAS_ARGBTORGB565DITHERROW_MSA) ANY11P(ARGBToRGB565DitherRow_Any_MSA, ARGBToRGB565DitherRow_MSA, - const uint32, + const uint32_t, 4, 2, 7) #endif #ifdef HAS_ARGBSHUFFLEROW_SSSE3 -ANY11P(ARGBShuffleRow_Any_SSSE3, ARGBShuffleRow_SSSE3, const uint8*, 4, 4, 7) +ANY11P(ARGBShuffleRow_Any_SSSE3, ARGBShuffleRow_SSSE3, const uint8_t*, 4, 4, 7) #endif #ifdef HAS_ARGBSHUFFLEROW_AVX2 -ANY11P(ARGBShuffleRow_Any_AVX2, ARGBShuffleRow_AVX2, const uint8*, 4, 4, 15) +ANY11P(ARGBShuffleRow_Any_AVX2, ARGBShuffleRow_AVX2, const uint8_t*, 4, 4, 15) #endif #ifdef HAS_ARGBSHUFFLEROW_NEON -ANY11P(ARGBShuffleRow_Any_NEON, ARGBShuffleRow_NEON, const uint8*, 4, 4, 3) +ANY11P(ARGBShuffleRow_Any_NEON, ARGBShuffleRow_NEON, const uint8_t*, 4, 4, 3) #endif #ifdef HAS_ARGBSHUFFLEROW_MSA -ANY11P(ARGBShuffleRow_Any_MSA, ARGBShuffleRow_MSA, const uint8*, 4, 4, 7) +ANY11P(ARGBShuffleRow_Any_MSA, ARGBShuffleRow_MSA, const uint8_t*, 4, 4, 7) #endif #undef ANY11P @@ -742,34 +767,53 @@ ANY11C(Convert16To8Row_Any_SSSE3, Convert16To8Row_SSSE3, 2, 1, - uint16, - uint8, + uint16_t, + uint8_t, 15) #endif #ifdef HAS_CONVERT16TO8ROW_AVX2 -ANY11C(Convert16To8Row_Any_AVX2, Convert16To8Row_AVX2, 2, 1, uint16, uint8, 31) +ANY11C(Convert16To8Row_Any_AVX2, + Convert16To8Row_AVX2, + 2, + 1, + uint16_t, + uint8_t, + 31) #endif #ifdef HAS_CONVERT8TO16ROW_SSE2 -ANY11C(Convert8To16Row_Any_SSE2, Convert8To16Row_SSE2, 1, 2, uint8, uint16, 15) +ANY11C(Convert8To16Row_Any_SSE2, + Convert8To16Row_SSE2, + 1, + 2, + uint8_t, + uint16_t, + 15) #endif #ifdef HAS_CONVERT8TO16ROW_AVX2 -ANY11C(Convert8To16Row_Any_AVX2, Convert8To16Row_AVX2, 1, 2, uint8, uint16, 31) +ANY11C(Convert8To16Row_Any_AVX2, + Convert8To16Row_AVX2, + 1, + 2, + uint8_t, + uint16_t, + 31) #endif #undef ANY11C // Any 1 to 1 with parameter and shorts to byte. BPP measures in shorts. -#define ANY11P16(NAMEANY, ANY_SIMD, T, SBPP, BPP, MASK) \ - void NAMEANY(const uint16* src_ptr, uint16* dst_ptr, T param, int width) { \ - SIMD_ALIGNED(uint16 temp[32 * 2]); \ - memset(temp, 0, 64); /* for msan */ \ - int r = width & MASK; \ - int n = width & ~MASK; \ - if (n > 0) { \ - ANY_SIMD(src_ptr, dst_ptr, param, n); \ - } \ - memcpy(temp, src_ptr + n, r * SBPP); \ - ANY_SIMD(temp, temp + 16, param, MASK + 1); \ - memcpy(dst_ptr + n, temp + 16, r * BPP); \ +#define ANY11P16(NAMEANY, ANY_SIMD, T, SBPP, BPP, MASK) \ + void NAMEANY(const uint16_t* src_ptr, uint16_t* dst_ptr, T param, \ + int width) { \ + SIMD_ALIGNED(uint16_t temp[32 * 2]); \ + memset(temp, 0, 64); /* for msan */ \ + int r = width & MASK; \ + int n = width & ~MASK; \ + if (n > 0) { \ + ANY_SIMD(src_ptr, dst_ptr, param, n); \ + } \ + memcpy(temp, src_ptr + n, r * SBPP); \ + ANY_SIMD(temp, temp + 16, param, MASK + 1); \ + memcpy(dst_ptr + n, temp + 16, r * BPP); \ } #ifdef HAS_HALFFLOATROW_SSE2 @@ -793,9 +837,9 @@ ANY11P16(HalfFloatRow_Any_MSA, HalfFloatRow_MSA, float, 2, 2, 31) // Any 1 to 1 with yuvconstants #define ANY11C(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \ - void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, \ + void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, \ const struct YuvConstants* yuvconstants, int width) { \ - SIMD_ALIGNED(uint8 temp[128 * 2]); \ + SIMD_ALIGNED(uint8_t temp[128 * 2]); \ memset(temp, 0, 128); /* for YUY2 and msan */ \ int r = width & MASK; \ int n = width & ~MASK; \ @@ -825,20 +869,20 @@ ANY11C(UYVYToARGBRow_Any_MSA, UYVYToARGBRow_MSA, 1, 4, 4, 7) #undef ANY11C // Any 1 to 1 interpolate. Takes 2 rows of source via stride. -#define ANY11T(NAMEANY, ANY_SIMD, SBPP, BPP, MASK) \ - void NAMEANY(uint8* dst_ptr, const uint8* src_ptr, ptrdiff_t src_stride_ptr, \ - int width, int source_y_fraction) { \ - SIMD_ALIGNED(uint8 temp[64 * 3]); \ - memset(temp, 0, 64 * 2); /* for msan */ \ - int r = width & MASK; \ - int n = width & ~MASK; \ - if (n > 0) { \ - ANY_SIMD(dst_ptr, src_ptr, src_stride_ptr, n, source_y_fraction); \ - } \ - memcpy(temp, src_ptr + n * SBPP, r * SBPP); \ - memcpy(temp + 64, src_ptr + src_stride_ptr + n * SBPP, r * SBPP); \ - ANY_SIMD(temp + 128, temp, 64, MASK + 1, source_y_fraction); \ - memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \ +#define ANY11T(NAMEANY, ANY_SIMD, SBPP, BPP, MASK) \ + void NAMEANY(uint8_t* dst_ptr, const uint8_t* src_ptr, \ + ptrdiff_t src_stride_ptr, int width, int source_y_fraction) { \ + SIMD_ALIGNED(uint8_t temp[64 * 3]); \ + memset(temp, 0, 64 * 2); /* for msan */ \ + int r = width & MASK; \ + int n = width & ~MASK; \ + if (n > 0) { \ + ANY_SIMD(dst_ptr, src_ptr, src_stride_ptr, n, source_y_fraction); \ + } \ + memcpy(temp, src_ptr + n * SBPP, r * SBPP); \ + memcpy(temp + 64, src_ptr + src_stride_ptr + n * SBPP, r * SBPP); \ + ANY_SIMD(temp + 128, temp, 64, MASK + 1, source_y_fraction); \ + memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \ } #ifdef HAS_INTERPOLATEROW_AVX2 @@ -857,8 +901,8 @@ ANY11T(InterpolateRow_Any_MSA, InterpolateRow_MSA, 1, 1, 31) // Any 1 to 1 mirror. #define ANY11M(NAMEANY, ANY_SIMD, BPP, MASK) \ - void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, int width) { \ - SIMD_ALIGNED(uint8 temp[64 * 2]); \ + void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) { \ + SIMD_ALIGNED(uint8_t temp[64 * 2]); \ memset(temp, 0, 64); /* for msan */ \ int r = width & MASK; \ int n = width & ~MASK; \ @@ -897,46 +941,47 @@ ANY11M(ARGBMirrorRow_Any_MSA, ARGBMirrorRow_MSA, 4, 15) #undef ANY11M // Any 1 plane. (memset) -#define ANY1(NAMEANY, ANY_SIMD, T, BPP, MASK) \ - void NAMEANY(uint8* dst_ptr, T v32, int width) { \ - SIMD_ALIGNED(uint8 temp[64]); \ - int r = width & MASK; \ - int n = width & ~MASK; \ - if (n > 0) { \ - ANY_SIMD(dst_ptr, v32, n); \ - } \ - ANY_SIMD(temp, v32, MASK + 1); \ - memcpy(dst_ptr + n * BPP, temp, r * BPP); \ +#define ANY1(NAMEANY, ANY_SIMD, T, BPP, MASK) \ + void NAMEANY(uint8_t* dst_ptr, T v32, int width) { \ + SIMD_ALIGNED(uint8_t temp[64]); \ + int r = width & MASK; \ + int n = width & ~MASK; \ + if (n > 0) { \ + ANY_SIMD(dst_ptr, v32, n); \ + } \ + ANY_SIMD(temp, v32, MASK + 1); \ + memcpy(dst_ptr + n * BPP, temp, r * BPP); \ } #ifdef HAS_SETROW_X86 -ANY1(SetRow_Any_X86, SetRow_X86, uint8, 1, 3) +ANY1(SetRow_Any_X86, SetRow_X86, uint8_t, 1, 3) #endif #ifdef HAS_SETROW_NEON -ANY1(SetRow_Any_NEON, SetRow_NEON, uint8, 1, 15) +ANY1(SetRow_Any_NEON, SetRow_NEON, uint8_t, 1, 15) #endif #ifdef HAS_ARGBSETROW_NEON -ANY1(ARGBSetRow_Any_NEON, ARGBSetRow_NEON, uint32, 4, 3) +ANY1(ARGBSetRow_Any_NEON, ARGBSetRow_NEON, uint32_t, 4, 3) #endif #ifdef HAS_ARGBSETROW_MSA -ANY1(ARGBSetRow_Any_MSA, ARGBSetRow_MSA, uint32, 4, 3) +ANY1(ARGBSetRow_Any_MSA, ARGBSetRow_MSA, uint32_t, 4, 3) #endif #undef ANY1 // Any 1 to 2. Outputs UV planes. -#define ANY12(NAMEANY, ANY_SIMD, UVSHIFT, BPP, DUVSHIFT, MASK) \ - void NAMEANY(const uint8* src_ptr, uint8* dst_u, uint8* dst_v, int width) { \ - SIMD_ALIGNED(uint8 temp[128 * 3]); \ - memset(temp, 0, 128); /* for msan */ \ - int r = width & MASK; \ - int n = width & ~MASK; \ - if (n > 0) { \ - ANY_SIMD(src_ptr, dst_u, dst_v, n); \ - } \ - memcpy(temp, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \ - ANY_SIMD(temp, temp + 128, temp + 256, MASK + 1); \ - memcpy(dst_u + (n >> DUVSHIFT), temp + 128, SS(r, DUVSHIFT)); \ - memcpy(dst_v + (n >> DUVSHIFT), temp + 256, SS(r, DUVSHIFT)); \ +#define ANY12(NAMEANY, ANY_SIMD, UVSHIFT, BPP, DUVSHIFT, MASK) \ + void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v, \ + int width) { \ + SIMD_ALIGNED(uint8_t temp[128 * 3]); \ + memset(temp, 0, 128); /* for msan */ \ + int r = width & MASK; \ + int n = width & ~MASK; \ + if (n > 0) { \ + ANY_SIMD(src_ptr, dst_u, dst_v, n); \ + } \ + memcpy(temp, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \ + ANY_SIMD(temp, temp + 128, temp + 256, MASK + 1); \ + memcpy(dst_u + (n >> DUVSHIFT), temp + 128, SS(r, DUVSHIFT)); \ + memcpy(dst_v + (n >> DUVSHIFT), temp + 256, SS(r, DUVSHIFT)); \ } #ifdef HAS_SPLITUVROW_SSE2 @@ -975,21 +1020,21 @@ ANY12(UYVYToUV422Row_Any_MSA, UYVYToUV422Row_MSA, 1, 4, 1, 31) #undef ANY12 // Any 1 to 3. Outputs RGB planes. -#define ANY13(NAMEANY, ANY_SIMD, BPP, MASK) \ - void NAMEANY(const uint8* src_ptr, uint8* dst_r, uint8* dst_g, uint8* dst_b, \ - int width) { \ - SIMD_ALIGNED(uint8 temp[16 * 6]); \ - memset(temp, 0, 16 * 3); /* for msan */ \ - int r = width & MASK; \ - int n = width & ~MASK; \ - if (n > 0) { \ - ANY_SIMD(src_ptr, dst_r, dst_g, dst_b, n); \ - } \ - memcpy(temp, src_ptr + n * BPP, r * BPP); \ - ANY_SIMD(temp, temp + 16 * 3, temp + 16 * 4, temp + 16 * 5, MASK + 1); \ - memcpy(dst_r + n, temp + 16 * 3, r); \ - memcpy(dst_g + n, temp + 16 * 4, r); \ - memcpy(dst_b + n, temp + 16 * 5, r); \ +#define ANY13(NAMEANY, ANY_SIMD, BPP, MASK) \ + void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_r, uint8_t* dst_g, \ + uint8_t* dst_b, int width) { \ + SIMD_ALIGNED(uint8_t temp[16 * 6]); \ + memset(temp, 0, 16 * 3); /* for msan */ \ + int r = width & MASK; \ + int n = width & ~MASK; \ + if (n > 0) { \ + ANY_SIMD(src_ptr, dst_r, dst_g, dst_b, n); \ + } \ + memcpy(temp, src_ptr + n * BPP, r * BPP); \ + ANY_SIMD(temp, temp + 16 * 3, temp + 16 * 4, temp + 16 * 5, MASK + 1); \ + memcpy(dst_r + n, temp + 16 * 3, r); \ + memcpy(dst_g + n, temp + 16 * 4, r); \ + memcpy(dst_b + n, temp + 16 * 5, r); \ } #ifdef HAS_SPLITRGBROW_SSSE3 @@ -1002,9 +1047,9 @@ ANY13(SplitRGBRow_Any_NEON, SplitRGBRow_NEON, 3, 15) // Any 1 to 2 with source stride (2 rows of source). Outputs UV planes. // 128 byte row allows for 32 avx ARGB pixels. #define ANY12S(NAMEANY, ANY_SIMD, UVSHIFT, BPP, MASK) \ - void NAMEANY(const uint8* src_ptr, int src_stride_ptr, uint8* dst_u, \ - uint8* dst_v, int width) { \ - SIMD_ALIGNED(uint8 temp[128 * 4]); \ + void NAMEANY(const uint8_t* src_ptr, int src_stride_ptr, uint8_t* dst_u, \ + uint8_t* dst_v, int width) { \ + SIMD_ALIGNED(uint8_t temp[128 * 4]); \ memset(temp, 0, 128 * 2); /* for msan */ \ int r = width & MASK; \ int n = width & ~MASK; \ diff --git a/chromium/third_party/libyuv/source/row_common.cc b/chromium/third_party/libyuv/source/row_common.cc index a0ca90b8ab8..297d87e01db 100644 --- a/chromium/third_party/libyuv/source/row_common.cc +++ b/chromium/third_party/libyuv/source/row_common.cc @@ -10,6 +10,7 @@ #include "libyuv/row.h" +#include <stdio.h> #include <string.h> // For memcpy and memset. #include "libyuv/basic_types.h" @@ -23,59 +24,69 @@ extern "C" { #define USE_BRANCHLESS 1 #if USE_BRANCHLESS -static __inline int32 clamp0(int32 v) { +static __inline int32_t clamp0(int32_t v) { return ((-(v) >> 31) & (v)); } -static __inline int32 clamp255(int32 v) { +static __inline int32_t clamp255(int32_t v) { return (((255 - (v)) >> 31) | (v)) & 255; } -static __inline uint32 Clamp(int32 val) { - int v = clamp0(val); - return (uint32)(clamp255(v)); +static __inline int32_t clamp1023(int32_t v) { + return (((1023 - (v)) >> 31) | (v)) & 1023; } -static __inline uint32 Abs(int32 v) { +static __inline uint32_t Abs(int32_t v) { int m = v >> 31; return (v + m) ^ m; } #else // USE_BRANCHLESS -static __inline int32 clamp0(int32 v) { +static __inline int32_t clamp0(int32_t v) { return (v < 0) ? 0 : v; } -static __inline int32 clamp255(int32 v) { +static __inline int32_t clamp255(int32_t v) { return (v > 255) ? 255 : v; } -static __inline uint32 Clamp(int32 val) { - int v = clamp0(val); - return (uint32)(clamp255(v)); +static __inline int32_t clamp1023(int32_t v) { + return (v > 1023) ? 1023 : v; } -static __inline uint32 Abs(int32 v) { +static __inline uint32_t Abs(int32_t v) { return (v < 0) ? -v : v; } #endif // USE_BRANCHLESS +static __inline uint32_t Clamp(int32_t val) { + int v = clamp0(val); + return (uint32_t)(clamp255(v)); +} + +static __inline uint32_t Clamp10(int32_t val) { + int v = clamp0(val); + return (uint32_t)(clamp1023(v)); +} -#ifdef LIBYUV_LITTLE_ENDIAN -#define WRITEWORD(p, v) *(uint32*)(p) = v +// Little Endian +#if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || \ + defined(_M_IX86) || defined(__arm__) || defined(_M_ARM) || \ + (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) +#define WRITEWORD(p, v) *(uint32_t*)(p) = v #else -static inline void WRITEWORD(uint8* p, uint32 v) { - p[0] = (uint8)(v & 255); - p[1] = (uint8)((v >> 8) & 255); - p[2] = (uint8)((v >> 16) & 255); - p[3] = (uint8)((v >> 24) & 255); +static inline void WRITEWORD(uint8_t* p, uint32_t v) { + p[0] = (uint8_t)(v & 255); + p[1] = (uint8_t)((v >> 8) & 255); + p[2] = (uint8_t)((v >> 16) & 255); + p[3] = (uint8_t)((v >> 24) & 255); } #endif -void RGB24ToARGBRow_C(const uint8* src_rgb24, uint8* dst_argb, int width) { +void RGB24ToARGBRow_C(const uint8_t* src_rgb24, uint8_t* dst_argb, int width) { int x; for (x = 0; x < width; ++x) { - uint8 b = src_rgb24[0]; - uint8 g = src_rgb24[1]; - uint8 r = src_rgb24[2]; + uint8_t b = src_rgb24[0]; + uint8_t g = src_rgb24[1]; + uint8_t r = src_rgb24[2]; dst_argb[0] = b; dst_argb[1] = g; dst_argb[2] = r; @@ -85,12 +96,12 @@ void RGB24ToARGBRow_C(const uint8* src_rgb24, uint8* dst_argb, int width) { } } -void RAWToARGBRow_C(const uint8* src_raw, uint8* dst_argb, int width) { +void RAWToARGBRow_C(const uint8_t* src_raw, uint8_t* dst_argb, int width) { int x; for (x = 0; x < width; ++x) { - uint8 r = src_raw[0]; - uint8 g = src_raw[1]; - uint8 b = src_raw[2]; + uint8_t r = src_raw[0]; + uint8_t g = src_raw[1]; + uint8_t b = src_raw[2]; dst_argb[0] = b; dst_argb[1] = g; dst_argb[2] = r; @@ -100,12 +111,12 @@ void RAWToARGBRow_C(const uint8* src_raw, uint8* dst_argb, int width) { } } -void RAWToRGB24Row_C(const uint8* src_raw, uint8* dst_rgb24, int width) { +void RAWToRGB24Row_C(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) { int x; for (x = 0; x < width; ++x) { - uint8 r = src_raw[0]; - uint8 g = src_raw[1]; - uint8 b = src_raw[2]; + uint8_t r = src_raw[0]; + uint8_t g = src_raw[1]; + uint8_t b = src_raw[2]; dst_rgb24[0] = b; dst_rgb24[1] = g; dst_rgb24[2] = r; @@ -114,12 +125,14 @@ void RAWToRGB24Row_C(const uint8* src_raw, uint8* dst_rgb24, int width) { } } -void RGB565ToARGBRow_C(const uint8* src_rgb565, uint8* dst_argb, int width) { +void RGB565ToARGBRow_C(const uint8_t* src_rgb565, + uint8_t* dst_argb, + int width) { int x; for (x = 0; x < width; ++x) { - uint8 b = src_rgb565[0] & 0x1f; - uint8 g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3); - uint8 r = src_rgb565[1] >> 3; + uint8_t b = src_rgb565[0] & 0x1f; + uint8_t g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3); + uint8_t r = src_rgb565[1] >> 3; dst_argb[0] = (b << 3) | (b >> 2); dst_argb[1] = (g << 2) | (g >> 4); dst_argb[2] = (r << 3) | (r >> 2); @@ -129,15 +142,15 @@ void RGB565ToARGBRow_C(const uint8* src_rgb565, uint8* dst_argb, int width) { } } -void ARGB1555ToARGBRow_C(const uint8* src_argb1555, - uint8* dst_argb, +void ARGB1555ToARGBRow_C(const uint8_t* src_argb1555, + uint8_t* dst_argb, int width) { int x; for (x = 0; x < width; ++x) { - uint8 b = src_argb1555[0] & 0x1f; - uint8 g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3); - uint8 r = (src_argb1555[1] & 0x7c) >> 2; - uint8 a = src_argb1555[1] >> 7; + uint8_t b = src_argb1555[0] & 0x1f; + uint8_t g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3); + uint8_t r = (src_argb1555[1] & 0x7c) >> 2; + uint8_t a = src_argb1555[1] >> 7; dst_argb[0] = (b << 3) | (b >> 2); dst_argb[1] = (g << 3) | (g >> 2); dst_argb[2] = (r << 3) | (r >> 2); @@ -147,15 +160,15 @@ void ARGB1555ToARGBRow_C(const uint8* src_argb1555, } } -void ARGB4444ToARGBRow_C(const uint8* src_argb4444, - uint8* dst_argb, +void ARGB4444ToARGBRow_C(const uint8_t* src_argb4444, + uint8_t* dst_argb, int width) { int x; for (x = 0; x < width; ++x) { - uint8 b = src_argb4444[0] & 0x0f; - uint8 g = src_argb4444[0] >> 4; - uint8 r = src_argb4444[1] & 0x0f; - uint8 a = src_argb4444[1] >> 4; + uint8_t b = src_argb4444[0] & 0x0f; + uint8_t g = src_argb4444[0] >> 4; + uint8_t r = src_argb4444[1] & 0x0f; + uint8_t a = src_argb4444[1] >> 4; dst_argb[0] = (b << 4) | b; dst_argb[1] = (g << 4) | g; dst_argb[2] = (r << 4) | r; @@ -165,14 +178,14 @@ void ARGB4444ToARGBRow_C(const uint8* src_argb4444, } } -void AR30ToARGBRow_C(const uint8* src_ar30, uint8* dst_argb, int width) { +void AR30ToARGBRow_C(const uint8_t* src_ar30, uint8_t* dst_argb, int width) { int x; for (x = 0; x < width; ++x) { - uint32 ar30 = *(uint32*)src_ar30; - uint32 b = ar30 & 0x3ff; - uint32 g = (ar30 >> 10) & 0x3ff; - uint32 r = (ar30 >> 20) & 0x3ff; - uint32 a = (ar30 >> 30) & 0x3; + uint32_t ar30 = *(uint32_t*)src_ar30; + uint32_t b = ar30 & 0x3ff; + uint32_t g = (ar30 >> 10) & 0x3ff; + uint32_t r = (ar30 >> 20) & 0x3ff; + uint32_t a = (ar30 >> 30) & 0x3; dst_argb[0] = b >> 2; dst_argb[1] = g >> 2; dst_argb[2] = r >> 2; @@ -182,12 +195,29 @@ void AR30ToARGBRow_C(const uint8* src_ar30, uint8* dst_argb, int width) { } } -void ARGBToRGB24Row_C(const uint8* src_argb, uint8* dst_rgb, int width) { +void AR30ToABGRRow_C(const uint8_t* src_ar30, uint8_t* dst_abgr, int width) { int x; for (x = 0; x < width; ++x) { - uint8 b = src_argb[0]; - uint8 g = src_argb[1]; - uint8 r = src_argb[2]; + uint32_t ar30 = *(uint32_t*)src_ar30; + uint32_t b = ar30 & 0x3ff; + uint32_t g = (ar30 >> 10) & 0x3ff; + uint32_t r = (ar30 >> 20) & 0x3ff; + uint32_t a = (ar30 >> 30) & 0x3; + dst_abgr[0] = r >> 2; + dst_abgr[1] = g >> 2; + dst_abgr[2] = b >> 2; + dst_abgr[3] = a * 0x55; + dst_abgr += 4; + src_ar30 += 4; + } +} + +void ARGBToRGB24Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { + int x; + for (x = 0; x < width; ++x) { + uint8_t b = src_argb[0]; + uint8_t g = src_argb[1]; + uint8_t r = src_argb[2]; dst_rgb[0] = b; dst_rgb[1] = g; dst_rgb[2] = r; @@ -196,12 +226,12 @@ void ARGBToRGB24Row_C(const uint8* src_argb, uint8* dst_rgb, int width) { } } -void ARGBToRAWRow_C(const uint8* src_argb, uint8* dst_rgb, int width) { +void ARGBToRAWRow_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { int x; for (x = 0; x < width; ++x) { - uint8 b = src_argb[0]; - uint8 g = src_argb[1]; - uint8 r = src_argb[2]; + uint8_t b = src_argb[0]; + uint8_t g = src_argb[1]; + uint8_t r = src_argb[2]; dst_rgb[0] = r; dst_rgb[1] = g; dst_rgb[2] = b; @@ -210,25 +240,25 @@ void ARGBToRAWRow_C(const uint8* src_argb, uint8* dst_rgb, int width) { } } -void ARGBToRGB565Row_C(const uint8* src_argb, uint8* dst_rgb, int width) { +void ARGBToRGB565Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { int x; for (x = 0; x < width - 1; x += 2) { - uint8 b0 = src_argb[0] >> 3; - uint8 g0 = src_argb[1] >> 2; - uint8 r0 = src_argb[2] >> 3; - uint8 b1 = src_argb[4] >> 3; - uint8 g1 = src_argb[5] >> 2; - uint8 r1 = src_argb[6] >> 3; + uint8_t b0 = src_argb[0] >> 3; + uint8_t g0 = src_argb[1] >> 2; + uint8_t r0 = src_argb[2] >> 3; + uint8_t b1 = src_argb[4] >> 3; + uint8_t g1 = src_argb[5] >> 2; + uint8_t r1 = src_argb[6] >> 3; WRITEWORD(dst_rgb, b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) | (r1 << 27)); dst_rgb += 4; src_argb += 8; } if (width & 1) { - uint8 b0 = src_argb[0] >> 3; - uint8 g0 = src_argb[1] >> 2; - uint8 r0 = src_argb[2] >> 3; - *(uint16*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 11); + uint8_t b0 = src_argb[0] >> 3; + uint8_t g0 = src_argb[1] >> 2; + uint8_t r0 = src_argb[2] >> 3; + *(uint16_t*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 11); } } @@ -240,20 +270,20 @@ void ARGBToRGB565Row_C(const uint8* src_argb, uint8* dst_rgb, int width) { // endian will not affect order of the original matrix. But the dither4 // will containing the first pixel in the lower byte for little endian // or the upper byte for big endian. -void ARGBToRGB565DitherRow_C(const uint8* src_argb, - uint8* dst_rgb, - const uint32 dither4, +void ARGBToRGB565DitherRow_C(const uint8_t* src_argb, + uint8_t* dst_rgb, + const uint32_t dither4, int width) { int x; for (x = 0; x < width - 1; x += 2) { int dither0 = ((const unsigned char*)(&dither4))[x & 3]; int dither1 = ((const unsigned char*)(&dither4))[(x + 1) & 3]; - uint8 b0 = clamp255(src_argb[0] + dither0) >> 3; - uint8 g0 = clamp255(src_argb[1] + dither0) >> 2; - uint8 r0 = clamp255(src_argb[2] + dither0) >> 3; - uint8 b1 = clamp255(src_argb[4] + dither1) >> 3; - uint8 g1 = clamp255(src_argb[5] + dither1) >> 2; - uint8 r1 = clamp255(src_argb[6] + dither1) >> 3; + uint8_t b0 = clamp255(src_argb[0] + dither0) >> 3; + uint8_t g0 = clamp255(src_argb[1] + dither0) >> 2; + uint8_t r0 = clamp255(src_argb[2] + dither0) >> 3; + uint8_t b1 = clamp255(src_argb[4] + dither1) >> 3; + uint8_t g1 = clamp255(src_argb[5] + dither1) >> 2; + uint8_t r1 = clamp255(src_argb[6] + dither1) >> 3; WRITEWORD(dst_rgb, b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) | (r1 << 27)); dst_rgb += 4; @@ -261,125 +291,138 @@ void ARGBToRGB565DitherRow_C(const uint8* src_argb, } if (width & 1) { int dither0 = ((const unsigned char*)(&dither4))[(width - 1) & 3]; - uint8 b0 = clamp255(src_argb[0] + dither0) >> 3; - uint8 g0 = clamp255(src_argb[1] + dither0) >> 2; - uint8 r0 = clamp255(src_argb[2] + dither0) >> 3; - *(uint16*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 11); + uint8_t b0 = clamp255(src_argb[0] + dither0) >> 3; + uint8_t g0 = clamp255(src_argb[1] + dither0) >> 2; + uint8_t r0 = clamp255(src_argb[2] + dither0) >> 3; + *(uint16_t*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 11); } } -void ARGBToARGB1555Row_C(const uint8* src_argb, uint8* dst_rgb, int width) { +void ARGBToARGB1555Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { int x; for (x = 0; x < width - 1; x += 2) { - uint8 b0 = src_argb[0] >> 3; - uint8 g0 = src_argb[1] >> 3; - uint8 r0 = src_argb[2] >> 3; - uint8 a0 = src_argb[3] >> 7; - uint8 b1 = src_argb[4] >> 3; - uint8 g1 = src_argb[5] >> 3; - uint8 r1 = src_argb[6] >> 3; - uint8 a1 = src_argb[7] >> 7; - *(uint32*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 10) | (a0 << 15) | - (b1 << 16) | (g1 << 21) | (r1 << 26) | (a1 << 31); + uint8_t b0 = src_argb[0] >> 3; + uint8_t g0 = src_argb[1] >> 3; + uint8_t r0 = src_argb[2] >> 3; + uint8_t a0 = src_argb[3] >> 7; + uint8_t b1 = src_argb[4] >> 3; + uint8_t g1 = src_argb[5] >> 3; + uint8_t r1 = src_argb[6] >> 3; + uint8_t a1 = src_argb[7] >> 7; + *(uint32_t*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 10) | (a0 << 15) | + (b1 << 16) | (g1 << 21) | (r1 << 26) | (a1 << 31); dst_rgb += 4; src_argb += 8; } if (width & 1) { - uint8 b0 = src_argb[0] >> 3; - uint8 g0 = src_argb[1] >> 3; - uint8 r0 = src_argb[2] >> 3; - uint8 a0 = src_argb[3] >> 7; - *(uint16*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 10) | (a0 << 15); + uint8_t b0 = src_argb[0] >> 3; + uint8_t g0 = src_argb[1] >> 3; + uint8_t r0 = src_argb[2] >> 3; + uint8_t a0 = src_argb[3] >> 7; + *(uint16_t*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 10) | (a0 << 15); } } -void ARGBToARGB4444Row_C(const uint8* src_argb, uint8* dst_rgb, int width) { +void ARGBToARGB4444Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { int x; for (x = 0; x < width - 1; x += 2) { - uint8 b0 = src_argb[0] >> 4; - uint8 g0 = src_argb[1] >> 4; - uint8 r0 = src_argb[2] >> 4; - uint8 a0 = src_argb[3] >> 4; - uint8 b1 = src_argb[4] >> 4; - uint8 g1 = src_argb[5] >> 4; - uint8 r1 = src_argb[6] >> 4; - uint8 a1 = src_argb[7] >> 4; - *(uint32*)(dst_rgb) = b0 | (g0 << 4) | (r0 << 8) | (a0 << 12) | (b1 << 16) | - (g1 << 20) | (r1 << 24) | (a1 << 28); + uint8_t b0 = src_argb[0] >> 4; + uint8_t g0 = src_argb[1] >> 4; + uint8_t r0 = src_argb[2] >> 4; + uint8_t a0 = src_argb[3] >> 4; + uint8_t b1 = src_argb[4] >> 4; + uint8_t g1 = src_argb[5] >> 4; + uint8_t r1 = src_argb[6] >> 4; + uint8_t a1 = src_argb[7] >> 4; + *(uint32_t*)(dst_rgb) = b0 | (g0 << 4) | (r0 << 8) | (a0 << 12) | + (b1 << 16) | (g1 << 20) | (r1 << 24) | (a1 << 28); dst_rgb += 4; src_argb += 8; } if (width & 1) { - uint8 b0 = src_argb[0] >> 4; - uint8 g0 = src_argb[1] >> 4; - uint8 r0 = src_argb[2] >> 4; - uint8 a0 = src_argb[3] >> 4; - *(uint16*)(dst_rgb) = b0 | (g0 << 4) | (r0 << 8) | (a0 << 12); + uint8_t b0 = src_argb[0] >> 4; + uint8_t g0 = src_argb[1] >> 4; + uint8_t r0 = src_argb[2] >> 4; + uint8_t a0 = src_argb[3] >> 4; + *(uint16_t*)(dst_rgb) = b0 | (g0 << 4) | (r0 << 8) | (a0 << 12); } } -void ARGBToAR30Row_C(const uint8* src_argb, uint8* dst_rgb, int width) { +void ABGRToAR30Row_C(const uint8_t* src_abgr, uint8_t* dst_ar30, int width) { int x; for (x = 0; x < width; ++x) { - uint32 b0 = (src_argb[0] >> 6) | ((uint32)(src_argb[0]) << 2); - uint32 g0 = (src_argb[1] >> 6) | ((uint32)(src_argb[1]) << 2); - uint32 r0 = (src_argb[2] >> 6) | ((uint32)(src_argb[2]) << 2); - uint32 a0 = (src_argb[3] >> 6); - *(uint32*)(dst_rgb) = b0 | (g0 << 10) | (r0 << 20) | (a0 << 30); - dst_rgb += 4; + uint32_t b0 = (src_abgr[0] >> 6) | ((uint32_t)(src_abgr[0]) << 2); + uint32_t g0 = (src_abgr[1] >> 6) | ((uint32_t)(src_abgr[1]) << 2); + uint32_t r0 = (src_abgr[2] >> 6) | ((uint32_t)(src_abgr[2]) << 2); + uint32_t a0 = (src_abgr[3] >> 6); + *(uint32_t*)(dst_ar30) = r0 | (g0 << 10) | (b0 << 20) | (a0 << 30); + dst_ar30 += 4; + src_abgr += 4; + } +} + +void ARGBToAR30Row_C(const uint8_t* src_argb, uint8_t* dst_ar30, int width) { + int x; + for (x = 0; x < width; ++x) { + uint32_t b0 = (src_argb[0] >> 6) | ((uint32_t)(src_argb[0]) << 2); + uint32_t g0 = (src_argb[1] >> 6) | ((uint32_t)(src_argb[1]) << 2); + uint32_t r0 = (src_argb[2] >> 6) | ((uint32_t)(src_argb[2]) << 2); + uint32_t a0 = (src_argb[3] >> 6); + *(uint32_t*)(dst_ar30) = b0 | (g0 << 10) | (r0 << 20) | (a0 << 30); + dst_ar30 += 4; src_argb += 4; } } -static __inline int RGBToY(uint8 r, uint8 g, uint8 b) { +static __inline int RGBToY(uint8_t r, uint8_t g, uint8_t b) { return (66 * r + 129 * g + 25 * b + 0x1080) >> 8; } -static __inline int RGBToU(uint8 r, uint8 g, uint8 b) { +static __inline int RGBToU(uint8_t r, uint8_t g, uint8_t b) { return (112 * b - 74 * g - 38 * r + 0x8080) >> 8; } -static __inline int RGBToV(uint8 r, uint8 g, uint8 b) { +static __inline int RGBToV(uint8_t r, uint8_t g, uint8_t b) { return (112 * r - 94 * g - 18 * b + 0x8080) >> 8; } // ARGBToY_C and ARGBToUV_C -#define MAKEROWY(NAME, R, G, B, BPP) \ - void NAME##ToYRow_C(const uint8* src_argb0, uint8* dst_y, int width) { \ - int x; \ - for (x = 0; x < width; ++x) { \ - dst_y[0] = RGBToY(src_argb0[R], src_argb0[G], src_argb0[B]); \ - src_argb0 += BPP; \ - dst_y += 1; \ - } \ - } \ - void NAME##ToUVRow_C(const uint8* src_rgb0, int src_stride_rgb, \ - uint8* dst_u, uint8* dst_v, int width) { \ - const uint8* src_rgb1 = src_rgb0 + src_stride_rgb; \ - int x; \ - for (x = 0; x < width - 1; x += 2) { \ - uint8 ab = (src_rgb0[B] + src_rgb0[B + BPP] + src_rgb1[B] + \ - src_rgb1[B + BPP]) >> \ - 2; \ - uint8 ag = (src_rgb0[G] + src_rgb0[G + BPP] + src_rgb1[G] + \ - src_rgb1[G + BPP]) >> \ - 2; \ - uint8 ar = (src_rgb0[R] + src_rgb0[R + BPP] + src_rgb1[R] + \ - src_rgb1[R + BPP]) >> \ - 2; \ - dst_u[0] = RGBToU(ar, ag, ab); \ - dst_v[0] = RGBToV(ar, ag, ab); \ - src_rgb0 += BPP * 2; \ - src_rgb1 += BPP * 2; \ - dst_u += 1; \ - dst_v += 1; \ - } \ - if (width & 1) { \ - uint8 ab = (src_rgb0[B] + src_rgb1[B]) >> 1; \ - uint8 ag = (src_rgb0[G] + src_rgb1[G]) >> 1; \ - uint8 ar = (src_rgb0[R] + src_rgb1[R]) >> 1; \ - dst_u[0] = RGBToU(ar, ag, ab); \ - dst_v[0] = RGBToV(ar, ag, ab); \ - } \ +#define MAKEROWY(NAME, R, G, B, BPP) \ + void NAME##ToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width) { \ + int x; \ + for (x = 0; x < width; ++x) { \ + dst_y[0] = RGBToY(src_argb0[R], src_argb0[G], src_argb0[B]); \ + src_argb0 += BPP; \ + dst_y += 1; \ + } \ + } \ + void NAME##ToUVRow_C(const uint8_t* src_rgb0, int src_stride_rgb, \ + uint8_t* dst_u, uint8_t* dst_v, int width) { \ + const uint8_t* src_rgb1 = src_rgb0 + src_stride_rgb; \ + int x; \ + for (x = 0; x < width - 1; x += 2) { \ + uint8_t ab = (src_rgb0[B] + src_rgb0[B + BPP] + src_rgb1[B] + \ + src_rgb1[B + BPP]) >> \ + 2; \ + uint8_t ag = (src_rgb0[G] + src_rgb0[G + BPP] + src_rgb1[G] + \ + src_rgb1[G + BPP]) >> \ + 2; \ + uint8_t ar = (src_rgb0[R] + src_rgb0[R + BPP] + src_rgb1[R] + \ + src_rgb1[R + BPP]) >> \ + 2; \ + dst_u[0] = RGBToU(ar, ag, ab); \ + dst_v[0] = RGBToV(ar, ag, ab); \ + src_rgb0 += BPP * 2; \ + src_rgb1 += BPP * 2; \ + dst_u += 1; \ + dst_v += 1; \ + } \ + if (width & 1) { \ + uint8_t ab = (src_rgb0[B] + src_rgb1[B]) >> 1; \ + uint8_t ag = (src_rgb0[G] + src_rgb1[G]) >> 1; \ + uint8_t ar = (src_rgb0[R] + src_rgb1[R]) >> 1; \ + dst_u[0] = RGBToU(ar, ag, ab); \ + dst_v[0] = RGBToV(ar, ag, ab); \ + } \ } MAKEROWY(ARGB, 2, 1, 0, 4) @@ -415,65 +458,65 @@ MAKEROWY(RAW, 0, 1, 2, 3) // g -0.41869 * 255 = -106.76595 = -107 // r 0.50000 * 255 = 127.5 = 127 -static __inline int RGBToYJ(uint8 r, uint8 g, uint8 b) { +static __inline int RGBToYJ(uint8_t r, uint8_t g, uint8_t b) { return (38 * r + 75 * g + 15 * b + 64) >> 7; } -static __inline int RGBToUJ(uint8 r, uint8 g, uint8 b) { +static __inline int RGBToUJ(uint8_t r, uint8_t g, uint8_t b) { return (127 * b - 84 * g - 43 * r + 0x8080) >> 8; } -static __inline int RGBToVJ(uint8 r, uint8 g, uint8 b) { +static __inline int RGBToVJ(uint8_t r, uint8_t g, uint8_t b) { return (127 * r - 107 * g - 20 * b + 0x8080) >> 8; } #define AVGB(a, b) (((a) + (b) + 1) >> 1) // ARGBToYJ_C and ARGBToUVJ_C -#define MAKEROWYJ(NAME, R, G, B, BPP) \ - void NAME##ToYJRow_C(const uint8* src_argb0, uint8* dst_y, int width) { \ - int x; \ - for (x = 0; x < width; ++x) { \ - dst_y[0] = RGBToYJ(src_argb0[R], src_argb0[G], src_argb0[B]); \ - src_argb0 += BPP; \ - dst_y += 1; \ - } \ - } \ - void NAME##ToUVJRow_C(const uint8* src_rgb0, int src_stride_rgb, \ - uint8* dst_u, uint8* dst_v, int width) { \ - const uint8* src_rgb1 = src_rgb0 + src_stride_rgb; \ - int x; \ - for (x = 0; x < width - 1; x += 2) { \ - uint8 ab = AVGB(AVGB(src_rgb0[B], src_rgb1[B]), \ - AVGB(src_rgb0[B + BPP], src_rgb1[B + BPP])); \ - uint8 ag = AVGB(AVGB(src_rgb0[G], src_rgb1[G]), \ - AVGB(src_rgb0[G + BPP], src_rgb1[G + BPP])); \ - uint8 ar = AVGB(AVGB(src_rgb0[R], src_rgb1[R]), \ - AVGB(src_rgb0[R + BPP], src_rgb1[R + BPP])); \ - dst_u[0] = RGBToUJ(ar, ag, ab); \ - dst_v[0] = RGBToVJ(ar, ag, ab); \ - src_rgb0 += BPP * 2; \ - src_rgb1 += BPP * 2; \ - dst_u += 1; \ - dst_v += 1; \ - } \ - if (width & 1) { \ - uint8 ab = AVGB(src_rgb0[B], src_rgb1[B]); \ - uint8 ag = AVGB(src_rgb0[G], src_rgb1[G]); \ - uint8 ar = AVGB(src_rgb0[R], src_rgb1[R]); \ - dst_u[0] = RGBToUJ(ar, ag, ab); \ - dst_v[0] = RGBToVJ(ar, ag, ab); \ - } \ +#define MAKEROWYJ(NAME, R, G, B, BPP) \ + void NAME##ToYJRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width) { \ + int x; \ + for (x = 0; x < width; ++x) { \ + dst_y[0] = RGBToYJ(src_argb0[R], src_argb0[G], src_argb0[B]); \ + src_argb0 += BPP; \ + dst_y += 1; \ + } \ + } \ + void NAME##ToUVJRow_C(const uint8_t* src_rgb0, int src_stride_rgb, \ + uint8_t* dst_u, uint8_t* dst_v, int width) { \ + const uint8_t* src_rgb1 = src_rgb0 + src_stride_rgb; \ + int x; \ + for (x = 0; x < width - 1; x += 2) { \ + uint8_t ab = AVGB(AVGB(src_rgb0[B], src_rgb1[B]), \ + AVGB(src_rgb0[B + BPP], src_rgb1[B + BPP])); \ + uint8_t ag = AVGB(AVGB(src_rgb0[G], src_rgb1[G]), \ + AVGB(src_rgb0[G + BPP], src_rgb1[G + BPP])); \ + uint8_t ar = AVGB(AVGB(src_rgb0[R], src_rgb1[R]), \ + AVGB(src_rgb0[R + BPP], src_rgb1[R + BPP])); \ + dst_u[0] = RGBToUJ(ar, ag, ab); \ + dst_v[0] = RGBToVJ(ar, ag, ab); \ + src_rgb0 += BPP * 2; \ + src_rgb1 += BPP * 2; \ + dst_u += 1; \ + dst_v += 1; \ + } \ + if (width & 1) { \ + uint8_t ab = AVGB(src_rgb0[B], src_rgb1[B]); \ + uint8_t ag = AVGB(src_rgb0[G], src_rgb1[G]); \ + uint8_t ar = AVGB(src_rgb0[R], src_rgb1[R]); \ + dst_u[0] = RGBToUJ(ar, ag, ab); \ + dst_v[0] = RGBToVJ(ar, ag, ab); \ + } \ } MAKEROWYJ(ARGB, 2, 1, 0, 4) #undef MAKEROWYJ -void RGB565ToYRow_C(const uint8* src_rgb565, uint8* dst_y, int width) { +void RGB565ToYRow_C(const uint8_t* src_rgb565, uint8_t* dst_y, int width) { int x; for (x = 0; x < width; ++x) { - uint8 b = src_rgb565[0] & 0x1f; - uint8 g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3); - uint8 r = src_rgb565[1] >> 3; + uint8_t b = src_rgb565[0] & 0x1f; + uint8_t g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3); + uint8_t r = src_rgb565[1] >> 3; b = (b << 3) | (b >> 2); g = (g << 2) | (g >> 4); r = (r << 3) | (r >> 2); @@ -483,12 +526,12 @@ void RGB565ToYRow_C(const uint8* src_rgb565, uint8* dst_y, int width) { } } -void ARGB1555ToYRow_C(const uint8* src_argb1555, uint8* dst_y, int width) { +void ARGB1555ToYRow_C(const uint8_t* src_argb1555, uint8_t* dst_y, int width) { int x; for (x = 0; x < width; ++x) { - uint8 b = src_argb1555[0] & 0x1f; - uint8 g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3); - uint8 r = (src_argb1555[1] & 0x7c) >> 2; + uint8_t b = src_argb1555[0] & 0x1f; + uint8_t g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3); + uint8_t r = (src_argb1555[1] & 0x7c) >> 2; b = (b << 3) | (b >> 2); g = (g << 3) | (g >> 2); r = (r << 3) | (r >> 2); @@ -498,12 +541,12 @@ void ARGB1555ToYRow_C(const uint8* src_argb1555, uint8* dst_y, int width) { } } -void ARGB4444ToYRow_C(const uint8* src_argb4444, uint8* dst_y, int width) { +void ARGB4444ToYRow_C(const uint8_t* src_argb4444, uint8_t* dst_y, int width) { int x; for (x = 0; x < width; ++x) { - uint8 b = src_argb4444[0] & 0x0f; - uint8 g = src_argb4444[0] >> 4; - uint8 r = src_argb4444[1] & 0x0f; + uint8_t b = src_argb4444[0] & 0x0f; + uint8_t g = src_argb4444[0] >> 4; + uint8_t r = src_argb4444[1] & 0x0f; b = (b << 4) | b; g = (g << 4) | g; r = (r << 4) | r; @@ -513,29 +556,29 @@ void ARGB4444ToYRow_C(const uint8* src_argb4444, uint8* dst_y, int width) { } } -void RGB565ToUVRow_C(const uint8* src_rgb565, +void RGB565ToUVRow_C(const uint8_t* src_rgb565, int src_stride_rgb565, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width) { - const uint8* next_rgb565 = src_rgb565 + src_stride_rgb565; + const uint8_t* next_rgb565 = src_rgb565 + src_stride_rgb565; int x; for (x = 0; x < width - 1; x += 2) { - uint8 b0 = src_rgb565[0] & 0x1f; - uint8 g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3); - uint8 r0 = src_rgb565[1] >> 3; - uint8 b1 = src_rgb565[2] & 0x1f; - uint8 g1 = (src_rgb565[2] >> 5) | ((src_rgb565[3] & 0x07) << 3); - uint8 r1 = src_rgb565[3] >> 3; - uint8 b2 = next_rgb565[0] & 0x1f; - uint8 g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3); - uint8 r2 = next_rgb565[1] >> 3; - uint8 b3 = next_rgb565[2] & 0x1f; - uint8 g3 = (next_rgb565[2] >> 5) | ((next_rgb565[3] & 0x07) << 3); - uint8 r3 = next_rgb565[3] >> 3; - uint8 b = (b0 + b1 + b2 + b3); // 565 * 4 = 787. - uint8 g = (g0 + g1 + g2 + g3); - uint8 r = (r0 + r1 + r2 + r3); + uint8_t b0 = src_rgb565[0] & 0x1f; + uint8_t g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3); + uint8_t r0 = src_rgb565[1] >> 3; + uint8_t b1 = src_rgb565[2] & 0x1f; + uint8_t g1 = (src_rgb565[2] >> 5) | ((src_rgb565[3] & 0x07) << 3); + uint8_t r1 = src_rgb565[3] >> 3; + uint8_t b2 = next_rgb565[0] & 0x1f; + uint8_t g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3); + uint8_t r2 = next_rgb565[1] >> 3; + uint8_t b3 = next_rgb565[2] & 0x1f; + uint8_t g3 = (next_rgb565[2] >> 5) | ((next_rgb565[3] & 0x07) << 3); + uint8_t r3 = next_rgb565[3] >> 3; + uint8_t b = (b0 + b1 + b2 + b3); // 565 * 4 = 787. + uint8_t g = (g0 + g1 + g2 + g3); + uint8_t r = (r0 + r1 + r2 + r3); b = (b << 1) | (b >> 6); // 787 -> 888. r = (r << 1) | (r >> 6); dst_u[0] = RGBToU(r, g, b); @@ -546,15 +589,15 @@ void RGB565ToUVRow_C(const uint8* src_rgb565, dst_v += 1; } if (width & 1) { - uint8 b0 = src_rgb565[0] & 0x1f; - uint8 g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3); - uint8 r0 = src_rgb565[1] >> 3; - uint8 b2 = next_rgb565[0] & 0x1f; - uint8 g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3); - uint8 r2 = next_rgb565[1] >> 3; - uint8 b = (b0 + b2); // 565 * 2 = 676. - uint8 g = (g0 + g2); - uint8 r = (r0 + r2); + uint8_t b0 = src_rgb565[0] & 0x1f; + uint8_t g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3); + uint8_t r0 = src_rgb565[1] >> 3; + uint8_t b2 = next_rgb565[0] & 0x1f; + uint8_t g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3); + uint8_t r2 = next_rgb565[1] >> 3; + uint8_t b = (b0 + b2); // 565 * 2 = 676. + uint8_t g = (g0 + g2); + uint8_t r = (r0 + r2); b = (b << 2) | (b >> 4); // 676 -> 888 g = (g << 1) | (g >> 6); r = (r << 2) | (r >> 4); @@ -563,29 +606,29 @@ void RGB565ToUVRow_C(const uint8* src_rgb565, } } -void ARGB1555ToUVRow_C(const uint8* src_argb1555, +void ARGB1555ToUVRow_C(const uint8_t* src_argb1555, int src_stride_argb1555, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width) { - const uint8* next_argb1555 = src_argb1555 + src_stride_argb1555; + const uint8_t* next_argb1555 = src_argb1555 + src_stride_argb1555; int x; for (x = 0; x < width - 1; x += 2) { - uint8 b0 = src_argb1555[0] & 0x1f; - uint8 g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3); - uint8 r0 = (src_argb1555[1] & 0x7c) >> 2; - uint8 b1 = src_argb1555[2] & 0x1f; - uint8 g1 = (src_argb1555[2] >> 5) | ((src_argb1555[3] & 0x03) << 3); - uint8 r1 = (src_argb1555[3] & 0x7c) >> 2; - uint8 b2 = next_argb1555[0] & 0x1f; - uint8 g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3); - uint8 r2 = (next_argb1555[1] & 0x7c) >> 2; - uint8 b3 = next_argb1555[2] & 0x1f; - uint8 g3 = (next_argb1555[2] >> 5) | ((next_argb1555[3] & 0x03) << 3); - uint8 r3 = (next_argb1555[3] & 0x7c) >> 2; - uint8 b = (b0 + b1 + b2 + b3); // 555 * 4 = 777. - uint8 g = (g0 + g1 + g2 + g3); - uint8 r = (r0 + r1 + r2 + r3); + uint8_t b0 = src_argb1555[0] & 0x1f; + uint8_t g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3); + uint8_t r0 = (src_argb1555[1] & 0x7c) >> 2; + uint8_t b1 = src_argb1555[2] & 0x1f; + uint8_t g1 = (src_argb1555[2] >> 5) | ((src_argb1555[3] & 0x03) << 3); + uint8_t r1 = (src_argb1555[3] & 0x7c) >> 2; + uint8_t b2 = next_argb1555[0] & 0x1f; + uint8_t g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3); + uint8_t r2 = (next_argb1555[1] & 0x7c) >> 2; + uint8_t b3 = next_argb1555[2] & 0x1f; + uint8_t g3 = (next_argb1555[2] >> 5) | ((next_argb1555[3] & 0x03) << 3); + uint8_t r3 = (next_argb1555[3] & 0x7c) >> 2; + uint8_t b = (b0 + b1 + b2 + b3); // 555 * 4 = 777. + uint8_t g = (g0 + g1 + g2 + g3); + uint8_t r = (r0 + r1 + r2 + r3); b = (b << 1) | (b >> 6); // 777 -> 888. g = (g << 1) | (g >> 6); r = (r << 1) | (r >> 6); @@ -597,15 +640,15 @@ void ARGB1555ToUVRow_C(const uint8* src_argb1555, dst_v += 1; } if (width & 1) { - uint8 b0 = src_argb1555[0] & 0x1f; - uint8 g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3); - uint8 r0 = (src_argb1555[1] & 0x7c) >> 2; - uint8 b2 = next_argb1555[0] & 0x1f; - uint8 g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3); - uint8 r2 = next_argb1555[1] >> 3; - uint8 b = (b0 + b2); // 555 * 2 = 666. - uint8 g = (g0 + g2); - uint8 r = (r0 + r2); + uint8_t b0 = src_argb1555[0] & 0x1f; + uint8_t g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3); + uint8_t r0 = (src_argb1555[1] & 0x7c) >> 2; + uint8_t b2 = next_argb1555[0] & 0x1f; + uint8_t g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3); + uint8_t r2 = next_argb1555[1] >> 3; + uint8_t b = (b0 + b2); // 555 * 2 = 666. + uint8_t g = (g0 + g2); + uint8_t r = (r0 + r2); b = (b << 2) | (b >> 4); // 666 -> 888. g = (g << 2) | (g >> 4); r = (r << 2) | (r >> 4); @@ -614,29 +657,29 @@ void ARGB1555ToUVRow_C(const uint8* src_argb1555, } } -void ARGB4444ToUVRow_C(const uint8* src_argb4444, +void ARGB4444ToUVRow_C(const uint8_t* src_argb4444, int src_stride_argb4444, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width) { - const uint8* next_argb4444 = src_argb4444 + src_stride_argb4444; + const uint8_t* next_argb4444 = src_argb4444 + src_stride_argb4444; int x; for (x = 0; x < width - 1; x += 2) { - uint8 b0 = src_argb4444[0] & 0x0f; - uint8 g0 = src_argb4444[0] >> 4; - uint8 r0 = src_argb4444[1] & 0x0f; - uint8 b1 = src_argb4444[2] & 0x0f; - uint8 g1 = src_argb4444[2] >> 4; - uint8 r1 = src_argb4444[3] & 0x0f; - uint8 b2 = next_argb4444[0] & 0x0f; - uint8 g2 = next_argb4444[0] >> 4; - uint8 r2 = next_argb4444[1] & 0x0f; - uint8 b3 = next_argb4444[2] & 0x0f; - uint8 g3 = next_argb4444[2] >> 4; - uint8 r3 = next_argb4444[3] & 0x0f; - uint8 b = (b0 + b1 + b2 + b3); // 444 * 4 = 666. - uint8 g = (g0 + g1 + g2 + g3); - uint8 r = (r0 + r1 + r2 + r3); + uint8_t b0 = src_argb4444[0] & 0x0f; + uint8_t g0 = src_argb4444[0] >> 4; + uint8_t r0 = src_argb4444[1] & 0x0f; + uint8_t b1 = src_argb4444[2] & 0x0f; + uint8_t g1 = src_argb4444[2] >> 4; + uint8_t r1 = src_argb4444[3] & 0x0f; + uint8_t b2 = next_argb4444[0] & 0x0f; + uint8_t g2 = next_argb4444[0] >> 4; + uint8_t r2 = next_argb4444[1] & 0x0f; + uint8_t b3 = next_argb4444[2] & 0x0f; + uint8_t g3 = next_argb4444[2] >> 4; + uint8_t r3 = next_argb4444[3] & 0x0f; + uint8_t b = (b0 + b1 + b2 + b3); // 444 * 4 = 666. + uint8_t g = (g0 + g1 + g2 + g3); + uint8_t r = (r0 + r1 + r2 + r3); b = (b << 2) | (b >> 4); // 666 -> 888. g = (g << 2) | (g >> 4); r = (r << 2) | (r >> 4); @@ -648,15 +691,15 @@ void ARGB4444ToUVRow_C(const uint8* src_argb4444, dst_v += 1; } if (width & 1) { - uint8 b0 = src_argb4444[0] & 0x0f; - uint8 g0 = src_argb4444[0] >> 4; - uint8 r0 = src_argb4444[1] & 0x0f; - uint8 b2 = next_argb4444[0] & 0x0f; - uint8 g2 = next_argb4444[0] >> 4; - uint8 r2 = next_argb4444[1] & 0x0f; - uint8 b = (b0 + b2); // 444 * 2 = 555. - uint8 g = (g0 + g2); - uint8 r = (r0 + r2); + uint8_t b0 = src_argb4444[0] & 0x0f; + uint8_t g0 = src_argb4444[0] >> 4; + uint8_t r0 = src_argb4444[1] & 0x0f; + uint8_t b2 = next_argb4444[0] & 0x0f; + uint8_t g2 = next_argb4444[0] >> 4; + uint8_t r2 = next_argb4444[1] & 0x0f; + uint8_t b = (b0 + b2); // 444 * 2 = 555. + uint8_t g = (g0 + g2); + uint8_t r = (r0 + r2); b = (b << 3) | (b >> 2); // 555 -> 888. g = (g << 3) | (g >> 2); r = (r << 3) | (r >> 2); @@ -665,15 +708,15 @@ void ARGB4444ToUVRow_C(const uint8* src_argb4444, } } -void ARGBToUV444Row_C(const uint8* src_argb, - uint8* dst_u, - uint8* dst_v, +void ARGBToUV444Row_C(const uint8_t* src_argb, + uint8_t* dst_u, + uint8_t* dst_v, int width) { int x; for (x = 0; x < width; ++x) { - uint8 ab = src_argb[0]; - uint8 ag = src_argb[1]; - uint8 ar = src_argb[2]; + uint8_t ab = src_argb[0]; + uint8_t ag = src_argb[1]; + uint8_t ar = src_argb[2]; dst_u[0] = RGBToU(ar, ag, ab); dst_v[0] = RGBToV(ar, ag, ab); src_argb += 4; @@ -682,10 +725,10 @@ void ARGBToUV444Row_C(const uint8* src_argb, } } -void ARGBGrayRow_C(const uint8* src_argb, uint8* dst_argb, int width) { +void ARGBGrayRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width) { int x; for (x = 0; x < width; ++x) { - uint8 y = RGBToYJ(src_argb[2], src_argb[1], src_argb[0]); + uint8_t y = RGBToYJ(src_argb[2], src_argb[1], src_argb[0]); dst_argb[2] = dst_argb[1] = dst_argb[0] = y; dst_argb[3] = src_argb[3]; dst_argb += 4; @@ -694,7 +737,7 @@ void ARGBGrayRow_C(const uint8* src_argb, uint8* dst_argb, int width) { } // Convert a row of image to Sepia tone. -void ARGBSepiaRow_C(uint8* dst_argb, int width) { +void ARGBSepiaRow_C(uint8_t* dst_argb, int width) { int x; for (x = 0; x < width; ++x) { int b = dst_argb[0]; @@ -713,9 +756,9 @@ void ARGBSepiaRow_C(uint8* dst_argb, int width) { // Apply color matrix to a row of image. Matrix is signed. // TODO(fbarchard): Consider adding rounding (+32). -void ARGBColorMatrixRow_C(const uint8* src_argb, - uint8* dst_argb, - const int8* matrix_argb, +void ARGBColorMatrixRow_C(const uint8_t* src_argb, + uint8_t* dst_argb, + const int8_t* matrix_argb, int width) { int x; for (x = 0; x < width; ++x) { @@ -745,7 +788,9 @@ void ARGBColorMatrixRow_C(const uint8* src_argb, } // Apply color table to a row of image. -void ARGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width) { +void ARGBColorTableRow_C(uint8_t* dst_argb, + const uint8_t* table_argb, + int width) { int x; for (x = 0; x < width; ++x) { int b = dst_argb[0]; @@ -761,7 +806,9 @@ void ARGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width) { } // Apply color table to a row of image. -void RGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width) { +void RGBColorTableRow_C(uint8_t* dst_argb, + const uint8_t* table_argb, + int width) { int x; for (x = 0; x < width; ++x) { int b = dst_argb[0]; @@ -774,7 +821,7 @@ void RGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width) { } } -void ARGBQuantizeRow_C(uint8* dst_argb, +void ARGBQuantizeRow_C(uint8_t* dst_argb, int scale, int interval_size, int interval_offset, @@ -794,21 +841,21 @@ void ARGBQuantizeRow_C(uint8* dst_argb, #define REPEAT8(v) (v) | ((v) << 8) #define SHADE(f, v) v* f >> 24 -void ARGBShadeRow_C(const uint8* src_argb, - uint8* dst_argb, +void ARGBShadeRow_C(const uint8_t* src_argb, + uint8_t* dst_argb, int width, - uint32 value) { - const uint32 b_scale = REPEAT8(value & 0xff); - const uint32 g_scale = REPEAT8((value >> 8) & 0xff); - const uint32 r_scale = REPEAT8((value >> 16) & 0xff); - const uint32 a_scale = REPEAT8(value >> 24); + uint32_t value) { + const uint32_t b_scale = REPEAT8(value & 0xff); + const uint32_t g_scale = REPEAT8((value >> 8) & 0xff); + const uint32_t r_scale = REPEAT8((value >> 16) & 0xff); + const uint32_t a_scale = REPEAT8(value >> 24); int i; for (i = 0; i < width; ++i) { - const uint32 b = REPEAT8(src_argb[0]); - const uint32 g = REPEAT8(src_argb[1]); - const uint32 r = REPEAT8(src_argb[2]); - const uint32 a = REPEAT8(src_argb[3]); + const uint32_t b = REPEAT8(src_argb[0]); + const uint32_t g = REPEAT8(src_argb[1]); + const uint32_t r = REPEAT8(src_argb[2]); + const uint32_t a = REPEAT8(src_argb[3]); dst_argb[0] = SHADE(b, b_scale); dst_argb[1] = SHADE(g, g_scale); dst_argb[2] = SHADE(r, r_scale); @@ -823,20 +870,20 @@ void ARGBShadeRow_C(const uint8* src_argb, #define REPEAT8(v) (v) | ((v) << 8) #define SHADE(f, v) v* f >> 16 -void ARGBMultiplyRow_C(const uint8* src_argb0, - const uint8* src_argb1, - uint8* dst_argb, +void ARGBMultiplyRow_C(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, int width) { int i; for (i = 0; i < width; ++i) { - const uint32 b = REPEAT8(src_argb0[0]); - const uint32 g = REPEAT8(src_argb0[1]); - const uint32 r = REPEAT8(src_argb0[2]); - const uint32 a = REPEAT8(src_argb0[3]); - const uint32 b_scale = src_argb1[0]; - const uint32 g_scale = src_argb1[1]; - const uint32 r_scale = src_argb1[2]; - const uint32 a_scale = src_argb1[3]; + const uint32_t b = REPEAT8(src_argb0[0]); + const uint32_t g = REPEAT8(src_argb0[1]); + const uint32_t r = REPEAT8(src_argb0[2]); + const uint32_t a = REPEAT8(src_argb0[3]); + const uint32_t b_scale = src_argb1[0]; + const uint32_t g_scale = src_argb1[1]; + const uint32_t r_scale = src_argb1[2]; + const uint32_t a_scale = src_argb1[3]; dst_argb[0] = SHADE(b, b_scale); dst_argb[1] = SHADE(g, g_scale); dst_argb[2] = SHADE(r, r_scale); @@ -851,9 +898,9 @@ void ARGBMultiplyRow_C(const uint8* src_argb0, #define SHADE(f, v) clamp255(v + f) -void ARGBAddRow_C(const uint8* src_argb0, - const uint8* src_argb1, - uint8* dst_argb, +void ARGBAddRow_C(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, int width) { int i; for (i = 0; i < width; ++i) { @@ -878,9 +925,9 @@ void ARGBAddRow_C(const uint8* src_argb0, #define SHADE(f, v) clamp0(f - v) -void ARGBSubtractRow_C(const uint8* src_argb0, - const uint8* src_argb1, - uint8* dst_argb, +void ARGBSubtractRow_C(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, int width) { int i; for (i = 0; i < width; ++i) { @@ -904,10 +951,10 @@ void ARGBSubtractRow_C(const uint8* src_argb0, #undef SHADE // Sobel functions which mimics SSSE3. -void SobelXRow_C(const uint8* src_y0, - const uint8* src_y1, - const uint8* src_y2, - uint8* dst_sobelx, +void SobelXRow_C(const uint8_t* src_y0, + const uint8_t* src_y1, + const uint8_t* src_y2, + uint8_t* dst_sobelx, int width) { int i; for (i = 0; i < width; ++i) { @@ -921,13 +968,13 @@ void SobelXRow_C(const uint8* src_y0, int b_diff = b - b_sub; int c_diff = c - c_sub; int sobel = Abs(a_diff + b_diff * 2 + c_diff); - dst_sobelx[i] = (uint8)(clamp255(sobel)); + dst_sobelx[i] = (uint8_t)(clamp255(sobel)); } } -void SobelYRow_C(const uint8* src_y0, - const uint8* src_y1, - uint8* dst_sobely, +void SobelYRow_C(const uint8_t* src_y0, + const uint8_t* src_y1, + uint8_t* dst_sobely, int width) { int i; for (i = 0; i < width; ++i) { @@ -941,62 +988,62 @@ void SobelYRow_C(const uint8* src_y0, int b_diff = b - b_sub; int c_diff = c - c_sub; int sobel = Abs(a_diff + b_diff * 2 + c_diff); - dst_sobely[i] = (uint8)(clamp255(sobel)); + dst_sobely[i] = (uint8_t)(clamp255(sobel)); } } -void SobelRow_C(const uint8* src_sobelx, - const uint8* src_sobely, - uint8* dst_argb, +void SobelRow_C(const uint8_t* src_sobelx, + const uint8_t* src_sobely, + uint8_t* dst_argb, int width) { int i; for (i = 0; i < width; ++i) { int r = src_sobelx[i]; int b = src_sobely[i]; int s = clamp255(r + b); - dst_argb[0] = (uint8)(s); - dst_argb[1] = (uint8)(s); - dst_argb[2] = (uint8)(s); - dst_argb[3] = (uint8)(255u); + dst_argb[0] = (uint8_t)(s); + dst_argb[1] = (uint8_t)(s); + dst_argb[2] = (uint8_t)(s); + dst_argb[3] = (uint8_t)(255u); dst_argb += 4; } } -void SobelToPlaneRow_C(const uint8* src_sobelx, - const uint8* src_sobely, - uint8* dst_y, +void SobelToPlaneRow_C(const uint8_t* src_sobelx, + const uint8_t* src_sobely, + uint8_t* dst_y, int width) { int i; for (i = 0; i < width; ++i) { int r = src_sobelx[i]; int b = src_sobely[i]; int s = clamp255(r + b); - dst_y[i] = (uint8)(s); + dst_y[i] = (uint8_t)(s); } } -void SobelXYRow_C(const uint8* src_sobelx, - const uint8* src_sobely, - uint8* dst_argb, +void SobelXYRow_C(const uint8_t* src_sobelx, + const uint8_t* src_sobely, + uint8_t* dst_argb, int width) { int i; for (i = 0; i < width; ++i) { int r = src_sobelx[i]; int b = src_sobely[i]; int g = clamp255(r + b); - dst_argb[0] = (uint8)(b); - dst_argb[1] = (uint8)(g); - dst_argb[2] = (uint8)(r); - dst_argb[3] = (uint8)(255u); + dst_argb[0] = (uint8_t)(b); + dst_argb[1] = (uint8_t)(g); + dst_argb[2] = (uint8_t)(r); + dst_argb[3] = (uint8_t)(255u); dst_argb += 4; } } -void J400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int width) { +void J400ToARGBRow_C(const uint8_t* src_y, uint8_t* dst_argb, int width) { // Copy a Y to RGB. int x; for (x = 0; x < width; ++x) { - uint8 y = src_y[0]; + uint8_t y = src_y[0]; dst_argb[2] = dst_argb[1] = dst_argb[0] = y; dst_argb[3] = 255u; dst_argb += 4; @@ -1253,12 +1300,14 @@ const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants) = { #undef YG // C reference code that mimics the YUV assembly. -static __inline void YuvPixel(uint8 y, - uint8 u, - uint8 v, - uint8* b, - uint8* g, - uint8* r, +// Reads 8 bit YUV and leaves result as 16 bit. + +static __inline void YuvPixel(uint8_t y, + uint8_t u, + uint8_t v, + uint8_t* b, + uint8_t* g, + uint8_t* r, const struct YuvConstants* yuvconstants) { #if defined(__aarch64__) int ub = -yuvconstants->kUVToRB[0]; @@ -1289,19 +1338,63 @@ static __inline void YuvPixel(uint8 y, int yg = yuvconstants->kYToRgb[0]; #endif - uint32 y1 = (uint32)(y * 0x0101 * yg) >> 16; - *b = Clamp((int32)(-(u * ub) + y1 + bb) >> 6); - *g = Clamp((int32)(-(u * ug + v * vg) + y1 + bg) >> 6); - *r = Clamp((int32)(-(v * vr) + y1 + br) >> 6); + uint32_t y1 = (uint32_t)(y * 0x0101 * yg) >> 16; + *b = Clamp((int32_t)(-(u * ub) + y1 + bb) >> 6); + *g = Clamp((int32_t)(-(u * ug + v * vg) + y1 + bg) >> 6); + *r = Clamp((int32_t)(-(v * vr) + y1 + br) >> 6); } -// C reference code that mimics the YUV 10 bit assembly. -static __inline void YuvPixel10(uint16 y, - uint16 u, - uint16 v, - uint8* b, - uint8* g, - uint8* r, +// Reads 8 bit YUV and leaves result as 16 bit. +static __inline void YuvPixel8_16(uint8_t y, + uint8_t u, + uint8_t v, + int* b, + int* g, + int* r, + const struct YuvConstants* yuvconstants) { +#if defined(__aarch64__) + int ub = -yuvconstants->kUVToRB[0]; + int ug = yuvconstants->kUVToG[0]; + int vg = yuvconstants->kUVToG[1]; + int vr = -yuvconstants->kUVToRB[1]; + int bb = yuvconstants->kUVBiasBGR[0]; + int bg = yuvconstants->kUVBiasBGR[1]; + int br = yuvconstants->kUVBiasBGR[2]; + int yg = yuvconstants->kYToRgb[0] / 0x0101; +#elif defined(__arm__) + int ub = -yuvconstants->kUVToRB[0]; + int ug = yuvconstants->kUVToG[0]; + int vg = yuvconstants->kUVToG[4]; + int vr = -yuvconstants->kUVToRB[4]; + int bb = yuvconstants->kUVBiasBGR[0]; + int bg = yuvconstants->kUVBiasBGR[1]; + int br = yuvconstants->kUVBiasBGR[2]; + int yg = yuvconstants->kYToRgb[0] / 0x0101; +#else + int ub = yuvconstants->kUVToB[0]; + int ug = yuvconstants->kUVToG[0]; + int vg = yuvconstants->kUVToG[1]; + int vr = yuvconstants->kUVToR[1]; + int bb = yuvconstants->kUVBiasB[0]; + int bg = yuvconstants->kUVBiasG[0]; + int br = yuvconstants->kUVBiasR[0]; + int yg = yuvconstants->kYToRgb[0]; +#endif + + uint32_t y1 = (uint32_t)(y * 0x0101 * yg) >> 16; + *b = (int)(-(u * ub) + y1 + bb); + *g = (int)(-(u * ug + v * vg) + y1 + bg); + *r = (int)(-(v * vr) + y1 + br); +} + +// C reference code that mimics the YUV 16 bit assembly. +// Reads 10 bit YUV and leaves result as 16 bit. +static __inline void YuvPixel16(int16_t y, + int16_t u, + int16_t v, + int* b, + int* g, + int* r, const struct YuvConstants* yuvconstants) { #if defined(__aarch64__) int ub = -yuvconstants->kUVToRB[0]; @@ -1332,12 +1425,30 @@ static __inline void YuvPixel10(uint16 y, int yg = yuvconstants->kYToRgb[0]; #endif - uint32 y1 = (uint32)((y << 6) * yg) >> 16; + uint32_t y1 = (uint32_t)((y << 6) * yg) >> 16; u = clamp255(u >> 2); v = clamp255(v >> 2); - *b = Clamp((int32)(-(u * ub) + y1 + bb) >> 6); - *g = Clamp((int32)(-(u * ug + v * vg) + y1 + bg) >> 6); - *r = Clamp((int32)(-(v * vr) + y1 + br) >> 6); + *b = (int)(-(u * ub) + y1 + bb); + *g = (int)(-(u * ug + v * vg) + y1 + bg); + *r = (int)(-(v * vr) + y1 + br); +} + +// C reference code that mimics the YUV 10 bit assembly. +// Reads 10 bit YUV and clamps down to 8 bit RGB. +static __inline void YuvPixel10(uint16_t y, + uint16_t u, + uint16_t v, + uint8_t* b, + uint8_t* g, + uint8_t* r, + const struct YuvConstants* yuvconstants) { + int b16; + int g16; + int r16; + YuvPixel16(y, u, v, &b16, &g16, &r16, yuvconstants); + *b = Clamp(b16 >> 6); + *g = Clamp(g16 >> 6); + *r = Clamp(r16 >> 6); } // Y contribution to R,G,B. Scale and bias. @@ -1345,11 +1456,11 @@ static __inline void YuvPixel10(uint16 y, #define YGB -1160 /* 1.164 * 64 * -16 + 64 / 2 */ // C reference code that mimics the YUV assembly. -static __inline void YPixel(uint8 y, uint8* b, uint8* g, uint8* r) { - uint32 y1 = (uint32)(y * 0x0101 * YG) >> 16; - *b = Clamp((int32)(y1 + YGB) >> 6); - *g = Clamp((int32)(y1 + YGB) >> 6); - *r = Clamp((int32)(y1 + YGB) >> 6); +static __inline void YPixel(uint8_t y, uint8_t* b, uint8_t* g, uint8_t* r) { + uint32_t y1 = (uint32_t)(y * 0x0101 * YG) >> 16; + *b = Clamp((int32_t)(y1 + YGB) >> 6); + *g = Clamp((int32_t)(y1 + YGB) >> 6); + *r = Clamp((int32_t)(y1 + YGB) >> 6); } #undef YG @@ -1359,16 +1470,16 @@ static __inline void YPixel(uint8 y, uint8* b, uint8* g, uint8* r) { (defined(__ARM_NEON__) || defined(__aarch64__) || defined(LIBYUV_NEON)) // C mimic assembly. // TODO(fbarchard): Remove subsampling from Neon. -void I444ToARGBRow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* rgb_buf, +void I444ToARGBRow_C(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) { int x; for (x = 0; x < width - 1; x += 2) { - uint8 u = (src_u[0] + src_u[1] + 1) >> 1; - uint8 v = (src_v[0] + src_v[1] + 1) >> 1; + uint8_t u = (src_u[0] + src_u[1] + 1) >> 1; + uint8_t v = (src_v[0] + src_v[1] + 1) >> 1; YuvPixel(src_y[0], u, v, rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); rgb_buf[3] = 255; @@ -1387,10 +1498,10 @@ void I444ToARGBRow_C(const uint8* src_y, } } #else -void I444ToARGBRow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* rgb_buf, +void I444ToARGBRow_C(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) { int x; @@ -1407,10 +1518,10 @@ void I444ToARGBRow_C(const uint8* src_y, #endif // Also used for 420 -void I422ToARGBRow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* rgb_buf, +void I422ToARGBRow_C(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) { int x; @@ -1434,10 +1545,10 @@ void I422ToARGBRow_C(const uint8* src_y, } // 10 bit YUV to ARGB -void I210ToARGBRow_C(const uint16* src_y, - const uint16* src_u, - const uint16* src_v, - uint8* rgb_buf, +void I210ToARGBRow_C(const uint16_t* src_y, + const uint16_t* src_u, + const uint16_t* src_v, + uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) { int x; @@ -1460,11 +1571,78 @@ void I210ToARGBRow_C(const uint16* src_y, } } -void I422AlphaToARGBRow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - const uint8* src_a, - uint8* rgb_buf, +static void StoreAR30(uint8_t* rgb_buf, int b, int g, int r) { + uint32_t ar30; + b = b >> 4; // convert 10.6 to 10 bit. + g = g >> 4; + r = r >> 4; + b = Clamp10(b); + g = Clamp10(g); + r = Clamp10(r); + ar30 = b | ((uint32_t)g << 10) | ((uint32_t)r << 20) | 0xc0000000; + (*(uint32_t*)rgb_buf) = ar30; +} + +// 10 bit YUV to 10 bit AR30 +void I210ToAR30Row_C(const uint16_t* src_y, + const uint16_t* src_u, + const uint16_t* src_v, + uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, + int width) { + int x; + int b; + int g; + int r; + for (x = 0; x < width - 1; x += 2) { + YuvPixel16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants); + StoreAR30(rgb_buf, b, g, r); + YuvPixel16(src_y[1], src_u[0], src_v[0], &b, &g, &r, yuvconstants); + StoreAR30(rgb_buf + 4, b, g, r); + src_y += 2; + src_u += 1; + src_v += 1; + rgb_buf += 8; // Advance 2 pixels. + } + if (width & 1) { + YuvPixel16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants); + StoreAR30(rgb_buf, b, g, r); + } +} + +// 8 bit YUV to 10 bit AR30 +// Uses same code as 10 bit YUV bit shifts the 8 bit values up to 10 bits. +void I422ToAR30Row_C(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, + int width) { + int x; + int b; + int g; + int r; + for (x = 0; x < width - 1; x += 2) { + YuvPixel8_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants); + StoreAR30(rgb_buf, b, g, r); + YuvPixel8_16(src_y[1], src_u[0], src_v[0], &b, &g, &r, yuvconstants); + StoreAR30(rgb_buf + 4, b, g, r); + src_y += 2; + src_u += 1; + src_v += 1; + rgb_buf += 8; // Advance 2 pixels. + } + if (width & 1) { + YuvPixel8_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants); + StoreAR30(rgb_buf, b, g, r); + } +} + +void I422AlphaToARGBRow_C(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + const uint8_t* src_a, + uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) { int x; @@ -1488,10 +1666,10 @@ void I422AlphaToARGBRow_C(const uint8* src_y, } } -void I422ToRGB24Row_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* rgb_buf, +void I422ToRGB24Row_C(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) { int x; @@ -1511,18 +1689,18 @@ void I422ToRGB24Row_C(const uint8* src_y, } } -void I422ToARGB4444Row_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb4444, +void I422ToARGB4444Row_C(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb4444, const struct YuvConstants* yuvconstants, int width) { - uint8 b0; - uint8 g0; - uint8 r0; - uint8 b1; - uint8 g1; - uint8 r1; + uint8_t b0; + uint8_t g0; + uint8_t r0; + uint8_t b1; + uint8_t g1; + uint8_t r1; int x; for (x = 0; x < width - 1; x += 2) { YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants); @@ -1533,8 +1711,8 @@ void I422ToARGB4444Row_C(const uint8* src_y, b1 = b1 >> 4; g1 = g1 >> 4; r1 = r1 >> 4; - *(uint32*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) | (b1 << 16) | - (g1 << 20) | (r1 << 24) | 0xf000f000; + *(uint32_t*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) | (b1 << 16) | + (g1 << 20) | (r1 << 24) | 0xf000f000; src_y += 2; src_u += 1; src_v += 1; @@ -1545,22 +1723,22 @@ void I422ToARGB4444Row_C(const uint8* src_y, b0 = b0 >> 4; g0 = g0 >> 4; r0 = r0 >> 4; - *(uint16*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) | 0xf000; + *(uint16_t*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) | 0xf000; } } -void I422ToARGB1555Row_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb1555, +void I422ToARGB1555Row_C(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb1555, const struct YuvConstants* yuvconstants, int width) { - uint8 b0; - uint8 g0; - uint8 r0; - uint8 b1; - uint8 g1; - uint8 r1; + uint8_t b0; + uint8_t g0; + uint8_t r0; + uint8_t b1; + uint8_t g1; + uint8_t r1; int x; for (x = 0; x < width - 1; x += 2) { YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants); @@ -1571,8 +1749,8 @@ void I422ToARGB1555Row_C(const uint8* src_y, b1 = b1 >> 3; g1 = g1 >> 3; r1 = r1 >> 3; - *(uint32*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) | (b1 << 16) | - (g1 << 21) | (r1 << 26) | 0x80008000; + *(uint32_t*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) | (b1 << 16) | + (g1 << 21) | (r1 << 26) | 0x80008000; src_y += 2; src_u += 1; src_v += 1; @@ -1583,22 +1761,22 @@ void I422ToARGB1555Row_C(const uint8* src_y, b0 = b0 >> 3; g0 = g0 >> 3; r0 = r0 >> 3; - *(uint16*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) | 0x8000; + *(uint16_t*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) | 0x8000; } } -void I422ToRGB565Row_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgb565, +void I422ToRGB565Row_C(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_rgb565, const struct YuvConstants* yuvconstants, int width) { - uint8 b0; - uint8 g0; - uint8 r0; - uint8 b1; - uint8 g1; - uint8 r1; + uint8_t b0; + uint8_t g0; + uint8_t r0; + uint8_t b1; + uint8_t g1; + uint8_t r1; int x; for (x = 0; x < width - 1; x += 2) { YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants); @@ -1609,7 +1787,7 @@ void I422ToRGB565Row_C(const uint8* src_y, b1 = b1 >> 3; g1 = g1 >> 2; r1 = r1 >> 3; - *(uint32*)(dst_rgb565) = + *(uint32_t*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) | (r1 << 27); src_y += 2; src_u += 1; @@ -1621,13 +1799,13 @@ void I422ToRGB565Row_C(const uint8* src_y, b0 = b0 >> 3; g0 = g0 >> 2; r0 = r0 >> 3; - *(uint16*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11); + *(uint16_t*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11); } } -void NV12ToARGBRow_C(const uint8* src_y, - const uint8* src_uv, - uint8* rgb_buf, +void NV12ToARGBRow_C(const uint8_t* src_y, + const uint8_t* src_uv, + uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) { int x; @@ -1649,9 +1827,9 @@ void NV12ToARGBRow_C(const uint8* src_y, } } -void NV21ToARGBRow_C(const uint8* src_y, - const uint8* src_vu, - uint8* rgb_buf, +void NV21ToARGBRow_C(const uint8_t* src_y, + const uint8_t* src_vu, + uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) { int x; @@ -1673,17 +1851,17 @@ void NV21ToARGBRow_C(const uint8* src_y, } } -void NV12ToRGB565Row_C(const uint8* src_y, - const uint8* src_uv, - uint8* dst_rgb565, +void NV12ToRGB565Row_C(const uint8_t* src_y, + const uint8_t* src_uv, + uint8_t* dst_rgb565, const struct YuvConstants* yuvconstants, int width) { - uint8 b0; - uint8 g0; - uint8 r0; - uint8 b1; - uint8 g1; - uint8 r1; + uint8_t b0; + uint8_t g0; + uint8_t r0; + uint8_t b1; + uint8_t g1; + uint8_t r1; int x; for (x = 0; x < width - 1; x += 2) { YuvPixel(src_y[0], src_uv[0], src_uv[1], &b0, &g0, &r0, yuvconstants); @@ -1694,7 +1872,7 @@ void NV12ToRGB565Row_C(const uint8* src_y, b1 = b1 >> 3; g1 = g1 >> 2; r1 = r1 >> 3; - *(uint32*)(dst_rgb565) = + *(uint32_t*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) | (r1 << 27); src_y += 2; src_uv += 2; @@ -1705,12 +1883,12 @@ void NV12ToRGB565Row_C(const uint8* src_y, b0 = b0 >> 3; g0 = g0 >> 2; r0 = r0 >> 3; - *(uint16*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11); + *(uint16_t*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11); } } -void YUY2ToARGBRow_C(const uint8* src_yuy2, - uint8* rgb_buf, +void YUY2ToARGBRow_C(const uint8_t* src_yuy2, + uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) { int x; @@ -1731,8 +1909,8 @@ void YUY2ToARGBRow_C(const uint8* src_yuy2, } } -void UYVYToARGBRow_C(const uint8* src_uyvy, - uint8* rgb_buf, +void UYVYToARGBRow_C(const uint8_t* src_uyvy, + uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) { int x; @@ -1753,10 +1931,10 @@ void UYVYToARGBRow_C(const uint8* src_uyvy, } } -void I422ToRGBARow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* rgb_buf, +void I422ToRGBARow_C(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) { int x; @@ -1779,7 +1957,7 @@ void I422ToRGBARow_C(const uint8* src_y, } } -void I400ToARGBRow_C(const uint8* src_y, uint8* rgb_buf, int width) { +void I400ToARGBRow_C(const uint8_t* src_y, uint8_t* rgb_buf, int width) { int x; for (x = 0; x < width - 1; x += 2) { YPixel(src_y[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); @@ -1795,7 +1973,7 @@ void I400ToARGBRow_C(const uint8* src_y, uint8* rgb_buf, int width) { } } -void MirrorRow_C(const uint8* src, uint8* dst, int width) { +void MirrorRow_C(const uint8_t* src, uint8_t* dst, int width) { int x; src += width - 1; for (x = 0; x < width - 1; x += 2) { @@ -1808,7 +1986,10 @@ void MirrorRow_C(const uint8* src, uint8* dst, int width) { } } -void MirrorUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) { +void MirrorUVRow_C(const uint8_t* src_uv, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { int x; src_uv += (width - 1) << 1; for (x = 0; x < width - 1; x += 2) { @@ -1824,10 +2005,10 @@ void MirrorUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) { } } -void ARGBMirrorRow_C(const uint8* src, uint8* dst, int width) { +void ARGBMirrorRow_C(const uint8_t* src, uint8_t* dst, int width) { int x; - const uint32* src32 = (const uint32*)(src); - uint32* dst32 = (uint32*)(dst); + const uint32_t* src32 = (const uint32_t*)(src); + uint32_t* dst32 = (uint32_t*)(dst); src32 += width - 1; for (x = 0; x < width - 1; x += 2) { dst32[x] = src32[0]; @@ -1839,7 +2020,10 @@ void ARGBMirrorRow_C(const uint8* src, uint8* dst, int width) { } } -void SplitUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) { +void SplitUVRow_C(const uint8_t* src_uv, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { int x; for (x = 0; x < width - 1; x += 2) { dst_u[x] = src_uv[0]; @@ -1854,9 +2038,9 @@ void SplitUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) { } } -void MergeUVRow_C(const uint8* src_u, - const uint8* src_v, - uint8* dst_uv, +void MergeUVRow_C(const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_uv, int width) { int x; for (x = 0; x < width - 1; x += 2) { @@ -1872,10 +2056,10 @@ void MergeUVRow_C(const uint8* src_u, } } -void SplitRGBRow_C(const uint8* src_rgb, - uint8* dst_r, - uint8* dst_g, - uint8* dst_b, +void SplitRGBRow_C(const uint8_t* src_rgb, + uint8_t* dst_r, + uint8_t* dst_g, + uint8_t* dst_b, int width) { int x; for (x = 0; x < width; ++x) { @@ -1886,10 +2070,10 @@ void SplitRGBRow_C(const uint8* src_rgb, } } -void MergeRGBRow_C(const uint8* src_r, - const uint8* src_g, - const uint8* src_b, - uint8* dst_rgb, +void MergeRGBRow_C(const uint8_t* src_r, + const uint8_t* src_g, + const uint8_t* src_b, + uint8_t* dst_rgb, int width) { int x; for (x = 0; x < width; ++x) { @@ -1905,9 +2089,9 @@ void MergeRGBRow_C(const uint8* src_r, // 64 = 10 bits // 16 = 12 bits // 1 = 16 bits -void MergeUVRow_16_C(const uint16* src_u, - const uint16* src_v, - uint16* dst_uv, +void MergeUVRow_16_C(const uint16_t* src_u, + const uint16_t* src_v, + uint16_t* dst_uv, int scale, int width) { int x; @@ -1924,8 +2108,8 @@ void MergeUVRow_16_C(const uint16* src_u, } } -void MultiplyRow_16_C(const uint16* src_y, - uint16* dst_y, +void MultiplyRow_16_C(const uint16_t* src_y, + uint16_t* dst_y, int scale, int width) { int x; @@ -1939,8 +2123,8 @@ void MultiplyRow_16_C(const uint16* src_y, // 16384 = 10 bits // 4096 = 12 bits // 256 = 16 bits -void Convert16To8Row_C(const uint16* src_y, - uint8* dst_y, +void Convert16To8Row_C(const uint16_t* src_y, + uint8_t* dst_y, int scale, int width) { int x; @@ -1951,8 +2135,8 @@ void Convert16To8Row_C(const uint16* src_y, // Use scale to convert lsb formats to msb, depending how many bits there are: // 1024 = 10 bits -void Convert8To16Row_C(const uint8* src_y, - uint16* dst_y, +void Convert8To16Row_C(const uint8_t* src_y, + uint16_t* dst_y, int scale, int width) { int x; @@ -1962,20 +2146,20 @@ void Convert8To16Row_C(const uint8* src_y, } } -void CopyRow_C(const uint8* src, uint8* dst, int count) { +void CopyRow_C(const uint8_t* src, uint8_t* dst, int count) { memcpy(dst, src, count); } -void CopyRow_16_C(const uint16* src, uint16* dst, int count) { +void CopyRow_16_C(const uint16_t* src, uint16_t* dst, int count) { memcpy(dst, src, count * 2); } -void SetRow_C(uint8* dst, uint8 v8, int width) { +void SetRow_C(uint8_t* dst, uint8_t v8, int width) { memset(dst, v8, width); } -void ARGBSetRow_C(uint8* dst_argb, uint32 v32, int width) { - uint32* d = (uint32*)(dst_argb); +void ARGBSetRow_C(uint8_t* dst_argb, uint32_t v32, int width) { + uint32_t* d = (uint32_t*)(dst_argb); int x; for (x = 0; x < width; ++x) { d[x] = v32; @@ -1983,10 +2167,10 @@ void ARGBSetRow_C(uint8* dst_argb, uint32 v32, int width) { } // Filter 2 rows of YUY2 UV's (422) into U and V (420). -void YUY2ToUVRow_C(const uint8* src_yuy2, +void YUY2ToUVRow_C(const uint8_t* src_yuy2, int src_stride_yuy2, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width) { // Output a row of UV values, filtering 2 rows of YUY2. int x; @@ -2000,9 +2184,9 @@ void YUY2ToUVRow_C(const uint8* src_yuy2, } // Copy row of YUY2 UV's (422) into U and V (422). -void YUY2ToUV422Row_C(const uint8* src_yuy2, - uint8* dst_u, - uint8* dst_v, +void YUY2ToUV422Row_C(const uint8_t* src_yuy2, + uint8_t* dst_u, + uint8_t* dst_v, int width) { // Output a row of UV values. int x; @@ -2016,7 +2200,7 @@ void YUY2ToUV422Row_C(const uint8* src_yuy2, } // Copy row of YUY2 Y's (422) into Y (420/422). -void YUY2ToYRow_C(const uint8* src_yuy2, uint8* dst_y, int width) { +void YUY2ToYRow_C(const uint8_t* src_yuy2, uint8_t* dst_y, int width) { // Output a row of Y values. int x; for (x = 0; x < width - 1; x += 2) { @@ -2030,10 +2214,10 @@ void YUY2ToYRow_C(const uint8* src_yuy2, uint8* dst_y, int width) { } // Filter 2 rows of UYVY UV's (422) into U and V (420). -void UYVYToUVRow_C(const uint8* src_uyvy, +void UYVYToUVRow_C(const uint8_t* src_uyvy, int src_stride_uyvy, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width) { // Output a row of UV values. int x; @@ -2047,9 +2231,9 @@ void UYVYToUVRow_C(const uint8* src_uyvy, } // Copy row of UYVY UV's (422) into U and V (422). -void UYVYToUV422Row_C(const uint8* src_uyvy, - uint8* dst_u, - uint8* dst_v, +void UYVYToUV422Row_C(const uint8_t* src_uyvy, + uint8_t* dst_u, + uint8_t* dst_v, int width) { // Output a row of UV values. int x; @@ -2063,7 +2247,7 @@ void UYVYToUV422Row_C(const uint8* src_uyvy, } // Copy row of UYVY Y's (422) into Y (420/422). -void UYVYToYRow_C(const uint8* src_uyvy, uint8* dst_y, int width) { +void UYVYToYRow_C(const uint8_t* src_uyvy, uint8_t* dst_y, int width) { // Output a row of Y values. int x; for (x = 0; x < width - 1; x += 2) { @@ -2081,19 +2265,19 @@ void UYVYToYRow_C(const uint8* src_uyvy, uint8* dst_y, int width) { // Blend src_argb0 over src_argb1 and store to dst_argb. // dst_argb may be src_argb0 or src_argb1. // This code mimics the SSSE3 version for better testability. -void ARGBBlendRow_C(const uint8* src_argb0, - const uint8* src_argb1, - uint8* dst_argb, +void ARGBBlendRow_C(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, int width) { int x; for (x = 0; x < width - 1; x += 2) { - uint32 fb = src_argb0[0]; - uint32 fg = src_argb0[1]; - uint32 fr = src_argb0[2]; - uint32 a = src_argb0[3]; - uint32 bb = src_argb1[0]; - uint32 bg = src_argb1[1]; - uint32 br = src_argb1[2]; + uint32_t fb = src_argb0[0]; + uint32_t fg = src_argb0[1]; + uint32_t fr = src_argb0[2]; + uint32_t a = src_argb0[3]; + uint32_t bb = src_argb1[0]; + uint32_t bg = src_argb1[1]; + uint32_t br = src_argb1[2]; dst_argb[0] = BLEND(fb, bb, a); dst_argb[1] = BLEND(fg, bg, a); dst_argb[2] = BLEND(fr, br, a); @@ -2116,13 +2300,13 @@ void ARGBBlendRow_C(const uint8* src_argb0, } if (width & 1) { - uint32 fb = src_argb0[0]; - uint32 fg = src_argb0[1]; - uint32 fr = src_argb0[2]; - uint32 a = src_argb0[3]; - uint32 bb = src_argb1[0]; - uint32 bg = src_argb1[1]; - uint32 br = src_argb1[2]; + uint32_t fb = src_argb0[0]; + uint32_t fg = src_argb0[1]; + uint32_t fr = src_argb0[2]; + uint32_t a = src_argb0[3]; + uint32_t bb = src_argb1[0]; + uint32_t bg = src_argb1[1]; + uint32_t br = src_argb1[2]; dst_argb[0] = BLEND(fb, bb, a); dst_argb[1] = BLEND(fg, bg, a); dst_argb[2] = BLEND(fr, br, a); @@ -2132,10 +2316,10 @@ void ARGBBlendRow_C(const uint8* src_argb0, #undef BLEND #define UBLEND(f, b, a) (((a)*f) + ((255 - a) * b) + 255) >> 8 -void BlendPlaneRow_C(const uint8* src0, - const uint8* src1, - const uint8* alpha, - uint8* dst, +void BlendPlaneRow_C(const uint8_t* src0, + const uint8_t* src1, + const uint8_t* alpha, + uint8_t* dst, int width) { int x; for (x = 0; x < width - 1; x += 2) { @@ -2156,13 +2340,13 @@ void BlendPlaneRow_C(const uint8* src0, // Multiply source RGB by alpha and store to destination. // This code mimics the SSSE3 version for better testability. -void ARGBAttenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width) { +void ARGBAttenuateRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width) { int i; for (i = 0; i < width - 1; i += 2) { - uint32 b = src_argb[0]; - uint32 g = src_argb[1]; - uint32 r = src_argb[2]; - uint32 a = src_argb[3]; + uint32_t b = src_argb[0]; + uint32_t g = src_argb[1]; + uint32_t r = src_argb[2]; + uint32_t a = src_argb[3]; dst_argb[0] = ATTENUATE(b, a); dst_argb[1] = ATTENUATE(g, a); dst_argb[2] = ATTENUATE(r, a); @@ -2180,10 +2364,10 @@ void ARGBAttenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width) { } if (width & 1) { - const uint32 b = src_argb[0]; - const uint32 g = src_argb[1]; - const uint32 r = src_argb[2]; - const uint32 a = src_argb[3]; + const uint32_t b = src_argb[0]; + const uint32_t g = src_argb[1]; + const uint32_t r = src_argb[2]; + const uint32_t a = src_argb[3]; dst_argb[0] = ATTENUATE(b, a); dst_argb[1] = ATTENUATE(g, a); dst_argb[2] = ATTENUATE(r, a); @@ -2199,7 +2383,7 @@ void ARGBAttenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width) { // Reciprocal method is off by 1 on some values. ie 125 // 8.8 fixed point inverse table with 1.0 in upper short and 1 / a in lower. #define T(a) 0x01000000 + (0x10000 / a) -const uint32 fixed_invtbl8[256] = { +const uint32_t fixed_invtbl8[256] = { 0x01000000, 0x0100ffff, T(0x02), T(0x03), T(0x04), T(0x05), T(0x06), T(0x07), T(0x08), T(0x09), T(0x0a), T(0x0b), T(0x0c), T(0x0d), T(0x0e), T(0x0f), T(0x10), T(0x11), T(0x12), T(0x13), T(0x14), @@ -2239,14 +2423,16 @@ const uint32 fixed_invtbl8[256] = { T(0xfc), T(0xfd), T(0xfe), 0x01000100}; #undef T -void ARGBUnattenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width) { +void ARGBUnattenuateRow_C(const uint8_t* src_argb, + uint8_t* dst_argb, + int width) { int i; for (i = 0; i < width; ++i) { - uint32 b = src_argb[0]; - uint32 g = src_argb[1]; - uint32 r = src_argb[2]; - const uint32 a = src_argb[3]; - const uint32 ia = fixed_invtbl8[a] & 0xffff; // 8.8 fixed point + uint32_t b = src_argb[0]; + uint32_t g = src_argb[1]; + uint32_t r = src_argb[2]; + const uint32_t a = src_argb[3]; + const uint32_t ia = fixed_invtbl8[a] & 0xffff; // 8.8 fixed point b = (b * ia) >> 8; g = (g * ia) >> 8; r = (r * ia) >> 8; @@ -2260,11 +2446,11 @@ void ARGBUnattenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width) { } } -void ComputeCumulativeSumRow_C(const uint8* row, - int32* cumsum, - const int32* previous_cumsum, +void ComputeCumulativeSumRow_C(const uint8_t* row, + int32_t* cumsum, + const int32_t* previous_cumsum, int width) { - int32 row_sum[4] = {0, 0, 0, 0}; + int32_t row_sum[4] = {0, 0, 0, 0}; int x; for (x = 0; x < width; ++x) { row_sum[0] += row[x * 4 + 0]; @@ -2278,19 +2464,19 @@ void ComputeCumulativeSumRow_C(const uint8* row, } } -void CumulativeSumToAverageRow_C(const int32* tl, - const int32* bl, +void CumulativeSumToAverageRow_C(const int32_t* tl, + const int32_t* bl, int w, int area, - uint8* dst, + uint8_t* dst, int count) { float ooa = 1.0f / area; int i; for (i = 0; i < count; ++i) { - dst[0] = (uint8)((bl[w + 0] + tl[0] - bl[0] - tl[w + 0]) * ooa); - dst[1] = (uint8)((bl[w + 1] + tl[1] - bl[1] - tl[w + 1]) * ooa); - dst[2] = (uint8)((bl[w + 2] + tl[2] - bl[2] - tl[w + 2]) * ooa); - dst[3] = (uint8)((bl[w + 3] + tl[3] - bl[3] - tl[w + 3]) * ooa); + dst[0] = (uint8_t)((bl[w + 0] + tl[0] - bl[0] - tl[w + 0]) * ooa); + dst[1] = (uint8_t)((bl[w + 1] + tl[1] - bl[1] - tl[w + 1]) * ooa); + dst[2] = (uint8_t)((bl[w + 2] + tl[2] - bl[2] - tl[w + 2]) * ooa); + dst[3] = (uint8_t)((bl[w + 3] + tl[3] - bl[3] - tl[w + 3]) * ooa); dst += 4; tl += 4; bl += 4; @@ -2299,9 +2485,9 @@ void CumulativeSumToAverageRow_C(const int32* tl, // Copy pixels from rotated source to destination row with a slope. LIBYUV_API -void ARGBAffineRow_C(const uint8* src_argb, +void ARGBAffineRow_C(const uint8_t* src_argb, int src_argb_stride, - uint8* dst_argb, + uint8_t* dst_argb, const float* uv_dudv, int width) { int i; @@ -2312,8 +2498,8 @@ void ARGBAffineRow_C(const uint8* src_argb, for (i = 0; i < width; ++i) { int x = (int)(uv[0]); int y = (int)(uv[1]); - *(uint32*)(dst_argb) = - *(const uint32*)(src_argb + y * src_argb_stride + x * 4); + *(uint32_t*)(dst_argb) = + *(const uint32_t*)(src_argb + y * src_argb_stride + x * 4); dst_argb += 4; uv[0] += uv_dudv[2]; uv[1] += uv_dudv[3]; @@ -2321,9 +2507,9 @@ void ARGBAffineRow_C(const uint8* src_argb, } // Blend 2 rows into 1. -static void HalfRow_C(const uint8* src_uv, +static void HalfRow_C(const uint8_t* src_uv, ptrdiff_t src_uv_stride, - uint8* dst_uv, + uint8_t* dst_uv, int width) { int x; for (x = 0; x < width; ++x) { @@ -2331,9 +2517,9 @@ static void HalfRow_C(const uint8* src_uv, } } -static void HalfRow_16_C(const uint16* src_uv, +static void HalfRow_16_C(const uint16_t* src_uv, ptrdiff_t src_uv_stride, - uint16* dst_uv, + uint16_t* dst_uv, int width) { int x; for (x = 0; x < width; ++x) { @@ -2342,14 +2528,14 @@ static void HalfRow_16_C(const uint16* src_uv, } // C version 2x2 -> 2x1. -void InterpolateRow_C(uint8* dst_ptr, - const uint8* src_ptr, +void InterpolateRow_C(uint8_t* dst_ptr, + const uint8_t* src_ptr, ptrdiff_t src_stride, int width, int source_y_fraction) { int y1_fraction = source_y_fraction; int y0_fraction = 256 - y1_fraction; - const uint8* src_ptr1 = src_ptr + src_stride; + const uint8_t* src_ptr1 = src_ptr + src_stride; int x; if (y1_fraction == 0) { memcpy(dst_ptr, src_ptr, width); @@ -2374,14 +2560,14 @@ void InterpolateRow_C(uint8* dst_ptr, } } -void InterpolateRow_16_C(uint16* dst_ptr, - const uint16* src_ptr, +void InterpolateRow_16_C(uint16_t* dst_ptr, + const uint16_t* src_ptr, ptrdiff_t src_stride, int width, int source_y_fraction) { int y1_fraction = source_y_fraction; int y0_fraction = 256 - y1_fraction; - const uint16* src_ptr1 = src_ptr + src_stride; + const uint16_t* src_ptr1 = src_ptr + src_stride; int x; if (source_y_fraction == 0) { memcpy(dst_ptr, src_ptr, width * 2); @@ -2404,9 +2590,9 @@ void InterpolateRow_16_C(uint16* dst_ptr, } // Use first 4 shuffler values to reorder ARGB channels. -void ARGBShuffleRow_C(const uint8* src_argb, - uint8* dst_argb, - const uint8* shuffler, +void ARGBShuffleRow_C(const uint8_t* src_argb, + uint8_t* dst_argb, + const uint8_t* shuffler, int width) { int index0 = shuffler[0]; int index1 = shuffler[1]; @@ -2416,10 +2602,10 @@ void ARGBShuffleRow_C(const uint8* src_argb, int x; for (x = 0; x < width; ++x) { // To support in-place conversion. - uint8 b = src_argb[index0]; - uint8 g = src_argb[index1]; - uint8 r = src_argb[index2]; - uint8 a = src_argb[index3]; + uint8_t b = src_argb[index0]; + uint8_t g = src_argb[index1]; + uint8_t r = src_argb[index2]; + uint8_t a = src_argb[index3]; dst_argb[0] = b; dst_argb[1] = g; dst_argb[2] = r; @@ -2429,10 +2615,10 @@ void ARGBShuffleRow_C(const uint8* src_argb, } } -void I422ToYUY2Row_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_frame, +void I422ToYUY2Row_C(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_frame, int width) { int x; for (x = 0; x < width - 1; x += 2) { @@ -2453,10 +2639,10 @@ void I422ToYUY2Row_C(const uint8* src_y, } } -void I422ToUYVYRow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_frame, +void I422ToUYVYRow_C(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_frame, int width) { int x; for (x = 0; x < width - 1; x += 2) { @@ -2477,8 +2663,8 @@ void I422ToUYVYRow_C(const uint8* src_y, } } -void ARGBPolynomialRow_C(const uint8* src_argb, - uint8* dst_argb, +void ARGBPolynomialRow_C(const uint8_t* src_argb, + uint8_t* dst_argb, const float* poly, int width) { int i; @@ -2508,10 +2694,10 @@ void ARGBPolynomialRow_C(const uint8* src_argb, dr += poly[14] * r3; da += poly[15] * a3; - dst_argb[0] = Clamp((int32)(db)); - dst_argb[1] = Clamp((int32)(dg)); - dst_argb[2] = Clamp((int32)(dr)); - dst_argb[3] = Clamp((int32)(da)); + dst_argb[0] = Clamp((int32_t)(db)); + dst_argb[1] = Clamp((int32_t)(dg)); + dst_argb[2] = Clamp((int32_t)(dr)); + dst_argb[3] = Clamp((int32_t)(da)); src_argb += 4; dst_argb += 4; } @@ -2527,31 +2713,34 @@ void ARGBPolynomialRow_C(const uint8* src_argb, // simply extract the low bits of the exponent and the high // bits of the mantissa from our float and we're done. -void HalfFloatRow_C(const uint16* src, uint16* dst, float scale, int width) { +void HalfFloatRow_C(const uint16_t* src, + uint16_t* dst, + float scale, + int width) { int i; float mult = 1.9259299444e-34f * scale; for (i = 0; i < width; ++i) { float value = src[i] * mult; - dst[i] = (uint16)((*(uint32_t*)&value) >> 13); + dst[i] = (uint16_t)((*(uint32_t*)&value) >> 13); } } -void ARGBLumaColorTableRow_C(const uint8* src_argb, - uint8* dst_argb, +void ARGBLumaColorTableRow_C(const uint8_t* src_argb, + uint8_t* dst_argb, int width, - const uint8* luma, - uint32 lumacoeff) { - uint32 bc = lumacoeff & 0xff; - uint32 gc = (lumacoeff >> 8) & 0xff; - uint32 rc = (lumacoeff >> 16) & 0xff; + const uint8_t* luma, + uint32_t lumacoeff) { + uint32_t bc = lumacoeff & 0xff; + uint32_t gc = (lumacoeff >> 8) & 0xff; + uint32_t rc = (lumacoeff >> 16) & 0xff; int i; for (i = 0; i < width - 1; i += 2) { // Luminance in rows, color values in columns. - const uint8* luma0 = + const uint8_t* luma0 = ((src_argb[0] * bc + src_argb[1] * gc + src_argb[2] * rc) & 0x7F00u) + luma; - const uint8* luma1; + const uint8_t* luma1; dst_argb[0] = luma0[src_argb[0]]; dst_argb[1] = luma0[src_argb[1]]; dst_argb[2] = luma0[src_argb[2]]; @@ -2568,7 +2757,7 @@ void ARGBLumaColorTableRow_C(const uint8* src_argb, } if (width & 1) { // Luminance in rows, color values in columns. - const uint8* luma0 = + const uint8_t* luma0 = ((src_argb[0] * bc + src_argb[1] * gc + src_argb[2] * rc) & 0x7F00u) + luma; dst_argb[0] = luma0[src_argb[0]]; @@ -2578,7 +2767,7 @@ void ARGBLumaColorTableRow_C(const uint8* src_argb, } } -void ARGBCopyAlphaRow_C(const uint8* src, uint8* dst, int width) { +void ARGBCopyAlphaRow_C(const uint8_t* src, uint8_t* dst, int width) { int i; for (i = 0; i < width - 1; i += 2) { dst[3] = src[3]; @@ -2591,7 +2780,7 @@ void ARGBCopyAlphaRow_C(const uint8* src, uint8* dst, int width) { } } -void ARGBExtractAlphaRow_C(const uint8* src_argb, uint8* dst_a, int width) { +void ARGBExtractAlphaRow_C(const uint8_t* src_argb, uint8_t* dst_a, int width) { int i; for (i = 0; i < width - 1; i += 2) { dst_a[0] = src_argb[3]; @@ -2604,7 +2793,7 @@ void ARGBExtractAlphaRow_C(const uint8* src_argb, uint8* dst_a, int width) { } } -void ARGBCopyYToAlphaRow_C(const uint8* src, uint8* dst, int width) { +void ARGBCopyYToAlphaRow_C(const uint8_t* src, uint8_t* dst, int width) { int i; for (i = 0; i < width - 1; i += 2) { dst[3] = src[0]; @@ -2623,13 +2812,13 @@ void ARGBCopyYToAlphaRow_C(const uint8* src, uint8* dst, int width) { #if !(defined(_MSC_VER) && defined(_M_IX86)) && \ defined(HAS_I422TORGB565ROW_SSSE3) // row_win.cc has asm version, but GCC uses 2 step wrapper. -void I422ToRGB565Row_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgb565, +void I422ToRGB565Row_SSSE3(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_rgb565, const struct YuvConstants* yuvconstants, int width) { - SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]); + SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]); while (width > 0) { int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth); @@ -2644,14 +2833,14 @@ void I422ToRGB565Row_SSSE3(const uint8* src_y, #endif #if defined(HAS_I422TOARGB1555ROW_SSSE3) -void I422ToARGB1555Row_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb1555, +void I422ToARGB1555Row_SSSE3(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb1555, const struct YuvConstants* yuvconstants, int width) { // Row buffer for intermediate ARGB pixels. - SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]); + SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]); while (width > 0) { int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth); @@ -2666,14 +2855,14 @@ void I422ToARGB1555Row_SSSE3(const uint8* src_y, #endif #if defined(HAS_I422TOARGB4444ROW_SSSE3) -void I422ToARGB4444Row_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb4444, +void I422ToARGB4444Row_SSSE3(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb4444, const struct YuvConstants* yuvconstants, int width) { // Row buffer for intermediate ARGB pixels. - SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]); + SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]); while (width > 0) { int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth); @@ -2688,13 +2877,13 @@ void I422ToARGB4444Row_SSSE3(const uint8* src_y, #endif #if defined(HAS_NV12TORGB565ROW_SSSE3) -void NV12ToRGB565Row_SSSE3(const uint8* src_y, - const uint8* src_uv, - uint8* dst_rgb565, +void NV12ToRGB565Row_SSSE3(const uint8_t* src_y, + const uint8_t* src_uv, + uint8_t* dst_rgb565, const struct YuvConstants* yuvconstants, int width) { // Row buffer for intermediate ARGB pixels. - SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]); + SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]); while (width > 0) { int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; NV12ToARGBRow_SSSE3(src_y, src_uv, row, yuvconstants, twidth); @@ -2708,13 +2897,13 @@ void NV12ToRGB565Row_SSSE3(const uint8* src_y, #endif #if defined(HAS_I422TORGB565ROW_AVX2) -void I422ToRGB565Row_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgb565, +void I422ToRGB565Row_AVX2(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_rgb565, const struct YuvConstants* yuvconstants, int width) { - SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]); + SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]); while (width > 0) { int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth); @@ -2733,14 +2922,14 @@ void I422ToRGB565Row_AVX2(const uint8* src_y, #endif #if defined(HAS_I422TOARGB1555ROW_AVX2) -void I422ToARGB1555Row_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb1555, +void I422ToARGB1555Row_AVX2(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb1555, const struct YuvConstants* yuvconstants, int width) { // Row buffer for intermediate ARGB pixels. - SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]); + SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]); while (width > 0) { int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth); @@ -2759,14 +2948,14 @@ void I422ToARGB1555Row_AVX2(const uint8* src_y, #endif #if defined(HAS_I422TOARGB4444ROW_AVX2) -void I422ToARGB4444Row_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb4444, +void I422ToARGB4444Row_AVX2(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb4444, const struct YuvConstants* yuvconstants, int width) { // Row buffer for intermediate ARGB pixels. - SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]); + SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]); while (width > 0) { int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth); @@ -2785,14 +2974,14 @@ void I422ToARGB4444Row_AVX2(const uint8* src_y, #endif #if defined(HAS_I422TORGB24ROW_AVX2) -void I422ToRGB24Row_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgb24, +void I422ToRGB24Row_AVX2(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_rgb24, const struct YuvConstants* yuvconstants, int width) { // Row buffer for intermediate ARGB pixels. - SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]); + SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]); while (width > 0) { int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth); @@ -2808,13 +2997,13 @@ void I422ToRGB24Row_AVX2(const uint8* src_y, #endif #if defined(HAS_NV12TORGB565ROW_AVX2) -void NV12ToRGB565Row_AVX2(const uint8* src_y, - const uint8* src_uv, - uint8* dst_rgb565, +void NV12ToRGB565Row_AVX2(const uint8_t* src_y, + const uint8_t* src_uv, + uint8_t* dst_rgb565, const struct YuvConstants* yuvconstants, int width) { // Row buffer for intermediate ARGB pixels. - SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]); + SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]); while (width > 0) { int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; NV12ToARGBRow_AVX2(src_y, src_uv, row, yuvconstants, twidth); @@ -2864,7 +3053,7 @@ void ScaleSamples_C(const float* src, float* dst, float scale, int width) { } } -void GaussRow_C(const uint32* src, uint16* dst, int width) { +void GaussRow_C(const uint32_t* src, uint16_t* dst, int width) { int i; for (i = 0; i < width; ++i) { *dst++ = @@ -2874,12 +3063,12 @@ void GaussRow_C(const uint32* src, uint16* dst, int width) { } // filter 5 rows with 1, 4, 6, 4, 1 coefficients to produce 1 row. -void GaussCol_C(const uint16* src0, - const uint16* src1, - const uint16* src2, - const uint16* src3, - const uint16* src4, - uint32* dst, +void GaussCol_C(const uint16_t* src0, + const uint16_t* src1, + const uint16_t* src2, + const uint16_t* src3, + const uint16_t* src4, + uint32_t* dst, int width) { int i; for (i = 0; i < width; ++i) { diff --git a/chromium/third_party/libyuv/source/row_gcc.cc b/chromium/third_party/libyuv/source/row_gcc.cc index 0dc126678e0..95845c2592f 100644 --- a/chromium/third_party/libyuv/source/row_gcc.cc +++ b/chromium/third_party/libyuv/source/row_gcc.cc @@ -152,392 +152,399 @@ static const lvec8 kShuffleNV21 = { #endif // HAS_RGB24TOARGBROW_SSSE3 #ifdef HAS_J400TOARGBROW_SSE2 -void J400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int width) { - asm volatile ( - "pcmpeqb %%xmm5,%%xmm5 \n" - "pslld $0x18,%%xmm5 \n" - LABELALIGN - "1: \n" - "movq " MEMACCESS(0) ",%%xmm0 \n" - "lea " MEMLEA(0x8,0) ",%0 \n" - "punpcklbw %%xmm0,%%xmm0 \n" - "movdqa %%xmm0,%%xmm1 \n" - "punpcklwd %%xmm0,%%xmm0 \n" - "punpckhwd %%xmm1,%%xmm1 \n" - "por %%xmm5,%%xmm0 \n" - "por %%xmm5,%%xmm1 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "movdqu %%xmm1," MEMACCESS2(0x10,1) " \n" - "lea " MEMLEA(0x20,1) ",%1 \n" - "sub $0x8,%2 \n" - "jg 1b \n" - : "+r"(src_y), // %0 - "+r"(dst_argb), // %1 - "+r"(width) // %2 - :: "memory", "cc", "xmm0", "xmm1", "xmm5" - ); +void J400ToARGBRow_SSE2(const uint8_t* src_y, uint8_t* dst_argb, int width) { + asm volatile( + "pcmpeqb %%xmm5,%%xmm5 \n" + "pslld $0x18,%%xmm5 \n" + + LABELALIGN + "1: \n" + "movq (%0),%%xmm0 \n" + "lea 0x8(%0),%0 \n" + "punpcklbw %%xmm0,%%xmm0 \n" + "movdqa %%xmm0,%%xmm1 \n" + "punpcklwd %%xmm0,%%xmm0 \n" + "punpckhwd %%xmm1,%%xmm1 \n" + "por %%xmm5,%%xmm0 \n" + "por %%xmm5,%%xmm1 \n" + "movdqu %%xmm0,(%1) \n" + "movdqu %%xmm1,0x10(%1) \n" + "lea 0x20(%1),%1 \n" + "sub $0x8,%2 \n" + "jg 1b \n" + : "+r"(src_y), // %0 + "+r"(dst_argb), // %1 + "+r"(width) // %2 + ::"memory", + "cc", "xmm0", "xmm1", "xmm5"); } #endif // HAS_J400TOARGBROW_SSE2 #ifdef HAS_RGB24TOARGBROW_SSSE3 -void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int width) { - asm volatile ( - "pcmpeqb %%xmm5,%%xmm5 \n" // generate mask 0xff000000 - "pslld $0x18,%%xmm5 \n" - "movdqa %3,%%xmm4 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "movdqu " MEMACCESS2(0x20,0) ",%%xmm3 \n" - "lea " MEMLEA(0x30,0) ",%0 \n" - "movdqa %%xmm3,%%xmm2 \n" - "palignr $0x8,%%xmm1,%%xmm2 \n" - "pshufb %%xmm4,%%xmm2 \n" - "por %%xmm5,%%xmm2 \n" - "palignr $0xc,%%xmm0,%%xmm1 \n" - "pshufb %%xmm4,%%xmm0 \n" - "movdqu %%xmm2," MEMACCESS2(0x20,1) " \n" - "por %%xmm5,%%xmm0 \n" - "pshufb %%xmm4,%%xmm1 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "por %%xmm5,%%xmm1 \n" - "palignr $0x4,%%xmm3,%%xmm3 \n" - "pshufb %%xmm4,%%xmm3 \n" - "movdqu %%xmm1," MEMACCESS2(0x10,1) " \n" - "por %%xmm5,%%xmm3 \n" - "movdqu %%xmm3," MEMACCESS2(0x30,1) " \n" - "lea " MEMLEA(0x40,1) ",%1 \n" - "sub $0x10,%2 \n" - "jg 1b \n" - : "+r"(src_rgb24), // %0 - "+r"(dst_argb), // %1 - "+r"(width) // %2 - : "m"(kShuffleMaskRGB24ToARGB) // %3 - : "memory", "cc" , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" - ); +void RGB24ToARGBRow_SSSE3(const uint8_t* src_rgb24, + uint8_t* dst_argb, + int width) { + asm volatile( + "pcmpeqb %%xmm5,%%xmm5 \n" // 0xff000000 + "pslld $0x18,%%xmm5 \n" + "movdqa %3,%%xmm4 \n" + + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "movdqu 0x20(%0),%%xmm3 \n" + "lea 0x30(%0),%0 \n" + "movdqa %%xmm3,%%xmm2 \n" + "palignr $0x8,%%xmm1,%%xmm2 \n" + "pshufb %%xmm4,%%xmm2 \n" + "por %%xmm5,%%xmm2 \n" + "palignr $0xc,%%xmm0,%%xmm1 \n" + "pshufb %%xmm4,%%xmm0 \n" + "movdqu %%xmm2,0x20(%1) \n" + "por %%xmm5,%%xmm0 \n" + "pshufb %%xmm4,%%xmm1 \n" + "movdqu %%xmm0,(%1) \n" + "por %%xmm5,%%xmm1 \n" + "palignr $0x4,%%xmm3,%%xmm3 \n" + "pshufb %%xmm4,%%xmm3 \n" + "movdqu %%xmm1,0x10(%1) \n" + "por %%xmm5,%%xmm3 \n" + "movdqu %%xmm3,0x30(%1) \n" + "lea 0x40(%1),%1 \n" + "sub $0x10,%2 \n" + "jg 1b \n" + : "+r"(src_rgb24), // %0 + "+r"(dst_argb), // %1 + "+r"(width) // %2 + : "m"(kShuffleMaskRGB24ToARGB) // %3 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); } -void RAWToARGBRow_SSSE3(const uint8* src_raw, uint8* dst_argb, int width) { - asm volatile ( - "pcmpeqb %%xmm5,%%xmm5 \n" // generate mask 0xff000000 - "pslld $0x18,%%xmm5 \n" - "movdqa %3,%%xmm4 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "movdqu " MEMACCESS2(0x20,0) ",%%xmm3 \n" - "lea " MEMLEA(0x30,0) ",%0 \n" - "movdqa %%xmm3,%%xmm2 \n" - "palignr $0x8,%%xmm1,%%xmm2 \n" - "pshufb %%xmm4,%%xmm2 \n" - "por %%xmm5,%%xmm2 \n" - "palignr $0xc,%%xmm0,%%xmm1 \n" - "pshufb %%xmm4,%%xmm0 \n" - "movdqu %%xmm2," MEMACCESS2(0x20,1) " \n" - "por %%xmm5,%%xmm0 \n" - "pshufb %%xmm4,%%xmm1 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "por %%xmm5,%%xmm1 \n" - "palignr $0x4,%%xmm3,%%xmm3 \n" - "pshufb %%xmm4,%%xmm3 \n" - "movdqu %%xmm1," MEMACCESS2(0x10,1) " \n" - "por %%xmm5,%%xmm3 \n" - "movdqu %%xmm3," MEMACCESS2(0x30,1) " \n" - "lea " MEMLEA(0x40,1) ",%1 \n" - "sub $0x10,%2 \n" - "jg 1b \n" - : "+r"(src_raw), // %0 - "+r"(dst_argb), // %1 - "+r"(width) // %2 - : "m"(kShuffleMaskRAWToARGB) // %3 - : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" - ); +void RAWToARGBRow_SSSE3(const uint8_t* src_raw, uint8_t* dst_argb, int width) { + asm volatile( + "pcmpeqb %%xmm5,%%xmm5 \n" // 0xff000000 + "pslld $0x18,%%xmm5 \n" + "movdqa %3,%%xmm4 \n" + + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "movdqu 0x20(%0),%%xmm3 \n" + "lea 0x30(%0),%0 \n" + "movdqa %%xmm3,%%xmm2 \n" + "palignr $0x8,%%xmm1,%%xmm2 \n" + "pshufb %%xmm4,%%xmm2 \n" + "por %%xmm5,%%xmm2 \n" + "palignr $0xc,%%xmm0,%%xmm1 \n" + "pshufb %%xmm4,%%xmm0 \n" + "movdqu %%xmm2,0x20(%1) \n" + "por %%xmm5,%%xmm0 \n" + "pshufb %%xmm4,%%xmm1 \n" + "movdqu %%xmm0,(%1) \n" + "por %%xmm5,%%xmm1 \n" + "palignr $0x4,%%xmm3,%%xmm3 \n" + "pshufb %%xmm4,%%xmm3 \n" + "movdqu %%xmm1,0x10(%1) \n" + "por %%xmm5,%%xmm3 \n" + "movdqu %%xmm3,0x30(%1) \n" + "lea 0x40(%1),%1 \n" + "sub $0x10,%2 \n" + "jg 1b \n" + : "+r"(src_raw), // %0 + "+r"(dst_argb), // %1 + "+r"(width) // %2 + : "m"(kShuffleMaskRAWToARGB) // %3 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); } -void RAWToRGB24Row_SSSE3(const uint8* src_raw, uint8* dst_rgb24, int width) { - asm volatile ( - "movdqa %3,%%xmm3 \n" - "movdqa %4,%%xmm4 \n" - "movdqa %5,%%xmm5 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x4,0) ",%%xmm1 \n" - "movdqu " MEMACCESS2(0x8,0) ",%%xmm2 \n" - "lea " MEMLEA(0x18,0) ",%0 \n" - "pshufb %%xmm3,%%xmm0 \n" - "pshufb %%xmm4,%%xmm1 \n" - "pshufb %%xmm5,%%xmm2 \n" - "movq %%xmm0," MEMACCESS(1) " \n" - "movq %%xmm1," MEMACCESS2(0x8,1) " \n" - "movq %%xmm2," MEMACCESS2(0x10,1) " \n" - "lea " MEMLEA(0x18,1) ",%1 \n" - "sub $0x8,%2 \n" - "jg 1b \n" - : "+r"(src_raw), // %0 - "+r"(dst_rgb24), // %1 - "+r"(width) // %2 - : "m"(kShuffleMaskRAWToRGB24_0), // %3 - "m"(kShuffleMaskRAWToRGB24_1), // %4 - "m"(kShuffleMaskRAWToRGB24_2) // %5 - : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" - ); +void RAWToRGB24Row_SSSE3(const uint8_t* src_raw, + uint8_t* dst_rgb24, + int width) { + asm volatile( + "movdqa %3,%%xmm3 \n" + "movdqa %4,%%xmm4 \n" + "movdqa %5,%%xmm5 \n" + + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x4(%0),%%xmm1 \n" + "movdqu 0x8(%0),%%xmm2 \n" + "lea 0x18(%0),%0 \n" + "pshufb %%xmm3,%%xmm0 \n" + "pshufb %%xmm4,%%xmm1 \n" + "pshufb %%xmm5,%%xmm2 \n" + "movq %%xmm0,(%1) \n" + "movq %%xmm1,0x8(%1) \n" + "movq %%xmm2,0x10(%1) \n" + "lea 0x18(%1),%1 \n" + "sub $0x8,%2 \n" + "jg 1b \n" + : "+r"(src_raw), // %0 + "+r"(dst_rgb24), // %1 + "+r"(width) // %2 + : "m"(kShuffleMaskRAWToRGB24_0), // %3 + "m"(kShuffleMaskRAWToRGB24_1), // %4 + "m"(kShuffleMaskRAWToRGB24_2) // %5 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); } -void RGB565ToARGBRow_SSE2(const uint8* src, uint8* dst, int width) { - asm volatile ( - "mov $0x1080108,%%eax \n" - "movd %%eax,%%xmm5 \n" - "pshufd $0x0,%%xmm5,%%xmm5 \n" - "mov $0x20802080,%%eax \n" - "movd %%eax,%%xmm6 \n" - "pshufd $0x0,%%xmm6,%%xmm6 \n" - "pcmpeqb %%xmm3,%%xmm3 \n" - "psllw $0xb,%%xmm3 \n" - "pcmpeqb %%xmm4,%%xmm4 \n" - "psllw $0xa,%%xmm4 \n" - "psrlw $0x5,%%xmm4 \n" - "pcmpeqb %%xmm7,%%xmm7 \n" - "psllw $0x8,%%xmm7 \n" - "sub %0,%1 \n" - "sub %0,%1 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqa %%xmm0,%%xmm1 \n" - "movdqa %%xmm0,%%xmm2 \n" - "pand %%xmm3,%%xmm1 \n" - "psllw $0xb,%%xmm2 \n" - "pmulhuw %%xmm5,%%xmm1 \n" - "pmulhuw %%xmm5,%%xmm2 \n" - "psllw $0x8,%%xmm1 \n" - "por %%xmm2,%%xmm1 \n" - "pand %%xmm4,%%xmm0 \n" - "pmulhuw %%xmm6,%%xmm0 \n" - "por %%xmm7,%%xmm0 \n" - "movdqa %%xmm1,%%xmm2 \n" - "punpcklbw %%xmm0,%%xmm1 \n" - "punpckhbw %%xmm0,%%xmm2 \n" - MEMOPMEM(movdqu,xmm1,0x00,1,0,2) // movdqu %%xmm1,(%1,%0,2) - MEMOPMEM(movdqu,xmm2,0x10,1,0,2) // movdqu %%xmm2,0x10(%1,%0,2) - "lea " MEMLEA(0x10,0) ",%0 \n" - "sub $0x8,%2 \n" - "jg 1b \n" - : "+r"(src), // %0 - "+r"(dst), // %1 - "+r"(width) // %2 - : - : "memory", "cc", "eax", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" - ); +void RGB565ToARGBRow_SSE2(const uint8_t* src, uint8_t* dst, int width) { + asm volatile( + "mov $0x1080108,%%eax \n" + "movd %%eax,%%xmm5 \n" + "pshufd $0x0,%%xmm5,%%xmm5 \n" + "mov $0x20802080,%%eax \n" + "movd %%eax,%%xmm6 \n" + "pshufd $0x0,%%xmm6,%%xmm6 \n" + "pcmpeqb %%xmm3,%%xmm3 \n" + "psllw $0xb,%%xmm3 \n" + "pcmpeqb %%xmm4,%%xmm4 \n" + "psllw $0xa,%%xmm4 \n" + "psrlw $0x5,%%xmm4 \n" + "pcmpeqb %%xmm7,%%xmm7 \n" + "psllw $0x8,%%xmm7 \n" + "sub %0,%1 \n" + "sub %0,%1 \n" + + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqa %%xmm0,%%xmm1 \n" + "movdqa %%xmm0,%%xmm2 \n" + "pand %%xmm3,%%xmm1 \n" + "psllw $0xb,%%xmm2 \n" + "pmulhuw %%xmm5,%%xmm1 \n" + "pmulhuw %%xmm5,%%xmm2 \n" + "psllw $0x8,%%xmm1 \n" + "por %%xmm2,%%xmm1 \n" + "pand %%xmm4,%%xmm0 \n" + "pmulhuw %%xmm6,%%xmm0 \n" + "por %%xmm7,%%xmm0 \n" + "movdqa %%xmm1,%%xmm2 \n" + "punpcklbw %%xmm0,%%xmm1 \n" + "punpckhbw %%xmm0,%%xmm2 \n" + "movdqu %%xmm1,0x00(%1,%0,2) \n" + "movdqu %%xmm2,0x10(%1,%0,2) \n" + "lea 0x10(%0),%0 \n" + "sub $0x8,%2 \n" + "jg 1b \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + : + : "memory", "cc", "eax", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", + "xmm6", "xmm7"); } -void ARGB1555ToARGBRow_SSE2(const uint8* src, uint8* dst, int width) { - asm volatile ( - "mov $0x1080108,%%eax \n" - "movd %%eax,%%xmm5 \n" - "pshufd $0x0,%%xmm5,%%xmm5 \n" - "mov $0x42004200,%%eax \n" - "movd %%eax,%%xmm6 \n" - "pshufd $0x0,%%xmm6,%%xmm6 \n" - "pcmpeqb %%xmm3,%%xmm3 \n" - "psllw $0xb,%%xmm3 \n" - "movdqa %%xmm3,%%xmm4 \n" - "psrlw $0x6,%%xmm4 \n" - "pcmpeqb %%xmm7,%%xmm7 \n" - "psllw $0x8,%%xmm7 \n" - "sub %0,%1 \n" - "sub %0,%1 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqa %%xmm0,%%xmm1 \n" - "movdqa %%xmm0,%%xmm2 \n" - "psllw $0x1,%%xmm1 \n" - "psllw $0xb,%%xmm2 \n" - "pand %%xmm3,%%xmm1 \n" - "pmulhuw %%xmm5,%%xmm2 \n" - "pmulhuw %%xmm5,%%xmm1 \n" - "psllw $0x8,%%xmm1 \n" - "por %%xmm2,%%xmm1 \n" - "movdqa %%xmm0,%%xmm2 \n" - "pand %%xmm4,%%xmm0 \n" - "psraw $0x8,%%xmm2 \n" - "pmulhuw %%xmm6,%%xmm0 \n" - "pand %%xmm7,%%xmm2 \n" - "por %%xmm2,%%xmm0 \n" - "movdqa %%xmm1,%%xmm2 \n" - "punpcklbw %%xmm0,%%xmm1 \n" - "punpckhbw %%xmm0,%%xmm2 \n" - MEMOPMEM(movdqu,xmm1,0x00,1,0,2) // movdqu %%xmm1,(%1,%0,2) - MEMOPMEM(movdqu,xmm2,0x10,1,0,2) // movdqu %%xmm2,0x10(%1,%0,2) - "lea " MEMLEA(0x10,0) ",%0 \n" - "sub $0x8,%2 \n" - "jg 1b \n" - : "+r"(src), // %0 - "+r"(dst), // %1 - "+r"(width) // %2 - : - : "memory", "cc", "eax", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" - ); +void ARGB1555ToARGBRow_SSE2(const uint8_t* src, uint8_t* dst, int width) { + asm volatile( + "mov $0x1080108,%%eax \n" + "movd %%eax,%%xmm5 \n" + "pshufd $0x0,%%xmm5,%%xmm5 \n" + "mov $0x42004200,%%eax \n" + "movd %%eax,%%xmm6 \n" + "pshufd $0x0,%%xmm6,%%xmm6 \n" + "pcmpeqb %%xmm3,%%xmm3 \n" + "psllw $0xb,%%xmm3 \n" + "movdqa %%xmm3,%%xmm4 \n" + "psrlw $0x6,%%xmm4 \n" + "pcmpeqb %%xmm7,%%xmm7 \n" + "psllw $0x8,%%xmm7 \n" + "sub %0,%1 \n" + "sub %0,%1 \n" + + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqa %%xmm0,%%xmm1 \n" + "movdqa %%xmm0,%%xmm2 \n" + "psllw $0x1,%%xmm1 \n" + "psllw $0xb,%%xmm2 \n" + "pand %%xmm3,%%xmm1 \n" + "pmulhuw %%xmm5,%%xmm2 \n" + "pmulhuw %%xmm5,%%xmm1 \n" + "psllw $0x8,%%xmm1 \n" + "por %%xmm2,%%xmm1 \n" + "movdqa %%xmm0,%%xmm2 \n" + "pand %%xmm4,%%xmm0 \n" + "psraw $0x8,%%xmm2 \n" + "pmulhuw %%xmm6,%%xmm0 \n" + "pand %%xmm7,%%xmm2 \n" + "por %%xmm2,%%xmm0 \n" + "movdqa %%xmm1,%%xmm2 \n" + "punpcklbw %%xmm0,%%xmm1 \n" + "punpckhbw %%xmm0,%%xmm2 \n" + "movdqu %%xmm1,0x00(%1,%0,2) \n" + "movdqu %%xmm2,0x10(%1,%0,2) \n" + "lea 0x10(%0),%0 \n" + "sub $0x8,%2 \n" + "jg 1b \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + : + : "memory", "cc", "eax", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", + "xmm6", "xmm7"); } -void ARGB4444ToARGBRow_SSE2(const uint8* src, uint8* dst, int width) { - asm volatile ( - "mov $0xf0f0f0f,%%eax \n" - "movd %%eax,%%xmm4 \n" - "pshufd $0x0,%%xmm4,%%xmm4 \n" - "movdqa %%xmm4,%%xmm5 \n" - "pslld $0x4,%%xmm5 \n" - "sub %0,%1 \n" - "sub %0,%1 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqa %%xmm0,%%xmm2 \n" - "pand %%xmm4,%%xmm0 \n" - "pand %%xmm5,%%xmm2 \n" - "movdqa %%xmm0,%%xmm1 \n" - "movdqa %%xmm2,%%xmm3 \n" - "psllw $0x4,%%xmm1 \n" - "psrlw $0x4,%%xmm3 \n" - "por %%xmm1,%%xmm0 \n" - "por %%xmm3,%%xmm2 \n" - "movdqa %%xmm0,%%xmm1 \n" - "punpcklbw %%xmm2,%%xmm0 \n" - "punpckhbw %%xmm2,%%xmm1 \n" - MEMOPMEM(movdqu,xmm0,0x00,1,0,2) // movdqu %%xmm0,(%1,%0,2) - MEMOPMEM(movdqu,xmm1,0x10,1,0,2) // movdqu %%xmm1,0x10(%1,%0,2) - "lea " MEMLEA(0x10,0) ",%0 \n" - "sub $0x8,%2 \n" - "jg 1b \n" - : "+r"(src), // %0 - "+r"(dst), // %1 - "+r"(width) // %2 - : - : "memory", "cc", "eax", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" - ); +void ARGB4444ToARGBRow_SSE2(const uint8_t* src, uint8_t* dst, int width) { + asm volatile( + "mov $0xf0f0f0f,%%eax \n" + "movd %%eax,%%xmm4 \n" + "pshufd $0x0,%%xmm4,%%xmm4 \n" + "movdqa %%xmm4,%%xmm5 \n" + "pslld $0x4,%%xmm5 \n" + "sub %0,%1 \n" + "sub %0,%1 \n" + + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqa %%xmm0,%%xmm2 \n" + "pand %%xmm4,%%xmm0 \n" + "pand %%xmm5,%%xmm2 \n" + "movdqa %%xmm0,%%xmm1 \n" + "movdqa %%xmm2,%%xmm3 \n" + "psllw $0x4,%%xmm1 \n" + "psrlw $0x4,%%xmm3 \n" + "por %%xmm1,%%xmm0 \n" + "por %%xmm3,%%xmm2 \n" + "movdqa %%xmm0,%%xmm1 \n" + "punpcklbw %%xmm2,%%xmm0 \n" + "punpckhbw %%xmm2,%%xmm1 \n" + "movdqu %%xmm0,0x00(%1,%0,2) \n" + "movdqu %%xmm1,0x10(%1,%0,2) \n" + "lea 0x10(%0),%0 \n" + "sub $0x8,%2 \n" + "jg 1b \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + : + : "memory", "cc", "eax", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); } -void ARGBToRGB24Row_SSSE3(const uint8* src, uint8* dst, int width) { - asm volatile ( - "movdqa %3,%%xmm6 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n" - "movdqu " MEMACCESS2(0x30,0) ",%%xmm3 \n" - "lea " MEMLEA(0x40,0) ",%0 \n" - "pshufb %%xmm6,%%xmm0 \n" - "pshufb %%xmm6,%%xmm1 \n" - "pshufb %%xmm6,%%xmm2 \n" - "pshufb %%xmm6,%%xmm3 \n" - "movdqa %%xmm1,%%xmm4 \n" - "psrldq $0x4,%%xmm1 \n" - "pslldq $0xc,%%xmm4 \n" - "movdqa %%xmm2,%%xmm5 \n" - "por %%xmm4,%%xmm0 \n" - "pslldq $0x8,%%xmm5 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "por %%xmm5,%%xmm1 \n" - "psrldq $0x8,%%xmm2 \n" - "pslldq $0x4,%%xmm3 \n" - "por %%xmm3,%%xmm2 \n" - "movdqu %%xmm1," MEMACCESS2(0x10,1) " \n" - "movdqu %%xmm2," MEMACCESS2(0x20,1) " \n" - "lea " MEMLEA(0x30,1) ",%1 \n" - "sub $0x10,%2 \n" - "jg 1b \n" - : "+r"(src), // %0 - "+r"(dst), // %1 - "+r"(width) // %2 - : "m"(kShuffleMaskARGBToRGB24) // %3 - : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6" - ); +void ARGBToRGB24Row_SSSE3(const uint8_t* src, uint8_t* dst, int width) { + asm volatile( + + "movdqa %3,%%xmm6 \n" + + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "movdqu 0x20(%0),%%xmm2 \n" + "movdqu 0x30(%0),%%xmm3 \n" + "lea 0x40(%0),%0 \n" + "pshufb %%xmm6,%%xmm0 \n" + "pshufb %%xmm6,%%xmm1 \n" + "pshufb %%xmm6,%%xmm2 \n" + "pshufb %%xmm6,%%xmm3 \n" + "movdqa %%xmm1,%%xmm4 \n" + "psrldq $0x4,%%xmm1 \n" + "pslldq $0xc,%%xmm4 \n" + "movdqa %%xmm2,%%xmm5 \n" + "por %%xmm4,%%xmm0 \n" + "pslldq $0x8,%%xmm5 \n" + "movdqu %%xmm0,(%1) \n" + "por %%xmm5,%%xmm1 \n" + "psrldq $0x8,%%xmm2 \n" + "pslldq $0x4,%%xmm3 \n" + "por %%xmm3,%%xmm2 \n" + "movdqu %%xmm1,0x10(%1) \n" + "movdqu %%xmm2,0x20(%1) \n" + "lea 0x30(%1),%1 \n" + "sub $0x10,%2 \n" + "jg 1b \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + : "m"(kShuffleMaskARGBToRGB24) // %3 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"); } -void ARGBToRAWRow_SSSE3(const uint8* src, uint8* dst, int width) { - asm volatile ( - "movdqa %3,%%xmm6 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n" - "movdqu " MEMACCESS2(0x30,0) ",%%xmm3 \n" - "lea " MEMLEA(0x40,0) ",%0 \n" - "pshufb %%xmm6,%%xmm0 \n" - "pshufb %%xmm6,%%xmm1 \n" - "pshufb %%xmm6,%%xmm2 \n" - "pshufb %%xmm6,%%xmm3 \n" - "movdqa %%xmm1,%%xmm4 \n" - "psrldq $0x4,%%xmm1 \n" - "pslldq $0xc,%%xmm4 \n" - "movdqa %%xmm2,%%xmm5 \n" - "por %%xmm4,%%xmm0 \n" - "pslldq $0x8,%%xmm5 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "por %%xmm5,%%xmm1 \n" - "psrldq $0x8,%%xmm2 \n" - "pslldq $0x4,%%xmm3 \n" - "por %%xmm3,%%xmm2 \n" - "movdqu %%xmm1," MEMACCESS2(0x10,1) " \n" - "movdqu %%xmm2," MEMACCESS2(0x20,1) " \n" - "lea " MEMLEA(0x30,1) ",%1 \n" - "sub $0x10,%2 \n" - "jg 1b \n" - : "+r"(src), // %0 - "+r"(dst), // %1 - "+r"(width) // %2 - : "m"(kShuffleMaskARGBToRAW) // %3 - : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6" - ); +void ARGBToRAWRow_SSSE3(const uint8_t* src, uint8_t* dst, int width) { + asm volatile( + + "movdqa %3,%%xmm6 \n" + + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "movdqu 0x20(%0),%%xmm2 \n" + "movdqu 0x30(%0),%%xmm3 \n" + "lea 0x40(%0),%0 \n" + "pshufb %%xmm6,%%xmm0 \n" + "pshufb %%xmm6,%%xmm1 \n" + "pshufb %%xmm6,%%xmm2 \n" + "pshufb %%xmm6,%%xmm3 \n" + "movdqa %%xmm1,%%xmm4 \n" + "psrldq $0x4,%%xmm1 \n" + "pslldq $0xc,%%xmm4 \n" + "movdqa %%xmm2,%%xmm5 \n" + "por %%xmm4,%%xmm0 \n" + "pslldq $0x8,%%xmm5 \n" + "movdqu %%xmm0,(%1) \n" + "por %%xmm5,%%xmm1 \n" + "psrldq $0x8,%%xmm2 \n" + "pslldq $0x4,%%xmm3 \n" + "por %%xmm3,%%xmm2 \n" + "movdqu %%xmm1,0x10(%1) \n" + "movdqu %%xmm2,0x20(%1) \n" + "lea 0x30(%1),%1 \n" + "sub $0x10,%2 \n" + "jg 1b \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + : "m"(kShuffleMaskARGBToRAW) // %3 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"); } -void ARGBToRGB565Row_SSE2(const uint8* src, uint8* dst, int width) { - asm volatile ( - "pcmpeqb %%xmm3,%%xmm3 \n" - "psrld $0x1b,%%xmm3 \n" - "pcmpeqb %%xmm4,%%xmm4 \n" - "psrld $0x1a,%%xmm4 \n" - "pslld $0x5,%%xmm4 \n" - "pcmpeqb %%xmm5,%%xmm5 \n" - "pslld $0xb,%%xmm5 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqa %%xmm0,%%xmm1 \n" - "movdqa %%xmm0,%%xmm2 \n" - "pslld $0x8,%%xmm0 \n" - "psrld $0x3,%%xmm1 \n" - "psrld $0x5,%%xmm2 \n" - "psrad $0x10,%%xmm0 \n" - "pand %%xmm3,%%xmm1 \n" - "pand %%xmm4,%%xmm2 \n" - "pand %%xmm5,%%xmm0 \n" - "por %%xmm2,%%xmm1 \n" - "por %%xmm1,%%xmm0 \n" - "packssdw %%xmm0,%%xmm0 \n" - "lea " MEMLEA(0x10,0) ",%0 \n" - "movq %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x8,1) ",%1 \n" - "sub $0x4,%2 \n" - "jg 1b \n" - : "+r"(src), // %0 - "+r"(dst), // %1 - "+r"(width) // %2 - :: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" - ); +void ARGBToRGB565Row_SSE2(const uint8_t* src, uint8_t* dst, int width) { + asm volatile( + "pcmpeqb %%xmm3,%%xmm3 \n" + "psrld $0x1b,%%xmm3 \n" + "pcmpeqb %%xmm4,%%xmm4 \n" + "psrld $0x1a,%%xmm4 \n" + "pslld $0x5,%%xmm4 \n" + "pcmpeqb %%xmm5,%%xmm5 \n" + "pslld $0xb,%%xmm5 \n" + + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqa %%xmm0,%%xmm1 \n" + "movdqa %%xmm0,%%xmm2 \n" + "pslld $0x8,%%xmm0 \n" + "psrld $0x3,%%xmm1 \n" + "psrld $0x5,%%xmm2 \n" + "psrad $0x10,%%xmm0 \n" + "pand %%xmm3,%%xmm1 \n" + "pand %%xmm4,%%xmm2 \n" + "pand %%xmm5,%%xmm0 \n" + "por %%xmm2,%%xmm1 \n" + "por %%xmm1,%%xmm0 \n" + "packssdw %%xmm0,%%xmm0 \n" + "lea 0x10(%0),%0 \n" + "movq %%xmm0,(%1) \n" + "lea 0x8(%1),%1 \n" + "sub $0x4,%2 \n" + "jg 1b \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + ::"memory", + "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); } -void ARGBToRGB565DitherRow_SSE2(const uint8* src, - uint8* dst, - const uint32 dither4, +void ARGBToRGB565DitherRow_SSE2(const uint8_t* src, + uint8_t* dst, + const uint32_t dither4, int width) { asm volatile( "movd %3,%%xmm6 \n" @@ -583,9 +590,9 @@ void ARGBToRGB565DitherRow_SSE2(const uint8* src, } #ifdef HAS_ARGBTORGB565DITHERROW_AVX2 -void ARGBToRGB565DitherRow_AVX2(const uint8* src, - uint8* dst, - const uint32 dither4, +void ARGBToRGB565DitherRow_AVX2(const uint8_t* src, + uint8_t* dst, + const uint32_t dither4, int width) { asm volatile( "vbroadcastss %3,%%xmm6 \n" @@ -628,75 +635,74 @@ void ARGBToRGB565DitherRow_AVX2(const uint8* src, } #endif // HAS_ARGBTORGB565DITHERROW_AVX2 -void ARGBToARGB1555Row_SSE2(const uint8* src, uint8* dst, int width) { - asm volatile ( - "pcmpeqb %%xmm4,%%xmm4 \n" - "psrld $0x1b,%%xmm4 \n" - "movdqa %%xmm4,%%xmm5 \n" - "pslld $0x5,%%xmm5 \n" - "movdqa %%xmm4,%%xmm6 \n" - "pslld $0xa,%%xmm6 \n" - "pcmpeqb %%xmm7,%%xmm7 \n" - "pslld $0xf,%%xmm7 \n" +void ARGBToARGB1555Row_SSE2(const uint8_t* src, uint8_t* dst, int width) { + asm volatile( + "pcmpeqb %%xmm4,%%xmm4 \n" + "psrld $0x1b,%%xmm4 \n" + "movdqa %%xmm4,%%xmm5 \n" + "pslld $0x5,%%xmm5 \n" + "movdqa %%xmm4,%%xmm6 \n" + "pslld $0xa,%%xmm6 \n" + "pcmpeqb %%xmm7,%%xmm7 \n" + "pslld $0xf,%%xmm7 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqa %%xmm0,%%xmm1 \n" - "movdqa %%xmm0,%%xmm2 \n" - "movdqa %%xmm0,%%xmm3 \n" - "psrad $0x10,%%xmm0 \n" - "psrld $0x3,%%xmm1 \n" - "psrld $0x6,%%xmm2 \n" - "psrld $0x9,%%xmm3 \n" - "pand %%xmm7,%%xmm0 \n" - "pand %%xmm4,%%xmm1 \n" - "pand %%xmm5,%%xmm2 \n" - "pand %%xmm6,%%xmm3 \n" - "por %%xmm1,%%xmm0 \n" - "por %%xmm3,%%xmm2 \n" - "por %%xmm2,%%xmm0 \n" - "packssdw %%xmm0,%%xmm0 \n" - "lea " MEMLEA(0x10,0) ",%0 \n" - "movq %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x8,1) ",%1 \n" - "sub $0x4,%2 \n" - "jg 1b \n" - : "+r"(src), // %0 - "+r"(dst), // %1 - "+r"(width) // %2 - :: "memory", "cc", - "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" - ); + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqa %%xmm0,%%xmm1 \n" + "movdqa %%xmm0,%%xmm2 \n" + "movdqa %%xmm0,%%xmm3 \n" + "psrad $0x10,%%xmm0 \n" + "psrld $0x3,%%xmm1 \n" + "psrld $0x6,%%xmm2 \n" + "psrld $0x9,%%xmm3 \n" + "pand %%xmm7,%%xmm0 \n" + "pand %%xmm4,%%xmm1 \n" + "pand %%xmm5,%%xmm2 \n" + "pand %%xmm6,%%xmm3 \n" + "por %%xmm1,%%xmm0 \n" + "por %%xmm3,%%xmm2 \n" + "por %%xmm2,%%xmm0 \n" + "packssdw %%xmm0,%%xmm0 \n" + "lea 0x10(%0),%0 \n" + "movq %%xmm0,(%1) \n" + "lea 0x8(%1),%1 \n" + "sub $0x4,%2 \n" + "jg 1b \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + ::"memory", + "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"); } -void ARGBToARGB4444Row_SSE2(const uint8* src, uint8* dst, int width) { - asm volatile ( - "pcmpeqb %%xmm4,%%xmm4 \n" - "psllw $0xc,%%xmm4 \n" - "movdqa %%xmm4,%%xmm3 \n" - "psrlw $0x8,%%xmm3 \n" +void ARGBToARGB4444Row_SSE2(const uint8_t* src, uint8_t* dst, int width) { + asm volatile( + "pcmpeqb %%xmm4,%%xmm4 \n" + "psllw $0xc,%%xmm4 \n" + "movdqa %%xmm4,%%xmm3 \n" + "psrlw $0x8,%%xmm3 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqa %%xmm0,%%xmm1 \n" - "pand %%xmm3,%%xmm0 \n" - "pand %%xmm4,%%xmm1 \n" - "psrlq $0x4,%%xmm0 \n" - "psrlq $0x8,%%xmm1 \n" - "por %%xmm1,%%xmm0 \n" - "packuswb %%xmm0,%%xmm0 \n" - "lea " MEMLEA(0x10,0) ",%0 \n" - "movq %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x8,1) ",%1 \n" - "sub $0x4,%2 \n" - "jg 1b \n" - : "+r"(src), // %0 - "+r"(dst), // %1 - "+r"(width) // %2 - :: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4" - ); + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqa %%xmm0,%%xmm1 \n" + "pand %%xmm3,%%xmm0 \n" + "pand %%xmm4,%%xmm1 \n" + "psrlq $0x4,%%xmm0 \n" + "psrlq $0x8,%%xmm1 \n" + "por %%xmm1,%%xmm0 \n" + "packuswb %%xmm0,%%xmm0 \n" + "lea 0x10(%0),%0 \n" + "movq %%xmm0,(%1) \n" + "lea 0x8(%1),%1 \n" + "sub $0x4,%2 \n" + "jg 1b \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + ::"memory", + "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4"); } #endif // HAS_RGB24TOARGBROW_SSSE3 @@ -724,12 +730,16 @@ result left 10 to position the A and G channels. // Shuffle table for converting RAW to RGB24. Last 8. static const uvec8 kShuffleRB30 = {128u, 0u, 128u, 2u, 128u, 4u, 128u, 6u, 128u, 8u, 128u, 10u, 128u, 12u, 128u, 14u}; -static const uint32 kMulRB10 = 1028 * 16 * 65536 + 1028; -static const uint32 kMaskRB10 = 0x3ff003ff; -static const uint32 kMaskAG10 = 0xc000ff00; -static const uint32 kMulAG10 = 64 * 65536 + 1028; -void ARGBToAR30Row_SSSE3(const uint8* src, uint8* dst, int width) { +static const uvec8 kShuffleBR30 = {128u, 2u, 128u, 0u, 128u, 6u, 128u, 4u, + 128u, 10u, 128u, 8u, 128u, 14u, 128u, 12u}; + +static const uint32_t kMulRB10 = 1028 * 16 * 65536 + 1028; +static const uint32_t kMaskRB10 = 0x3ff003ff; +static const uint32_t kMaskAG10 = 0xc000ff00; +static const uint32_t kMulAG10 = 64 * 65536 + 1028; + +void ARGBToAR30Row_SSSE3(const uint8_t* src, uint8_t* dst, int width) { asm volatile( "movdqa %3,%%xmm2 \n" // shuffler for RB "movd %4,%%xmm3 \n" // multipler for RB @@ -768,9 +778,47 @@ void ARGBToAR30Row_SSSE3(const uint8* src, uint8* dst, int width) { : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"); } -#ifdef HAS_ARGBTOAR30ROW_AVX2 +void ABGRToAR30Row_SSSE3(const uint8_t* src, uint8_t* dst, int width) { + asm volatile( + "movdqa %3,%%xmm2 \n" // shuffler for RB + "movd %4,%%xmm3 \n" // multipler for RB + "movd %5,%%xmm4 \n" // mask for R10 B10 + "movd %6,%%xmm5 \n" // mask for AG + "movd %7,%%xmm6 \n" // multipler for AG + "pshufd $0x0,%%xmm3,%%xmm3 \n" + "pshufd $0x0,%%xmm4,%%xmm4 \n" + "pshufd $0x0,%%xmm5,%%xmm5 \n" + "pshufd $0x0,%%xmm6,%%xmm6 \n" + "sub %0,%1 \n" + + "1: \n" + "movdqu (%0),%%xmm0 \n" // fetch 4 ABGR pixels + "movdqa %%xmm0,%%xmm1 \n" + "pshufb %%xmm2,%%xmm1 \n" // R0B0 + "pand %%xmm5,%%xmm0 \n" // A0G0 + "pmulhuw %%xmm3,%%xmm1 \n" // X2 R16 X4 B10 + "pmulhuw %%xmm6,%%xmm0 \n" // X10 A2 X10 G10 + "pand %%xmm4,%%xmm1 \n" // X2 R10 X10 B10 + "pslld $10,%%xmm0 \n" // A2 x10 G10 x10 + "por %%xmm1,%%xmm0 \n" // A2 R10 G10 B10 + "movdqu %%xmm0,(%1,%0) \n" // store 4 AR30 pixels + "add $0x10,%0 \n" + "sub $0x4,%2 \n" + "jg 1b \n" + + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + : "m"(kShuffleBR30), // %3 reversed shuffler + "m"(kMulRB10), // %4 + "m"(kMaskRB10), // %5 + "m"(kMaskAG10), // %6 + "m"(kMulAG10) // %7 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"); +} -void ARGBToAR30Row_AVX2(const uint8* src, uint8* dst, int width) { +#ifdef HAS_ARGBTOAR30ROW_AVX2 +void ARGBToAR30Row_AVX2(const uint8_t* src, uint8_t* dst, int width) { asm volatile( "vbroadcastf128 %3,%%ymm2 \n" // shuffler for RB "vbroadcastss %4,%%ymm3 \n" // multipler for RB @@ -806,81 +854,116 @@ void ARGBToAR30Row_AVX2(const uint8* src, uint8* dst, int width) { } #endif +#ifdef HAS_ABGRTOAR30ROW_AVX2 +void ABGRToAR30Row_AVX2(const uint8_t* src, uint8_t* dst, int width) { + asm volatile( + "vbroadcastf128 %3,%%ymm2 \n" // shuffler for RB + "vbroadcastss %4,%%ymm3 \n" // multipler for RB + "vbroadcastss %5,%%ymm4 \n" // mask for R10 B10 + "vbroadcastss %6,%%ymm5 \n" // mask for AG + "vbroadcastss %7,%%ymm6 \n" // multipler for AG + "sub %0,%1 \n" + + "1: \n" + "vmovdqu (%0),%%ymm0 \n" // fetch 8 ABGR pixels + "vpshufb %%ymm2,%%ymm0,%%ymm1 \n" // R0B0 + "vpand %%ymm5,%%ymm0,%%ymm0 \n" // A0G0 + "vpmulhuw %%ymm3,%%ymm1,%%ymm1 \n" // X2 R16 X4 B10 + "vpmulhuw %%ymm6,%%ymm0,%%ymm0 \n" // X10 A2 X10 G10 + "vpand %%ymm4,%%ymm1,%%ymm1 \n" // X2 R10 X10 B10 + "vpslld $10,%%ymm0,%%ymm0 \n" // A2 x10 G10 x10 + "vpor %%ymm1,%%ymm0,%%ymm0 \n" // A2 R10 G10 B10 + "vmovdqu %%ymm0,(%1,%0) \n" // store 8 AR30 pixels + "add $0x20,%0 \n" + "sub $0x8,%2 \n" + "jg 1b \n" + "vzeroupper \n" + + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + : "m"(kShuffleBR30), // %3 reversed shuffler + "m"(kMulRB10), // %4 + "m"(kMaskRB10), // %5 + "m"(kMaskAG10), // %6 + "m"(kMulAG10) // %7 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"); +} +#endif + #ifdef HAS_ARGBTOYROW_SSSE3 // Convert 16 ARGB pixels (64 bytes) to 16 Y values. -void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int width) { - asm volatile ( - "movdqa %3,%%xmm4 \n" - "movdqa %4,%%xmm5 \n" +void ARGBToYRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_y, int width) { + asm volatile( + "movdqa %3,%%xmm4 \n" + "movdqa %4,%%xmm5 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n" - "movdqu " MEMACCESS2(0x30,0) ",%%xmm3 \n" - "pmaddubsw %%xmm4,%%xmm0 \n" - "pmaddubsw %%xmm4,%%xmm1 \n" - "pmaddubsw %%xmm4,%%xmm2 \n" - "pmaddubsw %%xmm4,%%xmm3 \n" - "lea " MEMLEA(0x40,0) ",%0 \n" - "phaddw %%xmm1,%%xmm0 \n" - "phaddw %%xmm3,%%xmm2 \n" - "psrlw $0x7,%%xmm0 \n" - "psrlw $0x7,%%xmm2 \n" - "packuswb %%xmm2,%%xmm0 \n" - "paddb %%xmm5,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x10,%2 \n" - "jg 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_y), // %1 - "+r"(width) // %2 - : "m"(kARGBToY), // %3 - "m"(kAddY16) // %4 - : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" - ); + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "movdqu 0x20(%0),%%xmm2 \n" + "movdqu 0x30(%0),%%xmm3 \n" + "pmaddubsw %%xmm4,%%xmm0 \n" + "pmaddubsw %%xmm4,%%xmm1 \n" + "pmaddubsw %%xmm4,%%xmm2 \n" + "pmaddubsw %%xmm4,%%xmm3 \n" + "lea 0x40(%0),%0 \n" + "phaddw %%xmm1,%%xmm0 \n" + "phaddw %%xmm3,%%xmm2 \n" + "psrlw $0x7,%%xmm0 \n" + "psrlw $0x7,%%xmm2 \n" + "packuswb %%xmm2,%%xmm0 \n" + "paddb %%xmm5,%%xmm0 \n" + "movdqu %%xmm0,(%1) \n" + "lea 0x10(%1),%1 \n" + "sub $0x10,%2 \n" + "jg 1b \n" + : "+r"(src_argb), // %0 + "+r"(dst_y), // %1 + "+r"(width) // %2 + : "m"(kARGBToY), // %3 + "m"(kAddY16) // %4 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); } #endif // HAS_ARGBTOYROW_SSSE3 #ifdef HAS_ARGBTOYJROW_SSSE3 // Convert 16 ARGB pixels (64 bytes) to 16 YJ values. // Same as ARGBToYRow but different coefficients, no add 16, but do rounding. -void ARGBToYJRow_SSSE3(const uint8* src_argb, uint8* dst_y, int width) { - asm volatile ( - "movdqa %3,%%xmm4 \n" - "movdqa %4,%%xmm5 \n" +void ARGBToYJRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_y, int width) { + asm volatile( + "movdqa %3,%%xmm4 \n" + "movdqa %4,%%xmm5 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n" - "movdqu " MEMACCESS2(0x30,0) ",%%xmm3 \n" - "pmaddubsw %%xmm4,%%xmm0 \n" - "pmaddubsw %%xmm4,%%xmm1 \n" - "pmaddubsw %%xmm4,%%xmm2 \n" - "pmaddubsw %%xmm4,%%xmm3 \n" - "lea " MEMLEA(0x40,0) ",%0 \n" - "phaddw %%xmm1,%%xmm0 \n" - "phaddw %%xmm3,%%xmm2 \n" - "paddw %%xmm5,%%xmm0 \n" - "paddw %%xmm5,%%xmm2 \n" - "psrlw $0x7,%%xmm0 \n" - "psrlw $0x7,%%xmm2 \n" - "packuswb %%xmm2,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x10,%2 \n" - "jg 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_y), // %1 - "+r"(width) // %2 - : "m"(kARGBToYJ), // %3 - "m"(kAddYJ64) // %4 - : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" - ); + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "movdqu 0x20(%0),%%xmm2 \n" + "movdqu 0x30(%0),%%xmm3 \n" + "pmaddubsw %%xmm4,%%xmm0 \n" + "pmaddubsw %%xmm4,%%xmm1 \n" + "pmaddubsw %%xmm4,%%xmm2 \n" + "pmaddubsw %%xmm4,%%xmm3 \n" + "lea 0x40(%0),%0 \n" + "phaddw %%xmm1,%%xmm0 \n" + "phaddw %%xmm3,%%xmm2 \n" + "paddw %%xmm5,%%xmm0 \n" + "paddw %%xmm5,%%xmm2 \n" + "psrlw $0x7,%%xmm0 \n" + "psrlw $0x7,%%xmm2 \n" + "packuswb %%xmm2,%%xmm0 \n" + "movdqu %%xmm0,(%1) \n" + "lea 0x10(%1),%1 \n" + "sub $0x10,%2 \n" + "jg 1b \n" + : "+r"(src_argb), // %0 + "+r"(dst_y), // %1 + "+r"(width) // %2 + : "m"(kARGBToYJ), // %3 + "m"(kAddYJ64) // %4 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); } #endif // HAS_ARGBTOYJROW_SSSE3 @@ -889,153 +972,149 @@ void ARGBToYJRow_SSSE3(const uint8* src_argb, uint8* dst_y, int width) { static const lvec32 kPermdARGBToY_AVX = {0, 4, 1, 5, 2, 6, 3, 7}; // Convert 32 ARGB pixels (128 bytes) to 32 Y values. -void ARGBToYRow_AVX2(const uint8* src_argb, uint8* dst_y, int width) { - asm volatile ( - "vbroadcastf128 %3,%%ymm4 \n" - "vbroadcastf128 %4,%%ymm5 \n" - "vmovdqu %5,%%ymm6 \n" +void ARGBToYRow_AVX2(const uint8_t* src_argb, uint8_t* dst_y, int width) { + asm volatile( + "vbroadcastf128 %3,%%ymm4 \n" + "vbroadcastf128 %4,%%ymm5 \n" + "vmovdqu %5,%%ymm6 \n" - LABELALIGN - "1: \n" - "vmovdqu " MEMACCESS(0) ",%%ymm0 \n" - "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n" - "vmovdqu " MEMACCESS2(0x40,0) ",%%ymm2 \n" - "vmovdqu " MEMACCESS2(0x60,0) ",%%ymm3 \n" - "vpmaddubsw %%ymm4,%%ymm0,%%ymm0 \n" - "vpmaddubsw %%ymm4,%%ymm1,%%ymm1 \n" - "vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n" - "vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n" - "lea " MEMLEA(0x80,0) ",%0 \n" - "vphaddw %%ymm1,%%ymm0,%%ymm0 \n" // mutates. - "vphaddw %%ymm3,%%ymm2,%%ymm2 \n" - "vpsrlw $0x7,%%ymm0,%%ymm0 \n" - "vpsrlw $0x7,%%ymm2,%%ymm2 \n" - "vpackuswb %%ymm2,%%ymm0,%%ymm0 \n" // mutates. - "vpermd %%ymm0,%%ymm6,%%ymm0 \n" // unmutate. - "vpaddb %%ymm5,%%ymm0,%%ymm0 \n" // add 16 for Y - "vmovdqu %%ymm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x20,1) ",%1 \n" - "sub $0x20,%2 \n" - "jg 1b \n" - "vzeroupper \n" - : "+r"(src_argb), // %0 - "+r"(dst_y), // %1 - "+r"(width) // %2 - : "m"(kARGBToY), // %3 - "m"(kAddY16), // %4 - "m"(kPermdARGBToY_AVX) // %5 - : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6" - ); + LABELALIGN + "1: \n" + "vmovdqu (%0),%%ymm0 \n" + "vmovdqu 0x20(%0),%%ymm1 \n" + "vmovdqu 0x40(%0),%%ymm2 \n" + "vmovdqu 0x60(%0),%%ymm3 \n" + "vpmaddubsw %%ymm4,%%ymm0,%%ymm0 \n" + "vpmaddubsw %%ymm4,%%ymm1,%%ymm1 \n" + "vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n" + "vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n" + "lea 0x80(%0),%0 \n" + "vphaddw %%ymm1,%%ymm0,%%ymm0 \n" // mutates. + "vphaddw %%ymm3,%%ymm2,%%ymm2 \n" + "vpsrlw $0x7,%%ymm0,%%ymm0 \n" + "vpsrlw $0x7,%%ymm2,%%ymm2 \n" + "vpackuswb %%ymm2,%%ymm0,%%ymm0 \n" // mutates. + "vpermd %%ymm0,%%ymm6,%%ymm0 \n" // unmutate. + "vpaddb %%ymm5,%%ymm0,%%ymm0 \n" // add 16 for Y + "vmovdqu %%ymm0,(%1) \n" + "lea 0x20(%1),%1 \n" + "sub $0x20,%2 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src_argb), // %0 + "+r"(dst_y), // %1 + "+r"(width) // %2 + : "m"(kARGBToY), // %3 + "m"(kAddY16), // %4 + "m"(kPermdARGBToY_AVX) // %5 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"); } #endif // HAS_ARGBTOYROW_AVX2 #ifdef HAS_ARGBTOYJROW_AVX2 // Convert 32 ARGB pixels (128 bytes) to 32 Y values. -void ARGBToYJRow_AVX2(const uint8* src_argb, uint8* dst_y, int width) { - asm volatile ( - "vbroadcastf128 %3,%%ymm4 \n" - "vbroadcastf128 %4,%%ymm5 \n" - "vmovdqu %5,%%ymm6 \n" +void ARGBToYJRow_AVX2(const uint8_t* src_argb, uint8_t* dst_y, int width) { + asm volatile( + "vbroadcastf128 %3,%%ymm4 \n" + "vbroadcastf128 %4,%%ymm5 \n" + "vmovdqu %5,%%ymm6 \n" - LABELALIGN - "1: \n" - "vmovdqu " MEMACCESS(0) ",%%ymm0 \n" - "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n" - "vmovdqu " MEMACCESS2(0x40,0) ",%%ymm2 \n" - "vmovdqu " MEMACCESS2(0x60,0) ",%%ymm3 \n" - "vpmaddubsw %%ymm4,%%ymm0,%%ymm0 \n" - "vpmaddubsw %%ymm4,%%ymm1,%%ymm1 \n" - "vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n" - "vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n" - "lea " MEMLEA(0x80,0) ",%0 \n" - "vphaddw %%ymm1,%%ymm0,%%ymm0 \n" // mutates. - "vphaddw %%ymm3,%%ymm2,%%ymm2 \n" - "vpaddw %%ymm5,%%ymm0,%%ymm0 \n" // Add .5 for rounding. - "vpaddw %%ymm5,%%ymm2,%%ymm2 \n" - "vpsrlw $0x7,%%ymm0,%%ymm0 \n" - "vpsrlw $0x7,%%ymm2,%%ymm2 \n" - "vpackuswb %%ymm2,%%ymm0,%%ymm0 \n" // mutates. - "vpermd %%ymm0,%%ymm6,%%ymm0 \n" // unmutate. - "vmovdqu %%ymm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x20,1) ",%1 \n" - "sub $0x20,%2 \n" - "jg 1b \n" - "vzeroupper \n" - : "+r"(src_argb), // %0 - "+r"(dst_y), // %1 - "+r"(width) // %2 - : "m"(kARGBToYJ), // %3 - "m"(kAddYJ64), // %4 - "m"(kPermdARGBToY_AVX) // %5 - : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6" - ); + LABELALIGN + "1: \n" + "vmovdqu (%0),%%ymm0 \n" + "vmovdqu 0x20(%0),%%ymm1 \n" + "vmovdqu 0x40(%0),%%ymm2 \n" + "vmovdqu 0x60(%0),%%ymm3 \n" + "vpmaddubsw %%ymm4,%%ymm0,%%ymm0 \n" + "vpmaddubsw %%ymm4,%%ymm1,%%ymm1 \n" + "vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n" + "vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n" + "lea 0x80(%0),%0 \n" + "vphaddw %%ymm1,%%ymm0,%%ymm0 \n" // mutates. + "vphaddw %%ymm3,%%ymm2,%%ymm2 \n" + "vpaddw %%ymm5,%%ymm0,%%ymm0 \n" // Add .5 for rounding. + "vpaddw %%ymm5,%%ymm2,%%ymm2 \n" + "vpsrlw $0x7,%%ymm0,%%ymm0 \n" + "vpsrlw $0x7,%%ymm2,%%ymm2 \n" + "vpackuswb %%ymm2,%%ymm0,%%ymm0 \n" // mutates. + "vpermd %%ymm0,%%ymm6,%%ymm0 \n" // unmutate. + "vmovdqu %%ymm0,(%1) \n" + "lea 0x20(%1),%1 \n" + "sub $0x20,%2 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src_argb), // %0 + "+r"(dst_y), // %1 + "+r"(width) // %2 + : "m"(kARGBToYJ), // %3 + "m"(kAddYJ64), // %4 + "m"(kPermdARGBToY_AVX) // %5 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"); } #endif // HAS_ARGBTOYJROW_AVX2 #ifdef HAS_ARGBTOUVROW_SSSE3 -void ARGBToUVRow_SSSE3(const uint8* src_argb0, +void ARGBToUVRow_SSSE3(const uint8_t* src_argb0, int src_stride_argb, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width) { - asm volatile ( - "movdqa %5,%%xmm3 \n" - "movdqa %6,%%xmm4 \n" - "movdqa %7,%%xmm5 \n" - "sub %1,%2 \n" + asm volatile( + "movdqa %5,%%xmm3 \n" + "movdqa %6,%%xmm4 \n" + "movdqa %7,%%xmm5 \n" + "sub %1,%2 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - MEMOPREG(movdqu,0x00,0,4,1,xmm7) // movdqu (%0,%4,1),%%xmm7 - "pavgb %%xmm7,%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - MEMOPREG(movdqu,0x10,0,4,1,xmm7) // movdqu 0x10(%0,%4,1),%%xmm7 - "pavgb %%xmm7,%%xmm1 \n" - "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n" - MEMOPREG(movdqu,0x20,0,4,1,xmm7) // movdqu 0x20(%0,%4,1),%%xmm7 - "pavgb %%xmm7,%%xmm2 \n" - "movdqu " MEMACCESS2(0x30,0) ",%%xmm6 \n" - MEMOPREG(movdqu,0x30,0,4,1,xmm7) // movdqu 0x30(%0,%4,1),%%xmm7 - "pavgb %%xmm7,%%xmm6 \n" - - "lea " MEMLEA(0x40,0) ",%0 \n" - "movdqa %%xmm0,%%xmm7 \n" - "shufps $0x88,%%xmm1,%%xmm0 \n" - "shufps $0xdd,%%xmm1,%%xmm7 \n" - "pavgb %%xmm7,%%xmm0 \n" - "movdqa %%xmm2,%%xmm7 \n" - "shufps $0x88,%%xmm6,%%xmm2 \n" - "shufps $0xdd,%%xmm6,%%xmm7 \n" - "pavgb %%xmm7,%%xmm2 \n" - "movdqa %%xmm0,%%xmm1 \n" - "movdqa %%xmm2,%%xmm6 \n" - "pmaddubsw %%xmm4,%%xmm0 \n" - "pmaddubsw %%xmm4,%%xmm2 \n" - "pmaddubsw %%xmm3,%%xmm1 \n" - "pmaddubsw %%xmm3,%%xmm6 \n" - "phaddw %%xmm2,%%xmm0 \n" - "phaddw %%xmm6,%%xmm1 \n" - "psraw $0x8,%%xmm0 \n" - "psraw $0x8,%%xmm1 \n" - "packsswb %%xmm1,%%xmm0 \n" - "paddb %%xmm5,%%xmm0 \n" - "movlps %%xmm0," MEMACCESS(1) " \n" - MEMOPMEM(movhps,xmm0,0x00,1,2,1) // movhps %%xmm0,(%1,%2,1) - "lea " MEMLEA(0x8,1) ",%1 \n" - "sub $0x10,%3 \n" - "jg 1b \n" - : "+r"(src_argb0), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+rm"(width) // %3 - : "r"((intptr_t)(src_stride_argb)), // %4 - "m"(kARGBToV), // %5 - "m"(kARGBToU), // %6 - "m"(kAddUV128) // %7 - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm6", "xmm7" - ); + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x00(%0,%4,1),%%xmm7 \n" + "pavgb %%xmm7,%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "movdqu 0x10(%0,%4,1),%%xmm7 \n" + "pavgb %%xmm7,%%xmm1 \n" + "movdqu 0x20(%0),%%xmm2 \n" + "movdqu 0x20(%0,%4,1),%%xmm7 \n" + "pavgb %%xmm7,%%xmm2 \n" + "movdqu 0x30(%0),%%xmm6 \n" + "movdqu 0x30(%0,%4,1),%%xmm7 \n" + "pavgb %%xmm7,%%xmm6 \n" + + "lea 0x40(%0),%0 \n" + "movdqa %%xmm0,%%xmm7 \n" + "shufps $0x88,%%xmm1,%%xmm0 \n" + "shufps $0xdd,%%xmm1,%%xmm7 \n" + "pavgb %%xmm7,%%xmm0 \n" + "movdqa %%xmm2,%%xmm7 \n" + "shufps $0x88,%%xmm6,%%xmm2 \n" + "shufps $0xdd,%%xmm6,%%xmm7 \n" + "pavgb %%xmm7,%%xmm2 \n" + "movdqa %%xmm0,%%xmm1 \n" + "movdqa %%xmm2,%%xmm6 \n" + "pmaddubsw %%xmm4,%%xmm0 \n" + "pmaddubsw %%xmm4,%%xmm2 \n" + "pmaddubsw %%xmm3,%%xmm1 \n" + "pmaddubsw %%xmm3,%%xmm6 \n" + "phaddw %%xmm2,%%xmm0 \n" + "phaddw %%xmm6,%%xmm1 \n" + "psraw $0x8,%%xmm0 \n" + "psraw $0x8,%%xmm1 \n" + "packsswb %%xmm1,%%xmm0 \n" + "paddb %%xmm5,%%xmm0 \n" + "movlps %%xmm0,(%1) \n" + "movhps %%xmm0,0x00(%1,%2,1) \n" + "lea 0x8(%1),%1 \n" + "sub $0x10,%3 \n" + "jg 1b \n" + : "+r"(src_argb0), // %0 + "+r"(dst_u), // %1 + "+r"(dst_v), // %2 + "+rm"(width) // %3 + : "r"((intptr_t)(src_stride_argb)), // %4 + "m"(kARGBToV), // %5 + "m"(kARGBToU), // %6 + "m"(kAddUV128) // %7 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"); } #endif // HAS_ARGBTOUVROW_SSSE3 @@ -1044,659 +1123,644 @@ void ARGBToUVRow_SSSE3(const uint8* src_argb0, static const lvec8 kShufARGBToUV_AVX = { 0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15, 0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15}; -void ARGBToUVRow_AVX2(const uint8* src_argb0, +void ARGBToUVRow_AVX2(const uint8_t* src_argb0, int src_stride_argb, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width) { - asm volatile ( - "vbroadcastf128 %5,%%ymm5 \n" - "vbroadcastf128 %6,%%ymm6 \n" - "vbroadcastf128 %7,%%ymm7 \n" - "sub %1,%2 \n" + asm volatile( + "vbroadcastf128 %5,%%ymm5 \n" + "vbroadcastf128 %6,%%ymm6 \n" + "vbroadcastf128 %7,%%ymm7 \n" + "sub %1,%2 \n" - LABELALIGN - "1: \n" - "vmovdqu " MEMACCESS(0) ",%%ymm0 \n" - "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n" - "vmovdqu " MEMACCESS2(0x40,0) ",%%ymm2 \n" - "vmovdqu " MEMACCESS2(0x60,0) ",%%ymm3 \n" - VMEMOPREG(vpavgb,0x00,0,4,1,ymm0,ymm0) // vpavgb (%0,%4,1),%%ymm0,%%ymm0 - VMEMOPREG(vpavgb,0x20,0,4,1,ymm1,ymm1) - VMEMOPREG(vpavgb,0x40,0,4,1,ymm2,ymm2) - VMEMOPREG(vpavgb,0x60,0,4,1,ymm3,ymm3) - "lea " MEMLEA(0x80,0) ",%0 \n" - "vshufps $0x88,%%ymm1,%%ymm0,%%ymm4 \n" - "vshufps $0xdd,%%ymm1,%%ymm0,%%ymm0 \n" - "vpavgb %%ymm4,%%ymm0,%%ymm0 \n" - "vshufps $0x88,%%ymm3,%%ymm2,%%ymm4 \n" - "vshufps $0xdd,%%ymm3,%%ymm2,%%ymm2 \n" - "vpavgb %%ymm4,%%ymm2,%%ymm2 \n" - - "vpmaddubsw %%ymm7,%%ymm0,%%ymm1 \n" - "vpmaddubsw %%ymm7,%%ymm2,%%ymm3 \n" - "vpmaddubsw %%ymm6,%%ymm0,%%ymm0 \n" - "vpmaddubsw %%ymm6,%%ymm2,%%ymm2 \n" - "vphaddw %%ymm3,%%ymm1,%%ymm1 \n" - "vphaddw %%ymm2,%%ymm0,%%ymm0 \n" - "vpsraw $0x8,%%ymm1,%%ymm1 \n" - "vpsraw $0x8,%%ymm0,%%ymm0 \n" - "vpacksswb %%ymm0,%%ymm1,%%ymm0 \n" - "vpermq $0xd8,%%ymm0,%%ymm0 \n" - "vpshufb %8,%%ymm0,%%ymm0 \n" - "vpaddb %%ymm5,%%ymm0,%%ymm0 \n" - - "vextractf128 $0x0,%%ymm0," MEMACCESS(1) " \n" - VEXTOPMEM(vextractf128,1,ymm0,0x0,1,2,1) // vextractf128 $1,%%ymm0,(%1,%2,1) - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x20,%3 \n" - "jg 1b \n" - "vzeroupper \n" - : "+r"(src_argb0), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+rm"(width) // %3 - : "r"((intptr_t)(src_stride_argb)), // %4 - "m"(kAddUV128), // %5 - "m"(kARGBToV), // %6 - "m"(kARGBToU), // %7 - "m"(kShufARGBToUV_AVX) // %8 - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" - ); + LABELALIGN + "1: \n" + "vmovdqu (%0),%%ymm0 \n" + "vmovdqu 0x20(%0),%%ymm1 \n" + "vmovdqu 0x40(%0),%%ymm2 \n" + "vmovdqu 0x60(%0),%%ymm3 \n" + "vpavgb 0x00(%0,%4,1),%%ymm0,%%ymm0 \n" + "vpavgb 0x20(%0,%4,1),%%ymm1,%%ymm1 \n" + "vpavgb 0x40(%0,%4,1),%%ymm2,%%ymm2 \n" + "vpavgb 0x60(%0,%4,1),%%ymm3,%%ymm3 \n" + "lea 0x80(%0),%0 \n" + "vshufps $0x88,%%ymm1,%%ymm0,%%ymm4 \n" + "vshufps $0xdd,%%ymm1,%%ymm0,%%ymm0 \n" + "vpavgb %%ymm4,%%ymm0,%%ymm0 \n" + "vshufps $0x88,%%ymm3,%%ymm2,%%ymm4 \n" + "vshufps $0xdd,%%ymm3,%%ymm2,%%ymm2 \n" + "vpavgb %%ymm4,%%ymm2,%%ymm2 \n" + + "vpmaddubsw %%ymm7,%%ymm0,%%ymm1 \n" + "vpmaddubsw %%ymm7,%%ymm2,%%ymm3 \n" + "vpmaddubsw %%ymm6,%%ymm0,%%ymm0 \n" + "vpmaddubsw %%ymm6,%%ymm2,%%ymm2 \n" + "vphaddw %%ymm3,%%ymm1,%%ymm1 \n" + "vphaddw %%ymm2,%%ymm0,%%ymm0 \n" + "vpsraw $0x8,%%ymm1,%%ymm1 \n" + "vpsraw $0x8,%%ymm0,%%ymm0 \n" + "vpacksswb %%ymm0,%%ymm1,%%ymm0 \n" + "vpermq $0xd8,%%ymm0,%%ymm0 \n" + "vpshufb %8,%%ymm0,%%ymm0 \n" + "vpaddb %%ymm5,%%ymm0,%%ymm0 \n" + + "vextractf128 $0x0,%%ymm0,(%1) \n" + "vextractf128 $0x1,%%ymm0,0x0(%1,%2,1) \n" + "lea 0x10(%1),%1 \n" + "sub $0x20,%3 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src_argb0), // %0 + "+r"(dst_u), // %1 + "+r"(dst_v), // %2 + "+rm"(width) // %3 + : "r"((intptr_t)(src_stride_argb)), // %4 + "m"(kAddUV128), // %5 + "m"(kARGBToV), // %6 + "m"(kARGBToU), // %7 + "m"(kShufARGBToUV_AVX) // %8 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", + "xmm7"); } #endif // HAS_ARGBTOUVROW_AVX2 #ifdef HAS_ARGBTOUVJROW_AVX2 -void ARGBToUVJRow_AVX2(const uint8* src_argb0, +void ARGBToUVJRow_AVX2(const uint8_t* src_argb0, int src_stride_argb, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width) { - asm volatile ( - "vbroadcastf128 %5,%%ymm5 \n" - "vbroadcastf128 %6,%%ymm6 \n" - "vbroadcastf128 %7,%%ymm7 \n" - "sub %1,%2 \n" + asm volatile( + "vbroadcastf128 %5,%%ymm5 \n" + "vbroadcastf128 %6,%%ymm6 \n" + "vbroadcastf128 %7,%%ymm7 \n" + "sub %1,%2 \n" - LABELALIGN - "1: \n" - "vmovdqu " MEMACCESS(0) ",%%ymm0 \n" - "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n" - "vmovdqu " MEMACCESS2(0x40,0) ",%%ymm2 \n" - "vmovdqu " MEMACCESS2(0x60,0) ",%%ymm3 \n" - VMEMOPREG(vpavgb,0x00,0,4,1,ymm0,ymm0) // vpavgb (%0,%4,1),%%ymm0,%%ymm0 - VMEMOPREG(vpavgb,0x20,0,4,1,ymm1,ymm1) - VMEMOPREG(vpavgb,0x40,0,4,1,ymm2,ymm2) - VMEMOPREG(vpavgb,0x60,0,4,1,ymm3,ymm3) - "lea " MEMLEA(0x80,0) ",%0 \n" - "vshufps $0x88,%%ymm1,%%ymm0,%%ymm4 \n" - "vshufps $0xdd,%%ymm1,%%ymm0,%%ymm0 \n" - "vpavgb %%ymm4,%%ymm0,%%ymm0 \n" - "vshufps $0x88,%%ymm3,%%ymm2,%%ymm4 \n" - "vshufps $0xdd,%%ymm3,%%ymm2,%%ymm2 \n" - "vpavgb %%ymm4,%%ymm2,%%ymm2 \n" - - "vpmaddubsw %%ymm7,%%ymm0,%%ymm1 \n" - "vpmaddubsw %%ymm7,%%ymm2,%%ymm3 \n" - "vpmaddubsw %%ymm6,%%ymm0,%%ymm0 \n" - "vpmaddubsw %%ymm6,%%ymm2,%%ymm2 \n" - "vphaddw %%ymm3,%%ymm1,%%ymm1 \n" - "vphaddw %%ymm2,%%ymm0,%%ymm0 \n" - "vpaddw %%ymm5,%%ymm0,%%ymm0 \n" - "vpaddw %%ymm5,%%ymm1,%%ymm1 \n" - "vpsraw $0x8,%%ymm1,%%ymm1 \n" - "vpsraw $0x8,%%ymm0,%%ymm0 \n" - "vpacksswb %%ymm0,%%ymm1,%%ymm0 \n" - "vpermq $0xd8,%%ymm0,%%ymm0 \n" - "vpshufb %8,%%ymm0,%%ymm0 \n" - - "vextractf128 $0x0,%%ymm0," MEMACCESS(1) " \n" - VEXTOPMEM(vextractf128,1,ymm0,0x0,1,2,1) // vextractf128 $1,%%ymm0,(%1,%2,1) - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x20,%3 \n" - "jg 1b \n" - "vzeroupper \n" - : "+r"(src_argb0), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+rm"(width) // %3 - : "r"((intptr_t)(src_stride_argb)), // %4 - "m"(kAddUVJ128), // %5 - "m"(kARGBToVJ), // %6 - "m"(kARGBToUJ), // %7 - "m"(kShufARGBToUV_AVX) // %8 - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" - ); + LABELALIGN + "1: \n" + "vmovdqu (%0),%%ymm0 \n" + "vmovdqu 0x20(%0),%%ymm1 \n" + "vmovdqu 0x40(%0),%%ymm2 \n" + "vmovdqu 0x60(%0),%%ymm3 \n" + "vpavgb 0x00(%0,%4,1),%%ymm0,%%ymm0 \n" + "vpavgb 0x20(%0,%4,1),%%ymm1,%%ymm1 \n" + "vpavgb 0x40(%0,%4,1),%%ymm2,%%ymm2 \n" + "vpavgb 0x60(%0,%4,1),%%ymm3,%%ymm3 \n" + "lea 0x80(%0),%0 \n" + "vshufps $0x88,%%ymm1,%%ymm0,%%ymm4 \n" + "vshufps $0xdd,%%ymm1,%%ymm0,%%ymm0 \n" + "vpavgb %%ymm4,%%ymm0,%%ymm0 \n" + "vshufps $0x88,%%ymm3,%%ymm2,%%ymm4 \n" + "vshufps $0xdd,%%ymm3,%%ymm2,%%ymm2 \n" + "vpavgb %%ymm4,%%ymm2,%%ymm2 \n" + + "vpmaddubsw %%ymm7,%%ymm0,%%ymm1 \n" + "vpmaddubsw %%ymm7,%%ymm2,%%ymm3 \n" + "vpmaddubsw %%ymm6,%%ymm0,%%ymm0 \n" + "vpmaddubsw %%ymm6,%%ymm2,%%ymm2 \n" + "vphaddw %%ymm3,%%ymm1,%%ymm1 \n" + "vphaddw %%ymm2,%%ymm0,%%ymm0 \n" + "vpaddw %%ymm5,%%ymm0,%%ymm0 \n" + "vpaddw %%ymm5,%%ymm1,%%ymm1 \n" + "vpsraw $0x8,%%ymm1,%%ymm1 \n" + "vpsraw $0x8,%%ymm0,%%ymm0 \n" + "vpacksswb %%ymm0,%%ymm1,%%ymm0 \n" + "vpermq $0xd8,%%ymm0,%%ymm0 \n" + "vpshufb %8,%%ymm0,%%ymm0 \n" + + "vextractf128 $0x0,%%ymm0,(%1) \n" + "vextractf128 $0x1,%%ymm0,0x0(%1,%2,1) \n" + "lea 0x10(%1),%1 \n" + "sub $0x20,%3 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src_argb0), // %0 + "+r"(dst_u), // %1 + "+r"(dst_v), // %2 + "+rm"(width) // %3 + : "r"((intptr_t)(src_stride_argb)), // %4 + "m"(kAddUVJ128), // %5 + "m"(kARGBToVJ), // %6 + "m"(kARGBToUJ), // %7 + "m"(kShufARGBToUV_AVX) // %8 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", + "xmm7"); } #endif // HAS_ARGBTOUVJROW_AVX2 #ifdef HAS_ARGBTOUVJROW_SSSE3 -void ARGBToUVJRow_SSSE3(const uint8* src_argb0, +void ARGBToUVJRow_SSSE3(const uint8_t* src_argb0, int src_stride_argb, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width) { - asm volatile ( - "movdqa %5,%%xmm3 \n" - "movdqa %6,%%xmm4 \n" - "movdqa %7,%%xmm5 \n" - "sub %1,%2 \n" + asm volatile( + "movdqa %5,%%xmm3 \n" + "movdqa %6,%%xmm4 \n" + "movdqa %7,%%xmm5 \n" + "sub %1,%2 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - MEMOPREG(movdqu,0x00,0,4,1,xmm7) // movdqu (%0,%4,1),%%xmm7 - "pavgb %%xmm7,%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - MEMOPREG(movdqu,0x10,0,4,1,xmm7) // movdqu 0x10(%0,%4,1),%%xmm7 - "pavgb %%xmm7,%%xmm1 \n" - "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n" - MEMOPREG(movdqu,0x20,0,4,1,xmm7) // movdqu 0x20(%0,%4,1),%%xmm7 - "pavgb %%xmm7,%%xmm2 \n" - "movdqu " MEMACCESS2(0x30,0) ",%%xmm6 \n" - MEMOPREG(movdqu,0x30,0,4,1,xmm7) // movdqu 0x30(%0,%4,1),%%xmm7 - "pavgb %%xmm7,%%xmm6 \n" - - "lea " MEMLEA(0x40,0) ",%0 \n" - "movdqa %%xmm0,%%xmm7 \n" - "shufps $0x88,%%xmm1,%%xmm0 \n" - "shufps $0xdd,%%xmm1,%%xmm7 \n" - "pavgb %%xmm7,%%xmm0 \n" - "movdqa %%xmm2,%%xmm7 \n" - "shufps $0x88,%%xmm6,%%xmm2 \n" - "shufps $0xdd,%%xmm6,%%xmm7 \n" - "pavgb %%xmm7,%%xmm2 \n" - "movdqa %%xmm0,%%xmm1 \n" - "movdqa %%xmm2,%%xmm6 \n" - "pmaddubsw %%xmm4,%%xmm0 \n" - "pmaddubsw %%xmm4,%%xmm2 \n" - "pmaddubsw %%xmm3,%%xmm1 \n" - "pmaddubsw %%xmm3,%%xmm6 \n" - "phaddw %%xmm2,%%xmm0 \n" - "phaddw %%xmm6,%%xmm1 \n" - "paddw %%xmm5,%%xmm0 \n" - "paddw %%xmm5,%%xmm1 \n" - "psraw $0x8,%%xmm0 \n" - "psraw $0x8,%%xmm1 \n" - "packsswb %%xmm1,%%xmm0 \n" - "movlps %%xmm0," MEMACCESS(1) " \n" - MEMOPMEM(movhps,xmm0,0x00,1,2,1) // movhps %%xmm0,(%1,%2,1) - "lea " MEMLEA(0x8,1) ",%1 \n" - "sub $0x10,%3 \n" - "jg 1b \n" - : "+r"(src_argb0), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+rm"(width) // %3 - : "r"((intptr_t)(src_stride_argb)), // %4 - "m"(kARGBToVJ), // %5 - "m"(kARGBToUJ), // %6 - "m"(kAddUVJ128) // %7 - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm6", "xmm7" - ); + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x00(%0,%4,1),%%xmm7 \n" + "pavgb %%xmm7,%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "movdqu 0x10(%0,%4,1),%%xmm7 \n" + "pavgb %%xmm7,%%xmm1 \n" + "movdqu 0x20(%0),%%xmm2 \n" + "movdqu 0x20(%0,%4,1),%%xmm7 \n" + "pavgb %%xmm7,%%xmm2 \n" + "movdqu 0x30(%0),%%xmm6 \n" + "movdqu 0x30(%0,%4,1),%%xmm7 \n" + "pavgb %%xmm7,%%xmm6 \n" + + "lea 0x40(%0),%0 \n" + "movdqa %%xmm0,%%xmm7 \n" + "shufps $0x88,%%xmm1,%%xmm0 \n" + "shufps $0xdd,%%xmm1,%%xmm7 \n" + "pavgb %%xmm7,%%xmm0 \n" + "movdqa %%xmm2,%%xmm7 \n" + "shufps $0x88,%%xmm6,%%xmm2 \n" + "shufps $0xdd,%%xmm6,%%xmm7 \n" + "pavgb %%xmm7,%%xmm2 \n" + "movdqa %%xmm0,%%xmm1 \n" + "movdqa %%xmm2,%%xmm6 \n" + "pmaddubsw %%xmm4,%%xmm0 \n" + "pmaddubsw %%xmm4,%%xmm2 \n" + "pmaddubsw %%xmm3,%%xmm1 \n" + "pmaddubsw %%xmm3,%%xmm6 \n" + "phaddw %%xmm2,%%xmm0 \n" + "phaddw %%xmm6,%%xmm1 \n" + "paddw %%xmm5,%%xmm0 \n" + "paddw %%xmm5,%%xmm1 \n" + "psraw $0x8,%%xmm0 \n" + "psraw $0x8,%%xmm1 \n" + "packsswb %%xmm1,%%xmm0 \n" + "movlps %%xmm0,(%1) \n" + "movhps %%xmm0,0x00(%1,%2,1) \n" + "lea 0x8(%1),%1 \n" + "sub $0x10,%3 \n" + "jg 1b \n" + : "+r"(src_argb0), // %0 + "+r"(dst_u), // %1 + "+r"(dst_v), // %2 + "+rm"(width) // %3 + : "r"((intptr_t)(src_stride_argb)), // %4 + "m"(kARGBToVJ), // %5 + "m"(kARGBToUJ), // %6 + "m"(kAddUVJ128) // %7 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"); } #endif // HAS_ARGBTOUVJROW_SSSE3 #ifdef HAS_ARGBTOUV444ROW_SSSE3 -void ARGBToUV444Row_SSSE3(const uint8* src_argb, - uint8* dst_u, - uint8* dst_v, +void ARGBToUV444Row_SSSE3(const uint8_t* src_argb, + uint8_t* dst_u, + uint8_t* dst_v, int width) { - asm volatile ( - "movdqa %4,%%xmm3 \n" - "movdqa %5,%%xmm4 \n" - "movdqa %6,%%xmm5 \n" - "sub %1,%2 \n" + asm volatile( + "movdqa %4,%%xmm3 \n" + "movdqa %5,%%xmm4 \n" + "movdqa %6,%%xmm5 \n" + "sub %1,%2 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n" - "movdqu " MEMACCESS2(0x30,0) ",%%xmm6 \n" - "pmaddubsw %%xmm4,%%xmm0 \n" - "pmaddubsw %%xmm4,%%xmm1 \n" - "pmaddubsw %%xmm4,%%xmm2 \n" - "pmaddubsw %%xmm4,%%xmm6 \n" - "phaddw %%xmm1,%%xmm0 \n" - "phaddw %%xmm6,%%xmm2 \n" - "psraw $0x8,%%xmm0 \n" - "psraw $0x8,%%xmm2 \n" - "packsswb %%xmm2,%%xmm0 \n" - "paddb %%xmm5,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n" - "movdqu " MEMACCESS2(0x30,0) ",%%xmm6 \n" - "pmaddubsw %%xmm3,%%xmm0 \n" - "pmaddubsw %%xmm3,%%xmm1 \n" - "pmaddubsw %%xmm3,%%xmm2 \n" - "pmaddubsw %%xmm3,%%xmm6 \n" - "phaddw %%xmm1,%%xmm0 \n" - "phaddw %%xmm6,%%xmm2 \n" - "psraw $0x8,%%xmm0 \n" - "psraw $0x8,%%xmm2 \n" - "packsswb %%xmm2,%%xmm0 \n" - "paddb %%xmm5,%%xmm0 \n" - "lea " MEMLEA(0x40,0) ",%0 \n" - MEMOPMEM(movdqu,xmm0,0x00,1,2,1) // movdqu %%xmm0,(%1,%2,1) - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x10,%3 \n" - "jg 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+rm"(width) // %3 - : "m"(kARGBToV), // %4 - "m"(kARGBToU), // %5 - "m"(kAddUV128) // %6 - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm6" - ); + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "movdqu 0x20(%0),%%xmm2 \n" + "movdqu 0x30(%0),%%xmm6 \n" + "pmaddubsw %%xmm4,%%xmm0 \n" + "pmaddubsw %%xmm4,%%xmm1 \n" + "pmaddubsw %%xmm4,%%xmm2 \n" + "pmaddubsw %%xmm4,%%xmm6 \n" + "phaddw %%xmm1,%%xmm0 \n" + "phaddw %%xmm6,%%xmm2 \n" + "psraw $0x8,%%xmm0 \n" + "psraw $0x8,%%xmm2 \n" + "packsswb %%xmm2,%%xmm0 \n" + "paddb %%xmm5,%%xmm0 \n" + "movdqu %%xmm0,(%1) \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "movdqu 0x20(%0),%%xmm2 \n" + "movdqu 0x30(%0),%%xmm6 \n" + "pmaddubsw %%xmm3,%%xmm0 \n" + "pmaddubsw %%xmm3,%%xmm1 \n" + "pmaddubsw %%xmm3,%%xmm2 \n" + "pmaddubsw %%xmm3,%%xmm6 \n" + "phaddw %%xmm1,%%xmm0 \n" + "phaddw %%xmm6,%%xmm2 \n" + "psraw $0x8,%%xmm0 \n" + "psraw $0x8,%%xmm2 \n" + "packsswb %%xmm2,%%xmm0 \n" + "paddb %%xmm5,%%xmm0 \n" + "lea 0x40(%0),%0 \n" + "movdqu %%xmm0,0x00(%1,%2,1) \n" + "lea 0x10(%1),%1 \n" + "sub $0x10,%3 \n" + "jg 1b \n" + : "+r"(src_argb), // %0 + "+r"(dst_u), // %1 + "+r"(dst_v), // %2 + "+rm"(width) // %3 + : "m"(kARGBToV), // %4 + "m"(kARGBToU), // %5 + "m"(kAddUV128) // %6 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm6"); } #endif // HAS_ARGBTOUV444ROW_SSSE3 -void BGRAToYRow_SSSE3(const uint8* src_bgra, uint8* dst_y, int width) { - asm volatile ( - "movdqa %4,%%xmm5 \n" - "movdqa %3,%%xmm4 \n" +void BGRAToYRow_SSSE3(const uint8_t* src_bgra, uint8_t* dst_y, int width) { + asm volatile( + "movdqa %4,%%xmm5 \n" + "movdqa %3,%%xmm4 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n" - "movdqu " MEMACCESS2(0x30,0) ",%%xmm3 \n" - "pmaddubsw %%xmm4,%%xmm0 \n" - "pmaddubsw %%xmm4,%%xmm1 \n" - "pmaddubsw %%xmm4,%%xmm2 \n" - "pmaddubsw %%xmm4,%%xmm3 \n" - "lea " MEMLEA(0x40,0) ",%0 \n" - "phaddw %%xmm1,%%xmm0 \n" - "phaddw %%xmm3,%%xmm2 \n" - "psrlw $0x7,%%xmm0 \n" - "psrlw $0x7,%%xmm2 \n" - "packuswb %%xmm2,%%xmm0 \n" - "paddb %%xmm5,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x10,%2 \n" - "jg 1b \n" - : "+r"(src_bgra), // %0 - "+r"(dst_y), // %1 - "+r"(width) // %2 - : "m"(kBGRAToY), // %3 - "m"(kAddY16) // %4 - : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" - ); + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "movdqu 0x20(%0),%%xmm2 \n" + "movdqu 0x30(%0),%%xmm3 \n" + "pmaddubsw %%xmm4,%%xmm0 \n" + "pmaddubsw %%xmm4,%%xmm1 \n" + "pmaddubsw %%xmm4,%%xmm2 \n" + "pmaddubsw %%xmm4,%%xmm3 \n" + "lea 0x40(%0),%0 \n" + "phaddw %%xmm1,%%xmm0 \n" + "phaddw %%xmm3,%%xmm2 \n" + "psrlw $0x7,%%xmm0 \n" + "psrlw $0x7,%%xmm2 \n" + "packuswb %%xmm2,%%xmm0 \n" + "paddb %%xmm5,%%xmm0 \n" + "movdqu %%xmm0,(%1) \n" + "lea 0x10(%1),%1 \n" + "sub $0x10,%2 \n" + "jg 1b \n" + : "+r"(src_bgra), // %0 + "+r"(dst_y), // %1 + "+r"(width) // %2 + : "m"(kBGRAToY), // %3 + "m"(kAddY16) // %4 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); } -void BGRAToUVRow_SSSE3(const uint8* src_bgra0, +void BGRAToUVRow_SSSE3(const uint8_t* src_bgra0, int src_stride_bgra, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width) { - asm volatile ( - "movdqa %5,%%xmm3 \n" - "movdqa %6,%%xmm4 \n" - "movdqa %7,%%xmm5 \n" - "sub %1,%2 \n" - - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - MEMOPREG(movdqu,0x00,0,4,1,xmm7) // movdqu (%0,%4,1),%%xmm7 - "pavgb %%xmm7,%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - MEMOPREG(movdqu,0x10,0,4,1,xmm7) // movdqu 0x10(%0,%4,1),%%xmm7 - "pavgb %%xmm7,%%xmm1 \n" - "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n" - MEMOPREG(movdqu,0x20,0,4,1,xmm7) // movdqu 0x20(%0,%4,1),%%xmm7 - "pavgb %%xmm7,%%xmm2 \n" - "movdqu " MEMACCESS2(0x30,0) ",%%xmm6 \n" - MEMOPREG(movdqu,0x30,0,4,1,xmm7) // movdqu 0x30(%0,%4,1),%%xmm7 - "pavgb %%xmm7,%%xmm6 \n" - - "lea " MEMLEA(0x40,0) ",%0 \n" - "movdqa %%xmm0,%%xmm7 \n" - "shufps $0x88,%%xmm1,%%xmm0 \n" - "shufps $0xdd,%%xmm1,%%xmm7 \n" - "pavgb %%xmm7,%%xmm0 \n" - "movdqa %%xmm2,%%xmm7 \n" - "shufps $0x88,%%xmm6,%%xmm2 \n" - "shufps $0xdd,%%xmm6,%%xmm7 \n" - "pavgb %%xmm7,%%xmm2 \n" - "movdqa %%xmm0,%%xmm1 \n" - "movdqa %%xmm2,%%xmm6 \n" - "pmaddubsw %%xmm4,%%xmm0 \n" - "pmaddubsw %%xmm4,%%xmm2 \n" - "pmaddubsw %%xmm3,%%xmm1 \n" - "pmaddubsw %%xmm3,%%xmm6 \n" - "phaddw %%xmm2,%%xmm0 \n" - "phaddw %%xmm6,%%xmm1 \n" - "psraw $0x8,%%xmm0 \n" - "psraw $0x8,%%xmm1 \n" - "packsswb %%xmm1,%%xmm0 \n" - "paddb %%xmm5,%%xmm0 \n" - "movlps %%xmm0," MEMACCESS(1) " \n" - MEMOPMEM(movhps,xmm0,0x00,1,2,1) // movhps %%xmm0,(%1,%2,1) - "lea " MEMLEA(0x8,1) ",%1 \n" - "sub $0x10,%3 \n" - "jg 1b \n" - : "+r"(src_bgra0), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+rm"(width) // %3 - : "r"((intptr_t)(src_stride_bgra)), // %4 - "m"(kBGRAToV), // %5 - "m"(kBGRAToU), // %6 - "m"(kAddUV128) // %7 - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm6", "xmm7" - ); -} + asm volatile( + "movdqa %5,%%xmm3 \n" + "movdqa %6,%%xmm4 \n" + "movdqa %7,%%xmm5 \n" + "sub %1,%2 \n" -void ABGRToYRow_SSSE3(const uint8* src_abgr, uint8* dst_y, int width) { - asm volatile ( - "movdqa %4,%%xmm5 \n" - "movdqa %3,%%xmm4 \n" + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x00(%0,%4,1),%%xmm7 \n" + "pavgb %%xmm7,%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "movdqu 0x10(%0,%4,1),%%xmm7 \n" + "pavgb %%xmm7,%%xmm1 \n" + "movdqu 0x20(%0),%%xmm2 \n" + "movdqu 0x20(%0,%4,1),%%xmm7 \n" + "pavgb %%xmm7,%%xmm2 \n" + "movdqu 0x30(%0),%%xmm6 \n" + "movdqu 0x30(%0,%4,1),%%xmm7 \n" + "pavgb %%xmm7,%%xmm6 \n" + + "lea 0x40(%0),%0 \n" + "movdqa %%xmm0,%%xmm7 \n" + "shufps $0x88,%%xmm1,%%xmm0 \n" + "shufps $0xdd,%%xmm1,%%xmm7 \n" + "pavgb %%xmm7,%%xmm0 \n" + "movdqa %%xmm2,%%xmm7 \n" + "shufps $0x88,%%xmm6,%%xmm2 \n" + "shufps $0xdd,%%xmm6,%%xmm7 \n" + "pavgb %%xmm7,%%xmm2 \n" + "movdqa %%xmm0,%%xmm1 \n" + "movdqa %%xmm2,%%xmm6 \n" + "pmaddubsw %%xmm4,%%xmm0 \n" + "pmaddubsw %%xmm4,%%xmm2 \n" + "pmaddubsw %%xmm3,%%xmm1 \n" + "pmaddubsw %%xmm3,%%xmm6 \n" + "phaddw %%xmm2,%%xmm0 \n" + "phaddw %%xmm6,%%xmm1 \n" + "psraw $0x8,%%xmm0 \n" + "psraw $0x8,%%xmm1 \n" + "packsswb %%xmm1,%%xmm0 \n" + "paddb %%xmm5,%%xmm0 \n" + "movlps %%xmm0,(%1) \n" + "movhps %%xmm0,0x00(%1,%2,1) \n" + "lea 0x8(%1),%1 \n" + "sub $0x10,%3 \n" + "jg 1b \n" + : "+r"(src_bgra0), // %0 + "+r"(dst_u), // %1 + "+r"(dst_v), // %2 + "+rm"(width) // %3 + : "r"((intptr_t)(src_stride_bgra)), // %4 + "m"(kBGRAToV), // %5 + "m"(kBGRAToU), // %6 + "m"(kAddUV128) // %7 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"); +} + +void ABGRToYRow_SSSE3(const uint8_t* src_abgr, uint8_t* dst_y, int width) { + asm volatile( + "movdqa %4,%%xmm5 \n" + "movdqa %3,%%xmm4 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n" - "movdqu " MEMACCESS2(0x30,0) ",%%xmm3 \n" - "pmaddubsw %%xmm4,%%xmm0 \n" - "pmaddubsw %%xmm4,%%xmm1 \n" - "pmaddubsw %%xmm4,%%xmm2 \n" - "pmaddubsw %%xmm4,%%xmm3 \n" - "lea " MEMLEA(0x40,0) ",%0 \n" - "phaddw %%xmm1,%%xmm0 \n" - "phaddw %%xmm3,%%xmm2 \n" - "psrlw $0x7,%%xmm0 \n" - "psrlw $0x7,%%xmm2 \n" - "packuswb %%xmm2,%%xmm0 \n" - "paddb %%xmm5,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x10,%2 \n" - "jg 1b \n" - : "+r"(src_abgr), // %0 - "+r"(dst_y), // %1 - "+r"(width) // %2 - : "m"(kABGRToY), // %3 - "m"(kAddY16) // %4 - : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" - ); + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "movdqu 0x20(%0),%%xmm2 \n" + "movdqu 0x30(%0),%%xmm3 \n" + "pmaddubsw %%xmm4,%%xmm0 \n" + "pmaddubsw %%xmm4,%%xmm1 \n" + "pmaddubsw %%xmm4,%%xmm2 \n" + "pmaddubsw %%xmm4,%%xmm3 \n" + "lea 0x40(%0),%0 \n" + "phaddw %%xmm1,%%xmm0 \n" + "phaddw %%xmm3,%%xmm2 \n" + "psrlw $0x7,%%xmm0 \n" + "psrlw $0x7,%%xmm2 \n" + "packuswb %%xmm2,%%xmm0 \n" + "paddb %%xmm5,%%xmm0 \n" + "movdqu %%xmm0,(%1) \n" + "lea 0x10(%1),%1 \n" + "sub $0x10,%2 \n" + "jg 1b \n" + : "+r"(src_abgr), // %0 + "+r"(dst_y), // %1 + "+r"(width) // %2 + : "m"(kABGRToY), // %3 + "m"(kAddY16) // %4 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); } -void RGBAToYRow_SSSE3(const uint8* src_rgba, uint8* dst_y, int width) { - asm volatile ( - "movdqa %4,%%xmm5 \n" - "movdqa %3,%%xmm4 \n" +void RGBAToYRow_SSSE3(const uint8_t* src_rgba, uint8_t* dst_y, int width) { + asm volatile( + "movdqa %4,%%xmm5 \n" + "movdqa %3,%%xmm4 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n" - "movdqu " MEMACCESS2(0x30,0) ",%%xmm3 \n" - "pmaddubsw %%xmm4,%%xmm0 \n" - "pmaddubsw %%xmm4,%%xmm1 \n" - "pmaddubsw %%xmm4,%%xmm2 \n" - "pmaddubsw %%xmm4,%%xmm3 \n" - "lea " MEMLEA(0x40,0) ",%0 \n" - "phaddw %%xmm1,%%xmm0 \n" - "phaddw %%xmm3,%%xmm2 \n" - "psrlw $0x7,%%xmm0 \n" - "psrlw $0x7,%%xmm2 \n" - "packuswb %%xmm2,%%xmm0 \n" - "paddb %%xmm5,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x10,%2 \n" - "jg 1b \n" - : "+r"(src_rgba), // %0 - "+r"(dst_y), // %1 - "+r"(width) // %2 - : "m"(kRGBAToY), // %3 - "m"(kAddY16) // %4 - : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" - ); + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "movdqu 0x20(%0),%%xmm2 \n" + "movdqu 0x30(%0),%%xmm3 \n" + "pmaddubsw %%xmm4,%%xmm0 \n" + "pmaddubsw %%xmm4,%%xmm1 \n" + "pmaddubsw %%xmm4,%%xmm2 \n" + "pmaddubsw %%xmm4,%%xmm3 \n" + "lea 0x40(%0),%0 \n" + "phaddw %%xmm1,%%xmm0 \n" + "phaddw %%xmm3,%%xmm2 \n" + "psrlw $0x7,%%xmm0 \n" + "psrlw $0x7,%%xmm2 \n" + "packuswb %%xmm2,%%xmm0 \n" + "paddb %%xmm5,%%xmm0 \n" + "movdqu %%xmm0,(%1) \n" + "lea 0x10(%1),%1 \n" + "sub $0x10,%2 \n" + "jg 1b \n" + : "+r"(src_rgba), // %0 + "+r"(dst_y), // %1 + "+r"(width) // %2 + : "m"(kRGBAToY), // %3 + "m"(kAddY16) // %4 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); } -void ABGRToUVRow_SSSE3(const uint8* src_abgr0, +void ABGRToUVRow_SSSE3(const uint8_t* src_abgr0, int src_stride_abgr, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width) { - asm volatile ( - "movdqa %5,%%xmm3 \n" - "movdqa %6,%%xmm4 \n" - "movdqa %7,%%xmm5 \n" - "sub %1,%2 \n" - - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - MEMOPREG(movdqu,0x00,0,4,1,xmm7) // movdqu (%0,%4,1),%%xmm7 - "pavgb %%xmm7,%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - MEMOPREG(movdqu,0x10,0,4,1,xmm7) // movdqu 0x10(%0,%4,1),%%xmm7 - "pavgb %%xmm7,%%xmm1 \n" - "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n" - MEMOPREG(movdqu,0x20,0,4,1,xmm7) // movdqu 0x20(%0,%4,1),%%xmm7 - "pavgb %%xmm7,%%xmm2 \n" - "movdqu " MEMACCESS2(0x30,0) ",%%xmm6 \n" - MEMOPREG(movdqu,0x30,0,4,1,xmm7) // movdqu 0x30(%0,%4,1),%%xmm7 - "pavgb %%xmm7,%%xmm6 \n" - - "lea " MEMLEA(0x40,0) ",%0 \n" - "movdqa %%xmm0,%%xmm7 \n" - "shufps $0x88,%%xmm1,%%xmm0 \n" - "shufps $0xdd,%%xmm1,%%xmm7 \n" - "pavgb %%xmm7,%%xmm0 \n" - "movdqa %%xmm2,%%xmm7 \n" - "shufps $0x88,%%xmm6,%%xmm2 \n" - "shufps $0xdd,%%xmm6,%%xmm7 \n" - "pavgb %%xmm7,%%xmm2 \n" - "movdqa %%xmm0,%%xmm1 \n" - "movdqa %%xmm2,%%xmm6 \n" - "pmaddubsw %%xmm4,%%xmm0 \n" - "pmaddubsw %%xmm4,%%xmm2 \n" - "pmaddubsw %%xmm3,%%xmm1 \n" - "pmaddubsw %%xmm3,%%xmm6 \n" - "phaddw %%xmm2,%%xmm0 \n" - "phaddw %%xmm6,%%xmm1 \n" - "psraw $0x8,%%xmm0 \n" - "psraw $0x8,%%xmm1 \n" - "packsswb %%xmm1,%%xmm0 \n" - "paddb %%xmm5,%%xmm0 \n" - "movlps %%xmm0," MEMACCESS(1) " \n" - MEMOPMEM(movhps,xmm0,0x00,1,2,1) // movhps %%xmm0,(%1,%2,1) - "lea " MEMLEA(0x8,1) ",%1 \n" - "sub $0x10,%3 \n" - "jg 1b \n" - : "+r"(src_abgr0), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+rm"(width) // %3 - : "r"((intptr_t)(src_stride_abgr)), // %4 - "m"(kABGRToV), // %5 - "m"(kABGRToU), // %6 - "m"(kAddUV128) // %7 - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm6", "xmm7" - ); -} + asm volatile( + "movdqa %5,%%xmm3 \n" + "movdqa %6,%%xmm4 \n" + "movdqa %7,%%xmm5 \n" + "sub %1,%2 \n" -void RGBAToUVRow_SSSE3(const uint8* src_rgba0, + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x00(%0,%4,1),%%xmm7 \n" + "pavgb %%xmm7,%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "movdqu 0x10(%0,%4,1),%%xmm7 \n" + "pavgb %%xmm7,%%xmm1 \n" + "movdqu 0x20(%0),%%xmm2 \n" + "movdqu 0x20(%0,%4,1),%%xmm7 \n" + "pavgb %%xmm7,%%xmm2 \n" + "movdqu 0x30(%0),%%xmm6 \n" + "movdqu 0x30(%0,%4,1),%%xmm7 \n" + "pavgb %%xmm7,%%xmm6 \n" + + "lea 0x40(%0),%0 \n" + "movdqa %%xmm0,%%xmm7 \n" + "shufps $0x88,%%xmm1,%%xmm0 \n" + "shufps $0xdd,%%xmm1,%%xmm7 \n" + "pavgb %%xmm7,%%xmm0 \n" + "movdqa %%xmm2,%%xmm7 \n" + "shufps $0x88,%%xmm6,%%xmm2 \n" + "shufps $0xdd,%%xmm6,%%xmm7 \n" + "pavgb %%xmm7,%%xmm2 \n" + "movdqa %%xmm0,%%xmm1 \n" + "movdqa %%xmm2,%%xmm6 \n" + "pmaddubsw %%xmm4,%%xmm0 \n" + "pmaddubsw %%xmm4,%%xmm2 \n" + "pmaddubsw %%xmm3,%%xmm1 \n" + "pmaddubsw %%xmm3,%%xmm6 \n" + "phaddw %%xmm2,%%xmm0 \n" + "phaddw %%xmm6,%%xmm1 \n" + "psraw $0x8,%%xmm0 \n" + "psraw $0x8,%%xmm1 \n" + "packsswb %%xmm1,%%xmm0 \n" + "paddb %%xmm5,%%xmm0 \n" + "movlps %%xmm0,(%1) \n" + "movhps %%xmm0,0x00(%1,%2,1) \n" + "lea 0x8(%1),%1 \n" + "sub $0x10,%3 \n" + "jg 1b \n" + : "+r"(src_abgr0), // %0 + "+r"(dst_u), // %1 + "+r"(dst_v), // %2 + "+rm"(width) // %3 + : "r"((intptr_t)(src_stride_abgr)), // %4 + "m"(kABGRToV), // %5 + "m"(kABGRToU), // %6 + "m"(kAddUV128) // %7 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"); +} + +void RGBAToUVRow_SSSE3(const uint8_t* src_rgba0, int src_stride_rgba, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width) { - asm volatile ( - "movdqa %5,%%xmm3 \n" - "movdqa %6,%%xmm4 \n" - "movdqa %7,%%xmm5 \n" - "sub %1,%2 \n" + asm volatile( + "movdqa %5,%%xmm3 \n" + "movdqa %6,%%xmm4 \n" + "movdqa %7,%%xmm5 \n" + "sub %1,%2 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - MEMOPREG(movdqu,0x00,0,4,1,xmm7) // movdqu (%0,%4,1),%%xmm7 - "pavgb %%xmm7,%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - MEMOPREG(movdqu,0x10,0,4,1,xmm7) // movdqu 0x10(%0,%4,1),%%xmm7 - "pavgb %%xmm7,%%xmm1 \n" - "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n" - MEMOPREG(movdqu,0x20,0,4,1,xmm7) // movdqu 0x20(%0,%4,1),%%xmm7 - "pavgb %%xmm7,%%xmm2 \n" - "movdqu " MEMACCESS2(0x30,0) ",%%xmm6 \n" - MEMOPREG(movdqu,0x30,0,4,1,xmm7) // movdqu 0x30(%0,%4,1),%%xmm7 - "pavgb %%xmm7,%%xmm6 \n" - - "lea " MEMLEA(0x40,0) ",%0 \n" - "movdqa %%xmm0,%%xmm7 \n" - "shufps $0x88,%%xmm1,%%xmm0 \n" - "shufps $0xdd,%%xmm1,%%xmm7 \n" - "pavgb %%xmm7,%%xmm0 \n" - "movdqa %%xmm2,%%xmm7 \n" - "shufps $0x88,%%xmm6,%%xmm2 \n" - "shufps $0xdd,%%xmm6,%%xmm7 \n" - "pavgb %%xmm7,%%xmm2 \n" - "movdqa %%xmm0,%%xmm1 \n" - "movdqa %%xmm2,%%xmm6 \n" - "pmaddubsw %%xmm4,%%xmm0 \n" - "pmaddubsw %%xmm4,%%xmm2 \n" - "pmaddubsw %%xmm3,%%xmm1 \n" - "pmaddubsw %%xmm3,%%xmm6 \n" - "phaddw %%xmm2,%%xmm0 \n" - "phaddw %%xmm6,%%xmm1 \n" - "psraw $0x8,%%xmm0 \n" - "psraw $0x8,%%xmm1 \n" - "packsswb %%xmm1,%%xmm0 \n" - "paddb %%xmm5,%%xmm0 \n" - "movlps %%xmm0," MEMACCESS(1) " \n" - MEMOPMEM(movhps,xmm0,0x00,1,2,1) // movhps %%xmm0,(%1,%2,1) - "lea " MEMLEA(0x8,1) ",%1 \n" - "sub $0x10,%3 \n" - "jg 1b \n" - : "+r"(src_rgba0), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+rm"(width) // %3 - : "r"((intptr_t)(src_stride_rgba)), // %4 - "m"(kRGBAToV), // %5 - "m"(kRGBAToU), // %6 - "m"(kAddUV128) // %7 - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm6", "xmm7" - ); + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x00(%0,%4,1),%%xmm7 \n" + "pavgb %%xmm7,%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "movdqu 0x10(%0,%4,1),%%xmm7 \n" + "pavgb %%xmm7,%%xmm1 \n" + "movdqu 0x20(%0),%%xmm2 \n" + "movdqu 0x20(%0,%4,1),%%xmm7 \n" + "pavgb %%xmm7,%%xmm2 \n" + "movdqu 0x30(%0),%%xmm6 \n" + "movdqu 0x30(%0,%4,1),%%xmm7 \n" + "pavgb %%xmm7,%%xmm6 \n" + + "lea 0x40(%0),%0 \n" + "movdqa %%xmm0,%%xmm7 \n" + "shufps $0x88,%%xmm1,%%xmm0 \n" + "shufps $0xdd,%%xmm1,%%xmm7 \n" + "pavgb %%xmm7,%%xmm0 \n" + "movdqa %%xmm2,%%xmm7 \n" + "shufps $0x88,%%xmm6,%%xmm2 \n" + "shufps $0xdd,%%xmm6,%%xmm7 \n" + "pavgb %%xmm7,%%xmm2 \n" + "movdqa %%xmm0,%%xmm1 \n" + "movdqa %%xmm2,%%xmm6 \n" + "pmaddubsw %%xmm4,%%xmm0 \n" + "pmaddubsw %%xmm4,%%xmm2 \n" + "pmaddubsw %%xmm3,%%xmm1 \n" + "pmaddubsw %%xmm3,%%xmm6 \n" + "phaddw %%xmm2,%%xmm0 \n" + "phaddw %%xmm6,%%xmm1 \n" + "psraw $0x8,%%xmm0 \n" + "psraw $0x8,%%xmm1 \n" + "packsswb %%xmm1,%%xmm0 \n" + "paddb %%xmm5,%%xmm0 \n" + "movlps %%xmm0,(%1) \n" + "movhps %%xmm0,0x00(%1,%2,1) \n" + "lea 0x8(%1),%1 \n" + "sub $0x10,%3 \n" + "jg 1b \n" + : "+r"(src_rgba0), // %0 + "+r"(dst_u), // %1 + "+r"(dst_v), // %2 + "+rm"(width) // %3 + : "r"((intptr_t)(src_stride_rgba)), // %4 + "m"(kRGBAToV), // %5 + "m"(kRGBAToU), // %6 + "m"(kAddUV128) // %7 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"); } #if defined(HAS_I422TOARGBROW_SSSE3) || defined(HAS_I422TOARGBROW_AVX2) // Read 8 UV from 444 -#define READYUV444 \ - "movq " MEMACCESS([u_buf]) ",%%xmm0 \n" \ - MEMOPREG(movq, 0x00, [u_buf], [v_buf], 1, xmm1) \ - "lea " MEMLEA(0x8, [u_buf]) ",%[u_buf] \n" \ - "punpcklbw %%xmm1,%%xmm0 \n" \ - "movq " MEMACCESS([y_buf]) ",%%xmm4 \n" \ - "punpcklbw %%xmm4,%%xmm4 \n" \ - "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" +#define READYUV444 \ + "movq (%[u_buf]),%%xmm0 \n" \ + "movq 0x00(%[u_buf],%[v_buf],1),%%xmm1 \n" \ + "lea 0x8(%[u_buf]),%[u_buf] \n" \ + "punpcklbw %%xmm1,%%xmm0 \n" \ + "movq (%[y_buf]),%%xmm4 \n" \ + "punpcklbw %%xmm4,%%xmm4 \n" \ + "lea 0x8(%[y_buf]),%[y_buf] \n" // Read 4 UV from 422, upsample to 8 UV -#define READYUV422 \ - "movd " MEMACCESS([u_buf]) ",%%xmm0 \n" \ - MEMOPREG(movd, 0x00, [u_buf], [v_buf], 1, xmm1) \ - "lea " MEMLEA(0x4, [u_buf]) ",%[u_buf] \n" \ - "punpcklbw %%xmm1,%%xmm0 \n" \ - "punpcklwd %%xmm0,%%xmm0 \n" \ - "movq " MEMACCESS([y_buf]) ",%%xmm4 \n" \ - "punpcklbw %%xmm4,%%xmm4 \n" \ - "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" +#define READYUV422 \ + "movd (%[u_buf]),%%xmm0 \n" \ + "movd 0x00(%[u_buf],%[v_buf],1),%%xmm1 \n" \ + "lea 0x4(%[u_buf]),%[u_buf] \n" \ + "punpcklbw %%xmm1,%%xmm0 \n" \ + "punpcklwd %%xmm0,%%xmm0 \n" \ + "movq (%[y_buf]),%%xmm4 \n" \ + "punpcklbw %%xmm4,%%xmm4 \n" \ + "lea 0x8(%[y_buf]),%[y_buf] \n" // Read 4 UV from 422 10 bit, upsample to 8 UV // TODO(fbarchard): Consider shufb to replace pack/unpack // TODO(fbarchard): Consider pmulhuw to replace psraw // TODO(fbarchard): Consider pmullw to replace psllw and allow different bits. -#define READYUV422_10 \ - "movq " MEMACCESS([u_buf]) ",%%xmm0 \n" \ - MEMOPREG(movq, 0x00, [u_buf], [v_buf], 1, xmm1) \ - "lea " MEMLEA(0x8, [u_buf]) ",%[u_buf] \n" \ - "punpcklwd %%xmm1,%%xmm0 \n" \ - "psraw $0x2,%%xmm0 \n" \ - "packuswb %%xmm0,%%xmm0 \n" \ - "punpcklwd %%xmm0,%%xmm0 \n" \ - "movdqu " MEMACCESS([y_buf]) ",%%xmm4 \n" \ - "psllw $0x6,%%xmm4 \n" \ - "lea " MEMLEA(0x10, [y_buf]) ",%[y_buf] \n" +#define READYUV210 \ + "movq (%[u_buf]),%%xmm0 \n" \ + "movq 0x00(%[u_buf],%[v_buf],1),%%xmm1 \n" \ + "lea 0x8(%[u_buf]),%[u_buf] \n" \ + "punpcklwd %%xmm1,%%xmm0 \n" \ + "psraw $0x2,%%xmm0 \n" \ + "packuswb %%xmm0,%%xmm0 \n" \ + "punpcklwd %%xmm0,%%xmm0 \n" \ + "movdqu (%[y_buf]),%%xmm4 \n" \ + "psllw $0x6,%%xmm4 \n" \ + "lea 0x10(%[y_buf]),%[y_buf] \n" // Read 4 UV from 422, upsample to 8 UV. With 8 Alpha. -#define READYUVA422 \ - "movd " MEMACCESS([u_buf]) ",%%xmm0 \n" \ - MEMOPREG(movd, 0x00, [u_buf], [v_buf], 1, xmm1) \ - "lea " MEMLEA(0x4, [u_buf]) ",%[u_buf] \n" \ - "punpcklbw %%xmm1,%%xmm0 \n" \ - "punpcklwd %%xmm0,%%xmm0 \n" \ - "movq " MEMACCESS([y_buf]) ",%%xmm4 \n" \ - "punpcklbw %%xmm4,%%xmm4 \n" \ - "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" \ - "movq " MEMACCESS([a_buf]) ",%%xmm5 \n" \ - "lea " MEMLEA(0x8, [a_buf]) ",%[a_buf] \n" +#define READYUVA422 \ + "movd (%[u_buf]),%%xmm0 \n" \ + "movd 0x00(%[u_buf],%[v_buf],1),%%xmm1 \n" \ + "lea 0x4(%[u_buf]),%[u_buf] \n" \ + "punpcklbw %%xmm1,%%xmm0 \n" \ + "punpcklwd %%xmm0,%%xmm0 \n" \ + "movq (%[y_buf]),%%xmm4 \n" \ + "punpcklbw %%xmm4,%%xmm4 \n" \ + "lea 0x8(%[y_buf]),%[y_buf] \n" \ + "movq (%[a_buf]),%%xmm5 \n" \ + "lea 0x8(%[a_buf]),%[a_buf] \n" // Read 4 UV from NV12, upsample to 8 UV -#define READNV12 \ - "movq " MEMACCESS([uv_buf]) ",%%xmm0 \n" \ - "lea " MEMLEA(0x8, [uv_buf]) ",%[uv_buf] \n" \ - "punpcklwd %%xmm0,%%xmm0 \n" \ - "movq " MEMACCESS([y_buf]) ",%%xmm4 \n" \ - "punpcklbw %%xmm4,%%xmm4 \n" \ - "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" +#define READNV12 \ + "movq (%[uv_buf]),%%xmm0 \n" \ + "lea 0x8(%[uv_buf]),%[uv_buf] \n" \ + "punpcklwd %%xmm0,%%xmm0 \n" \ + "movq (%[y_buf]),%%xmm4 \n" \ + "punpcklbw %%xmm4,%%xmm4 \n" \ + "lea 0x8(%[y_buf]),%[y_buf] \n" // Read 4 VU from NV21, upsample to 8 UV -#define READNV21 \ - "movq " MEMACCESS([vu_buf]) ",%%xmm0 \n" \ - "lea " MEMLEA(0x8, [vu_buf]) ",%[vu_buf] \n" \ - "pshufb %[kShuffleNV21], %%xmm0 \n" \ - "movq " MEMACCESS([y_buf]) ",%%xmm4 \n" \ - "punpcklbw %%xmm4,%%xmm4 \n" \ - "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" +#define READNV21 \ + "movq (%[vu_buf]),%%xmm0 \n" \ + "lea 0x8(%[vu_buf]),%[vu_buf] \n" \ + "pshufb %[kShuffleNV21], %%xmm0 \n" \ + "movq (%[y_buf]),%%xmm4 \n" \ + "punpcklbw %%xmm4,%%xmm4 \n" \ + "lea 0x8(%[y_buf]),%[y_buf] \n" // Read 4 YUY2 with 8 Y and update 4 UV to 8 UV. -#define READYUY2 \ - "movdqu " MEMACCESS([yuy2_buf]) ",%%xmm4 \n" \ - "pshufb %[kShuffleYUY2Y], %%xmm4 \n" \ - "movdqu " MEMACCESS([yuy2_buf]) ",%%xmm0 \n" \ - "pshufb %[kShuffleYUY2UV], %%xmm0 \n" \ - "lea " MEMLEA(0x10, [yuy2_buf]) ",%[yuy2_buf] \n" +#define READYUY2 \ + "movdqu (%[yuy2_buf]),%%xmm4 \n" \ + "pshufb %[kShuffleYUY2Y], %%xmm4 \n" \ + "movdqu (%[yuy2_buf]),%%xmm0 \n" \ + "pshufb %[kShuffleYUY2UV], %%xmm0 \n" \ + "lea 0x10(%[yuy2_buf]),%[yuy2_buf] \n" // Read 4 UYVY with 8 Y and update 4 UV to 8 UV. -#define READUYVY \ - "movdqu " MEMACCESS([uyvy_buf]) ",%%xmm4 \n" \ - "pshufb %[kShuffleUYVYY], %%xmm4 \n" \ - "movdqu " MEMACCESS([uyvy_buf]) ",%%xmm0 \n" \ - "pshufb %[kShuffleUYVYUV], %%xmm0 \n" \ - "lea " MEMLEA(0x10, [uyvy_buf]) ",%[uyvy_buf] \n" +#define READUYVY \ + "movdqu (%[uyvy_buf]),%%xmm4 \n" \ + "pshufb %[kShuffleUYVYY], %%xmm4 \n" \ + "movdqu (%[uyvy_buf]),%%xmm0 \n" \ + "pshufb %[kShuffleUYVYUV], %%xmm0 \n" \ + "lea 0x10(%[uyvy_buf]),%[uyvy_buf] \n" #if defined(__x86_64__) -#define YUVTORGB_SETUP(yuvconstants) \ - "movdqa " MEMACCESS([yuvconstants]) ",%%xmm8 \n" \ - "movdqa " MEMACCESS2(32, [yuvconstants]) ",%%xmm9 \n" \ - "movdqa " MEMACCESS2(64, [yuvconstants]) ",%%xmm10 \n" \ - "movdqa " MEMACCESS2(96, [yuvconstants]) ",%%xmm11 \n" \ - "movdqa " MEMACCESS2(128, [yuvconstants]) ",%%xmm12 \n" \ - "movdqa " MEMACCESS2(160, [yuvconstants]) ",%%xmm13 \n" \ - "movdqa " MEMACCESS2(192, [yuvconstants]) ",%%xmm14 \n" +#define YUVTORGB_SETUP(yuvconstants) \ + "movdqa (%[yuvconstants]),%%xmm8 \n" \ + "movdqa 32(%[yuvconstants]),%%xmm9 \n" \ + "movdqa 64(%[yuvconstants]),%%xmm10 \n" \ + "movdqa 96(%[yuvconstants]),%%xmm11 \n" \ + "movdqa 128(%[yuvconstants]),%%xmm12 \n" \ + "movdqa 160(%[yuvconstants]),%%xmm13 \n" \ + "movdqa 192(%[yuvconstants]),%%xmm14 \n" // Convert 8 pixels: 8 UV and 8 Y -#define YUVTORGB(yuvconstants) \ +#define YUVTORGB16(yuvconstants) \ "movdqa %%xmm0,%%xmm1 \n" \ "movdqa %%xmm0,%%xmm2 \n" \ "movdqa %%xmm0,%%xmm3 \n" \ @@ -1712,72 +1776,95 @@ void RGBAToUVRow_SSSE3(const uint8* src_rgba0, "pmulhuw %%xmm14,%%xmm4 \n" \ "paddsw %%xmm4,%%xmm0 \n" \ "paddsw %%xmm4,%%xmm1 \n" \ - "paddsw %%xmm4,%%xmm2 \n" \ - "psraw $0x6,%%xmm0 \n" \ - "psraw $0x6,%%xmm1 \n" \ - "psraw $0x6,%%xmm2 \n" \ - "packuswb %%xmm0,%%xmm0 \n" \ - "packuswb %%xmm1,%%xmm1 \n" \ - "packuswb %%xmm2,%%xmm2 \n" + "paddsw %%xmm4,%%xmm2 \n" #define YUVTORGB_REGS \ "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", #else #define YUVTORGB_SETUP(yuvconstants) // Convert 8 pixels: 8 UV and 8 Y -#define YUVTORGB(yuvconstants) \ - "movdqa %%xmm0,%%xmm1 \n" \ - "movdqa %%xmm0,%%xmm2 \n" \ - "movdqa %%xmm0,%%xmm3 \n" \ - "movdqa " MEMACCESS2(96, [yuvconstants]) ",%%xmm0 \n" \ - "pmaddubsw " MEMACCESS([yuvconstants]) ",%%xmm1 \n" \ - "psubw %%xmm1,%%xmm0 \n" \ - "movdqa " MEMACCESS2(128, [yuvconstants]) ",%%xmm1 \n" \ - "pmaddubsw " MEMACCESS2(32, [yuvconstants]) ",%%xmm2 \n" \ - "psubw %%xmm2,%%xmm1 \n" \ - "movdqa " MEMACCESS2(160, [yuvconstants]) ",%%xmm2 \n" \ - "pmaddubsw " MEMACCESS2(64, [yuvconstants]) ",%%xmm3 \n" \ - "psubw %%xmm3,%%xmm2 \n" \ - "pmulhuw " MEMACCESS2(192, [yuvconstants]) ",%%xmm4 \n" \ - "paddsw %%xmm4,%%xmm0 \n" \ - "paddsw %%xmm4,%%xmm1 \n" \ - "paddsw %%xmm4,%%xmm2 \n" \ - "psraw $0x6,%%xmm0 \n" \ - "psraw $0x6,%%xmm1 \n" \ - "psraw $0x6,%%xmm2 \n" \ - "packuswb %%xmm0,%%xmm0 \n" \ - "packuswb %%xmm1,%%xmm1 \n" \ - "packuswb %%xmm2,%%xmm2 \n" +#define YUVTORGB16(yuvconstants) \ + "movdqa %%xmm0,%%xmm1 \n" \ + "movdqa %%xmm0,%%xmm2 \n" \ + "movdqa %%xmm0,%%xmm3 \n" \ + "movdqa 96(%[yuvconstants]),%%xmm0 \n" \ + "pmaddubsw (%[yuvconstants]),%%xmm1 \n" \ + "psubw %%xmm1,%%xmm0 \n" \ + "movdqa 128(%[yuvconstants]),%%xmm1 \n" \ + "pmaddubsw 32(%[yuvconstants]),%%xmm2 \n" \ + "psubw %%xmm2,%%xmm1 \n" \ + "movdqa 160(%[yuvconstants]),%%xmm2 \n" \ + "pmaddubsw 64(%[yuvconstants]),%%xmm3 \n" \ + "psubw %%xmm3,%%xmm2 \n" \ + "pmulhuw 192(%[yuvconstants]),%%xmm4 \n" \ + "paddsw %%xmm4,%%xmm0 \n" \ + "paddsw %%xmm4,%%xmm1 \n" \ + "paddsw %%xmm4,%%xmm2 \n" #define YUVTORGB_REGS #endif +#define YUVTORGB(yuvconstants) \ + YUVTORGB16(yuvconstants) \ + "psraw $0x6,%%xmm0 \n" \ + "psraw $0x6,%%xmm1 \n" \ + "psraw $0x6,%%xmm2 \n" \ + "packuswb %%xmm0,%%xmm0 \n" \ + "packuswb %%xmm1,%%xmm1 \n" \ + "packuswb %%xmm2,%%xmm2 \n" + // Store 8 ARGB values. -#define STOREARGB \ - "punpcklbw %%xmm1,%%xmm0 \n" \ - "punpcklbw %%xmm5,%%xmm2 \n" \ - "movdqa %%xmm0,%%xmm1 \n" \ - "punpcklwd %%xmm2,%%xmm0 \n" \ - "punpckhwd %%xmm2,%%xmm1 \n" \ - "movdqu %%xmm0," MEMACCESS([dst_argb]) " \n" \ - "movdqu %%xmm1," MEMACCESS2(0x10, [dst_argb]) " \n" \ - "lea " MEMLEA(0x20, [dst_argb]) ", %[dst_argb] \n" +#define STOREARGB \ + "punpcklbw %%xmm1,%%xmm0 \n" \ + "punpcklbw %%xmm5,%%xmm2 \n" \ + "movdqa %%xmm0,%%xmm1 \n" \ + "punpcklwd %%xmm2,%%xmm0 \n" \ + "punpckhwd %%xmm2,%%xmm1 \n" \ + "movdqu %%xmm0,(%[dst_argb]) \n" \ + "movdqu %%xmm1,0x10(%[dst_argb]) \n" \ + "lea 0x20(%[dst_argb]), %[dst_argb] \n" // Store 8 RGBA values. -#define STORERGBA \ - "pcmpeqb %%xmm5,%%xmm5 \n" \ - "punpcklbw %%xmm2,%%xmm1 \n" \ - "punpcklbw %%xmm0,%%xmm5 \n" \ - "movdqa %%xmm5,%%xmm0 \n" \ - "punpcklwd %%xmm1,%%xmm5 \n" \ - "punpckhwd %%xmm1,%%xmm0 \n" \ - "movdqu %%xmm5," MEMACCESS([dst_rgba]) " \n" \ - "movdqu %%xmm0," MEMACCESS2(0x10, [dst_rgba]) " \n" \ - "lea " MEMLEA(0x20, [dst_rgba]) ",%[dst_rgba] \n" - -void OMITFP I444ToARGBRow_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_argb, +#define STORERGBA \ + "pcmpeqb %%xmm5,%%xmm5 \n" \ + "punpcklbw %%xmm2,%%xmm1 \n" \ + "punpcklbw %%xmm0,%%xmm5 \n" \ + "movdqa %%xmm5,%%xmm0 \n" \ + "punpcklwd %%xmm1,%%xmm5 \n" \ + "punpckhwd %%xmm1,%%xmm0 \n" \ + "movdqu %%xmm5,(%[dst_rgba]) \n" \ + "movdqu %%xmm0,0x10(%[dst_rgba]) \n" \ + "lea 0x20(%[dst_rgba]),%[dst_rgba] \n" + +// Store 8 AR30 values. +#define STOREAR30 \ + "psraw $0x4,%%xmm0 \n" \ + "psraw $0x4,%%xmm1 \n" \ + "psraw $0x4,%%xmm2 \n" \ + "pminsw %%xmm7,%%xmm0 \n" \ + "pminsw %%xmm7,%%xmm1 \n" \ + "pminsw %%xmm7,%%xmm2 \n" \ + "pmaxsw %%xmm6,%%xmm0 \n" \ + "pmaxsw %%xmm6,%%xmm1 \n" \ + "pmaxsw %%xmm6,%%xmm2 \n" \ + "psllw $0x4,%%xmm2 \n" \ + "movdqa %%xmm0,%%xmm3 \n" \ + "punpcklwd %%xmm2,%%xmm0 \n" \ + "punpckhwd %%xmm2,%%xmm3 \n" \ + "movdqa %%xmm1,%%xmm2 \n" \ + "punpcklwd %%xmm5,%%xmm1 \n" \ + "punpckhwd %%xmm5,%%xmm2 \n" \ + "pslld $0xa,%%xmm1 \n" \ + "pslld $0xa,%%xmm2 \n" \ + "por %%xmm1,%%xmm0 \n" \ + "por %%xmm2,%%xmm3 \n" \ + "movdqu %%xmm0,(%[dst_ar30]) \n" \ + "movdqu %%xmm3,0x10(%[dst_ar30]) \n" \ + "lea 0x20(%[dst_ar30]), %[dst_ar30] \n" + +void OMITFP I444ToARGBRow_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { asm volatile ( @@ -1798,15 +1885,15 @@ void OMITFP I444ToARGBRow_SSSE3(const uint8* y_buf, [dst_argb]"+r"(dst_argb), // %[dst_argb] [width]"+rm"(width) // %[width] : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] - : "memory", "cc", NACL_R14 YUVTORGB_REGS + : "memory", "cc", YUVTORGB_REGS "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" ); } -void OMITFP I422ToRGB24Row_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_rgb24, +void OMITFP I422ToRGB24Row_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_rgb24, const struct YuvConstants* yuvconstants, int width) { asm volatile ( @@ -1827,9 +1914,9 @@ void OMITFP I422ToRGB24Row_SSSE3(const uint8* y_buf, "pshufb %%xmm5,%%xmm0 \n" "pshufb %%xmm6,%%xmm1 \n" "palignr $0xc,%%xmm0,%%xmm1 \n" - "movq %%xmm0," MEMACCESS([dst_rgb24]) "\n" - "movdqu %%xmm1," MEMACCESS2(0x8,[dst_rgb24]) "\n" - "lea " MEMLEA(0x18,[dst_rgb24]) ",%[dst_rgb24] \n" + "movq %%xmm0,(%[dst_rgb24]) \n" + "movdqu %%xmm1,0x8(%[dst_rgb24]) \n" + "lea 0x18(%[dst_rgb24]),%[dst_rgb24] \n" "subl $0x8,%[width] \n" "jg 1b \n" : [y_buf]"+r"(y_buf), // %[y_buf] @@ -1844,15 +1931,15 @@ void OMITFP I422ToRGB24Row_SSSE3(const uint8* y_buf, : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] [kShuffleMaskARGBToRGB24_0]"m"(kShuffleMaskARGBToRGB24_0), [kShuffleMaskARGBToRGB24]"m"(kShuffleMaskARGBToRGB24) - : "memory", "cc", NACL_R14 YUVTORGB_REGS + : "memory", "cc", YUVTORGB_REGS "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6" ); } -void OMITFP I422ToARGBRow_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_argb, +void OMITFP I422ToARGBRow_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { asm volatile ( @@ -1873,16 +1960,50 @@ void OMITFP I422ToARGBRow_SSSE3(const uint8* y_buf, [dst_argb]"+r"(dst_argb), // %[dst_argb] [width]"+rm"(width) // %[width] : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] - : "memory", "cc", NACL_R14 YUVTORGB_REGS + : "memory", "cc", YUVTORGB_REGS "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" ); } +void OMITFP I422ToAR30Row_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ar30, + const struct YuvConstants* yuvconstants, + int width) { + asm volatile ( + YUVTORGB_SETUP(yuvconstants) + "sub %[u_buf],%[v_buf] \n" + "pcmpeqb %%xmm5,%%xmm5 \n" // AR30 constants + "psrlw $14,%%xmm5 \n" + "psllw $4,%%xmm5 \n" // 2 alpha bits + "pxor %%xmm6,%%xmm6 \n" + "pcmpeqb %%xmm7,%%xmm7 \n" // 0 for min + "psrlw $6,%%xmm7 \n" // 1023 for max + + LABELALIGN + "1: \n" + READYUV422 + YUVTORGB16(yuvconstants) + STOREAR30 + "sub $0x8,%[width] \n" + "jg 1b \n" + : [y_buf]"+r"(y_buf), // %[y_buf] + [u_buf]"+r"(u_buf), // %[u_buf] + [v_buf]"+r"(v_buf), // %[v_buf] + [dst_ar30]"+r"(dst_ar30), // %[dst_ar30] + [width]"+rm"(width) // %[width] + : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] + : "memory", "cc", YUVTORGB_REGS + "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" + ); +} + // 10 bit YUV to ARGB -void OMITFP I210ToARGBRow_SSSE3(const uint16* y_buf, - const uint16* u_buf, - const uint16* v_buf, - uint8* dst_argb, +void OMITFP I210ToARGBRow_SSSE3(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { asm volatile ( @@ -1892,7 +2013,7 @@ void OMITFP I210ToARGBRow_SSSE3(const uint16* y_buf, LABELALIGN "1: \n" - READYUV422_10 + READYUV210 YUVTORGB(yuvconstants) STOREARGB "sub $0x8,%[width] \n" @@ -1903,17 +2024,52 @@ void OMITFP I210ToARGBRow_SSSE3(const uint16* y_buf, [dst_argb]"+r"(dst_argb), // %[dst_argb] [width]"+rm"(width) // %[width] : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] - : "memory", "cc", NACL_R14 YUVTORGB_REGS + : "memory", "cc", YUVTORGB_REGS "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" ); } +// 10 bit YUV to AR30 +void OMITFP I210ToAR30Row_SSSE3(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + uint8_t* dst_ar30, + const struct YuvConstants* yuvconstants, + int width) { + asm volatile ( + YUVTORGB_SETUP(yuvconstants) + "sub %[u_buf],%[v_buf] \n" + "pcmpeqb %%xmm5,%%xmm5 \n" + "psrlw $14,%%xmm5 \n" + "psllw $4,%%xmm5 \n" // 2 alpha bits + "pxor %%xmm6,%%xmm6 \n" + "pcmpeqb %%xmm7,%%xmm7 \n" // 0 for min + "psrlw $6,%%xmm7 \n" // 1023 for max + + LABELALIGN + "1: \n" + READYUV210 + YUVTORGB16(yuvconstants) + STOREAR30 + "sub $0x8,%[width] \n" + "jg 1b \n" + : [y_buf]"+r"(y_buf), // %[y_buf] + [u_buf]"+r"(u_buf), // %[u_buf] + [v_buf]"+r"(v_buf), // %[v_buf] + [dst_ar30]"+r"(dst_ar30), // %[dst_ar30] + [width]"+rm"(width) // %[width] + : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] + : "memory", "cc", YUVTORGB_REGS + "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" + ); +} + #ifdef HAS_I422ALPHATOARGBROW_SSSE3 -void OMITFP I422AlphaToARGBRow_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - const uint8* a_buf, - uint8* dst_argb, +void OMITFP I422AlphaToARGBRow_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + const uint8_t* a_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { // clang-format off @@ -1939,16 +2095,16 @@ void OMITFP I422AlphaToARGBRow_SSSE3(const uint8* y_buf, [width]"+rm"(width) // %[width] #endif : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] - : "memory", "cc", NACL_R14 YUVTORGB_REGS + : "memory", "cc", YUVTORGB_REGS "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" ); // clang-format on } #endif // HAS_I422ALPHATOARGBROW_SSSE3 -void OMITFP NV12ToARGBRow_SSSE3(const uint8* y_buf, - const uint8* uv_buf, - uint8* dst_argb, +void OMITFP NV12ToARGBRow_SSSE3(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { // clang-format off @@ -1968,15 +2124,15 @@ void OMITFP NV12ToARGBRow_SSSE3(const uint8* y_buf, [dst_argb]"+r"(dst_argb), // %[dst_argb] [width]"+rm"(width) // %[width] : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] - : "memory", "cc", YUVTORGB_REGS // Does not use r14. + : "memory", "cc", YUVTORGB_REGS "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" ); // clang-format on } -void OMITFP NV21ToARGBRow_SSSE3(const uint8* y_buf, - const uint8* vu_buf, - uint8* dst_argb, +void OMITFP NV21ToARGBRow_SSSE3(const uint8_t* y_buf, + const uint8_t* vu_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { // clang-format off @@ -1997,14 +2153,14 @@ void OMITFP NV21ToARGBRow_SSSE3(const uint8* y_buf, [width]"+rm"(width) // %[width] : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] [kShuffleNV21]"m"(kShuffleNV21) - : "memory", "cc", YUVTORGB_REGS // Does not use r14. + : "memory", "cc", YUVTORGB_REGS "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" ); // clang-format on } -void OMITFP YUY2ToARGBRow_SSSE3(const uint8* yuy2_buf, - uint8* dst_argb, +void OMITFP YUY2ToARGBRow_SSSE3(const uint8_t* yuy2_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { // clang-format off @@ -2025,14 +2181,14 @@ void OMITFP YUY2ToARGBRow_SSSE3(const uint8* yuy2_buf, : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] [kShuffleYUY2Y]"m"(kShuffleYUY2Y), [kShuffleYUY2UV]"m"(kShuffleYUY2UV) - : "memory", "cc", YUVTORGB_REGS // Does not use r14. + : "memory", "cc", YUVTORGB_REGS "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" ); // clang-format on } -void OMITFP UYVYToARGBRow_SSSE3(const uint8* uyvy_buf, - uint8* dst_argb, +void OMITFP UYVYToARGBRow_SSSE3(const uint8_t* uyvy_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { // clang-format off @@ -2053,16 +2209,16 @@ void OMITFP UYVYToARGBRow_SSSE3(const uint8* uyvy_buf, : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] [kShuffleUYVYY]"m"(kShuffleUYVYY), [kShuffleUYVYUV]"m"(kShuffleUYVYUV) - : "memory", "cc", YUVTORGB_REGS // Does not use r14. + : "memory", "cc", YUVTORGB_REGS "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" ); // clang-format on } -void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_rgba, +void OMITFP I422ToRGBARow_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_rgba, const struct YuvConstants* yuvconstants, int width) { asm volatile ( @@ -2083,7 +2239,7 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf, [dst_rgba]"+r"(dst_rgba), // %[dst_rgba] [width]"+rm"(width) // %[width] : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] - : "memory", "cc", NACL_R14 YUVTORGB_REGS + : "memory", "cc", YUVTORGB_REGS "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" ); } @@ -2091,96 +2247,113 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf, #endif // HAS_I422TOARGBROW_SSSE3 // Read 16 UV from 444 -#define READYUV444_AVX2 \ - "vmovdqu " MEMACCESS([u_buf]) ",%%xmm0 \n" \ - MEMOPREG(vmovdqu, 0x00, [u_buf], [v_buf], 1, xmm1) \ - "lea " MEMLEA(0x10, [u_buf]) ",%[u_buf] \n" \ - "vpermq $0xd8,%%ymm0,%%ymm0 \n" \ - "vpermq $0xd8,%%ymm1,%%ymm1 \n" \ - "vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" \ - "vmovdqu " MEMACCESS([y_buf]) ",%%xmm4 \n" \ - "vpermq $0xd8,%%ymm4,%%ymm4 \n" \ - "vpunpcklbw %%ymm4,%%ymm4,%%ymm4 \n" \ - "lea " MEMLEA(0x10, [y_buf]) ",%[y_buf] \n" +#define READYUV444_AVX2 \ + "vmovdqu (%[u_buf]),%%xmm0 \n" \ + "vmovdqu 0x00(%[u_buf],%[v_buf],1),%%xmm1 \n" \ + "lea 0x10(%[u_buf]),%[u_buf] \n" \ + "vpermq $0xd8,%%ymm0,%%ymm0 \n" \ + "vpermq $0xd8,%%ymm1,%%ymm1 \n" \ + "vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" \ + "vmovdqu (%[y_buf]),%%xmm4 \n" \ + "vpermq $0xd8,%%ymm4,%%ymm4 \n" \ + "vpunpcklbw %%ymm4,%%ymm4,%%ymm4 \n" \ + "lea 0x10(%[y_buf]),%[y_buf] \n" // Read 8 UV from 422, upsample to 16 UV. -#define READYUV422_AVX2 \ - "vmovq " MEMACCESS([u_buf]) ",%%xmm0 \n" \ - MEMOPREG(vmovq, 0x00, [u_buf], [v_buf], 1, xmm1) \ - "lea " MEMLEA(0x8, [u_buf]) ",%[u_buf] \n" \ - "vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" \ - "vpermq $0xd8,%%ymm0,%%ymm0 \n" \ - "vpunpcklwd %%ymm0,%%ymm0,%%ymm0 \n" \ - "vmovdqu " MEMACCESS([y_buf]) ",%%xmm4 \n" \ - "vpermq $0xd8,%%ymm4,%%ymm4 \n" \ - "vpunpcklbw %%ymm4,%%ymm4,%%ymm4 \n" \ - "lea " MEMLEA(0x10, [y_buf]) ",%[y_buf] \n" +#define READYUV422_AVX2 \ + "vmovq (%[u_buf]),%%xmm0 \n" \ + "vmovq 0x00(%[u_buf],%[v_buf],1),%%xmm1 \n" \ + "lea 0x8(%[u_buf]),%[u_buf] \n" \ + "vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" \ + "vpermq $0xd8,%%ymm0,%%ymm0 \n" \ + "vpunpcklwd %%ymm0,%%ymm0,%%ymm0 \n" \ + "vmovdqu (%[y_buf]),%%xmm4 \n" \ + "vpermq $0xd8,%%ymm4,%%ymm4 \n" \ + "vpunpcklbw %%ymm4,%%ymm4,%%ymm4 \n" \ + "lea 0x10(%[y_buf]),%[y_buf] \n" + +// Read 8 UV from 210 10 bit, upsample to 16 UV +// TODO(fbarchard): Consider vshufb to replace pack/unpack +// TODO(fbarchard): Consider vunpcklpd to combine the 2 registers into 1. +#define READYUV210_AVX2 \ + "vmovdqu (%[u_buf]),%%xmm0 \n" \ + "vmovdqu 0x00(%[u_buf],%[v_buf],1),%%xmm1 \n" \ + "lea 0x10(%[u_buf]),%[u_buf] \n" \ + "vpermq $0xd8,%%ymm0,%%ymm0 \n" \ + "vpermq $0xd8,%%ymm1,%%ymm1 \n" \ + "vpunpcklwd %%ymm1,%%ymm0,%%ymm0 \n" \ + "vpsraw $0x2,%%ymm0,%%ymm0 \n" \ + "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n" \ + "vpunpcklwd %%ymm0,%%ymm0,%%ymm0 \n" \ + "vmovdqu (%[y_buf]),%%ymm4 \n" \ + "vpsllw $0x6,%%ymm4,%%ymm4 \n" \ + "lea 0x20(%[y_buf]),%[y_buf] \n" // Read 8 UV from 422, upsample to 16 UV. With 16 Alpha. -#define READYUVA422_AVX2 \ - "vmovq " MEMACCESS([u_buf]) ",%%xmm0 \n" \ - MEMOPREG(vmovq, 0x00, [u_buf], [v_buf], 1, xmm1) \ - "lea " MEMLEA(0x8, [u_buf]) ",%[u_buf] \n" \ - "vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" \ - "vpermq $0xd8,%%ymm0,%%ymm0 \n" \ - "vpunpcklwd %%ymm0,%%ymm0,%%ymm0 \n" \ - "vmovdqu " MEMACCESS([y_buf]) ",%%xmm4 \n" \ - "vpermq $0xd8,%%ymm4,%%ymm4 \n" \ - "vpunpcklbw %%ymm4,%%ymm4,%%ymm4 \n" \ - "lea " MEMLEA(0x10, [y_buf]) ",%[y_buf] \n" \ - "vmovdqu " MEMACCESS([a_buf]) ",%%xmm5 \n" \ - "vpermq $0xd8,%%ymm5,%%ymm5 \n" \ - "lea " MEMLEA(0x10, [a_buf]) ",%[a_buf] \n" +#define READYUVA422_AVX2 \ + "vmovq (%[u_buf]),%%xmm0 \n" \ + "vmovq 0x00(%[u_buf],%[v_buf],1),%%xmm1 \n" \ + "lea 0x8(%[u_buf]),%[u_buf] \n" \ + "vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" \ + "vpermq $0xd8,%%ymm0,%%ymm0 \n" \ + "vpunpcklwd %%ymm0,%%ymm0,%%ymm0 \n" \ + "vmovdqu (%[y_buf]),%%xmm4 \n" \ + "vpermq $0xd8,%%ymm4,%%ymm4 \n" \ + "vpunpcklbw %%ymm4,%%ymm4,%%ymm4 \n" \ + "lea 0x10(%[y_buf]),%[y_buf] \n" \ + "vmovdqu (%[a_buf]),%%xmm5 \n" \ + "vpermq $0xd8,%%ymm5,%%ymm5 \n" \ + "lea 0x10(%[a_buf]),%[a_buf] \n" // Read 8 UV from NV12, upsample to 16 UV. -#define READNV12_AVX2 \ - "vmovdqu " MEMACCESS([uv_buf]) ",%%xmm0 \n" \ - "lea " MEMLEA(0x10, [uv_buf]) ",%[uv_buf] \n" \ - "vpermq $0xd8,%%ymm0,%%ymm0 \n" \ - "vpunpcklwd %%ymm0,%%ymm0,%%ymm0 \n" \ - "vmovdqu " MEMACCESS([y_buf]) ",%%xmm4 \n" \ - "vpermq $0xd8,%%ymm4,%%ymm4 \n" \ - "vpunpcklbw %%ymm4,%%ymm4,%%ymm4 \n" \ - "lea " MEMLEA(0x10, [y_buf]) ",%[y_buf] \n" +#define READNV12_AVX2 \ + "vmovdqu (%[uv_buf]),%%xmm0 \n" \ + "lea 0x10(%[uv_buf]),%[uv_buf] \n" \ + "vpermq $0xd8,%%ymm0,%%ymm0 \n" \ + "vpunpcklwd %%ymm0,%%ymm0,%%ymm0 \n" \ + "vmovdqu (%[y_buf]),%%xmm4 \n" \ + "vpermq $0xd8,%%ymm4,%%ymm4 \n" \ + "vpunpcklbw %%ymm4,%%ymm4,%%ymm4 \n" \ + "lea 0x10(%[y_buf]),%[y_buf] \n" // Read 8 VU from NV21, upsample to 16 UV. -#define READNV21_AVX2 \ - "vmovdqu " MEMACCESS([vu_buf]) ",%%xmm0 \n" \ - "lea " MEMLEA(0x10, [vu_buf]) ",%[vu_buf] \n" \ - "vpermq $0xd8,%%ymm0,%%ymm0 \n" \ - "vpshufb %[kShuffleNV21], %%ymm0, %%ymm0 \n" \ - "vmovdqu " MEMACCESS([y_buf]) ",%%xmm4 \n" \ - "vpermq $0xd8,%%ymm4,%%ymm4 \n" \ - "vpunpcklbw %%ymm4,%%ymm4,%%ymm4 \n" \ - "lea " MEMLEA(0x10, [y_buf]) ",%[y_buf] \n" +#define READNV21_AVX2 \ + "vmovdqu (%[vu_buf]),%%xmm0 \n" \ + "lea 0x10(%[vu_buf]),%[vu_buf] \n" \ + "vpermq $0xd8,%%ymm0,%%ymm0 \n" \ + "vpshufb %[kShuffleNV21], %%ymm0, %%ymm0 \n" \ + "vmovdqu (%[y_buf]),%%xmm4 \n" \ + "vpermq $0xd8,%%ymm4,%%ymm4 \n" \ + "vpunpcklbw %%ymm4,%%ymm4,%%ymm4 \n" \ + "lea 0x10(%[y_buf]),%[y_buf] \n" // Read 8 YUY2 with 16 Y and upsample 8 UV to 16 UV. -#define READYUY2_AVX2 \ - "vmovdqu " MEMACCESS([yuy2_buf]) ",%%ymm4 \n" \ - "vpshufb %[kShuffleYUY2Y], %%ymm4, %%ymm4 \n" \ - "vmovdqu " MEMACCESS([yuy2_buf]) ",%%ymm0 \n" \ - "vpshufb %[kShuffleYUY2UV], %%ymm0, %%ymm0 \n" \ - "lea " MEMLEA(0x20, [yuy2_buf]) ",%[yuy2_buf] \n" +#define READYUY2_AVX2 \ + "vmovdqu (%[yuy2_buf]),%%ymm4 \n" \ + "vpshufb %[kShuffleYUY2Y], %%ymm4, %%ymm4 \n" \ + "vmovdqu (%[yuy2_buf]),%%ymm0 \n" \ + "vpshufb %[kShuffleYUY2UV], %%ymm0, %%ymm0 \n" \ + "lea 0x20(%[yuy2_buf]),%[yuy2_buf] \n" // Read 8 UYVY with 16 Y and upsample 8 UV to 16 UV. -#define READUYVY_AVX2 \ - "vmovdqu " MEMACCESS([uyvy_buf]) ",%%ymm4 \n" \ - "vpshufb %[kShuffleUYVYY], %%ymm4, %%ymm4 \n" \ - "vmovdqu " MEMACCESS([uyvy_buf]) ",%%ymm0 \n" \ - "vpshufb %[kShuffleUYVYUV], %%ymm0, %%ymm0 \n" \ - "lea " MEMLEA(0x20, [uyvy_buf]) ",%[uyvy_buf] \n" +#define READUYVY_AVX2 \ + "vmovdqu (%[uyvy_buf]),%%ymm4 \n" \ + "vpshufb %[kShuffleUYVYY], %%ymm4, %%ymm4 \n" \ + "vmovdqu (%[uyvy_buf]),%%ymm0 \n" \ + "vpshufb %[kShuffleUYVYUV], %%ymm0, %%ymm0 \n" \ + "lea 0x20(%[uyvy_buf]),%[uyvy_buf] \n" #if defined(__x86_64__) -#define YUVTORGB_SETUP_AVX2(yuvconstants) \ - "vmovdqa " MEMACCESS([yuvconstants]) ",%%ymm8 \n" \ - "vmovdqa " MEMACCESS2(32, [yuvconstants]) ",%%ymm9 \n" \ - "vmovdqa " MEMACCESS2(64, [yuvconstants]) ",%%ymm10 \n" \ - "vmovdqa " MEMACCESS2(96, [yuvconstants]) ",%%ymm11 \n" \ - "vmovdqa " MEMACCESS2(128, [yuvconstants]) ",%%ymm12 \n" \ - "vmovdqa " MEMACCESS2(160, [yuvconstants]) ",%%ymm13 \n" \ - "vmovdqa " MEMACCESS2(192, [yuvconstants]) ",%%ymm14 \n" - -#define YUVTORGB_AVX2(yuvconstants) \ +#define YUVTORGB_SETUP_AVX2(yuvconstants) \ + "vmovdqa (%[yuvconstants]),%%ymm8 \n" \ + "vmovdqa 32(%[yuvconstants]),%%ymm9 \n" \ + "vmovdqa 64(%[yuvconstants]),%%ymm10 \n" \ + "vmovdqa 96(%[yuvconstants]),%%ymm11 \n" \ + "vmovdqa 128(%[yuvconstants]),%%ymm12 \n" \ + "vmovdqa 160(%[yuvconstants]),%%ymm13 \n" \ + "vmovdqa 192(%[yuvconstants]),%%ymm14 \n" + +#define YUVTORGB16_AVX2(yuvconstants) \ "vpmaddubsw %%ymm10,%%ymm0,%%ymm2 \n" \ "vpmaddubsw %%ymm9,%%ymm0,%%ymm1 \n" \ "vpmaddubsw %%ymm8,%%ymm0,%%ymm0 \n" \ @@ -2190,13 +2363,7 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf, "vpmulhuw %%ymm14,%%ymm4,%%ymm4 \n" \ "vpaddsw %%ymm4,%%ymm0,%%ymm0 \n" \ "vpaddsw %%ymm4,%%ymm1,%%ymm1 \n" \ - "vpaddsw %%ymm4,%%ymm2,%%ymm2 \n" \ - "vpsraw $0x6,%%ymm0,%%ymm0 \n" \ - "vpsraw $0x6,%%ymm1,%%ymm1 \n" \ - "vpsraw $0x6,%%ymm2,%%ymm2 \n" \ - "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n" \ - "vpackuswb %%ymm1,%%ymm1,%%ymm1 \n" \ - "vpackuswb %%ymm2,%%ymm2,%%ymm2 \n" + "vpaddsw %%ymm4,%%ymm2,%%ymm2 \n" #define YUVTORGB_REGS_AVX2 \ "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", @@ -2204,48 +2371,78 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf, #else // Convert 16 pixels: 16 UV and 16 Y. #define YUVTORGB_SETUP_AVX2(yuvconstants) -#define YUVTORGB_AVX2(yuvconstants) \ - "vpmaddubsw " MEMACCESS2(64, [yuvconstants]) ",%%ymm0,%%ymm2 \n" \ - "vpmaddubsw " MEMACCESS2(32, [yuvconstants]) ",%%ymm0,%%ymm1 \n" \ - "vpmaddubsw " MEMACCESS([yuvconstants]) ",%%ymm0,%%ymm0 \n" \ - "vmovdqu " MEMACCESS2(160, [yuvconstants]) ",%%ymm3 \n" \ - "vpsubw %%ymm2,%%ymm3,%%ymm2 \n" \ - "vmovdqu " MEMACCESS2(128, [yuvconstants]) ",%%ymm3 \n" \ - "vpsubw %%ymm1,%%ymm3,%%ymm1 \n" \ - "vmovdqu " MEMACCESS2(96, [yuvconstants]) ",%%ymm3 \n" \ - "vpsubw %%ymm0,%%ymm3,%%ymm0 \n" \ - "vpmulhuw " MEMACCESS2(192, [yuvconstants]) ",%%ymm4,%%ymm4 \n" \ - "vpaddsw %%ymm4,%%ymm0,%%ymm0 \n" \ - "vpaddsw %%ymm4,%%ymm1,%%ymm1 \n" \ - "vpaddsw %%ymm4,%%ymm2,%%ymm2 \n" \ - "vpsraw $0x6,%%ymm0,%%ymm0 \n" \ - "vpsraw $0x6,%%ymm1,%%ymm1 \n" \ - "vpsraw $0x6,%%ymm2,%%ymm2 \n" \ - "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n" \ - "vpackuswb %%ymm1,%%ymm1,%%ymm1 \n" \ - "vpackuswb %%ymm2,%%ymm2,%%ymm2 \n" +#define YUVTORGB16_AVX2(yuvconstants) \ + "vpmaddubsw 64(%[yuvconstants]),%%ymm0,%%ymm2 \n" \ + "vpmaddubsw 32(%[yuvconstants]),%%ymm0,%%ymm1 \n" \ + "vpmaddubsw (%[yuvconstants]),%%ymm0,%%ymm0 \n" \ + "vmovdqu 160(%[yuvconstants]),%%ymm3 \n" \ + "vpsubw %%ymm2,%%ymm3,%%ymm2 \n" \ + "vmovdqu 128(%[yuvconstants]),%%ymm3 \n" \ + "vpsubw %%ymm1,%%ymm3,%%ymm1 \n" \ + "vmovdqu 96(%[yuvconstants]),%%ymm3 \n" \ + "vpsubw %%ymm0,%%ymm3,%%ymm0 \n" \ + "vpmulhuw 192(%[yuvconstants]),%%ymm4,%%ymm4 \n" \ + "vpaddsw %%ymm4,%%ymm0,%%ymm0 \n" \ + "vpaddsw %%ymm4,%%ymm1,%%ymm1 \n" \ + "vpaddsw %%ymm4,%%ymm2,%%ymm2 \n" #define YUVTORGB_REGS_AVX2 #endif +#define YUVTORGB_AVX2(yuvconstants) \ + YUVTORGB16_AVX2(yuvconstants) \ + "vpsraw $0x6,%%ymm0,%%ymm0 \n" \ + "vpsraw $0x6,%%ymm1,%%ymm1 \n" \ + "vpsraw $0x6,%%ymm2,%%ymm2 \n" \ + "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n" \ + "vpackuswb %%ymm1,%%ymm1,%%ymm1 \n" \ + "vpackuswb %%ymm2,%%ymm2,%%ymm2 \n" + // Store 16 ARGB values. -#define STOREARGB_AVX2 \ - "vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" \ - "vpermq $0xd8,%%ymm0,%%ymm0 \n" \ - "vpunpcklbw %%ymm5,%%ymm2,%%ymm2 \n" \ - "vpermq $0xd8,%%ymm2,%%ymm2 \n" \ - "vpunpcklwd %%ymm2,%%ymm0,%%ymm1 \n" \ - "vpunpckhwd %%ymm2,%%ymm0,%%ymm0 \n" \ - "vmovdqu %%ymm1," MEMACCESS([dst_argb]) " \n" \ - "vmovdqu %%ymm0," MEMACCESS2(0x20, [dst_argb]) " \n" \ - "lea " MEMLEA(0x40, [dst_argb]) ", %[dst_argb] \n" +#define STOREARGB_AVX2 \ + "vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" \ + "vpermq $0xd8,%%ymm0,%%ymm0 \n" \ + "vpunpcklbw %%ymm5,%%ymm2,%%ymm2 \n" \ + "vpermq $0xd8,%%ymm2,%%ymm2 \n" \ + "vpunpcklwd %%ymm2,%%ymm0,%%ymm1 \n" \ + "vpunpckhwd %%ymm2,%%ymm0,%%ymm0 \n" \ + "vmovdqu %%ymm1,(%[dst_argb]) \n" \ + "vmovdqu %%ymm0,0x20(%[dst_argb]) \n" \ + "lea 0x40(%[dst_argb]), %[dst_argb] \n" + +// Store 16 AR30 values. +#define STOREAR30_AVX2 \ + "vpsraw $0x4,%%ymm0,%%ymm0 \n" \ + "vpsraw $0x4,%%ymm1,%%ymm1 \n" \ + "vpsraw $0x4,%%ymm2,%%ymm2 \n" \ + "vpminsw %%ymm7,%%ymm0,%%ymm0 \n" \ + "vpminsw %%ymm7,%%ymm1,%%ymm1 \n" \ + "vpminsw %%ymm7,%%ymm2,%%ymm2 \n" \ + "vpmaxsw %%ymm6,%%ymm0,%%ymm0 \n" \ + "vpmaxsw %%ymm6,%%ymm1,%%ymm1 \n" \ + "vpmaxsw %%ymm6,%%ymm2,%%ymm2 \n" \ + "vpsllw $0x4,%%ymm2,%%ymm2 \n" \ + "vpermq $0xd8,%%ymm0,%%ymm0 \n" \ + "vpermq $0xd8,%%ymm1,%%ymm1 \n" \ + "vpermq $0xd8,%%ymm2,%%ymm2 \n" \ + "vpunpckhwd %%ymm2,%%ymm0,%%ymm3 \n" \ + "vpunpcklwd %%ymm2,%%ymm0,%%ymm0 \n" \ + "vpunpckhwd %%ymm5,%%ymm1,%%ymm2 \n" \ + "vpunpcklwd %%ymm5,%%ymm1,%%ymm1 \n" \ + "vpslld $0xa,%%ymm1,%%ymm1 \n" \ + "vpslld $0xa,%%ymm2,%%ymm2 \n" \ + "vpor %%ymm1,%%ymm0,%%ymm0 \n" \ + "vpor %%ymm2,%%ymm3,%%ymm3 \n" \ + "vmovdqu %%ymm0,(%[dst_ar30]) \n" \ + "vmovdqu %%ymm3,0x20(%[dst_ar30]) \n" \ + "lea 0x40(%[dst_ar30]), %[dst_ar30] \n" #ifdef HAS_I444TOARGBROW_AVX2 // 16 pixels // 16 UV values with 16 Y producing 16 ARGB (64 bytes). -void OMITFP I444ToARGBRow_AVX2(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_argb, +void OMITFP I444ToARGBRow_AVX2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { asm volatile ( @@ -2267,7 +2464,7 @@ void OMITFP I444ToARGBRow_AVX2(const uint8* y_buf, [dst_argb]"+r"(dst_argb), // %[dst_argb] [width]"+rm"(width) // %[width] : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] - : "memory", "cc", NACL_R14 YUVTORGB_REGS_AVX2 + : "memory", "cc", YUVTORGB_REGS_AVX2 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" ); } @@ -2276,10 +2473,10 @@ void OMITFP I444ToARGBRow_AVX2(const uint8* y_buf, #if defined(HAS_I422TOARGBROW_AVX2) // 16 pixels // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). -void OMITFP I422ToARGBRow_AVX2(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_argb, +void OMITFP I422ToARGBRow_AVX2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { asm volatile ( @@ -2302,20 +2499,135 @@ void OMITFP I422ToARGBRow_AVX2(const uint8* y_buf, [dst_argb]"+r"(dst_argb), // %[dst_argb] [width]"+rm"(width) // %[width] : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] - : "memory", "cc", NACL_R14 YUVTORGB_REGS_AVX2 + : "memory", "cc", YUVTORGB_REGS_AVX2 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" ); } #endif // HAS_I422TOARGBROW_AVX2 +#if defined(HAS_I422TOAR30ROW_AVX2) +// 16 pixels +// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 AR30 (64 bytes). +void OMITFP I422ToAR30Row_AVX2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ar30, + const struct YuvConstants* yuvconstants, + int width) { + asm volatile ( + YUVTORGB_SETUP_AVX2(yuvconstants) + "sub %[u_buf],%[v_buf] \n" + "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" // AR30 constants + "vpsrlw $14,%%ymm5,%%ymm5 \n" + "vpsllw $4,%%ymm5,%%ymm5 \n" // 2 alpha bits + "vpxor %%ymm6,%%ymm6,%%ymm6 \n" // 0 for min + "vpcmpeqb %%ymm7,%%ymm7,%%ymm7 \n" // 1023 for max + "vpsrlw $6,%%ymm7,%%ymm7 \n" + + LABELALIGN + "1: \n" + READYUV422_AVX2 + YUVTORGB16_AVX2(yuvconstants) + STOREAR30_AVX2 + "sub $0x10,%[width] \n" + "jg 1b \n" + + "vzeroupper \n" + : [y_buf]"+r"(y_buf), // %[y_buf] + [u_buf]"+r"(u_buf), // %[u_buf] + [v_buf]"+r"(v_buf), // %[v_buf] + [dst_ar30]"+r"(dst_ar30), // %[dst_ar30] + [width]"+rm"(width) // %[width] + : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] + : "memory", "cc", YUVTORGB_REGS_AVX2 + "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" + ); +} +#endif // HAS_I422TOAR30ROW_AVX2 + +#if defined(HAS_I210TOARGBROW_AVX2) +// 16 pixels +// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). +void OMITFP I210ToARGBRow_AVX2(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width) { + asm volatile ( + YUVTORGB_SETUP_AVX2(yuvconstants) + "sub %[u_buf],%[v_buf] \n" + "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" + + LABELALIGN + "1: \n" + READYUV210_AVX2 + YUVTORGB_AVX2(yuvconstants) + STOREARGB_AVX2 + "sub $0x10,%[width] \n" + "jg 1b \n" + + "vzeroupper \n" + : [y_buf]"+r"(y_buf), // %[y_buf] + [u_buf]"+r"(u_buf), // %[u_buf] + [v_buf]"+r"(v_buf), // %[v_buf] + [dst_argb]"+r"(dst_argb), // %[dst_argb] + [width]"+rm"(width) // %[width] + : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] + : "memory", "cc", YUVTORGB_REGS_AVX2 + "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" + ); +} +#endif // HAS_I210TOARGBROW_AVX2 + +#if defined(HAS_I210TOAR30ROW_AVX2) +// 16 pixels +// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 AR30 (64 bytes). +void OMITFP I210ToAR30Row_AVX2(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + uint8_t* dst_ar30, + const struct YuvConstants* yuvconstants, + int width) { + asm volatile ( + YUVTORGB_SETUP_AVX2(yuvconstants) + "sub %[u_buf],%[v_buf] \n" + "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" // AR30 constants + "vpsrlw $14,%%ymm5,%%ymm5 \n" + "vpsllw $4,%%ymm5,%%ymm5 \n" // 2 alpha bits + "vpxor %%ymm6,%%ymm6,%%ymm6 \n" // 0 for min + "vpcmpeqb %%ymm7,%%ymm7,%%ymm7 \n" // 1023 for max + "vpsrlw $6,%%ymm7,%%ymm7 \n" + + LABELALIGN + "1: \n" + READYUV210_AVX2 + YUVTORGB16_AVX2(yuvconstants) + STOREAR30_AVX2 + "sub $0x10,%[width] \n" + "jg 1b \n" + + "vzeroupper \n" + : [y_buf]"+r"(y_buf), // %[y_buf] + [u_buf]"+r"(u_buf), // %[u_buf] + [v_buf]"+r"(v_buf), // %[v_buf] + [dst_ar30]"+r"(dst_ar30), // %[dst_ar30] + [width]"+rm"(width) // %[width] + : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] + : "memory", "cc", YUVTORGB_REGS_AVX2 + "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" + ); +} +#endif // HAS_I210TOAR30ROW_AVX2 + #if defined(HAS_I422ALPHATOARGBROW_AVX2) // 16 pixels // 8 UV values upsampled to 16 UV, mixed with 16 Y and 16 A producing 16 ARGB. -void OMITFP I422AlphaToARGBRow_AVX2(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - const uint8* a_buf, - uint8* dst_argb, +void OMITFP I422AlphaToARGBRow_AVX2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + const uint8_t* a_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { // clang-format off @@ -2342,7 +2654,7 @@ void OMITFP I422AlphaToARGBRow_AVX2(const uint8* y_buf, [width]"+rm"(width) // %[width] #endif : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] - : "memory", "cc", NACL_R14 YUVTORGB_REGS_AVX2 + : "memory", "cc", YUVTORGB_REGS_AVX2 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" ); // clang-format on @@ -2352,10 +2664,10 @@ void OMITFP I422AlphaToARGBRow_AVX2(const uint8* y_buf, #if defined(HAS_I422TORGBAROW_AVX2) // 16 pixels // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 RGBA (64 bytes). -void OMITFP I422ToRGBARow_AVX2(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_argb, +void OMITFP I422ToRGBARow_AVX2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { asm volatile ( @@ -2375,11 +2687,11 @@ void OMITFP I422ToRGBARow_AVX2(const uint8* y_buf, "vpermq $0xd8,%%ymm2,%%ymm2 \n" "vpunpcklwd %%ymm1,%%ymm2,%%ymm0 \n" "vpunpckhwd %%ymm1,%%ymm2,%%ymm1 \n" - "vmovdqu %%ymm0," MEMACCESS([dst_argb]) "\n" - "vmovdqu %%ymm1," MEMACCESS2(0x20,[dst_argb]) "\n" - "lea " MEMLEA(0x40,[dst_argb]) ",%[dst_argb] \n" - "sub $0x10,%[width] \n" - "jg 1b \n" + "vmovdqu %%ymm0,(%[dst_argb]) \n" + "vmovdqu %%ymm1,0x20(%[dst_argb]) \n" + "lea 0x40(%[dst_argb]),%[dst_argb] \n" + "sub $0x10,%[width] \n" + "jg 1b \n" "vzeroupper \n" : [y_buf]"+r"(y_buf), // %[y_buf] [u_buf]"+r"(u_buf), // %[u_buf] @@ -2387,7 +2699,7 @@ void OMITFP I422ToRGBARow_AVX2(const uint8* y_buf, [dst_argb]"+r"(dst_argb), // %[dst_argb] [width]"+rm"(width) // %[width] : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] - : "memory", "cc", NACL_R14 YUVTORGB_REGS_AVX2 + : "memory", "cc", YUVTORGB_REGS_AVX2 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" ); } @@ -2396,9 +2708,9 @@ void OMITFP I422ToRGBARow_AVX2(const uint8* y_buf, #if defined(HAS_NV12TOARGBROW_AVX2) // 16 pixels. // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). -void OMITFP NV12ToARGBRow_AVX2(const uint8* y_buf, - const uint8* uv_buf, - uint8* dst_argb, +void OMITFP NV12ToARGBRow_AVX2(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { // clang-format off @@ -2419,7 +2731,7 @@ void OMITFP NV12ToARGBRow_AVX2(const uint8* y_buf, [dst_argb]"+r"(dst_argb), // %[dst_argb] [width]"+rm"(width) // %[width] : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] - : "memory", "cc", YUVTORGB_REGS_AVX2 // Does not use r14. + : "memory", "cc", YUVTORGB_REGS_AVX2 "xmm0", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" ); // clang-format on @@ -2429,9 +2741,9 @@ void OMITFP NV12ToARGBRow_AVX2(const uint8* y_buf, #if defined(HAS_NV21TOARGBROW_AVX2) // 16 pixels. // 8 VU values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). -void OMITFP NV21ToARGBRow_AVX2(const uint8* y_buf, - const uint8* vu_buf, - uint8* dst_argb, +void OMITFP NV21ToARGBRow_AVX2(const uint8_t* y_buf, + const uint8_t* vu_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { // clang-format off @@ -2453,7 +2765,7 @@ void OMITFP NV21ToARGBRow_AVX2(const uint8* y_buf, [width]"+rm"(width) // %[width] : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] [kShuffleNV21]"m"(kShuffleNV21) - : "memory", "cc", YUVTORGB_REGS_AVX2 // Does not use r14. + : "memory", "cc", YUVTORGB_REGS_AVX2 "xmm0", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" ); // clang-format on @@ -2463,8 +2775,8 @@ void OMITFP NV21ToARGBRow_AVX2(const uint8* y_buf, #if defined(HAS_YUY2TOARGBROW_AVX2) // 16 pixels. // 8 YUY2 values with 16 Y and 8 UV producing 16 ARGB (64 bytes). -void OMITFP YUY2ToARGBRow_AVX2(const uint8* yuy2_buf, - uint8* dst_argb, +void OMITFP YUY2ToARGBRow_AVX2(const uint8_t* yuy2_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { // clang-format off @@ -2486,7 +2798,7 @@ void OMITFP YUY2ToARGBRow_AVX2(const uint8* yuy2_buf, : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] [kShuffleYUY2Y]"m"(kShuffleYUY2Y), [kShuffleYUY2UV]"m"(kShuffleYUY2UV) - : "memory", "cc", YUVTORGB_REGS_AVX2 // Does not use r14. + : "memory", "cc", YUVTORGB_REGS_AVX2 "xmm0", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" ); // clang-format on @@ -2496,8 +2808,8 @@ void OMITFP YUY2ToARGBRow_AVX2(const uint8* yuy2_buf, #if defined(HAS_UYVYTOARGBROW_AVX2) // 16 pixels. // 8 UYVY values with 16 Y and 8 UV producing 16 ARGB (64 bytes). -void OMITFP UYVYToARGBRow_AVX2(const uint8* uyvy_buf, - uint8* dst_argb, +void OMITFP UYVYToARGBRow_AVX2(const uint8_t* uyvy_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { // clang-format off @@ -2519,7 +2831,7 @@ void OMITFP UYVYToARGBRow_AVX2(const uint8* uyvy_buf, : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] [kShuffleUYVYY]"m"(kShuffleUYVYY), [kShuffleUYVYUV]"m"(kShuffleUYVYUV) - : "memory", "cc", YUVTORGB_REGS_AVX2 // Does not use r14. + : "memory", "cc", YUVTORGB_REGS_AVX2 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" ); // clang-format on @@ -2527,95 +2839,93 @@ void OMITFP UYVYToARGBRow_AVX2(const uint8* uyvy_buf, #endif // HAS_UYVYTOARGBROW_AVX2 #ifdef HAS_I400TOARGBROW_SSE2 -void I400ToARGBRow_SSE2(const uint8* y_buf, uint8* dst_argb, int width) { - asm volatile ( - "mov $0x4a354a35,%%eax \n" // 4a35 = 18997 = 1.164 - "movd %%eax,%%xmm2 \n" - "pshufd $0x0,%%xmm2,%%xmm2 \n" - "mov $0x04880488,%%eax \n" // 0488 = 1160 = 1.164 * 16 - "movd %%eax,%%xmm3 \n" - "pshufd $0x0,%%xmm3,%%xmm3 \n" - "pcmpeqb %%xmm4,%%xmm4 \n" - "pslld $0x18,%%xmm4 \n" +void I400ToARGBRow_SSE2(const uint8_t* y_buf, uint8_t* dst_argb, int width) { + asm volatile( + "mov $0x4a354a35,%%eax \n" // 4a35 = 18997 = 1.164 + "movd %%eax,%%xmm2 \n" + "pshufd $0x0,%%xmm2,%%xmm2 \n" + "mov $0x04880488,%%eax \n" // 0488 = 1160 = 1.164 * + // 16 + "movd %%eax,%%xmm3 \n" + "pshufd $0x0,%%xmm3,%%xmm3 \n" + "pcmpeqb %%xmm4,%%xmm4 \n" + "pslld $0x18,%%xmm4 \n" - LABELALIGN - "1: \n" - // Step 1: Scale Y contribution to 8 G values. G = (y - 16) * 1.164 - "movq " MEMACCESS(0) ",%%xmm0 \n" - "lea " MEMLEA(0x8,0) ",%0 \n" - "punpcklbw %%xmm0,%%xmm0 \n" - "pmulhuw %%xmm2,%%xmm0 \n" - "psubusw %%xmm3,%%xmm0 \n" - "psrlw $6, %%xmm0 \n" - "packuswb %%xmm0,%%xmm0 \n" - - // Step 2: Weave into ARGB - "punpcklbw %%xmm0,%%xmm0 \n" - "movdqa %%xmm0,%%xmm1 \n" - "punpcklwd %%xmm0,%%xmm0 \n" - "punpckhwd %%xmm1,%%xmm1 \n" - "por %%xmm4,%%xmm0 \n" - "por %%xmm4,%%xmm1 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "movdqu %%xmm1," MEMACCESS2(0x10,1) " \n" - "lea " MEMLEA(0x20,1) ",%1 \n" - - "sub $0x8,%2 \n" - "jg 1b \n" - : "+r"(y_buf), // %0 - "+r"(dst_argb), // %1 - "+rm"(width) // %2 - : - : "memory", "cc", "eax" - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4" - ); + LABELALIGN + "1: \n" + // Step 1: Scale Y contribution to 8 G values. G = (y - 16) * 1.164 + "movq (%0),%%xmm0 \n" + "lea 0x8(%0),%0 \n" + "punpcklbw %%xmm0,%%xmm0 \n" + "pmulhuw %%xmm2,%%xmm0 \n" + "psubusw %%xmm3,%%xmm0 \n" + "psrlw $6, %%xmm0 \n" + "packuswb %%xmm0,%%xmm0 \n" + + // Step 2: Weave into ARGB + "punpcklbw %%xmm0,%%xmm0 \n" + "movdqa %%xmm0,%%xmm1 \n" + "punpcklwd %%xmm0,%%xmm0 \n" + "punpckhwd %%xmm1,%%xmm1 \n" + "por %%xmm4,%%xmm0 \n" + "por %%xmm4,%%xmm1 \n" + "movdqu %%xmm0,(%1) \n" + "movdqu %%xmm1,0x10(%1) \n" + "lea 0x20(%1),%1 \n" + + "sub $0x8,%2 \n" + "jg 1b \n" + : "+r"(y_buf), // %0 + "+r"(dst_argb), // %1 + "+rm"(width) // %2 + : + : "memory", "cc", "eax", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4"); } #endif // HAS_I400TOARGBROW_SSE2 #ifdef HAS_I400TOARGBROW_AVX2 // 16 pixels of Y converted to 16 pixels of ARGB (64 bytes). // note: vpunpcklbw mutates and vpackuswb unmutates. -void I400ToARGBRow_AVX2(const uint8* y_buf, uint8* dst_argb, int width) { - asm volatile ( - "mov $0x4a354a35,%%eax \n" // 0488 = 1160 = 1.164 * 16 - "vmovd %%eax,%%xmm2 \n" - "vbroadcastss %%xmm2,%%ymm2 \n" - "mov $0x4880488,%%eax \n" // 4a35 = 18997 = 1.164 - "vmovd %%eax,%%xmm3 \n" - "vbroadcastss %%xmm3,%%ymm3 \n" - "vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n" - "vpslld $0x18,%%ymm4,%%ymm4 \n" +void I400ToARGBRow_AVX2(const uint8_t* y_buf, uint8_t* dst_argb, int width) { + asm volatile( + "mov $0x4a354a35,%%eax \n" // 0488 = 1160 = 1.164 * + // 16 + "vmovd %%eax,%%xmm2 \n" + "vbroadcastss %%xmm2,%%ymm2 \n" + "mov $0x4880488,%%eax \n" // 4a35 = 18997 = 1.164 + "vmovd %%eax,%%xmm3 \n" + "vbroadcastss %%xmm3,%%ymm3 \n" + "vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n" + "vpslld $0x18,%%ymm4,%%ymm4 \n" - LABELALIGN - "1: \n" - // Step 1: Scale Y contribution to 16 G values. G = (y - 16) * 1.164 - "vmovdqu " MEMACCESS(0) ",%%xmm0 \n" - "lea " MEMLEA(0x10,0) ",%0 \n" - "vpermq $0xd8,%%ymm0,%%ymm0 \n" - "vpunpcklbw %%ymm0,%%ymm0,%%ymm0 \n" - "vpmulhuw %%ymm2,%%ymm0,%%ymm0 \n" - "vpsubusw %%ymm3,%%ymm0,%%ymm0 \n" - "vpsrlw $0x6,%%ymm0,%%ymm0 \n" - "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n" - "vpunpcklbw %%ymm0,%%ymm0,%%ymm1 \n" - "vpermq $0xd8,%%ymm1,%%ymm1 \n" - "vpunpcklwd %%ymm1,%%ymm1,%%ymm0 \n" - "vpunpckhwd %%ymm1,%%ymm1,%%ymm1 \n" - "vpor %%ymm4,%%ymm0,%%ymm0 \n" - "vpor %%ymm4,%%ymm1,%%ymm1 \n" - "vmovdqu %%ymm0," MEMACCESS(1) " \n" - "vmovdqu %%ymm1," MEMACCESS2(0x20,1) " \n" - "lea " MEMLEA(0x40,1) ",%1 \n" - "sub $0x10,%2 \n" - "jg 1b \n" - "vzeroupper \n" - : "+r"(y_buf), // %0 - "+r"(dst_argb), // %1 - "+rm"(width) // %2 - : - : "memory", "cc", "eax" - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4" - ); + LABELALIGN + "1: \n" + // Step 1: Scale Y contribution to 16 G values. G = (y - 16) * 1.164 + "vmovdqu (%0),%%xmm0 \n" + "lea 0x10(%0),%0 \n" + "vpermq $0xd8,%%ymm0,%%ymm0 \n" + "vpunpcklbw %%ymm0,%%ymm0,%%ymm0 \n" + "vpmulhuw %%ymm2,%%ymm0,%%ymm0 \n" + "vpsubusw %%ymm3,%%ymm0,%%ymm0 \n" + "vpsrlw $0x6,%%ymm0,%%ymm0 \n" + "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n" + "vpunpcklbw %%ymm0,%%ymm0,%%ymm1 \n" + "vpermq $0xd8,%%ymm1,%%ymm1 \n" + "vpunpcklwd %%ymm1,%%ymm1,%%ymm0 \n" + "vpunpckhwd %%ymm1,%%ymm1,%%ymm1 \n" + "vpor %%ymm4,%%ymm0,%%ymm0 \n" + "vpor %%ymm4,%%ymm1,%%ymm1 \n" + "vmovdqu %%ymm0,(%1) \n" + "vmovdqu %%ymm1,0x20(%1) \n" + "lea 0x40(%1),%1 \n" + "sub $0x10,%2 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(y_buf), // %0 + "+r"(dst_argb), // %1 + "+rm"(width) // %2 + : + : "memory", "cc", "eax", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4"); } #endif // HAS_I400TOARGBROW_AVX2 @@ -2624,52 +2934,50 @@ void I400ToARGBRow_AVX2(const uint8* y_buf, uint8* dst_argb, int width) { static const uvec8 kShuffleMirror = {15u, 14u, 13u, 12u, 11u, 10u, 9u, 8u, 7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u}; -void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width) { +void MirrorRow_SSSE3(const uint8_t* src, uint8_t* dst, int width) { intptr_t temp_width = (intptr_t)(width); - asm volatile ( - "movdqa %3,%%xmm5 \n" + asm volatile( - LABELALIGN - "1: \n" - MEMOPREG(movdqu,-0x10,0,2,1,xmm0) // movdqu -0x10(%0,%2),%%xmm0 - "pshufb %%xmm5,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x10,%2 \n" - "jg 1b \n" - : "+r"(src), // %0 - "+r"(dst), // %1 - "+r"(temp_width) // %2 - : "m"(kShuffleMirror) // %3 - : "memory", "cc", NACL_R14 - "xmm0", "xmm5" - ); + "movdqa %3,%%xmm5 \n" + + LABELALIGN + "1: \n" + "movdqu -0x10(%0,%2,1),%%xmm0 \n" + "pshufb %%xmm5,%%xmm0 \n" + "movdqu %%xmm0,(%1) \n" + "lea 0x10(%1),%1 \n" + "sub $0x10,%2 \n" + "jg 1b \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(temp_width) // %2 + : "m"(kShuffleMirror) // %3 + : "memory", "cc", "xmm0", "xmm5"); } #endif // HAS_MIRRORROW_SSSE3 #ifdef HAS_MIRRORROW_AVX2 -void MirrorRow_AVX2(const uint8* src, uint8* dst, int width) { +void MirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width) { intptr_t temp_width = (intptr_t)(width); - asm volatile ( - "vbroadcastf128 %3,%%ymm5 \n" + asm volatile( - LABELALIGN - "1: \n" - MEMOPREG(vmovdqu,-0x20,0,2,1,ymm0) // vmovdqu -0x20(%0,%2),%%ymm0 - "vpshufb %%ymm5,%%ymm0,%%ymm0 \n" - "vpermq $0x4e,%%ymm0,%%ymm0 \n" - "vmovdqu %%ymm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x20,1) ",%1 \n" - "sub $0x20,%2 \n" - "jg 1b \n" - "vzeroupper \n" - : "+r"(src), // %0 - "+r"(dst), // %1 - "+r"(temp_width) // %2 - : "m"(kShuffleMirror) // %3 - : "memory", "cc", NACL_R14 - "xmm0", "xmm5" - ); + "vbroadcastf128 %3,%%ymm5 \n" + + LABELALIGN + "1: \n" + "vmovdqu -0x20(%0,%2,1),%%ymm0 \n" + "vpshufb %%ymm5,%%ymm0,%%ymm0 \n" + "vpermq $0x4e,%%ymm0,%%ymm0 \n" + "vmovdqu %%ymm0,(%1) \n" + "lea 0x20(%1),%1 \n" + "sub $0x20,%2 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(temp_width) // %2 + : "m"(kShuffleMirror) // %3 + : "memory", "cc", "xmm0", "xmm5"); } #endif // HAS_MIRRORROW_AVX2 @@ -2677,231 +2985,221 @@ void MirrorRow_AVX2(const uint8* src, uint8* dst, int width) { // Shuffle table for reversing the bytes of UV channels. static const uvec8 kShuffleMirrorUV = {14u, 12u, 10u, 8u, 6u, 4u, 2u, 0u, 15u, 13u, 11u, 9u, 7u, 5u, 3u, 1u}; -void MirrorUVRow_SSSE3(const uint8* src, - uint8* dst_u, - uint8* dst_v, +void MirrorUVRow_SSSE3(const uint8_t* src, + uint8_t* dst_u, + uint8_t* dst_v, int width) { intptr_t temp_width = (intptr_t)(width); - asm volatile ( - "movdqa %4,%%xmm1 \n" - "lea " MEMLEA4(-0x10,0,3,2) ",%0 \n" - "sub %1,%2 \n" + asm volatile( + "movdqa %4,%%xmm1 \n" + "lea -0x10(%0,%3,2),%0 \n" + "sub %1,%2 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "lea " MEMLEA(-0x10,0) ",%0 \n" - "pshufb %%xmm1,%%xmm0 \n" - "movlpd %%xmm0," MEMACCESS(1) " \n" - MEMOPMEM(movhpd,xmm0,0x00,1,2,1) // movhpd %%xmm0,(%1,%2) - "lea " MEMLEA(0x8,1) ",%1 \n" - "sub $8,%3 \n" - "jg 1b \n" - : "+r"(src), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+r"(temp_width) // %3 - : "m"(kShuffleMirrorUV) // %4 - : "memory", "cc", NACL_R14 - "xmm0", "xmm1" - ); + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "lea -0x10(%0),%0 \n" + "pshufb %%xmm1,%%xmm0 \n" + "movlpd %%xmm0,(%1) \n" + "movhpd %%xmm0,0x00(%1,%2,1) \n" + "lea 0x8(%1),%1 \n" + "sub $8,%3 \n" + "jg 1b \n" + : "+r"(src), // %0 + "+r"(dst_u), // %1 + "+r"(dst_v), // %2 + "+r"(temp_width) // %3 + : "m"(kShuffleMirrorUV) // %4 + : "memory", "cc", "xmm0", "xmm1"); } #endif // HAS_MIRRORUVROW_SSSE3 #ifdef HAS_ARGBMIRRORROW_SSE2 -void ARGBMirrorRow_SSE2(const uint8* src, uint8* dst, int width) { +void ARGBMirrorRow_SSE2(const uint8_t* src, uint8_t* dst, int width) { intptr_t temp_width = (intptr_t)(width); - asm volatile ( - "lea " MEMLEA4(-0x10,0,2,4) ",%0 \n" + asm volatile( - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "pshufd $0x1b,%%xmm0,%%xmm0 \n" - "lea " MEMLEA(-0x10,0) ",%0 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x4,%2 \n" - "jg 1b \n" - : "+r"(src), // %0 - "+r"(dst), // %1 - "+r"(temp_width) // %2 - : - : "memory", "cc" - , "xmm0" - ); + "lea -0x10(%0,%2,4),%0 \n" + + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "pshufd $0x1b,%%xmm0,%%xmm0 \n" + "lea -0x10(%0),%0 \n" + "movdqu %%xmm0,(%1) \n" + "lea 0x10(%1),%1 \n" + "sub $0x4,%2 \n" + "jg 1b \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(temp_width) // %2 + : + : "memory", "cc", "xmm0"); } #endif // HAS_ARGBMIRRORROW_SSE2 #ifdef HAS_ARGBMIRRORROW_AVX2 // Shuffle table for reversing the bytes. static const ulvec32 kARGBShuffleMirror_AVX2 = {7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u}; -void ARGBMirrorRow_AVX2(const uint8* src, uint8* dst, int width) { +void ARGBMirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width) { intptr_t temp_width = (intptr_t)(width); - asm volatile ( - "vmovdqu %3,%%ymm5 \n" + asm volatile( - LABELALIGN - "1: \n" - VMEMOPREG(vpermd,-0x20,0,2,4,ymm5,ymm0) // vpermd -0x20(%0,%2,4),ymm5,ymm0 - "vmovdqu %%ymm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x20,1) ",%1 \n" - "sub $0x8,%2 \n" - "jg 1b \n" - "vzeroupper \n" - : "+r"(src), // %0 - "+r"(dst), // %1 - "+r"(temp_width) // %2 - : "m"(kARGBShuffleMirror_AVX2) // %3 - : "memory", "cc", NACL_R14 - "xmm0", "xmm5" - ); + "vmovdqu %3,%%ymm5 \n" + + LABELALIGN + "1: \n" + "vpermd -0x20(%0,%2,4),%%ymm5,%%ymm0 \n" + "vmovdqu %%ymm0,(%1) \n" + "lea 0x20(%1),%1 \n" + "sub $0x8,%2 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(temp_width) // %2 + : "m"(kARGBShuffleMirror_AVX2) // %3 + : "memory", "cc", "xmm0", "xmm5"); } #endif // HAS_ARGBMIRRORROW_AVX2 #ifdef HAS_SPLITUVROW_AVX2 -void SplitUVRow_AVX2(const uint8* src_uv, - uint8* dst_u, - uint8* dst_v, +void SplitUVRow_AVX2(const uint8_t* src_uv, + uint8_t* dst_u, + uint8_t* dst_v, int width) { - asm volatile ( - "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" - "vpsrlw $0x8,%%ymm5,%%ymm5 \n" - "sub %1,%2 \n" + asm volatile( + "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" + "vpsrlw $0x8,%%ymm5,%%ymm5 \n" + "sub %1,%2 \n" - LABELALIGN - "1: \n" - "vmovdqu " MEMACCESS(0) ",%%ymm0 \n" - "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n" - "lea " MEMLEA(0x40,0) ",%0 \n" - "vpsrlw $0x8,%%ymm0,%%ymm2 \n" - "vpsrlw $0x8,%%ymm1,%%ymm3 \n" - "vpand %%ymm5,%%ymm0,%%ymm0 \n" - "vpand %%ymm5,%%ymm1,%%ymm1 \n" - "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" - "vpackuswb %%ymm3,%%ymm2,%%ymm2 \n" - "vpermq $0xd8,%%ymm0,%%ymm0 \n" - "vpermq $0xd8,%%ymm2,%%ymm2 \n" - "vmovdqu %%ymm0," MEMACCESS(1) " \n" - MEMOPMEM(vmovdqu,ymm2,0x00,1,2,1) // vmovdqu %%ymm2,(%1,%2) - "lea " MEMLEA(0x20,1) ",%1 \n" - "sub $0x20,%3 \n" - "jg 1b \n" - "vzeroupper \n" - : "+r"(src_uv), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+r"(width) // %3 - : - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" - ); + LABELALIGN + "1: \n" + "vmovdqu (%0),%%ymm0 \n" + "vmovdqu 0x20(%0),%%ymm1 \n" + "lea 0x40(%0),%0 \n" + "vpsrlw $0x8,%%ymm0,%%ymm2 \n" + "vpsrlw $0x8,%%ymm1,%%ymm3 \n" + "vpand %%ymm5,%%ymm0,%%ymm0 \n" + "vpand %%ymm5,%%ymm1,%%ymm1 \n" + "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" + "vpackuswb %%ymm3,%%ymm2,%%ymm2 \n" + "vpermq $0xd8,%%ymm0,%%ymm0 \n" + "vpermq $0xd8,%%ymm2,%%ymm2 \n" + "vmovdqu %%ymm0,(%1) \n" + "vmovdqu %%ymm2,0x00(%1,%2,1) \n" + "lea 0x20(%1),%1 \n" + "sub $0x20,%3 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src_uv), // %0 + "+r"(dst_u), // %1 + "+r"(dst_v), // %2 + "+r"(width) // %3 + : + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"); } #endif // HAS_SPLITUVROW_AVX2 #ifdef HAS_SPLITUVROW_SSE2 -void SplitUVRow_SSE2(const uint8* src_uv, - uint8* dst_u, - uint8* dst_v, +void SplitUVRow_SSE2(const uint8_t* src_uv, + uint8_t* dst_u, + uint8_t* dst_v, int width) { - asm volatile ( - "pcmpeqb %%xmm5,%%xmm5 \n" - "psrlw $0x8,%%xmm5 \n" - "sub %1,%2 \n" + asm volatile( + "pcmpeqb %%xmm5,%%xmm5 \n" + "psrlw $0x8,%%xmm5 \n" + "sub %1,%2 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "lea " MEMLEA(0x20,0) ",%0 \n" - "movdqa %%xmm0,%%xmm2 \n" - "movdqa %%xmm1,%%xmm3 \n" - "pand %%xmm5,%%xmm0 \n" - "pand %%xmm5,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "psrlw $0x8,%%xmm2 \n" - "psrlw $0x8,%%xmm3 \n" - "packuswb %%xmm3,%%xmm2 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - MEMOPMEM(movdqu,xmm2,0x00,1,2,1) // movdqu %%xmm2,(%1,%2) - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x10,%3 \n" - "jg 1b \n" - : "+r"(src_uv), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+r"(width) // %3 - : - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" - ); + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "lea 0x20(%0),%0 \n" + "movdqa %%xmm0,%%xmm2 \n" + "movdqa %%xmm1,%%xmm3 \n" + "pand %%xmm5,%%xmm0 \n" + "pand %%xmm5,%%xmm1 \n" + "packuswb %%xmm1,%%xmm0 \n" + "psrlw $0x8,%%xmm2 \n" + "psrlw $0x8,%%xmm3 \n" + "packuswb %%xmm3,%%xmm2 \n" + "movdqu %%xmm0,(%1) \n" + "movdqu %%xmm2,0x00(%1,%2,1) \n" + "lea 0x10(%1),%1 \n" + "sub $0x10,%3 \n" + "jg 1b \n" + : "+r"(src_uv), // %0 + "+r"(dst_u), // %1 + "+r"(dst_v), // %2 + "+r"(width) // %3 + : + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"); } #endif // HAS_SPLITUVROW_SSE2 #ifdef HAS_MERGEUVROW_AVX2 -void MergeUVRow_AVX2(const uint8* src_u, - const uint8* src_v, - uint8* dst_uv, +void MergeUVRow_AVX2(const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_uv, int width) { - asm volatile ( - "sub %0,%1 \n" + asm volatile( - LABELALIGN - "1: \n" - "vmovdqu " MEMACCESS(0) ",%%ymm0 \n" - MEMOPREG(vmovdqu,0x00,0,1,1,ymm1) // vmovdqu (%0,%1,1),%%ymm1 - "lea " MEMLEA(0x20,0) ",%0 \n" - "vpunpcklbw %%ymm1,%%ymm0,%%ymm2 \n" - "vpunpckhbw %%ymm1,%%ymm0,%%ymm0 \n" - "vextractf128 $0x0,%%ymm2," MEMACCESS(2) " \n" - "vextractf128 $0x0,%%ymm0," MEMACCESS2(0x10,2) "\n" - "vextractf128 $0x1,%%ymm2," MEMACCESS2(0x20,2) "\n" - "vextractf128 $0x1,%%ymm0," MEMACCESS2(0x30,2) "\n" - "lea " MEMLEA(0x40,2) ",%2 \n" - "sub $0x20,%3 \n" - "jg 1b \n" - "vzeroupper \n" - : "+r"(src_u), // %0 - "+r"(src_v), // %1 - "+r"(dst_uv), // %2 - "+r"(width) // %3 - : - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2" - ); + "sub %0,%1 \n" + + LABELALIGN + "1: \n" + "vmovdqu (%0),%%ymm0 \n" + "vmovdqu 0x00(%0,%1,1),%%ymm1 \n" + "lea 0x20(%0),%0 \n" + "vpunpcklbw %%ymm1,%%ymm0,%%ymm2 \n" + "vpunpckhbw %%ymm1,%%ymm0,%%ymm0 \n" + "vextractf128 $0x0,%%ymm2,(%2) \n" + "vextractf128 $0x0,%%ymm0,0x10(%2) \n" + "vextractf128 $0x1,%%ymm2,0x20(%2) \n" + "vextractf128 $0x1,%%ymm0,0x30(%2) \n" + "lea 0x40(%2),%2 \n" + "sub $0x20,%3 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src_u), // %0 + "+r"(src_v), // %1 + "+r"(dst_uv), // %2 + "+r"(width) // %3 + : + : "memory", "cc", "xmm0", "xmm1", "xmm2"); } #endif // HAS_MERGEUVROW_AVX2 #ifdef HAS_MERGEUVROW_SSE2 -void MergeUVRow_SSE2(const uint8* src_u, - const uint8* src_v, - uint8* dst_uv, +void MergeUVRow_SSE2(const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_uv, int width) { - asm volatile ( - "sub %0,%1 \n" + asm volatile( - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - MEMOPREG(movdqu,0x00,0,1,1,xmm1) // movdqu (%0,%1,1),%%xmm1 - "lea " MEMLEA(0x10,0) ",%0 \n" - "movdqa %%xmm0,%%xmm2 \n" - "punpcklbw %%xmm1,%%xmm0 \n" - "punpckhbw %%xmm1,%%xmm2 \n" - "movdqu %%xmm0," MEMACCESS(2) " \n" - "movdqu %%xmm2," MEMACCESS2(0x10,2) " \n" - "lea " MEMLEA(0x20,2) ",%2 \n" - "sub $0x10,%3 \n" - "jg 1b \n" - : "+r"(src_u), // %0 - "+r"(src_v), // %1 - "+r"(dst_uv), // %2 - "+r"(width) // %3 - : - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2" - ); + "sub %0,%1 \n" + + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x00(%0,%1,1),%%xmm1 \n" + "lea 0x10(%0),%0 \n" + "movdqa %%xmm0,%%xmm2 \n" + "punpcklbw %%xmm1,%%xmm0 \n" + "punpckhbw %%xmm1,%%xmm2 \n" + "movdqu %%xmm0,(%2) \n" + "movdqu %%xmm2,0x10(%2) \n" + "lea 0x20(%2),%2 \n" + "sub $0x10,%3 \n" + "jg 1b \n" + : "+r"(src_u), // %0 + "+r"(src_v), // %1 + "+r"(dst_uv), // %2 + "+r"(width) // %3 + : + : "memory", "cc", "xmm0", "xmm1", "xmm2"); } #endif // HAS_MERGEUVROW_SSE2 @@ -2911,9 +3209,9 @@ void MergeUVRow_SSE2(const uint8* src_u, // 16 = 12 bits // 1 = 16 bits #ifdef HAS_MERGEUVROW_16_AVX2 -void MergeUVRow_16_AVX2(const uint16* src_u, - const uint16* src_v, - uint16* dst_uv, +void MergeUVRow_16_AVX2(const uint16_t* src_u, + const uint16_t* src_v, + uint16_t* dst_uv, int scale, int width) { // clang-format off @@ -2958,8 +3256,8 @@ void MergeUVRow_16_AVX2(const uint16* src_u, // 16 = 12 bits // 1 = 16 bits #ifdef HAS_MULTIPLYROW_16_AVX2 -void MultiplyRow_16_AVX2(const uint16* src_y, - uint16* dst_y, +void MultiplyRow_16_AVX2(const uint16_t* src_y, + uint16_t* dst_y, int scale, int width) { // clang-format off @@ -2996,8 +3294,8 @@ void MultiplyRow_16_AVX2(const uint16* src_y, // 16384 = 10 bits // 4096 = 12 bits // 256 = 16 bits -void Convert16To8Row_SSSE3(const uint16* src_y, - uint8* dst_y, +void Convert16To8Row_SSSE3(const uint16_t* src_y, + uint8_t* dst_y, int scale, int width) { // clang-format off @@ -3028,8 +3326,8 @@ void Convert16To8Row_SSSE3(const uint16* src_y, } #ifdef HAS_CONVERT16TO8ROW_AVX2 -void Convert16To8Row_AVX2(const uint16* src_y, - uint8* dst_y, +void Convert16To8Row_AVX2(const uint16_t* src_y, + uint8_t* dst_y, int scale, int width) { // clang-format off @@ -3067,8 +3365,8 @@ void Convert16To8Row_AVX2(const uint16* src_y, // 1024 = 10 bits // 4096 = 12 bits // TODO(fbarchard): reduce to SSE2 -void Convert8To16Row_SSE2(const uint8* src_y, - uint16* dst_y, +void Convert8To16Row_SSE2(const uint8_t* src_y, + uint16_t* dst_y, int scale, int width) { // clang-format off @@ -3101,8 +3399,8 @@ void Convert8To16Row_SSE2(const uint8* src_y, } #ifdef HAS_CONVERT8TO16ROW_AVX2 -void Convert8To16Row_AVX2(const uint8* src_y, - uint16* dst_y, +void Convert8To16Row_AVX2(const uint8_t* src_y, + uint16_t* dst_y, int scale, int width) { // clang-format off @@ -3169,66 +3467,65 @@ static const uvec8 kShuffleMaskRGBToB2 = {128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, 0u, 3u, 6u, 9u, 12u, 15u}; -void SplitRGBRow_SSSE3(const uint8* src_rgb, - uint8* dst_r, - uint8* dst_g, - uint8* dst_b, +void SplitRGBRow_SSSE3(const uint8_t* src_rgb, + uint8_t* dst_r, + uint8_t* dst_g, + uint8_t* dst_b, int width) { - asm volatile ( - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n" - "pshufb %5, %%xmm0 \n" - "pshufb %6, %%xmm1 \n" - "pshufb %7, %%xmm2 \n" - "por %%xmm1,%%xmm0 \n" - "por %%xmm2,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n" - "pshufb %8, %%xmm0 \n" - "pshufb %9, %%xmm1 \n" - "pshufb %10, %%xmm2 \n" - "por %%xmm1,%%xmm0 \n" - "por %%xmm2,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS(2) " \n" - "lea " MEMLEA(0x10,2) ",%2 \n" - - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n" - "pshufb %11, %%xmm0 \n" - "pshufb %12, %%xmm1 \n" - "pshufb %13, %%xmm2 \n" - "por %%xmm1,%%xmm0 \n" - "por %%xmm2,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS(3) " \n" - "lea " MEMLEA(0x10,3) ",%3 \n" - "lea " MEMLEA(0x30,0) ",%0 \n" - "sub $0x10,%4 \n" - "jg 1b \n" - : "+r"(src_rgb), // %0 - "+r"(dst_r), // %1 - "+r"(dst_g), // %2 - "+r"(dst_b), // %3 - "+r"(width) // %4 - : "m"(kShuffleMaskRGBToR0), // %5 - "m"(kShuffleMaskRGBToR1), // %6 - "m"(kShuffleMaskRGBToR2), // %7 - "m"(kShuffleMaskRGBToG0), // %8 - "m"(kShuffleMaskRGBToG1), // %9 - "m"(kShuffleMaskRGBToG2), // %10 - "m"(kShuffleMaskRGBToB0), // %11 - "m"(kShuffleMaskRGBToB1), // %12 - "m"(kShuffleMaskRGBToB2) // %13 - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2" - ); + asm volatile( + + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "movdqu 0x20(%0),%%xmm2 \n" + "pshufb %5, %%xmm0 \n" + "pshufb %6, %%xmm1 \n" + "pshufb %7, %%xmm2 \n" + "por %%xmm1,%%xmm0 \n" + "por %%xmm2,%%xmm0 \n" + "movdqu %%xmm0,(%1) \n" + "lea 0x10(%1),%1 \n" + + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "movdqu 0x20(%0),%%xmm2 \n" + "pshufb %8, %%xmm0 \n" + "pshufb %9, %%xmm1 \n" + "pshufb %10, %%xmm2 \n" + "por %%xmm1,%%xmm0 \n" + "por %%xmm2,%%xmm0 \n" + "movdqu %%xmm0,(%2) \n" + "lea 0x10(%2),%2 \n" + + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "movdqu 0x20(%0),%%xmm2 \n" + "pshufb %11, %%xmm0 \n" + "pshufb %12, %%xmm1 \n" + "pshufb %13, %%xmm2 \n" + "por %%xmm1,%%xmm0 \n" + "por %%xmm2,%%xmm0 \n" + "movdqu %%xmm0,(%3) \n" + "lea 0x10(%3),%3 \n" + "lea 0x30(%0),%0 \n" + "sub $0x10,%4 \n" + "jg 1b \n" + : "+r"(src_rgb), // %0 + "+r"(dst_r), // %1 + "+r"(dst_g), // %2 + "+r"(dst_b), // %3 + "+r"(width) // %4 + : "m"(kShuffleMaskRGBToR0), // %5 + "m"(kShuffleMaskRGBToR1), // %6 + "m"(kShuffleMaskRGBToR2), // %7 + "m"(kShuffleMaskRGBToG0), // %8 + "m"(kShuffleMaskRGBToG1), // %9 + "m"(kShuffleMaskRGBToG2), // %10 + "m"(kShuffleMaskRGBToB0), // %11 + "m"(kShuffleMaskRGBToB1), // %12 + "m"(kShuffleMaskRGBToB2) // %13 + : "memory", "cc", "xmm0", "xmm1", "xmm2"); } #endif // HAS_SPLITRGBROW_SSSE3 @@ -3265,238 +3562,234 @@ static const uvec8 kShuffleMaskGToRGB2 = {128u, 128u, 11u, 128u, 128u, 12u, 128u, 128u, 13u, 128u, 128u, 14u, 128u, 128u, 15u, 128u}; -void MergeRGBRow_SSSE3(const uint8* src_r, - const uint8* src_g, - const uint8* src_b, - uint8* dst_rgb, +void MergeRGBRow_SSSE3(const uint8_t* src_r, + const uint8_t* src_g, + const uint8_t* src_b, + uint8_t* dst_rgb, int width) { - asm volatile ( - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS(1) ",%%xmm1 \n" - "movdqu " MEMACCESS(2) ",%%xmm2 \n" - "pshufb %5, %%xmm0 \n" - "pshufb %6, %%xmm1 \n" - "pshufb %7, %%xmm2 \n" - "por %%xmm1,%%xmm0 \n" - "por %%xmm2,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS(3) " \n" - - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS(1) ",%%xmm1 \n" - "movdqu " MEMACCESS(2) ",%%xmm2 \n" - "pshufb %8, %%xmm0 \n" - "pshufb %9, %%xmm1 \n" - "pshufb %10, %%xmm2 \n" - "por %%xmm1,%%xmm0 \n" - "por %%xmm2,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS2(16, 3) " \n" - - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS(1) ",%%xmm1 \n" - "movdqu " MEMACCESS(2) ",%%xmm2 \n" - "pshufb %11, %%xmm0 \n" - "pshufb %12, %%xmm1 \n" - "pshufb %13, %%xmm2 \n" - "por %%xmm1,%%xmm0 \n" - "por %%xmm2,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS2(32, 3) " \n" - - "lea " MEMLEA(0x10,0) ",%0 \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "lea " MEMLEA(0x10,2) ",%2 \n" - "lea " MEMLEA(0x30,3) ",%3 \n" - "sub $0x10,%4 \n" - "jg 1b \n" - : "+r"(src_r), // %0 - "+r"(src_g), // %1 - "+r"(src_b), // %2 - "+r"(dst_rgb), // %3 - "+r"(width) // %4 - : "m"(kShuffleMaskRToRGB0), // %5 - "m"(kShuffleMaskGToRGB0), // %6 - "m"(kShuffleMaskBToRGB0), // %7 - "m"(kShuffleMaskRToRGB1), // %8 - "m"(kShuffleMaskGToRGB1), // %9 - "m"(kShuffleMaskBToRGB1), // %10 - "m"(kShuffleMaskRToRGB2), // %11 - "m"(kShuffleMaskGToRGB2), // %12 - "m"(kShuffleMaskBToRGB2) // %13 - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2" - ); + asm volatile( + + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu (%1),%%xmm1 \n" + "movdqu (%2),%%xmm2 \n" + "pshufb %5, %%xmm0 \n" + "pshufb %6, %%xmm1 \n" + "pshufb %7, %%xmm2 \n" + "por %%xmm1,%%xmm0 \n" + "por %%xmm2,%%xmm0 \n" + "movdqu %%xmm0,(%3) \n" + + "movdqu (%0),%%xmm0 \n" + "movdqu (%1),%%xmm1 \n" + "movdqu (%2),%%xmm2 \n" + "pshufb %8, %%xmm0 \n" + "pshufb %9, %%xmm1 \n" + "pshufb %10, %%xmm2 \n" + "por %%xmm1,%%xmm0 \n" + "por %%xmm2,%%xmm0 \n" + "movdqu %%xmm0,16(%3) \n" + + "movdqu (%0),%%xmm0 \n" + "movdqu (%1),%%xmm1 \n" + "movdqu (%2),%%xmm2 \n" + "pshufb %11, %%xmm0 \n" + "pshufb %12, %%xmm1 \n" + "pshufb %13, %%xmm2 \n" + "por %%xmm1,%%xmm0 \n" + "por %%xmm2,%%xmm0 \n" + "movdqu %%xmm0,32(%3) \n" + + "lea 0x10(%0),%0 \n" + "lea 0x10(%1),%1 \n" + "lea 0x10(%2),%2 \n" + "lea 0x30(%3),%3 \n" + "sub $0x10,%4 \n" + "jg 1b \n" + : "+r"(src_r), // %0 + "+r"(src_g), // %1 + "+r"(src_b), // %2 + "+r"(dst_rgb), // %3 + "+r"(width) // %4 + : "m"(kShuffleMaskRToRGB0), // %5 + "m"(kShuffleMaskGToRGB0), // %6 + "m"(kShuffleMaskBToRGB0), // %7 + "m"(kShuffleMaskRToRGB1), // %8 + "m"(kShuffleMaskGToRGB1), // %9 + "m"(kShuffleMaskBToRGB1), // %10 + "m"(kShuffleMaskRToRGB2), // %11 + "m"(kShuffleMaskGToRGB2), // %12 + "m"(kShuffleMaskBToRGB2) // %13 + : "memory", "cc", "xmm0", "xmm1", "xmm2"); } #endif // HAS_MERGERGBROW_SSSE3 #ifdef HAS_COPYROW_SSE2 -void CopyRow_SSE2(const uint8* src, uint8* dst, int count) { - asm volatile ( - "test $0xf,%0 \n" - "jne 2f \n" - "test $0xf,%1 \n" - "jne 2f \n" +void CopyRow_SSE2(const uint8_t* src, uint8_t* dst, int width) { + asm volatile( + "test $0xf,%0 \n" + "jne 2f \n" + "test $0xf,%1 \n" + "jne 2f \n" - LABELALIGN - "1: \n" - "movdqa " MEMACCESS(0) ",%%xmm0 \n" - "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "lea " MEMLEA(0x20,0) ",%0 \n" - "movdqa %%xmm0," MEMACCESS(1) " \n" - "movdqa %%xmm1," MEMACCESS2(0x10,1) " \n" - "lea " MEMLEA(0x20,1) ",%1 \n" - "sub $0x20,%2 \n" - "jg 1b \n" - "jmp 9f \n" + LABELALIGN + "1: \n" + "movdqa (%0),%%xmm0 \n" + "movdqa 0x10(%0),%%xmm1 \n" + "lea 0x20(%0),%0 \n" + "movdqa %%xmm0,(%1) \n" + "movdqa %%xmm1,0x10(%1) \n" + "lea 0x20(%1),%1 \n" + "sub $0x20,%2 \n" + "jg 1b \n" + "jmp 9f \n" - LABELALIGN - "2: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "lea " MEMLEA(0x20,0) ",%0 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "movdqu %%xmm1," MEMACCESS2(0x10,1) " \n" - "lea " MEMLEA(0x20,1) ",%1 \n" - "sub $0x20,%2 \n" - "jg 2b \n" - "9: \n" - : "+r"(src), // %0 - "+r"(dst), // %1 - "+r"(count) // %2 - : - : "memory", "cc" - , "xmm0", "xmm1" - ); + LABELALIGN + "2: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "lea 0x20(%0),%0 \n" + "movdqu %%xmm0,(%1) \n" + "movdqu %%xmm1,0x10(%1) \n" + "lea 0x20(%1),%1 \n" + "sub $0x20,%2 \n" + "jg 2b \n" + + LABELALIGN "9: \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + : + : "memory", "cc", "xmm0", "xmm1"); } #endif // HAS_COPYROW_SSE2 #ifdef HAS_COPYROW_AVX -void CopyRow_AVX(const uint8* src, uint8* dst, int count) { - asm volatile ( - LABELALIGN - "1: \n" - "vmovdqu " MEMACCESS(0) ",%%ymm0 \n" - "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n" - "lea " MEMLEA(0x40,0) ",%0 \n" - "vmovdqu %%ymm0," MEMACCESS(1) " \n" - "vmovdqu %%ymm1," MEMACCESS2(0x20,1) " \n" - "lea " MEMLEA(0x40,1) ",%1 \n" - "sub $0x40,%2 \n" - "jg 1b \n" - : "+r"(src), // %0 - "+r"(dst), // %1 - "+r"(count) // %2 - : - : "memory", "cc" - , "xmm0", "xmm1" - ); +void CopyRow_AVX(const uint8_t* src, uint8_t* dst, int width) { + asm volatile( + + LABELALIGN + "1: \n" + "vmovdqu (%0),%%ymm0 \n" + "vmovdqu 0x20(%0),%%ymm1 \n" + "lea 0x40(%0),%0 \n" + "vmovdqu %%ymm0,(%1) \n" + "vmovdqu %%ymm1,0x20(%1) \n" + "lea 0x40(%1),%1 \n" + "sub $0x40,%2 \n" + "jg 1b \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + : + : "memory", "cc", "xmm0", "xmm1"); } #endif // HAS_COPYROW_AVX #ifdef HAS_COPYROW_ERMS // Multiple of 1. -void CopyRow_ERMS(const uint8* src, uint8* dst, int width) { +void CopyRow_ERMS(const uint8_t* src, uint8_t* dst, int width) { size_t width_tmp = (size_t)(width); - asm volatile("rep movsb " MEMMOVESTRING(0, 1) " \n" - : "+S"(src), // %0 - "+D"(dst), // %1 - "+c"(width_tmp) // %2 - : - : "memory", "cc"); + asm volatile( + + "rep movsb \n" + : "+S"(src), // %0 + "+D"(dst), // %1 + "+c"(width_tmp) // %2 + : + : "memory", "cc"); } #endif // HAS_COPYROW_ERMS #ifdef HAS_ARGBCOPYALPHAROW_SSE2 // width in pixels -void ARGBCopyAlphaRow_SSE2(const uint8* src, uint8* dst, int width) { - asm volatile ( - "pcmpeqb %%xmm0,%%xmm0 \n" - "pslld $0x18,%%xmm0 \n" - "pcmpeqb %%xmm1,%%xmm1 \n" - "psrld $0x8,%%xmm1 \n" +void ARGBCopyAlphaRow_SSE2(const uint8_t* src, uint8_t* dst, int width) { + asm volatile( + "pcmpeqb %%xmm0,%%xmm0 \n" + "pslld $0x18,%%xmm0 \n" + "pcmpeqb %%xmm1,%%xmm1 \n" + "psrld $0x8,%%xmm1 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm2 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm3 \n" - "lea " MEMLEA(0x20,0) ",%0 \n" - "movdqu " MEMACCESS(1) ",%%xmm4 \n" - "movdqu " MEMACCESS2(0x10,1) ",%%xmm5 \n" - "pand %%xmm0,%%xmm2 \n" - "pand %%xmm0,%%xmm3 \n" - "pand %%xmm1,%%xmm4 \n" - "pand %%xmm1,%%xmm5 \n" - "por %%xmm4,%%xmm2 \n" - "por %%xmm5,%%xmm3 \n" - "movdqu %%xmm2," MEMACCESS(1) " \n" - "movdqu %%xmm3," MEMACCESS2(0x10,1) " \n" - "lea " MEMLEA(0x20,1) ",%1 \n" - "sub $0x8,%2 \n" - "jg 1b \n" - : "+r"(src), // %0 - "+r"(dst), // %1 - "+r"(width) // %2 - : - : "memory", "cc" - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" - ); + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm2 \n" + "movdqu 0x10(%0),%%xmm3 \n" + "lea 0x20(%0),%0 \n" + "movdqu (%1),%%xmm4 \n" + "movdqu 0x10(%1),%%xmm5 \n" + "pand %%xmm0,%%xmm2 \n" + "pand %%xmm0,%%xmm3 \n" + "pand %%xmm1,%%xmm4 \n" + "pand %%xmm1,%%xmm5 \n" + "por %%xmm4,%%xmm2 \n" + "por %%xmm5,%%xmm3 \n" + "movdqu %%xmm2,(%1) \n" + "movdqu %%xmm3,0x10(%1) \n" + "lea 0x20(%1),%1 \n" + "sub $0x8,%2 \n" + "jg 1b \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + : + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); } #endif // HAS_ARGBCOPYALPHAROW_SSE2 #ifdef HAS_ARGBCOPYALPHAROW_AVX2 // width in pixels -void ARGBCopyAlphaRow_AVX2(const uint8* src, uint8* dst, int width) { - asm volatile ( - "vpcmpeqb %%ymm0,%%ymm0,%%ymm0 \n" - "vpsrld $0x8,%%ymm0,%%ymm0 \n" +void ARGBCopyAlphaRow_AVX2(const uint8_t* src, uint8_t* dst, int width) { + asm volatile( + "vpcmpeqb %%ymm0,%%ymm0,%%ymm0 \n" + "vpsrld $0x8,%%ymm0,%%ymm0 \n" - LABELALIGN - "1: \n" - "vmovdqu " MEMACCESS(0) ",%%ymm1 \n" - "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm2 \n" - "lea " MEMLEA(0x40,0) ",%0 \n" - "vpblendvb %%ymm0," MEMACCESS(1) ",%%ymm1,%%ymm1 \n" - "vpblendvb %%ymm0," MEMACCESS2(0x20,1) ",%%ymm2,%%ymm2 \n" - "vmovdqu %%ymm1," MEMACCESS(1) " \n" - "vmovdqu %%ymm2," MEMACCESS2(0x20,1) " \n" - "lea " MEMLEA(0x40,1) ",%1 \n" - "sub $0x10,%2 \n" - "jg 1b \n" - "vzeroupper \n" - : "+r"(src), // %0 - "+r"(dst), // %1 - "+r"(width) // %2 - : - : "memory", "cc" - , "xmm0", "xmm1", "xmm2" - ); + LABELALIGN + "1: \n" + "vmovdqu (%0),%%ymm1 \n" + "vmovdqu 0x20(%0),%%ymm2 \n" + "lea 0x40(%0),%0 \n" + "vpblendvb %%ymm0,(%1),%%ymm1,%%ymm1 \n" + "vpblendvb %%ymm0,0x20(%1),%%ymm2,%%ymm2 \n" + "vmovdqu %%ymm1,(%1) \n" + "vmovdqu %%ymm2,0x20(%1) \n" + "lea 0x40(%1),%1 \n" + "sub $0x10,%2 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + : + : "memory", "cc", "xmm0", "xmm1", "xmm2"); } #endif // HAS_ARGBCOPYALPHAROW_AVX2 #ifdef HAS_ARGBEXTRACTALPHAROW_SSE2 // width in pixels -void ARGBExtractAlphaRow_SSE2(const uint8* src_argb, uint8* dst_a, int width) { - asm volatile ( - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ", %%xmm0 \n" - "movdqu " MEMACCESS2(0x10, 0) ", %%xmm1 \n" - "lea " MEMLEA(0x20, 0) ", %0 \n" - "psrld $0x18, %%xmm0 \n" - "psrld $0x18, %%xmm1 \n" - "packssdw %%xmm1, %%xmm0 \n" - "packuswb %%xmm0, %%xmm0 \n" - "movq %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x8, 1) ", %1 \n" - "sub $0x8, %2 \n" - "jg 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_a), // %1 - "+rm"(width) // %2 - : - : "memory", "cc" - , "xmm0", "xmm1" - ); +void ARGBExtractAlphaRow_SSE2(const uint8_t* src_argb, + uint8_t* dst_a, + int width) { + asm volatile( + + LABELALIGN + "1: \n" + "movdqu (%0), %%xmm0 \n" + "movdqu 0x10(%0), %%xmm1 \n" + "lea 0x20(%0), %0 \n" + "psrld $0x18, %%xmm0 \n" + "psrld $0x18, %%xmm1 \n" + "packssdw %%xmm1, %%xmm0 \n" + "packuswb %%xmm0, %%xmm0 \n" + "movq %%xmm0,(%1) \n" + "lea 0x8(%1), %1 \n" + "sub $0x8, %2 \n" + "jg 1b \n" + : "+r"(src_argb), // %0 + "+r"(dst_a), // %1 + "+rm"(width) // %2 + : + : "memory", "cc", "xmm0", "xmm1"); } #endif // HAS_ARGBEXTRACTALPHAROW_SSE2 @@ -3505,569 +3798,549 @@ static const uvec8 kShuffleAlphaShort_AVX2 = { 3u, 128u, 128u, 128u, 7u, 128u, 128u, 128u, 11u, 128u, 128u, 128u, 15u, 128u, 128u, 128u}; -void ARGBExtractAlphaRow_AVX2(const uint8* src_argb, uint8* dst_a, int width) { - asm volatile ( - "vmovdqa %3,%%ymm4 \n" - "vbroadcastf128 %4,%%ymm5 \n" +void ARGBExtractAlphaRow_AVX2(const uint8_t* src_argb, + uint8_t* dst_a, + int width) { + asm volatile( + "vmovdqa %3,%%ymm4 \n" + "vbroadcastf128 %4,%%ymm5 \n" - LABELALIGN - "1: \n" - "vmovdqu " MEMACCESS(0) ", %%ymm0 \n" - "vmovdqu " MEMACCESS2(0x20, 0) ", %%ymm1 \n" - "vpshufb %%ymm5,%%ymm0,%%ymm0 \n" // vpsrld $0x18, %%ymm0 - "vpshufb %%ymm5,%%ymm1,%%ymm1 \n" - "vmovdqu " MEMACCESS2(0x40, 0) ", %%ymm2 \n" - "vmovdqu " MEMACCESS2(0x60, 0) ", %%ymm3 \n" - "lea " MEMLEA(0x80, 0) ", %0 \n" - "vpackssdw %%ymm1, %%ymm0, %%ymm0 \n" // mutates - "vpshufb %%ymm5,%%ymm2,%%ymm2 \n" - "vpshufb %%ymm5,%%ymm3,%%ymm3 \n" - "vpackssdw %%ymm3, %%ymm2, %%ymm2 \n" // mutates - "vpackuswb %%ymm2,%%ymm0,%%ymm0 \n" // mutates. - "vpermd %%ymm0,%%ymm4,%%ymm0 \n" // unmutate. - "vmovdqu %%ymm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x20,1) ",%1 \n" - "sub $0x20, %2 \n" - "jg 1b \n" - "vzeroupper \n" - : "+r"(src_argb), // %0 - "+r"(dst_a), // %1 - "+rm"(width) // %2 - : "m"(kPermdARGBToY_AVX), // %3 - "m"(kShuffleAlphaShort_AVX2) // %4 - : "memory", "cc" - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" - ); + LABELALIGN + "1: \n" + "vmovdqu (%0), %%ymm0 \n" + "vmovdqu 0x20(%0), %%ymm1 \n" + "vpshufb %%ymm5,%%ymm0,%%ymm0 \n" // vpsrld $0x18, %%ymm0 + "vpshufb %%ymm5,%%ymm1,%%ymm1 \n" + "vmovdqu 0x40(%0), %%ymm2 \n" + "vmovdqu 0x60(%0), %%ymm3 \n" + "lea 0x80(%0), %0 \n" + "vpackssdw %%ymm1, %%ymm0, %%ymm0 \n" // mutates + "vpshufb %%ymm5,%%ymm2,%%ymm2 \n" + "vpshufb %%ymm5,%%ymm3,%%ymm3 \n" + "vpackssdw %%ymm3, %%ymm2, %%ymm2 \n" // mutates + "vpackuswb %%ymm2,%%ymm0,%%ymm0 \n" // mutates. + "vpermd %%ymm0,%%ymm4,%%ymm0 \n" // unmutate. + "vmovdqu %%ymm0,(%1) \n" + "lea 0x20(%1),%1 \n" + "sub $0x20, %2 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src_argb), // %0 + "+r"(dst_a), // %1 + "+rm"(width) // %2 + : "m"(kPermdARGBToY_AVX), // %3 + "m"(kShuffleAlphaShort_AVX2) // %4 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); } #endif // HAS_ARGBEXTRACTALPHAROW_AVX2 #ifdef HAS_ARGBCOPYYTOALPHAROW_SSE2 // width in pixels -void ARGBCopyYToAlphaRow_SSE2(const uint8* src, uint8* dst, int width) { - asm volatile ( - "pcmpeqb %%xmm0,%%xmm0 \n" - "pslld $0x18,%%xmm0 \n" - "pcmpeqb %%xmm1,%%xmm1 \n" - "psrld $0x8,%%xmm1 \n" +void ARGBCopyYToAlphaRow_SSE2(const uint8_t* src, uint8_t* dst, int width) { + asm volatile( + "pcmpeqb %%xmm0,%%xmm0 \n" + "pslld $0x18,%%xmm0 \n" + "pcmpeqb %%xmm1,%%xmm1 \n" + "psrld $0x8,%%xmm1 \n" - LABELALIGN - "1: \n" - "movq " MEMACCESS(0) ",%%xmm2 \n" - "lea " MEMLEA(0x8,0) ",%0 \n" - "punpcklbw %%xmm2,%%xmm2 \n" - "punpckhwd %%xmm2,%%xmm3 \n" - "punpcklwd %%xmm2,%%xmm2 \n" - "movdqu " MEMACCESS(1) ",%%xmm4 \n" - "movdqu " MEMACCESS2(0x10,1) ",%%xmm5 \n" - "pand %%xmm0,%%xmm2 \n" - "pand %%xmm0,%%xmm3 \n" - "pand %%xmm1,%%xmm4 \n" - "pand %%xmm1,%%xmm5 \n" - "por %%xmm4,%%xmm2 \n" - "por %%xmm5,%%xmm3 \n" - "movdqu %%xmm2," MEMACCESS(1) " \n" - "movdqu %%xmm3," MEMACCESS2(0x10,1) " \n" - "lea " MEMLEA(0x20,1) ",%1 \n" - "sub $0x8,%2 \n" - "jg 1b \n" - : "+r"(src), // %0 - "+r"(dst), // %1 - "+r"(width) // %2 - : - : "memory", "cc" - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" - ); + LABELALIGN + "1: \n" + "movq (%0),%%xmm2 \n" + "lea 0x8(%0),%0 \n" + "punpcklbw %%xmm2,%%xmm2 \n" + "punpckhwd %%xmm2,%%xmm3 \n" + "punpcklwd %%xmm2,%%xmm2 \n" + "movdqu (%1),%%xmm4 \n" + "movdqu 0x10(%1),%%xmm5 \n" + "pand %%xmm0,%%xmm2 \n" + "pand %%xmm0,%%xmm3 \n" + "pand %%xmm1,%%xmm4 \n" + "pand %%xmm1,%%xmm5 \n" + "por %%xmm4,%%xmm2 \n" + "por %%xmm5,%%xmm3 \n" + "movdqu %%xmm2,(%1) \n" + "movdqu %%xmm3,0x10(%1) \n" + "lea 0x20(%1),%1 \n" + "sub $0x8,%2 \n" + "jg 1b \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + : + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); } #endif // HAS_ARGBCOPYYTOALPHAROW_SSE2 #ifdef HAS_ARGBCOPYYTOALPHAROW_AVX2 // width in pixels -void ARGBCopyYToAlphaRow_AVX2(const uint8* src, uint8* dst, int width) { - asm volatile ( - "vpcmpeqb %%ymm0,%%ymm0,%%ymm0 \n" - "vpsrld $0x8,%%ymm0,%%ymm0 \n" +void ARGBCopyYToAlphaRow_AVX2(const uint8_t* src, uint8_t* dst, int width) { + asm volatile( + "vpcmpeqb %%ymm0,%%ymm0,%%ymm0 \n" + "vpsrld $0x8,%%ymm0,%%ymm0 \n" - LABELALIGN - "1: \n" - "vpmovzxbd " MEMACCESS(0) ",%%ymm1 \n" - "vpmovzxbd " MEMACCESS2(0x8,0) ",%%ymm2 \n" - "lea " MEMLEA(0x10,0) ",%0 \n" - "vpslld $0x18,%%ymm1,%%ymm1 \n" - "vpslld $0x18,%%ymm2,%%ymm2 \n" - "vpblendvb %%ymm0," MEMACCESS(1) ",%%ymm1,%%ymm1 \n" - "vpblendvb %%ymm0," MEMACCESS2(0x20,1) ",%%ymm2,%%ymm2 \n" - "vmovdqu %%ymm1," MEMACCESS(1) " \n" - "vmovdqu %%ymm2," MEMACCESS2(0x20,1) " \n" - "lea " MEMLEA(0x40,1) ",%1 \n" - "sub $0x10,%2 \n" - "jg 1b \n" - "vzeroupper \n" - : "+r"(src), // %0 - "+r"(dst), // %1 - "+r"(width) // %2 - : - : "memory", "cc" - , "xmm0", "xmm1", "xmm2" - ); + LABELALIGN + "1: \n" + "vpmovzxbd (%0),%%ymm1 \n" + "vpmovzxbd 0x8(%0),%%ymm2 \n" + "lea 0x10(%0),%0 \n" + "vpslld $0x18,%%ymm1,%%ymm1 \n" + "vpslld $0x18,%%ymm2,%%ymm2 \n" + "vpblendvb %%ymm0,(%1),%%ymm1,%%ymm1 \n" + "vpblendvb %%ymm0,0x20(%1),%%ymm2,%%ymm2 \n" + "vmovdqu %%ymm1,(%1) \n" + "vmovdqu %%ymm2,0x20(%1) \n" + "lea 0x40(%1),%1 \n" + "sub $0x10,%2 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + : + : "memory", "cc", "xmm0", "xmm1", "xmm2"); } #endif // HAS_ARGBCOPYYTOALPHAROW_AVX2 #ifdef HAS_SETROW_X86 -void SetRow_X86(uint8* dst, uint8 v8, int width) { +void SetRow_X86(uint8_t* dst, uint8_t v8, int width) { size_t width_tmp = (size_t)(width >> 2); - const uint32 v32 = v8 * 0x01010101u; // Duplicate byte to all bytes. - asm volatile("rep stosl " MEMSTORESTRING(eax, 0) " \n" - : "+D"(dst), // %0 - "+c"(width_tmp) // %1 - : "a"(v32) // %2 - : "memory", "cc"); + const uint32_t v32 = v8 * 0x01010101u; // Duplicate byte to all bytes. + asm volatile( + + "rep stosl \n" + : "+D"(dst), // %0 + "+c"(width_tmp) // %1 + : "a"(v32) // %2 + : "memory", "cc"); } -void SetRow_ERMS(uint8* dst, uint8 v8, int width) { +void SetRow_ERMS(uint8_t* dst, uint8_t v8, int width) { size_t width_tmp = (size_t)(width); - asm volatile("rep stosb " MEMSTORESTRING(al, 0) " \n" - : "+D"(dst), // %0 - "+c"(width_tmp) // %1 - : "a"(v8) // %2 - : "memory", "cc"); + asm volatile( + + "rep stosb \n" + : "+D"(dst), // %0 + "+c"(width_tmp) // %1 + : "a"(v8) // %2 + : "memory", "cc"); } -void ARGBSetRow_X86(uint8* dst_argb, uint32 v32, int width) { +void ARGBSetRow_X86(uint8_t* dst_argb, uint32_t v32, int width) { size_t width_tmp = (size_t)(width); - asm volatile("rep stosl " MEMSTORESTRING(eax, 0) " \n" - : "+D"(dst_argb), // %0 - "+c"(width_tmp) // %1 - : "a"(v32) // %2 - : "memory", "cc"); + asm volatile( + + "rep stosl \n" + : "+D"(dst_argb), // %0 + "+c"(width_tmp) // %1 + : "a"(v32) // %2 + : "memory", "cc"); } #endif // HAS_SETROW_X86 #ifdef HAS_YUY2TOYROW_SSE2 -void YUY2ToYRow_SSE2(const uint8* src_yuy2, uint8* dst_y, int width) { - asm volatile ( - "pcmpeqb %%xmm5,%%xmm5 \n" - "psrlw $0x8,%%xmm5 \n" +void YUY2ToYRow_SSE2(const uint8_t* src_yuy2, uint8_t* dst_y, int width) { + asm volatile( + "pcmpeqb %%xmm5,%%xmm5 \n" + "psrlw $0x8,%%xmm5 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "lea " MEMLEA(0x20,0) ",%0 \n" - "pand %%xmm5,%%xmm0 \n" - "pand %%xmm5,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x10,%2 \n" - "jg 1b \n" - : "+r"(src_yuy2), // %0 - "+r"(dst_y), // %1 - "+r"(width) // %2 - : - : "memory", "cc" - , "xmm0", "xmm1", "xmm5" - ); + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "lea 0x20(%0),%0 \n" + "pand %%xmm5,%%xmm0 \n" + "pand %%xmm5,%%xmm1 \n" + "packuswb %%xmm1,%%xmm0 \n" + "movdqu %%xmm0,(%1) \n" + "lea 0x10(%1),%1 \n" + "sub $0x10,%2 \n" + "jg 1b \n" + : "+r"(src_yuy2), // %0 + "+r"(dst_y), // %1 + "+r"(width) // %2 + : + : "memory", "cc", "xmm0", "xmm1", "xmm5"); } -void YUY2ToUVRow_SSE2(const uint8* src_yuy2, +void YUY2ToUVRow_SSE2(const uint8_t* src_yuy2, int stride_yuy2, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width) { - asm volatile ( - "pcmpeqb %%xmm5,%%xmm5 \n" - "psrlw $0x8,%%xmm5 \n" - "sub %1,%2 \n" + asm volatile( + "pcmpeqb %%xmm5,%%xmm5 \n" + "psrlw $0x8,%%xmm5 \n" + "sub %1,%2 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - MEMOPREG(movdqu,0x00,0,4,1,xmm2) // movdqu (%0,%4,1),%%xmm2 - MEMOPREG(movdqu,0x10,0,4,1,xmm3) // movdqu 0x10(%0,%4,1),%%xmm3 - "lea " MEMLEA(0x20,0) ",%0 \n" - "pavgb %%xmm2,%%xmm0 \n" - "pavgb %%xmm3,%%xmm1 \n" - "psrlw $0x8,%%xmm0 \n" - "psrlw $0x8,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "movdqa %%xmm0,%%xmm1 \n" - "pand %%xmm5,%%xmm0 \n" - "packuswb %%xmm0,%%xmm0 \n" - "psrlw $0x8,%%xmm1 \n" - "packuswb %%xmm1,%%xmm1 \n" - "movq %%xmm0," MEMACCESS(1) " \n" - MEMOPMEM(movq,xmm1,0x00,1,2,1) // movq %%xmm1,(%1,%2) - "lea " MEMLEA(0x8,1) ",%1 \n" - "sub $0x10,%3 \n" - "jg 1b \n" - : "+r"(src_yuy2), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+r"(width) // %3 - : "r"((intptr_t)(stride_yuy2)) // %4 - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" - ); + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "movdqu 0x00(%0,%4,1),%%xmm2 \n" + "movdqu 0x10(%0,%4,1),%%xmm3 \n" + "lea 0x20(%0),%0 \n" + "pavgb %%xmm2,%%xmm0 \n" + "pavgb %%xmm3,%%xmm1 \n" + "psrlw $0x8,%%xmm0 \n" + "psrlw $0x8,%%xmm1 \n" + "packuswb %%xmm1,%%xmm0 \n" + "movdqa %%xmm0,%%xmm1 \n" + "pand %%xmm5,%%xmm0 \n" + "packuswb %%xmm0,%%xmm0 \n" + "psrlw $0x8,%%xmm1 \n" + "packuswb %%xmm1,%%xmm1 \n" + "movq %%xmm0,(%1) \n" + "movq %%xmm1,0x00(%1,%2,1) \n" + "lea 0x8(%1),%1 \n" + "sub $0x10,%3 \n" + "jg 1b \n" + : "+r"(src_yuy2), // %0 + "+r"(dst_u), // %1 + "+r"(dst_v), // %2 + "+r"(width) // %3 + : "r"((intptr_t)(stride_yuy2)) // %4 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"); } -void YUY2ToUV422Row_SSE2(const uint8* src_yuy2, - uint8* dst_u, - uint8* dst_v, +void YUY2ToUV422Row_SSE2(const uint8_t* src_yuy2, + uint8_t* dst_u, + uint8_t* dst_v, int width) { - asm volatile ( - "pcmpeqb %%xmm5,%%xmm5 \n" - "psrlw $0x8,%%xmm5 \n" - "sub %1,%2 \n" + asm volatile( + "pcmpeqb %%xmm5,%%xmm5 \n" + "psrlw $0x8,%%xmm5 \n" + "sub %1,%2 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "lea " MEMLEA(0x20,0) ",%0 \n" - "psrlw $0x8,%%xmm0 \n" - "psrlw $0x8,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "movdqa %%xmm0,%%xmm1 \n" - "pand %%xmm5,%%xmm0 \n" - "packuswb %%xmm0,%%xmm0 \n" - "psrlw $0x8,%%xmm1 \n" - "packuswb %%xmm1,%%xmm1 \n" - "movq %%xmm0," MEMACCESS(1) " \n" - MEMOPMEM(movq,xmm1,0x00,1,2,1) // movq %%xmm1,(%1,%2) - "lea " MEMLEA(0x8,1) ",%1 \n" - "sub $0x10,%3 \n" - "jg 1b \n" - : "+r"(src_yuy2), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+r"(width) // %3 - : - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm5" - ); + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "lea 0x20(%0),%0 \n" + "psrlw $0x8,%%xmm0 \n" + "psrlw $0x8,%%xmm1 \n" + "packuswb %%xmm1,%%xmm0 \n" + "movdqa %%xmm0,%%xmm1 \n" + "pand %%xmm5,%%xmm0 \n" + "packuswb %%xmm0,%%xmm0 \n" + "psrlw $0x8,%%xmm1 \n" + "packuswb %%xmm1,%%xmm1 \n" + "movq %%xmm0,(%1) \n" + "movq %%xmm1,0x00(%1,%2,1) \n" + "lea 0x8(%1),%1 \n" + "sub $0x10,%3 \n" + "jg 1b \n" + : "+r"(src_yuy2), // %0 + "+r"(dst_u), // %1 + "+r"(dst_v), // %2 + "+r"(width) // %3 + : + : "memory", "cc", "xmm0", "xmm1", "xmm5"); } -void UYVYToYRow_SSE2(const uint8* src_uyvy, uint8* dst_y, int width) { - asm volatile ( - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "lea " MEMLEA(0x20,0) ",%0 \n" - "psrlw $0x8,%%xmm0 \n" - "psrlw $0x8,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x10,%2 \n" - "jg 1b \n" - : "+r"(src_uyvy), // %0 - "+r"(dst_y), // %1 - "+r"(width) // %2 - : - : "memory", "cc" - , "xmm0", "xmm1" - ); +void UYVYToYRow_SSE2(const uint8_t* src_uyvy, uint8_t* dst_y, int width) { + asm volatile( + + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "lea 0x20(%0),%0 \n" + "psrlw $0x8,%%xmm0 \n" + "psrlw $0x8,%%xmm1 \n" + "packuswb %%xmm1,%%xmm0 \n" + "movdqu %%xmm0,(%1) \n" + "lea 0x10(%1),%1 \n" + "sub $0x10,%2 \n" + "jg 1b \n" + : "+r"(src_uyvy), // %0 + "+r"(dst_y), // %1 + "+r"(width) // %2 + : + : "memory", "cc", "xmm0", "xmm1"); } -void UYVYToUVRow_SSE2(const uint8* src_uyvy, +void UYVYToUVRow_SSE2(const uint8_t* src_uyvy, int stride_uyvy, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width) { - asm volatile ( - "pcmpeqb %%xmm5,%%xmm5 \n" - "psrlw $0x8,%%xmm5 \n" - "sub %1,%2 \n" + asm volatile( + "pcmpeqb %%xmm5,%%xmm5 \n" + "psrlw $0x8,%%xmm5 \n" + "sub %1,%2 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - MEMOPREG(movdqu,0x00,0,4,1,xmm2) // movdqu (%0,%4,1),%%xmm2 - MEMOPREG(movdqu,0x10,0,4,1,xmm3) // movdqu 0x10(%0,%4,1),%%xmm3 - "lea " MEMLEA(0x20,0) ",%0 \n" - "pavgb %%xmm2,%%xmm0 \n" - "pavgb %%xmm3,%%xmm1 \n" - "pand %%xmm5,%%xmm0 \n" - "pand %%xmm5,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "movdqa %%xmm0,%%xmm1 \n" - "pand %%xmm5,%%xmm0 \n" - "packuswb %%xmm0,%%xmm0 \n" - "psrlw $0x8,%%xmm1 \n" - "packuswb %%xmm1,%%xmm1 \n" - "movq %%xmm0," MEMACCESS(1) " \n" - MEMOPMEM(movq,xmm1,0x00,1,2,1) // movq %%xmm1,(%1,%2) - "lea " MEMLEA(0x8,1) ",%1 \n" - "sub $0x10,%3 \n" - "jg 1b \n" - : "+r"(src_uyvy), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+r"(width) // %3 - : "r"((intptr_t)(stride_uyvy)) // %4 - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" - ); + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "movdqu 0x00(%0,%4,1),%%xmm2 \n" + "movdqu 0x10(%0,%4,1),%%xmm3 \n" + "lea 0x20(%0),%0 \n" + "pavgb %%xmm2,%%xmm0 \n" + "pavgb %%xmm3,%%xmm1 \n" + "pand %%xmm5,%%xmm0 \n" + "pand %%xmm5,%%xmm1 \n" + "packuswb %%xmm1,%%xmm0 \n" + "movdqa %%xmm0,%%xmm1 \n" + "pand %%xmm5,%%xmm0 \n" + "packuswb %%xmm0,%%xmm0 \n" + "psrlw $0x8,%%xmm1 \n" + "packuswb %%xmm1,%%xmm1 \n" + "movq %%xmm0,(%1) \n" + "movq %%xmm1,0x00(%1,%2,1) \n" + "lea 0x8(%1),%1 \n" + "sub $0x10,%3 \n" + "jg 1b \n" + : "+r"(src_uyvy), // %0 + "+r"(dst_u), // %1 + "+r"(dst_v), // %2 + "+r"(width) // %3 + : "r"((intptr_t)(stride_uyvy)) // %4 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"); } -void UYVYToUV422Row_SSE2(const uint8* src_uyvy, - uint8* dst_u, - uint8* dst_v, +void UYVYToUV422Row_SSE2(const uint8_t* src_uyvy, + uint8_t* dst_u, + uint8_t* dst_v, int width) { - asm volatile ( - "pcmpeqb %%xmm5,%%xmm5 \n" - "psrlw $0x8,%%xmm5 \n" - "sub %1,%2 \n" + asm volatile( + "pcmpeqb %%xmm5,%%xmm5 \n" + "psrlw $0x8,%%xmm5 \n" + "sub %1,%2 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "lea " MEMLEA(0x20,0) ",%0 \n" - "pand %%xmm5,%%xmm0 \n" - "pand %%xmm5,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "movdqa %%xmm0,%%xmm1 \n" - "pand %%xmm5,%%xmm0 \n" - "packuswb %%xmm0,%%xmm0 \n" - "psrlw $0x8,%%xmm1 \n" - "packuswb %%xmm1,%%xmm1 \n" - "movq %%xmm0," MEMACCESS(1) " \n" - MEMOPMEM(movq,xmm1,0x00,1,2,1) // movq %%xmm1,(%1,%2) - "lea " MEMLEA(0x8,1) ",%1 \n" - "sub $0x10,%3 \n" - "jg 1b \n" - : "+r"(src_uyvy), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+r"(width) // %3 - : - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm5" - ); + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "lea 0x20(%0),%0 \n" + "pand %%xmm5,%%xmm0 \n" + "pand %%xmm5,%%xmm1 \n" + "packuswb %%xmm1,%%xmm0 \n" + "movdqa %%xmm0,%%xmm1 \n" + "pand %%xmm5,%%xmm0 \n" + "packuswb %%xmm0,%%xmm0 \n" + "psrlw $0x8,%%xmm1 \n" + "packuswb %%xmm1,%%xmm1 \n" + "movq %%xmm0,(%1) \n" + "movq %%xmm1,0x00(%1,%2,1) \n" + "lea 0x8(%1),%1 \n" + "sub $0x10,%3 \n" + "jg 1b \n" + : "+r"(src_uyvy), // %0 + "+r"(dst_u), // %1 + "+r"(dst_v), // %2 + "+r"(width) // %3 + : + : "memory", "cc", "xmm0", "xmm1", "xmm5"); } #endif // HAS_YUY2TOYROW_SSE2 #ifdef HAS_YUY2TOYROW_AVX2 -void YUY2ToYRow_AVX2(const uint8* src_yuy2, uint8* dst_y, int width) { - asm volatile ( - "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" - "vpsrlw $0x8,%%ymm5,%%ymm5 \n" +void YUY2ToYRow_AVX2(const uint8_t* src_yuy2, uint8_t* dst_y, int width) { + asm volatile( + "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" + "vpsrlw $0x8,%%ymm5,%%ymm5 \n" - LABELALIGN - "1: \n" - "vmovdqu " MEMACCESS(0) ",%%ymm0 \n" - "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n" - "lea " MEMLEA(0x40,0) ",%0 \n" - "vpand %%ymm5,%%ymm0,%%ymm0 \n" - "vpand %%ymm5,%%ymm1,%%ymm1 \n" - "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" - "vpermq $0xd8,%%ymm0,%%ymm0 \n" - "vmovdqu %%ymm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x20,1) ",%1 \n" - "sub $0x20,%2 \n" - "jg 1b \n" - "vzeroupper \n" - : "+r"(src_yuy2), // %0 - "+r"(dst_y), // %1 - "+r"(width) // %2 - : - : "memory", "cc" - , "xmm0", "xmm1", "xmm5" - ); + LABELALIGN + "1: \n" + "vmovdqu (%0),%%ymm0 \n" + "vmovdqu 0x20(%0),%%ymm1 \n" + "lea 0x40(%0),%0 \n" + "vpand %%ymm5,%%ymm0,%%ymm0 \n" + "vpand %%ymm5,%%ymm1,%%ymm1 \n" + "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" + "vpermq $0xd8,%%ymm0,%%ymm0 \n" + "vmovdqu %%ymm0,(%1) \n" + "lea 0x20(%1),%1 \n" + "sub $0x20,%2 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src_yuy2), // %0 + "+r"(dst_y), // %1 + "+r"(width) // %2 + : + : "memory", "cc", "xmm0", "xmm1", "xmm5"); } -void YUY2ToUVRow_AVX2(const uint8* src_yuy2, +void YUY2ToUVRow_AVX2(const uint8_t* src_yuy2, int stride_yuy2, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width) { - asm volatile ( - "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" - "vpsrlw $0x8,%%ymm5,%%ymm5 \n" - "sub %1,%2 \n" + asm volatile( + "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" + "vpsrlw $0x8,%%ymm5,%%ymm5 \n" + "sub %1,%2 \n" - LABELALIGN - "1: \n" - "vmovdqu " MEMACCESS(0) ",%%ymm0 \n" - "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n" - VMEMOPREG(vpavgb,0x00,0,4,1,ymm0,ymm0) // vpavgb (%0,%4,1),%%ymm0,%%ymm0 - VMEMOPREG(vpavgb,0x20,0,4,1,ymm1,ymm1) - "lea " MEMLEA(0x40,0) ",%0 \n" - "vpsrlw $0x8,%%ymm0,%%ymm0 \n" - "vpsrlw $0x8,%%ymm1,%%ymm1 \n" - "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" - "vpermq $0xd8,%%ymm0,%%ymm0 \n" - "vpand %%ymm5,%%ymm0,%%ymm1 \n" - "vpsrlw $0x8,%%ymm0,%%ymm0 \n" - "vpackuswb %%ymm1,%%ymm1,%%ymm1 \n" - "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n" - "vpermq $0xd8,%%ymm1,%%ymm1 \n" - "vpermq $0xd8,%%ymm0,%%ymm0 \n" - "vextractf128 $0x0,%%ymm1," MEMACCESS(1) " \n" - VEXTOPMEM(vextractf128,0,ymm0,0x00,1,2,1) // vextractf128 $0x0,%%ymm0,(%1,%2,1) - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x20,%3 \n" - "jg 1b \n" - "vzeroupper \n" - : "+r"(src_yuy2), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+r"(width) // %3 - : "r"((intptr_t)(stride_yuy2)) // %4 - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm5" - ); + LABELALIGN + "1: \n" + "vmovdqu (%0),%%ymm0 \n" + "vmovdqu 0x20(%0),%%ymm1 \n" + "vpavgb 0x00(%0,%4,1),%%ymm0,%%ymm0 \n" + "vpavgb 0x20(%0,%4,1),%%ymm1,%%ymm1 \n" + "lea 0x40(%0),%0 \n" + "vpsrlw $0x8,%%ymm0,%%ymm0 \n" + "vpsrlw $0x8,%%ymm1,%%ymm1 \n" + "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" + "vpermq $0xd8,%%ymm0,%%ymm0 \n" + "vpand %%ymm5,%%ymm0,%%ymm1 \n" + "vpsrlw $0x8,%%ymm0,%%ymm0 \n" + "vpackuswb %%ymm1,%%ymm1,%%ymm1 \n" + "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n" + "vpermq $0xd8,%%ymm1,%%ymm1 \n" + "vpermq $0xd8,%%ymm0,%%ymm0 \n" + "vextractf128 $0x0,%%ymm1,(%1) \n" + "vextractf128 $0x0,%%ymm0,0x00(%1,%2,1) \n" + "lea 0x10(%1),%1 \n" + "sub $0x20,%3 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src_yuy2), // %0 + "+r"(dst_u), // %1 + "+r"(dst_v), // %2 + "+r"(width) // %3 + : "r"((intptr_t)(stride_yuy2)) // %4 + : "memory", "cc", "xmm0", "xmm1", "xmm5"); } -void YUY2ToUV422Row_AVX2(const uint8* src_yuy2, - uint8* dst_u, - uint8* dst_v, +void YUY2ToUV422Row_AVX2(const uint8_t* src_yuy2, + uint8_t* dst_u, + uint8_t* dst_v, int width) { - asm volatile ( - "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" - "vpsrlw $0x8,%%ymm5,%%ymm5 \n" - "sub %1,%2 \n" + asm volatile( + "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" + "vpsrlw $0x8,%%ymm5,%%ymm5 \n" + "sub %1,%2 \n" - LABELALIGN - "1: \n" - "vmovdqu " MEMACCESS(0) ",%%ymm0 \n" - "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n" - "lea " MEMLEA(0x40,0) ",%0 \n" - "vpsrlw $0x8,%%ymm0,%%ymm0 \n" - "vpsrlw $0x8,%%ymm1,%%ymm1 \n" - "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" - "vpermq $0xd8,%%ymm0,%%ymm0 \n" - "vpand %%ymm5,%%ymm0,%%ymm1 \n" - "vpsrlw $0x8,%%ymm0,%%ymm0 \n" - "vpackuswb %%ymm1,%%ymm1,%%ymm1 \n" - "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n" - "vpermq $0xd8,%%ymm1,%%ymm1 \n" - "vpermq $0xd8,%%ymm0,%%ymm0 \n" - "vextractf128 $0x0,%%ymm1," MEMACCESS(1) " \n" - VEXTOPMEM(vextractf128,0,ymm0,0x00,1,2,1) // vextractf128 $0x0,%%ymm0,(%1,%2,1) - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x20,%3 \n" - "jg 1b \n" - "vzeroupper \n" - : "+r"(src_yuy2), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+r"(width) // %3 - : - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm5" - ); + LABELALIGN + "1: \n" + "vmovdqu (%0),%%ymm0 \n" + "vmovdqu 0x20(%0),%%ymm1 \n" + "lea 0x40(%0),%0 \n" + "vpsrlw $0x8,%%ymm0,%%ymm0 \n" + "vpsrlw $0x8,%%ymm1,%%ymm1 \n" + "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" + "vpermq $0xd8,%%ymm0,%%ymm0 \n" + "vpand %%ymm5,%%ymm0,%%ymm1 \n" + "vpsrlw $0x8,%%ymm0,%%ymm0 \n" + "vpackuswb %%ymm1,%%ymm1,%%ymm1 \n" + "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n" + "vpermq $0xd8,%%ymm1,%%ymm1 \n" + "vpermq $0xd8,%%ymm0,%%ymm0 \n" + "vextractf128 $0x0,%%ymm1,(%1) \n" + "vextractf128 $0x0,%%ymm0,0x00(%1,%2,1) \n" + "lea 0x10(%1),%1 \n" + "sub $0x20,%3 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src_yuy2), // %0 + "+r"(dst_u), // %1 + "+r"(dst_v), // %2 + "+r"(width) // %3 + : + : "memory", "cc", "xmm0", "xmm1", "xmm5"); } -void UYVYToYRow_AVX2(const uint8* src_uyvy, uint8* dst_y, int width) { - asm volatile ( - LABELALIGN - "1: \n" - "vmovdqu " MEMACCESS(0) ",%%ymm0 \n" - "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n" - "lea " MEMLEA(0x40,0) ",%0 \n" - "vpsrlw $0x8,%%ymm0,%%ymm0 \n" - "vpsrlw $0x8,%%ymm1,%%ymm1 \n" - "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" - "vpermq $0xd8,%%ymm0,%%ymm0 \n" - "vmovdqu %%ymm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x20,1) ",%1 \n" - "sub $0x20,%2 \n" - "jg 1b \n" - "vzeroupper \n" - : "+r"(src_uyvy), // %0 - "+r"(dst_y), // %1 - "+r"(width) // %2 - : - : "memory", "cc" - , "xmm0", "xmm1", "xmm5" - ); +void UYVYToYRow_AVX2(const uint8_t* src_uyvy, uint8_t* dst_y, int width) { + asm volatile( + + LABELALIGN + "1: \n" + "vmovdqu (%0),%%ymm0 \n" + "vmovdqu 0x20(%0),%%ymm1 \n" + "lea 0x40(%0),%0 \n" + "vpsrlw $0x8,%%ymm0,%%ymm0 \n" + "vpsrlw $0x8,%%ymm1,%%ymm1 \n" + "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" + "vpermq $0xd8,%%ymm0,%%ymm0 \n" + "vmovdqu %%ymm0,(%1) \n" + "lea 0x20(%1),%1 \n" + "sub $0x20,%2 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src_uyvy), // %0 + "+r"(dst_y), // %1 + "+r"(width) // %2 + : + : "memory", "cc", "xmm0", "xmm1", "xmm5"); } -void UYVYToUVRow_AVX2(const uint8* src_uyvy, +void UYVYToUVRow_AVX2(const uint8_t* src_uyvy, int stride_uyvy, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width) { - asm volatile ( - "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" - "vpsrlw $0x8,%%ymm5,%%ymm5 \n" - "sub %1,%2 \n" + asm volatile( + "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" + "vpsrlw $0x8,%%ymm5,%%ymm5 \n" + "sub %1,%2 \n" - LABELALIGN - "1: \n" - "vmovdqu " MEMACCESS(0) ",%%ymm0 \n" - "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n" - VMEMOPREG(vpavgb,0x00,0,4,1,ymm0,ymm0) // vpavgb (%0,%4,1),%%ymm0,%%ymm0 - VMEMOPREG(vpavgb,0x20,0,4,1,ymm1,ymm1) - "lea " MEMLEA(0x40,0) ",%0 \n" - "vpand %%ymm5,%%ymm0,%%ymm0 \n" - "vpand %%ymm5,%%ymm1,%%ymm1 \n" - "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" - "vpermq $0xd8,%%ymm0,%%ymm0 \n" - "vpand %%ymm5,%%ymm0,%%ymm1 \n" - "vpsrlw $0x8,%%ymm0,%%ymm0 \n" - "vpackuswb %%ymm1,%%ymm1,%%ymm1 \n" - "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n" - "vpermq $0xd8,%%ymm1,%%ymm1 \n" - "vpermq $0xd8,%%ymm0,%%ymm0 \n" - "vextractf128 $0x0,%%ymm1," MEMACCESS(1) " \n" - VEXTOPMEM(vextractf128,0,ymm0,0x00,1,2,1) // vextractf128 $0x0,%%ymm0,(%1,%2,1) - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x20,%3 \n" - "jg 1b \n" - "vzeroupper \n" - : "+r"(src_uyvy), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+r"(width) // %3 - : "r"((intptr_t)(stride_uyvy)) // %4 - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm5" - ); + LABELALIGN + "1: \n" + "vmovdqu (%0),%%ymm0 \n" + "vmovdqu 0x20(%0),%%ymm1 \n" + "vpavgb 0x00(%0,%4,1),%%ymm0,%%ymm0 \n" + "vpavgb 0x20(%0,%4,1),%%ymm1,%%ymm1 \n" + "lea 0x40(%0),%0 \n" + "vpand %%ymm5,%%ymm0,%%ymm0 \n" + "vpand %%ymm5,%%ymm1,%%ymm1 \n" + "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" + "vpermq $0xd8,%%ymm0,%%ymm0 \n" + "vpand %%ymm5,%%ymm0,%%ymm1 \n" + "vpsrlw $0x8,%%ymm0,%%ymm0 \n" + "vpackuswb %%ymm1,%%ymm1,%%ymm1 \n" + "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n" + "vpermq $0xd8,%%ymm1,%%ymm1 \n" + "vpermq $0xd8,%%ymm0,%%ymm0 \n" + "vextractf128 $0x0,%%ymm1,(%1) \n" + "vextractf128 $0x0,%%ymm0,0x00(%1,%2,1) \n" + "lea 0x10(%1),%1 \n" + "sub $0x20,%3 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src_uyvy), // %0 + "+r"(dst_u), // %1 + "+r"(dst_v), // %2 + "+r"(width) // %3 + : "r"((intptr_t)(stride_uyvy)) // %4 + : "memory", "cc", "xmm0", "xmm1", "xmm5"); } -void UYVYToUV422Row_AVX2(const uint8* src_uyvy, - uint8* dst_u, - uint8* dst_v, +void UYVYToUV422Row_AVX2(const uint8_t* src_uyvy, + uint8_t* dst_u, + uint8_t* dst_v, int width) { - asm volatile ( - "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" - "vpsrlw $0x8,%%ymm5,%%ymm5 \n" - "sub %1,%2 \n" + asm volatile( + "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" + "vpsrlw $0x8,%%ymm5,%%ymm5 \n" + "sub %1,%2 \n" - LABELALIGN - "1: \n" - "vmovdqu " MEMACCESS(0) ",%%ymm0 \n" - "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n" - "lea " MEMLEA(0x40,0) ",%0 \n" - "vpand %%ymm5,%%ymm0,%%ymm0 \n" - "vpand %%ymm5,%%ymm1,%%ymm1 \n" - "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" - "vpermq $0xd8,%%ymm0,%%ymm0 \n" - "vpand %%ymm5,%%ymm0,%%ymm1 \n" - "vpsrlw $0x8,%%ymm0,%%ymm0 \n" - "vpackuswb %%ymm1,%%ymm1,%%ymm1 \n" - "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n" - "vpermq $0xd8,%%ymm1,%%ymm1 \n" - "vpermq $0xd8,%%ymm0,%%ymm0 \n" - "vextractf128 $0x0,%%ymm1," MEMACCESS(1) " \n" - VEXTOPMEM(vextractf128,0,ymm0,0x00,1,2,1) // vextractf128 $0x0,%%ymm0,(%1,%2,1) - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x20,%3 \n" - "jg 1b \n" - "vzeroupper \n" - : "+r"(src_uyvy), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+r"(width) // %3 - : - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm5" - ); + LABELALIGN + "1: \n" + "vmovdqu (%0),%%ymm0 \n" + "vmovdqu 0x20(%0),%%ymm1 \n" + "lea 0x40(%0),%0 \n" + "vpand %%ymm5,%%ymm0,%%ymm0 \n" + "vpand %%ymm5,%%ymm1,%%ymm1 \n" + "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" + "vpermq $0xd8,%%ymm0,%%ymm0 \n" + "vpand %%ymm5,%%ymm0,%%ymm1 \n" + "vpsrlw $0x8,%%ymm0,%%ymm0 \n" + "vpackuswb %%ymm1,%%ymm1,%%ymm1 \n" + "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n" + "vpermq $0xd8,%%ymm1,%%ymm1 \n" + "vpermq $0xd8,%%ymm0,%%ymm0 \n" + "vextractf128 $0x0,%%ymm1,(%1) \n" + "vextractf128 $0x0,%%ymm0,0x00(%1,%2,1) \n" + "lea 0x10(%1),%1 \n" + "sub $0x20,%3 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src_uyvy), // %0 + "+r"(dst_u), // %1 + "+r"(dst_v), // %2 + "+r"(width) // %3 + : + : "memory", "cc", "xmm0", "xmm1", "xmm5"); } #endif // HAS_YUY2TOYROW_AVX2 @@ -4077,85 +4350,84 @@ static const uvec8 kShuffleAlpha = {3u, 0x80, 3u, 0x80, 7u, 0x80, 7u, 0x80, 11u, 0x80, 11u, 0x80, 15u, 0x80, 15u, 0x80}; // Blend 8 pixels at a time -void ARGBBlendRow_SSSE3(const uint8* src_argb0, - const uint8* src_argb1, - uint8* dst_argb, +void ARGBBlendRow_SSSE3(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, int width) { - asm volatile ( - "pcmpeqb %%xmm7,%%xmm7 \n" - "psrlw $0xf,%%xmm7 \n" - "pcmpeqb %%xmm6,%%xmm6 \n" - "psrlw $0x8,%%xmm6 \n" - "pcmpeqb %%xmm5,%%xmm5 \n" - "psllw $0x8,%%xmm5 \n" - "pcmpeqb %%xmm4,%%xmm4 \n" - "pslld $0x18,%%xmm4 \n" - "sub $0x4,%3 \n" - "jl 49f \n" - - // 4 pixel loop. - LABELALIGN - "40: \n" - "movdqu " MEMACCESS(0) ",%%xmm3 \n" - "lea " MEMLEA(0x10,0) ",%0 \n" - "movdqa %%xmm3,%%xmm0 \n" - "pxor %%xmm4,%%xmm3 \n" - "movdqu " MEMACCESS(1) ",%%xmm2 \n" - "pshufb %4,%%xmm3 \n" - "pand %%xmm6,%%xmm2 \n" - "paddw %%xmm7,%%xmm3 \n" - "pmullw %%xmm3,%%xmm2 \n" - "movdqu " MEMACCESS(1) ",%%xmm1 \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "psrlw $0x8,%%xmm1 \n" - "por %%xmm4,%%xmm0 \n" - "pmullw %%xmm3,%%xmm1 \n" - "psrlw $0x8,%%xmm2 \n" - "paddusb %%xmm2,%%xmm0 \n" - "pand %%xmm5,%%xmm1 \n" - "paddusb %%xmm1,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS(2) " \n" - "lea " MEMLEA(0x10,2) ",%2 \n" - "sub $0x4,%3 \n" - "jge 40b \n" - - "49: \n" - "add $0x3,%3 \n" - "jl 99f \n" - - // 1 pixel loop. - "91: \n" - "movd " MEMACCESS(0) ",%%xmm3 \n" - "lea " MEMLEA(0x4,0) ",%0 \n" - "movdqa %%xmm3,%%xmm0 \n" - "pxor %%xmm4,%%xmm3 \n" - "movd " MEMACCESS(1) ",%%xmm2 \n" - "pshufb %4,%%xmm3 \n" - "pand %%xmm6,%%xmm2 \n" - "paddw %%xmm7,%%xmm3 \n" - "pmullw %%xmm3,%%xmm2 \n" - "movd " MEMACCESS(1) ",%%xmm1 \n" - "lea " MEMLEA(0x4,1) ",%1 \n" - "psrlw $0x8,%%xmm1 \n" - "por %%xmm4,%%xmm0 \n" - "pmullw %%xmm3,%%xmm1 \n" - "psrlw $0x8,%%xmm2 \n" - "paddusb %%xmm2,%%xmm0 \n" - "pand %%xmm5,%%xmm1 \n" - "paddusb %%xmm1,%%xmm0 \n" - "movd %%xmm0," MEMACCESS(2) " \n" - "lea " MEMLEA(0x4,2) ",%2 \n" - "sub $0x1,%3 \n" - "jge 91b \n" - "99: \n" - : "+r"(src_argb0), // %0 - "+r"(src_argb1), // %1 - "+r"(dst_argb), // %2 - "+r"(width) // %3 - : "m"(kShuffleAlpha) // %4 - : "memory", "cc" - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" - ); + asm volatile( + "pcmpeqb %%xmm7,%%xmm7 \n" + "psrlw $0xf,%%xmm7 \n" + "pcmpeqb %%xmm6,%%xmm6 \n" + "psrlw $0x8,%%xmm6 \n" + "pcmpeqb %%xmm5,%%xmm5 \n" + "psllw $0x8,%%xmm5 \n" + "pcmpeqb %%xmm4,%%xmm4 \n" + "pslld $0x18,%%xmm4 \n" + "sub $0x4,%3 \n" + "jl 49f \n" + + // 4 pixel loop. + LABELALIGN + "40: \n" + "movdqu (%0),%%xmm3 \n" + "lea 0x10(%0),%0 \n" + "movdqa %%xmm3,%%xmm0 \n" + "pxor %%xmm4,%%xmm3 \n" + "movdqu (%1),%%xmm2 \n" + "pshufb %4,%%xmm3 \n" + "pand %%xmm6,%%xmm2 \n" + "paddw %%xmm7,%%xmm3 \n" + "pmullw %%xmm3,%%xmm2 \n" + "movdqu (%1),%%xmm1 \n" + "lea 0x10(%1),%1 \n" + "psrlw $0x8,%%xmm1 \n" + "por %%xmm4,%%xmm0 \n" + "pmullw %%xmm3,%%xmm1 \n" + "psrlw $0x8,%%xmm2 \n" + "paddusb %%xmm2,%%xmm0 \n" + "pand %%xmm5,%%xmm1 \n" + "paddusb %%xmm1,%%xmm0 \n" + "movdqu %%xmm0,(%2) \n" + "lea 0x10(%2),%2 \n" + "sub $0x4,%3 \n" + "jge 40b \n" + + "49: \n" + "add $0x3,%3 \n" + "jl 99f \n" + + // 1 pixel loop. + "91: \n" + "movd (%0),%%xmm3 \n" + "lea 0x4(%0),%0 \n" + "movdqa %%xmm3,%%xmm0 \n" + "pxor %%xmm4,%%xmm3 \n" + "movd (%1),%%xmm2 \n" + "pshufb %4,%%xmm3 \n" + "pand %%xmm6,%%xmm2 \n" + "paddw %%xmm7,%%xmm3 \n" + "pmullw %%xmm3,%%xmm2 \n" + "movd (%1),%%xmm1 \n" + "lea 0x4(%1),%1 \n" + "psrlw $0x8,%%xmm1 \n" + "por %%xmm4,%%xmm0 \n" + "pmullw %%xmm3,%%xmm1 \n" + "psrlw $0x8,%%xmm2 \n" + "paddusb %%xmm2,%%xmm0 \n" + "pand %%xmm5,%%xmm1 \n" + "paddusb %%xmm1,%%xmm0 \n" + "movd %%xmm0,(%2) \n" + "lea 0x4(%2),%2 \n" + "sub $0x1,%3 \n" + "jge 91b \n" + "99: \n" + : "+r"(src_argb0), // %0 + "+r"(src_argb1), // %1 + "+r"(dst_argb), // %2 + "+r"(width) // %3 + : "m"(kShuffleAlpha) // %4 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", + "xmm7"); } #endif // HAS_ARGBBLENDROW_SSSE3 @@ -4165,10 +4437,10 @@ void ARGBBlendRow_SSSE3(const uint8* src_argb0, // =((A2*C2)+(B2*(255-C2))+255)/256 // signed version of math // =(((A2-128)*C2)+((B2-128)*(255-C2))+32768+127)/256 -void BlendPlaneRow_SSSE3(const uint8* src0, - const uint8* src1, - const uint8* alpha, - uint8* dst, +void BlendPlaneRow_SSSE3(const uint8_t* src0, + const uint8_t* src1, + const uint8_t* alpha, + uint8_t* dst, int width) { asm volatile( "pcmpeqb %%xmm5,%%xmm5 \n" @@ -4217,10 +4489,10 @@ void BlendPlaneRow_SSSE3(const uint8* src0, // =((A2*C2)+(B2*(255-C2))+255)/256 // signed version of math // =(((A2-128)*C2)+((B2-128)*(255-C2))+32768+127)/256 -void BlendPlaneRow_AVX2(const uint8* src0, - const uint8* src1, - const uint8* alpha, - uint8* dst, +void BlendPlaneRow_AVX2(const uint8_t* src0, + const uint8_t* src1, + const uint8_t* alpha, + uint8_t* dst, int width) { asm volatile( "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" @@ -4279,45 +4551,45 @@ static const uvec8 kShuffleAlpha0 = {3u, 3u, 3u, 3u, 3u, 3u, 128u, 128u, static const uvec8 kShuffleAlpha1 = {11u, 11u, 11u, 11u, 11u, 11u, 128u, 128u, 15u, 15u, 15u, 15u, 15u, 15u, 128u, 128u}; // Attenuate 4 pixels at a time. -void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) { - asm volatile ( - "pcmpeqb %%xmm3,%%xmm3 \n" - "pslld $0x18,%%xmm3 \n" - "movdqa %3,%%xmm4 \n" - "movdqa %4,%%xmm5 \n" +void ARGBAttenuateRow_SSSE3(const uint8_t* src_argb, + uint8_t* dst_argb, + int width) { + asm volatile( + "pcmpeqb %%xmm3,%%xmm3 \n" + "pslld $0x18,%%xmm3 \n" + "movdqa %3,%%xmm4 \n" + "movdqa %4,%%xmm5 \n" - // 4 pixel loop. - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "pshufb %%xmm4,%%xmm0 \n" - "movdqu " MEMACCESS(0) ",%%xmm1 \n" - "punpcklbw %%xmm1,%%xmm1 \n" - "pmulhuw %%xmm1,%%xmm0 \n" - "movdqu " MEMACCESS(0) ",%%xmm1 \n" - "pshufb %%xmm5,%%xmm1 \n" - "movdqu " MEMACCESS(0) ",%%xmm2 \n" - "punpckhbw %%xmm2,%%xmm2 \n" - "pmulhuw %%xmm2,%%xmm1 \n" - "movdqu " MEMACCESS(0) ",%%xmm2 \n" - "lea " MEMLEA(0x10,0) ",%0 \n" - "pand %%xmm3,%%xmm2 \n" - "psrlw $0x8,%%xmm0 \n" - "psrlw $0x8,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "por %%xmm2,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x4,%2 \n" - "jg 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_argb), // %1 - "+r"(width) // %2 - : "m"(kShuffleAlpha0), // %3 - "m"(kShuffleAlpha1) // %4 - : "memory", "cc" - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" - ); + // 4 pixel loop. + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "pshufb %%xmm4,%%xmm0 \n" + "movdqu (%0),%%xmm1 \n" + "punpcklbw %%xmm1,%%xmm1 \n" + "pmulhuw %%xmm1,%%xmm0 \n" + "movdqu (%0),%%xmm1 \n" + "pshufb %%xmm5,%%xmm1 \n" + "movdqu (%0),%%xmm2 \n" + "punpckhbw %%xmm2,%%xmm2 \n" + "pmulhuw %%xmm2,%%xmm1 \n" + "movdqu (%0),%%xmm2 \n" + "lea 0x10(%0),%0 \n" + "pand %%xmm3,%%xmm2 \n" + "psrlw $0x8,%%xmm0 \n" + "psrlw $0x8,%%xmm1 \n" + "packuswb %%xmm1,%%xmm0 \n" + "por %%xmm2,%%xmm0 \n" + "movdqu %%xmm0,(%1) \n" + "lea 0x10(%1),%1 \n" + "sub $0x4,%2 \n" + "jg 1b \n" + : "+r"(src_argb), // %0 + "+r"(dst_argb), // %1 + "+r"(width) // %2 + : "m"(kShuffleAlpha0), // %3 + "m"(kShuffleAlpha1) // %4 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); } #endif // HAS_ARGBATTENUATEROW_SSSE3 @@ -4327,87 +4599,85 @@ static const uvec8 kShuffleAlpha_AVX2 = {6u, 7u, 6u, 7u, 6u, 7u, 128u, 128u, 14u, 15u, 14u, 15u, 14u, 15u, 128u, 128u}; // Attenuate 8 pixels at a time. -void ARGBAttenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width) { - asm volatile ( - "vbroadcastf128 %3,%%ymm4 \n" - "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" - "vpslld $0x18,%%ymm5,%%ymm5 \n" - "sub %0,%1 \n" +void ARGBAttenuateRow_AVX2(const uint8_t* src_argb, + uint8_t* dst_argb, + int width) { + asm volatile( + "vbroadcastf128 %3,%%ymm4 \n" + "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" + "vpslld $0x18,%%ymm5,%%ymm5 \n" + "sub %0,%1 \n" - // 8 pixel loop. - LABELALIGN - "1: \n" - "vmovdqu " MEMACCESS(0) ",%%ymm6 \n" - "vpunpcklbw %%ymm6,%%ymm6,%%ymm0 \n" - "vpunpckhbw %%ymm6,%%ymm6,%%ymm1 \n" - "vpshufb %%ymm4,%%ymm0,%%ymm2 \n" - "vpshufb %%ymm4,%%ymm1,%%ymm3 \n" - "vpmulhuw %%ymm2,%%ymm0,%%ymm0 \n" - "vpmulhuw %%ymm3,%%ymm1,%%ymm1 \n" - "vpand %%ymm5,%%ymm6,%%ymm6 \n" - "vpsrlw $0x8,%%ymm0,%%ymm0 \n" - "vpsrlw $0x8,%%ymm1,%%ymm1 \n" - "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" - "vpor %%ymm6,%%ymm0,%%ymm0 \n" - MEMOPMEM(vmovdqu,ymm0,0x00,0,1,1) // vmovdqu %%ymm0,(%0,%1) - "lea " MEMLEA(0x20,0) ",%0 \n" - "sub $0x8,%2 \n" - "jg 1b \n" - "vzeroupper \n" - : "+r"(src_argb), // %0 - "+r"(dst_argb), // %1 - "+r"(width) // %2 - : "m"(kShuffleAlpha_AVX2) // %3 - : "memory", "cc" - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6" - ); + // 8 pixel loop. + LABELALIGN + "1: \n" + "vmovdqu (%0),%%ymm6 \n" + "vpunpcklbw %%ymm6,%%ymm6,%%ymm0 \n" + "vpunpckhbw %%ymm6,%%ymm6,%%ymm1 \n" + "vpshufb %%ymm4,%%ymm0,%%ymm2 \n" + "vpshufb %%ymm4,%%ymm1,%%ymm3 \n" + "vpmulhuw %%ymm2,%%ymm0,%%ymm0 \n" + "vpmulhuw %%ymm3,%%ymm1,%%ymm1 \n" + "vpand %%ymm5,%%ymm6,%%ymm6 \n" + "vpsrlw $0x8,%%ymm0,%%ymm0 \n" + "vpsrlw $0x8,%%ymm1,%%ymm1 \n" + "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" + "vpor %%ymm6,%%ymm0,%%ymm0 \n" + "vmovdqu %%ymm0,0x00(%0,%1,1) \n" + "lea 0x20(%0),%0 \n" + "sub $0x8,%2 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src_argb), // %0 + "+r"(dst_argb), // %1 + "+r"(width) // %2 + : "m"(kShuffleAlpha_AVX2) // %3 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"); } #endif // HAS_ARGBATTENUATEROW_AVX2 #ifdef HAS_ARGBUNATTENUATEROW_SSE2 // Unattenuate 4 pixels at a time. -void ARGBUnattenuateRow_SSE2(const uint8* src_argb, - uint8* dst_argb, +void ARGBUnattenuateRow_SSE2(const uint8_t* src_argb, + uint8_t* dst_argb, int width) { uintptr_t alpha; - asm volatile ( - // 4 pixel loop. - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movzb " MEMACCESS2(0x03,0) ",%3 \n" - "punpcklbw %%xmm0,%%xmm0 \n" - MEMOPREG(movd,0x00,4,3,4,xmm2) // movd 0x0(%4,%3,4),%%xmm2 - "movzb " MEMACCESS2(0x07,0) ",%3 \n" - MEMOPREG(movd,0x00,4,3,4,xmm3) // movd 0x0(%4,%3,4),%%xmm3 - "pshuflw $0x40,%%xmm2,%%xmm2 \n" - "pshuflw $0x40,%%xmm3,%%xmm3 \n" - "movlhps %%xmm3,%%xmm2 \n" - "pmulhuw %%xmm2,%%xmm0 \n" - "movdqu " MEMACCESS(0) ",%%xmm1 \n" - "movzb " MEMACCESS2(0x0b,0) ",%3 \n" - "punpckhbw %%xmm1,%%xmm1 \n" - MEMOPREG(movd,0x00,4,3,4,xmm2) // movd 0x0(%4,%3,4),%%xmm2 - "movzb " MEMACCESS2(0x0f,0) ",%3 \n" - MEMOPREG(movd,0x00,4,3,4,xmm3) // movd 0x0(%4,%3,4),%%xmm3 - "pshuflw $0x40,%%xmm2,%%xmm2 \n" - "pshuflw $0x40,%%xmm3,%%xmm3 \n" - "movlhps %%xmm3,%%xmm2 \n" - "pmulhuw %%xmm2,%%xmm1 \n" - "lea " MEMLEA(0x10,0) ",%0 \n" - "packuswb %%xmm1,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x4,%2 \n" - "jg 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_argb), // %1 - "+r"(width), // %2 - "=&r"(alpha) // %3 - : "r"(fixed_invtbl8) // %4 - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" - ); + asm volatile( + // 4 pixel loop. + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movzb 0x03(%0),%3 \n" + "punpcklbw %%xmm0,%%xmm0 \n" + "movd 0x00(%4,%3,4),%%xmm2 \n" + "movzb 0x07(%0),%3 \n" + "movd 0x00(%4,%3,4),%%xmm3 \n" + "pshuflw $0x40,%%xmm2,%%xmm2 \n" + "pshuflw $0x40,%%xmm3,%%xmm3 \n" + "movlhps %%xmm3,%%xmm2 \n" + "pmulhuw %%xmm2,%%xmm0 \n" + "movdqu (%0),%%xmm1 \n" + "movzb 0x0b(%0),%3 \n" + "punpckhbw %%xmm1,%%xmm1 \n" + "movd 0x00(%4,%3,4),%%xmm2 \n" + "movzb 0x0f(%0),%3 \n" + "movd 0x00(%4,%3,4),%%xmm3 \n" + "pshuflw $0x40,%%xmm2,%%xmm2 \n" + "pshuflw $0x40,%%xmm3,%%xmm3 \n" + "movlhps %%xmm3,%%xmm2 \n" + "pmulhuw %%xmm2,%%xmm1 \n" + "lea 0x10(%0),%0 \n" + "packuswb %%xmm1,%%xmm0 \n" + "movdqu %%xmm0,(%1) \n" + "lea 0x10(%1),%1 \n" + "sub $0x4,%2 \n" + "jg 1b \n" + : "+r"(src_argb), // %0 + "+r"(dst_argb), // %1 + "+r"(width), // %2 + "=&r"(alpha) // %3 + : "r"(fixed_invtbl8) // %4 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); } #endif // HAS_ARGBUNATTENUATEROW_SSE2 @@ -4416,114 +4686,111 @@ void ARGBUnattenuateRow_SSE2(const uint8* src_argb, static const uvec8 kUnattenShuffleAlpha_AVX2 = { 0u, 1u, 0u, 1u, 0u, 1u, 6u, 7u, 8u, 9u, 8u, 9u, 8u, 9u, 14u, 15u}; // Unattenuate 8 pixels at a time. -void ARGBUnattenuateRow_AVX2(const uint8* src_argb, - uint8* dst_argb, +void ARGBUnattenuateRow_AVX2(const uint8_t* src_argb, + uint8_t* dst_argb, int width) { uintptr_t alpha; - asm volatile ( - "sub %0,%1 \n" - "vbroadcastf128 %5,%%ymm5 \n" + asm volatile( + "sub %0,%1 \n" + "vbroadcastf128 %5,%%ymm5 \n" - // 8 pixel loop. - LABELALIGN - "1: \n" - // replace VPGATHER - "movzb " MEMACCESS2(0x03,0) ",%3 \n" - MEMOPREG(vmovd,0x00,4,3,4,xmm0) // vmovd 0x0(%4,%3,4),%%xmm0 - "movzb " MEMACCESS2(0x07,0) ",%3 \n" - MEMOPREG(vmovd,0x00,4,3,4,xmm1) // vmovd 0x0(%4,%3,4),%%xmm1 - "movzb " MEMACCESS2(0x0b,0) ",%3 \n" - "vpunpckldq %%xmm1,%%xmm0,%%xmm6 \n" - MEMOPREG(vmovd,0x00,4,3,4,xmm2) // vmovd 0x0(%4,%3,4),%%xmm2 - "movzb " MEMACCESS2(0x0f,0) ",%3 \n" - MEMOPREG(vmovd,0x00,4,3,4,xmm3) // vmovd 0x0(%4,%3,4),%%xmm3 - "movzb " MEMACCESS2(0x13,0) ",%3 \n" - "vpunpckldq %%xmm3,%%xmm2,%%xmm7 \n" - MEMOPREG(vmovd,0x00,4,3,4,xmm0) // vmovd 0x0(%4,%3,4),%%xmm0 - "movzb " MEMACCESS2(0x17,0) ",%3 \n" - MEMOPREG(vmovd,0x00,4,3,4,xmm1) // vmovd 0x0(%4,%3,4),%%xmm1 - "movzb " MEMACCESS2(0x1b,0) ",%3 \n" - "vpunpckldq %%xmm1,%%xmm0,%%xmm0 \n" - MEMOPREG(vmovd,0x00,4,3,4,xmm2) // vmovd 0x0(%4,%3,4),%%xmm2 - "movzb " MEMACCESS2(0x1f,0) ",%3 \n" - MEMOPREG(vmovd,0x00,4,3,4,xmm3) // vmovd 0x0(%4,%3,4),%%xmm3 - "vpunpckldq %%xmm3,%%xmm2,%%xmm2 \n" - "vpunpcklqdq %%xmm7,%%xmm6,%%xmm3 \n" - "vpunpcklqdq %%xmm2,%%xmm0,%%xmm0 \n" - "vinserti128 $0x1,%%xmm0,%%ymm3,%%ymm3 \n" - // end of VPGATHER - - "vmovdqu " MEMACCESS(0) ",%%ymm6 \n" - "vpunpcklbw %%ymm6,%%ymm6,%%ymm0 \n" - "vpunpckhbw %%ymm6,%%ymm6,%%ymm1 \n" - "vpunpcklwd %%ymm3,%%ymm3,%%ymm2 \n" - "vpunpckhwd %%ymm3,%%ymm3,%%ymm3 \n" - "vpshufb %%ymm5,%%ymm2,%%ymm2 \n" - "vpshufb %%ymm5,%%ymm3,%%ymm3 \n" - "vpmulhuw %%ymm2,%%ymm0,%%ymm0 \n" - "vpmulhuw %%ymm3,%%ymm1,%%ymm1 \n" - "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" - MEMOPMEM(vmovdqu,ymm0,0x00,0,1,1) // vmovdqu %%ymm0,(%0,%1) - "lea " MEMLEA(0x20,0) ",%0 \n" - "sub $0x8,%2 \n" - "jg 1b \n" - "vzeroupper \n" - : "+r"(src_argb), // %0 - "+r"(dst_argb), // %1 - "+r"(width), // %2 - "=&r"(alpha) // %3 - : "r"(fixed_invtbl8), // %4 - "m"(kUnattenShuffleAlpha_AVX2) // %5 - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" - ); + // 8 pixel loop. + LABELALIGN + "1: \n" + // replace VPGATHER + "movzb 0x03(%0),%3 \n" + "vmovd 0x00(%4,%3,4),%%xmm0 \n" + "movzb 0x07(%0),%3 \n" + "vmovd 0x00(%4,%3,4),%%xmm1 \n" + "movzb 0x0b(%0),%3 \n" + "vpunpckldq %%xmm1,%%xmm0,%%xmm6 \n" + "vmovd 0x00(%4,%3,4),%%xmm2 \n" + "movzb 0x0f(%0),%3 \n" + "vmovd 0x00(%4,%3,4),%%xmm3 \n" + "movzb 0x13(%0),%3 \n" + "vpunpckldq %%xmm3,%%xmm2,%%xmm7 \n" + "vmovd 0x00(%4,%3,4),%%xmm0 \n" + "movzb 0x17(%0),%3 \n" + "vmovd 0x00(%4,%3,4),%%xmm1 \n" + "movzb 0x1b(%0),%3 \n" + "vpunpckldq %%xmm1,%%xmm0,%%xmm0 \n" + "vmovd 0x00(%4,%3,4),%%xmm2 \n" + "movzb 0x1f(%0),%3 \n" + "vmovd 0x00(%4,%3,4),%%xmm3 \n" + "vpunpckldq %%xmm3,%%xmm2,%%xmm2 \n" + "vpunpcklqdq %%xmm7,%%xmm6,%%xmm3 \n" + "vpunpcklqdq %%xmm2,%%xmm0,%%xmm0 \n" + "vinserti128 $0x1,%%xmm0,%%ymm3,%%ymm3 \n" + // end of VPGATHER + + "vmovdqu (%0),%%ymm6 \n" + "vpunpcklbw %%ymm6,%%ymm6,%%ymm0 \n" + "vpunpckhbw %%ymm6,%%ymm6,%%ymm1 \n" + "vpunpcklwd %%ymm3,%%ymm3,%%ymm2 \n" + "vpunpckhwd %%ymm3,%%ymm3,%%ymm3 \n" + "vpshufb %%ymm5,%%ymm2,%%ymm2 \n" + "vpshufb %%ymm5,%%ymm3,%%ymm3 \n" + "vpmulhuw %%ymm2,%%ymm0,%%ymm0 \n" + "vpmulhuw %%ymm3,%%ymm1,%%ymm1 \n" + "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" + "vmovdqu %%ymm0,0x00(%0,%1,1) \n" + "lea 0x20(%0),%0 \n" + "sub $0x8,%2 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src_argb), // %0 + "+r"(dst_argb), // %1 + "+r"(width), // %2 + "=&r"(alpha) // %3 + : "r"(fixed_invtbl8), // %4 + "m"(kUnattenShuffleAlpha_AVX2) // %5 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", + "xmm7"); } #endif // HAS_ARGBUNATTENUATEROW_AVX2 #ifdef HAS_ARGBGRAYROW_SSSE3 // Convert 8 ARGB pixels (64 bytes) to 8 Gray ARGB pixels -void ARGBGrayRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) { - asm volatile ( - "movdqa %3,%%xmm4 \n" - "movdqa %4,%%xmm5 \n" +void ARGBGrayRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_argb, int width) { + asm volatile( + "movdqa %3,%%xmm4 \n" + "movdqa %4,%%xmm5 \n" - // 8 pixel loop. - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "pmaddubsw %%xmm4,%%xmm0 \n" - "pmaddubsw %%xmm4,%%xmm1 \n" - "phaddw %%xmm1,%%xmm0 \n" - "paddw %%xmm5,%%xmm0 \n" - "psrlw $0x7,%%xmm0 \n" - "packuswb %%xmm0,%%xmm0 \n" - "movdqu " MEMACCESS(0) ",%%xmm2 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm3 \n" - "lea " MEMLEA(0x20,0) ",%0 \n" - "psrld $0x18,%%xmm2 \n" - "psrld $0x18,%%xmm3 \n" - "packuswb %%xmm3,%%xmm2 \n" - "packuswb %%xmm2,%%xmm2 \n" - "movdqa %%xmm0,%%xmm3 \n" - "punpcklbw %%xmm0,%%xmm0 \n" - "punpcklbw %%xmm2,%%xmm3 \n" - "movdqa %%xmm0,%%xmm1 \n" - "punpcklwd %%xmm3,%%xmm0 \n" - "punpckhwd %%xmm3,%%xmm1 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "movdqu %%xmm1," MEMACCESS2(0x10,1) " \n" - "lea " MEMLEA(0x20,1) ",%1 \n" - "sub $0x8,%2 \n" - "jg 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_argb), // %1 - "+r"(width) // %2 - : "m"(kARGBToYJ), // %3 - "m"(kAddYJ64) // %4 - : "memory", "cc" - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" - ); + // 8 pixel loop. + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "pmaddubsw %%xmm4,%%xmm0 \n" + "pmaddubsw %%xmm4,%%xmm1 \n" + "phaddw %%xmm1,%%xmm0 \n" + "paddw %%xmm5,%%xmm0 \n" + "psrlw $0x7,%%xmm0 \n" + "packuswb %%xmm0,%%xmm0 \n" + "movdqu (%0),%%xmm2 \n" + "movdqu 0x10(%0),%%xmm3 \n" + "lea 0x20(%0),%0 \n" + "psrld $0x18,%%xmm2 \n" + "psrld $0x18,%%xmm3 \n" + "packuswb %%xmm3,%%xmm2 \n" + "packuswb %%xmm2,%%xmm2 \n" + "movdqa %%xmm0,%%xmm3 \n" + "punpcklbw %%xmm0,%%xmm0 \n" + "punpcklbw %%xmm2,%%xmm3 \n" + "movdqa %%xmm0,%%xmm1 \n" + "punpcklwd %%xmm3,%%xmm0 \n" + "punpckhwd %%xmm3,%%xmm1 \n" + "movdqu %%xmm0,(%1) \n" + "movdqu %%xmm1,0x10(%1) \n" + "lea 0x20(%1),%1 \n" + "sub $0x8,%2 \n" + "jg 1b \n" + : "+r"(src_argb), // %0 + "+r"(dst_argb), // %1 + "+r"(width) // %2 + : "m"(kARGBToYJ), // %3 + "m"(kAddYJ64) // %4 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); } #endif // HAS_ARGBGRAYROW_SSSE3 @@ -4542,418 +4809,405 @@ static const vec8 kARGBToSepiaR = {24, 98, 50, 0, 24, 98, 50, 0, 24, 98, 50, 0, 24, 98, 50, 0}; // Convert 8 ARGB pixels (32 bytes) to 8 Sepia ARGB pixels. -void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width) { - asm volatile ( - "movdqa %2,%%xmm2 \n" - "movdqa %3,%%xmm3 \n" - "movdqa %4,%%xmm4 \n" +void ARGBSepiaRow_SSSE3(uint8_t* dst_argb, int width) { + asm volatile( + "movdqa %2,%%xmm2 \n" + "movdqa %3,%%xmm3 \n" + "movdqa %4,%%xmm4 \n" - // 8 pixel loop. - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm6 \n" - "pmaddubsw %%xmm2,%%xmm0 \n" - "pmaddubsw %%xmm2,%%xmm6 \n" - "phaddw %%xmm6,%%xmm0 \n" - "psrlw $0x7,%%xmm0 \n" - "packuswb %%xmm0,%%xmm0 \n" - "movdqu " MEMACCESS(0) ",%%xmm5 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "pmaddubsw %%xmm3,%%xmm5 \n" - "pmaddubsw %%xmm3,%%xmm1 \n" - "phaddw %%xmm1,%%xmm5 \n" - "psrlw $0x7,%%xmm5 \n" - "packuswb %%xmm5,%%xmm5 \n" - "punpcklbw %%xmm5,%%xmm0 \n" - "movdqu " MEMACCESS(0) ",%%xmm5 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "pmaddubsw %%xmm4,%%xmm5 \n" - "pmaddubsw %%xmm4,%%xmm1 \n" - "phaddw %%xmm1,%%xmm5 \n" - "psrlw $0x7,%%xmm5 \n" - "packuswb %%xmm5,%%xmm5 \n" - "movdqu " MEMACCESS(0) ",%%xmm6 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "psrld $0x18,%%xmm6 \n" - "psrld $0x18,%%xmm1 \n" - "packuswb %%xmm1,%%xmm6 \n" - "packuswb %%xmm6,%%xmm6 \n" - "punpcklbw %%xmm6,%%xmm5 \n" - "movdqa %%xmm0,%%xmm1 \n" - "punpcklwd %%xmm5,%%xmm0 \n" - "punpckhwd %%xmm5,%%xmm1 \n" - "movdqu %%xmm0," MEMACCESS(0) " \n" - "movdqu %%xmm1," MEMACCESS2(0x10,0) " \n" - "lea " MEMLEA(0x20,0) ",%0 \n" - "sub $0x8,%1 \n" - "jg 1b \n" - : "+r"(dst_argb), // %0 - "+r"(width) // %1 - : "m"(kARGBToSepiaB), // %2 - "m"(kARGBToSepiaG), // %3 - "m"(kARGBToSepiaR) // %4 - : "memory", "cc" - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6" - ); + // 8 pixel loop. + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm6 \n" + "pmaddubsw %%xmm2,%%xmm0 \n" + "pmaddubsw %%xmm2,%%xmm6 \n" + "phaddw %%xmm6,%%xmm0 \n" + "psrlw $0x7,%%xmm0 \n" + "packuswb %%xmm0,%%xmm0 \n" + "movdqu (%0),%%xmm5 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "pmaddubsw %%xmm3,%%xmm5 \n" + "pmaddubsw %%xmm3,%%xmm1 \n" + "phaddw %%xmm1,%%xmm5 \n" + "psrlw $0x7,%%xmm5 \n" + "packuswb %%xmm5,%%xmm5 \n" + "punpcklbw %%xmm5,%%xmm0 \n" + "movdqu (%0),%%xmm5 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "pmaddubsw %%xmm4,%%xmm5 \n" + "pmaddubsw %%xmm4,%%xmm1 \n" + "phaddw %%xmm1,%%xmm5 \n" + "psrlw $0x7,%%xmm5 \n" + "packuswb %%xmm5,%%xmm5 \n" + "movdqu (%0),%%xmm6 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "psrld $0x18,%%xmm6 \n" + "psrld $0x18,%%xmm1 \n" + "packuswb %%xmm1,%%xmm6 \n" + "packuswb %%xmm6,%%xmm6 \n" + "punpcklbw %%xmm6,%%xmm5 \n" + "movdqa %%xmm0,%%xmm1 \n" + "punpcklwd %%xmm5,%%xmm0 \n" + "punpckhwd %%xmm5,%%xmm1 \n" + "movdqu %%xmm0,(%0) \n" + "movdqu %%xmm1,0x10(%0) \n" + "lea 0x20(%0),%0 \n" + "sub $0x8,%1 \n" + "jg 1b \n" + : "+r"(dst_argb), // %0 + "+r"(width) // %1 + : "m"(kARGBToSepiaB), // %2 + "m"(kARGBToSepiaG), // %3 + "m"(kARGBToSepiaR) // %4 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"); } #endif // HAS_ARGBSEPIAROW_SSSE3 #ifdef HAS_ARGBCOLORMATRIXROW_SSSE3 // Tranform 8 ARGB pixels (32 bytes) with color matrix. // Same as Sepia except matrix is provided. -void ARGBColorMatrixRow_SSSE3(const uint8* src_argb, - uint8* dst_argb, - const int8* matrix_argb, +void ARGBColorMatrixRow_SSSE3(const uint8_t* src_argb, + uint8_t* dst_argb, + const int8_t* matrix_argb, int width) { - asm volatile ( - "movdqu " MEMACCESS(3) ",%%xmm5 \n" - "pshufd $0x00,%%xmm5,%%xmm2 \n" - "pshufd $0x55,%%xmm5,%%xmm3 \n" - "pshufd $0xaa,%%xmm5,%%xmm4 \n" - "pshufd $0xff,%%xmm5,%%xmm5 \n" + asm volatile( + "movdqu (%3),%%xmm5 \n" + "pshufd $0x00,%%xmm5,%%xmm2 \n" + "pshufd $0x55,%%xmm5,%%xmm3 \n" + "pshufd $0xaa,%%xmm5,%%xmm4 \n" + "pshufd $0xff,%%xmm5,%%xmm5 \n" - // 8 pixel loop. - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm7 \n" - "pmaddubsw %%xmm2,%%xmm0 \n" - "pmaddubsw %%xmm2,%%xmm7 \n" - "movdqu " MEMACCESS(0) ",%%xmm6 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "pmaddubsw %%xmm3,%%xmm6 \n" - "pmaddubsw %%xmm3,%%xmm1 \n" - "phaddsw %%xmm7,%%xmm0 \n" - "phaddsw %%xmm1,%%xmm6 \n" - "psraw $0x6,%%xmm0 \n" - "psraw $0x6,%%xmm6 \n" - "packuswb %%xmm0,%%xmm0 \n" - "packuswb %%xmm6,%%xmm6 \n" - "punpcklbw %%xmm6,%%xmm0 \n" - "movdqu " MEMACCESS(0) ",%%xmm1 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm7 \n" - "pmaddubsw %%xmm4,%%xmm1 \n" - "pmaddubsw %%xmm4,%%xmm7 \n" - "phaddsw %%xmm7,%%xmm1 \n" - "movdqu " MEMACCESS(0) ",%%xmm6 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm7 \n" - "pmaddubsw %%xmm5,%%xmm6 \n" - "pmaddubsw %%xmm5,%%xmm7 \n" - "phaddsw %%xmm7,%%xmm6 \n" - "psraw $0x6,%%xmm1 \n" - "psraw $0x6,%%xmm6 \n" - "packuswb %%xmm1,%%xmm1 \n" - "packuswb %%xmm6,%%xmm6 \n" - "punpcklbw %%xmm6,%%xmm1 \n" - "movdqa %%xmm0,%%xmm6 \n" - "punpcklwd %%xmm1,%%xmm0 \n" - "punpckhwd %%xmm1,%%xmm6 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "movdqu %%xmm6," MEMACCESS2(0x10,1) " \n" - "lea " MEMLEA(0x20,0) ",%0 \n" - "lea " MEMLEA(0x20,1) ",%1 \n" - "sub $0x8,%2 \n" - "jg 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_argb), // %1 - "+r"(width) // %2 - : "r"(matrix_argb) // %3 - : "memory", "cc" - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" - ); + // 8 pixel loop. + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm7 \n" + "pmaddubsw %%xmm2,%%xmm0 \n" + "pmaddubsw %%xmm2,%%xmm7 \n" + "movdqu (%0),%%xmm6 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "pmaddubsw %%xmm3,%%xmm6 \n" + "pmaddubsw %%xmm3,%%xmm1 \n" + "phaddsw %%xmm7,%%xmm0 \n" + "phaddsw %%xmm1,%%xmm6 \n" + "psraw $0x6,%%xmm0 \n" + "psraw $0x6,%%xmm6 \n" + "packuswb %%xmm0,%%xmm0 \n" + "packuswb %%xmm6,%%xmm6 \n" + "punpcklbw %%xmm6,%%xmm0 \n" + "movdqu (%0),%%xmm1 \n" + "movdqu 0x10(%0),%%xmm7 \n" + "pmaddubsw %%xmm4,%%xmm1 \n" + "pmaddubsw %%xmm4,%%xmm7 \n" + "phaddsw %%xmm7,%%xmm1 \n" + "movdqu (%0),%%xmm6 \n" + "movdqu 0x10(%0),%%xmm7 \n" + "pmaddubsw %%xmm5,%%xmm6 \n" + "pmaddubsw %%xmm5,%%xmm7 \n" + "phaddsw %%xmm7,%%xmm6 \n" + "psraw $0x6,%%xmm1 \n" + "psraw $0x6,%%xmm6 \n" + "packuswb %%xmm1,%%xmm1 \n" + "packuswb %%xmm6,%%xmm6 \n" + "punpcklbw %%xmm6,%%xmm1 \n" + "movdqa %%xmm0,%%xmm6 \n" + "punpcklwd %%xmm1,%%xmm0 \n" + "punpckhwd %%xmm1,%%xmm6 \n" + "movdqu %%xmm0,(%1) \n" + "movdqu %%xmm6,0x10(%1) \n" + "lea 0x20(%0),%0 \n" + "lea 0x20(%1),%1 \n" + "sub $0x8,%2 \n" + "jg 1b \n" + : "+r"(src_argb), // %0 + "+r"(dst_argb), // %1 + "+r"(width) // %2 + : "r"(matrix_argb) // %3 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", + "xmm7"); } #endif // HAS_ARGBCOLORMATRIXROW_SSSE3 #ifdef HAS_ARGBQUANTIZEROW_SSE2 // Quantize 4 ARGB pixels (16 bytes). -void ARGBQuantizeRow_SSE2(uint8* dst_argb, +void ARGBQuantizeRow_SSE2(uint8_t* dst_argb, int scale, int interval_size, int interval_offset, int width) { - asm volatile ( - "movd %2,%%xmm2 \n" - "movd %3,%%xmm3 \n" - "movd %4,%%xmm4 \n" - "pshuflw $0x40,%%xmm2,%%xmm2 \n" - "pshufd $0x44,%%xmm2,%%xmm2 \n" - "pshuflw $0x40,%%xmm3,%%xmm3 \n" - "pshufd $0x44,%%xmm3,%%xmm3 \n" - "pshuflw $0x40,%%xmm4,%%xmm4 \n" - "pshufd $0x44,%%xmm4,%%xmm4 \n" - "pxor %%xmm5,%%xmm5 \n" - "pcmpeqb %%xmm6,%%xmm6 \n" - "pslld $0x18,%%xmm6 \n" - - // 4 pixel loop. - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "punpcklbw %%xmm5,%%xmm0 \n" - "pmulhuw %%xmm2,%%xmm0 \n" - "movdqu " MEMACCESS(0) ",%%xmm1 \n" - "punpckhbw %%xmm5,%%xmm1 \n" - "pmulhuw %%xmm2,%%xmm1 \n" - "pmullw %%xmm3,%%xmm0 \n" - "movdqu " MEMACCESS(0) ",%%xmm7 \n" - "pmullw %%xmm3,%%xmm1 \n" - "pand %%xmm6,%%xmm7 \n" - "paddw %%xmm4,%%xmm0 \n" - "paddw %%xmm4,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "por %%xmm7,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS(0) " \n" - "lea " MEMLEA(0x10,0) ",%0 \n" - "sub $0x4,%1 \n" - "jg 1b \n" - : "+r"(dst_argb), // %0 - "+r"(width) // %1 - : "r"(scale), // %2 - "r"(interval_size), // %3 - "r"(interval_offset) // %4 - : "memory", "cc" - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" - ); + asm volatile( + "movd %2,%%xmm2 \n" + "movd %3,%%xmm3 \n" + "movd %4,%%xmm4 \n" + "pshuflw $0x40,%%xmm2,%%xmm2 \n" + "pshufd $0x44,%%xmm2,%%xmm2 \n" + "pshuflw $0x40,%%xmm3,%%xmm3 \n" + "pshufd $0x44,%%xmm3,%%xmm3 \n" + "pshuflw $0x40,%%xmm4,%%xmm4 \n" + "pshufd $0x44,%%xmm4,%%xmm4 \n" + "pxor %%xmm5,%%xmm5 \n" + "pcmpeqb %%xmm6,%%xmm6 \n" + "pslld $0x18,%%xmm6 \n" + + // 4 pixel loop. + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "punpcklbw %%xmm5,%%xmm0 \n" + "pmulhuw %%xmm2,%%xmm0 \n" + "movdqu (%0),%%xmm1 \n" + "punpckhbw %%xmm5,%%xmm1 \n" + "pmulhuw %%xmm2,%%xmm1 \n" + "pmullw %%xmm3,%%xmm0 \n" + "movdqu (%0),%%xmm7 \n" + "pmullw %%xmm3,%%xmm1 \n" + "pand %%xmm6,%%xmm7 \n" + "paddw %%xmm4,%%xmm0 \n" + "paddw %%xmm4,%%xmm1 \n" + "packuswb %%xmm1,%%xmm0 \n" + "por %%xmm7,%%xmm0 \n" + "movdqu %%xmm0,(%0) \n" + "lea 0x10(%0),%0 \n" + "sub $0x4,%1 \n" + "jg 1b \n" + : "+r"(dst_argb), // %0 + "+r"(width) // %1 + : "r"(scale), // %2 + "r"(interval_size), // %3 + "r"(interval_offset) // %4 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", + "xmm7"); } #endif // HAS_ARGBQUANTIZEROW_SSE2 #ifdef HAS_ARGBSHADEROW_SSE2 // Shade 4 pixels at a time by specified value. -void ARGBShadeRow_SSE2(const uint8* src_argb, - uint8* dst_argb, +void ARGBShadeRow_SSE2(const uint8_t* src_argb, + uint8_t* dst_argb, int width, - uint32 value) { - asm volatile ( - "movd %3,%%xmm2 \n" - "punpcklbw %%xmm2,%%xmm2 \n" - "punpcklqdq %%xmm2,%%xmm2 \n" + uint32_t value) { + asm volatile( + "movd %3,%%xmm2 \n" + "punpcklbw %%xmm2,%%xmm2 \n" + "punpcklqdq %%xmm2,%%xmm2 \n" - // 4 pixel loop. - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "lea " MEMLEA(0x10,0) ",%0 \n" - "movdqa %%xmm0,%%xmm1 \n" - "punpcklbw %%xmm0,%%xmm0 \n" - "punpckhbw %%xmm1,%%xmm1 \n" - "pmulhuw %%xmm2,%%xmm0 \n" - "pmulhuw %%xmm2,%%xmm1 \n" - "psrlw $0x8,%%xmm0 \n" - "psrlw $0x8,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x4,%2 \n" - "jg 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_argb), // %1 - "+r"(width) // %2 - : "r"(value) // %3 - : "memory", "cc" - , "xmm0", "xmm1", "xmm2" - ); + // 4 pixel loop. + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "lea 0x10(%0),%0 \n" + "movdqa %%xmm0,%%xmm1 \n" + "punpcklbw %%xmm0,%%xmm0 \n" + "punpckhbw %%xmm1,%%xmm1 \n" + "pmulhuw %%xmm2,%%xmm0 \n" + "pmulhuw %%xmm2,%%xmm1 \n" + "psrlw $0x8,%%xmm0 \n" + "psrlw $0x8,%%xmm1 \n" + "packuswb %%xmm1,%%xmm0 \n" + "movdqu %%xmm0,(%1) \n" + "lea 0x10(%1),%1 \n" + "sub $0x4,%2 \n" + "jg 1b \n" + : "+r"(src_argb), // %0 + "+r"(dst_argb), // %1 + "+r"(width) // %2 + : "r"(value) // %3 + : "memory", "cc", "xmm0", "xmm1", "xmm2"); } #endif // HAS_ARGBSHADEROW_SSE2 #ifdef HAS_ARGBMULTIPLYROW_SSE2 // Multiply 2 rows of ARGB pixels together, 4 pixels at a time. -void ARGBMultiplyRow_SSE2(const uint8* src_argb0, - const uint8* src_argb1, - uint8* dst_argb, +void ARGBMultiplyRow_SSE2(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, int width) { - asm volatile ( - "pxor %%xmm5,%%xmm5 \n" + asm volatile( - // 4 pixel loop. - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "lea " MEMLEA(0x10,0) ",%0 \n" - "movdqu " MEMACCESS(1) ",%%xmm2 \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "movdqu %%xmm0,%%xmm1 \n" - "movdqu %%xmm2,%%xmm3 \n" - "punpcklbw %%xmm0,%%xmm0 \n" - "punpckhbw %%xmm1,%%xmm1 \n" - "punpcklbw %%xmm5,%%xmm2 \n" - "punpckhbw %%xmm5,%%xmm3 \n" - "pmulhuw %%xmm2,%%xmm0 \n" - "pmulhuw %%xmm3,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS(2) " \n" - "lea " MEMLEA(0x10,2) ",%2 \n" - "sub $0x4,%3 \n" - "jg 1b \n" - : "+r"(src_argb0), // %0 - "+r"(src_argb1), // %1 - "+r"(dst_argb), // %2 - "+r"(width) // %3 - : - : "memory", "cc" - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" - ); + "pxor %%xmm5,%%xmm5 \n" + + // 4 pixel loop. + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "lea 0x10(%0),%0 \n" + "movdqu (%1),%%xmm2 \n" + "lea 0x10(%1),%1 \n" + "movdqu %%xmm0,%%xmm1 \n" + "movdqu %%xmm2,%%xmm3 \n" + "punpcklbw %%xmm0,%%xmm0 \n" + "punpckhbw %%xmm1,%%xmm1 \n" + "punpcklbw %%xmm5,%%xmm2 \n" + "punpckhbw %%xmm5,%%xmm3 \n" + "pmulhuw %%xmm2,%%xmm0 \n" + "pmulhuw %%xmm3,%%xmm1 \n" + "packuswb %%xmm1,%%xmm0 \n" + "movdqu %%xmm0,(%2) \n" + "lea 0x10(%2),%2 \n" + "sub $0x4,%3 \n" + "jg 1b \n" + : "+r"(src_argb0), // %0 + "+r"(src_argb1), // %1 + "+r"(dst_argb), // %2 + "+r"(width) // %3 + : + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"); } #endif // HAS_ARGBMULTIPLYROW_SSE2 #ifdef HAS_ARGBMULTIPLYROW_AVX2 // Multiply 2 rows of ARGB pixels together, 8 pixels at a time. -void ARGBMultiplyRow_AVX2(const uint8* src_argb0, - const uint8* src_argb1, - uint8* dst_argb, +void ARGBMultiplyRow_AVX2(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, int width) { - asm volatile ( - "vpxor %%ymm5,%%ymm5,%%ymm5 \n" + asm volatile( - // 4 pixel loop. - LABELALIGN - "1: \n" - "vmovdqu " MEMACCESS(0) ",%%ymm1 \n" - "lea " MEMLEA(0x20,0) ",%0 \n" - "vmovdqu " MEMACCESS(1) ",%%ymm3 \n" - "lea " MEMLEA(0x20,1) ",%1 \n" - "vpunpcklbw %%ymm1,%%ymm1,%%ymm0 \n" - "vpunpckhbw %%ymm1,%%ymm1,%%ymm1 \n" - "vpunpcklbw %%ymm5,%%ymm3,%%ymm2 \n" - "vpunpckhbw %%ymm5,%%ymm3,%%ymm3 \n" - "vpmulhuw %%ymm2,%%ymm0,%%ymm0 \n" - "vpmulhuw %%ymm3,%%ymm1,%%ymm1 \n" - "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" - "vmovdqu %%ymm0," MEMACCESS(2) " \n" - "lea " MEMLEA(0x20,2) ",%2 \n" - "sub $0x8,%3 \n" - "jg 1b \n" - "vzeroupper \n" - : "+r"(src_argb0), // %0 - "+r"(src_argb1), // %1 - "+r"(dst_argb), // %2 - "+r"(width) // %3 - : - : "memory", "cc" + "vpxor %%ymm5,%%ymm5,%%ymm5 \n" + + // 4 pixel loop. + LABELALIGN + "1: \n" + "vmovdqu (%0),%%ymm1 \n" + "lea 0x20(%0),%0 \n" + "vmovdqu (%1),%%ymm3 \n" + "lea 0x20(%1),%1 \n" + "vpunpcklbw %%ymm1,%%ymm1,%%ymm0 \n" + "vpunpckhbw %%ymm1,%%ymm1,%%ymm1 \n" + "vpunpcklbw %%ymm5,%%ymm3,%%ymm2 \n" + "vpunpckhbw %%ymm5,%%ymm3,%%ymm3 \n" + "vpmulhuw %%ymm2,%%ymm0,%%ymm0 \n" + "vpmulhuw %%ymm3,%%ymm1,%%ymm1 \n" + "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" + "vmovdqu %%ymm0,(%2) \n" + "lea 0x20(%2),%2 \n" + "sub $0x8,%3 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src_argb0), // %0 + "+r"(src_argb1), // %1 + "+r"(dst_argb), // %2 + "+r"(width) // %3 + : + : "memory", "cc" #if defined(__AVX2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" + , + "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" #endif - ); + ); } #endif // HAS_ARGBMULTIPLYROW_AVX2 #ifdef HAS_ARGBADDROW_SSE2 // Add 2 rows of ARGB pixels together, 4 pixels at a time. -void ARGBAddRow_SSE2(const uint8* src_argb0, - const uint8* src_argb1, - uint8* dst_argb, +void ARGBAddRow_SSE2(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, int width) { - asm volatile ( - // 4 pixel loop. - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "lea " MEMLEA(0x10,0) ",%0 \n" - "movdqu " MEMACCESS(1) ",%%xmm1 \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "paddusb %%xmm1,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS(2) " \n" - "lea " MEMLEA(0x10,2) ",%2 \n" - "sub $0x4,%3 \n" - "jg 1b \n" - : "+r"(src_argb0), // %0 - "+r"(src_argb1), // %1 - "+r"(dst_argb), // %2 - "+r"(width) // %3 - : - : "memory", "cc" - , "xmm0", "xmm1" - ); + asm volatile( + // 4 pixel loop. + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "lea 0x10(%0),%0 \n" + "movdqu (%1),%%xmm1 \n" + "lea 0x10(%1),%1 \n" + "paddusb %%xmm1,%%xmm0 \n" + "movdqu %%xmm0,(%2) \n" + "lea 0x10(%2),%2 \n" + "sub $0x4,%3 \n" + "jg 1b \n" + : "+r"(src_argb0), // %0 + "+r"(src_argb1), // %1 + "+r"(dst_argb), // %2 + "+r"(width) // %3 + : + : "memory", "cc", "xmm0", "xmm1"); } #endif // HAS_ARGBADDROW_SSE2 #ifdef HAS_ARGBADDROW_AVX2 // Add 2 rows of ARGB pixels together, 4 pixels at a time. -void ARGBAddRow_AVX2(const uint8* src_argb0, - const uint8* src_argb1, - uint8* dst_argb, +void ARGBAddRow_AVX2(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, int width) { - asm volatile ( - // 4 pixel loop. - LABELALIGN - "1: \n" - "vmovdqu " MEMACCESS(0) ",%%ymm0 \n" - "lea " MEMLEA(0x20,0) ",%0 \n" - "vpaddusb " MEMACCESS(1) ",%%ymm0,%%ymm0 \n" - "lea " MEMLEA(0x20,1) ",%1 \n" - "vmovdqu %%ymm0," MEMACCESS(2) " \n" - "lea " MEMLEA(0x20,2) ",%2 \n" - "sub $0x8,%3 \n" - "jg 1b \n" - "vzeroupper \n" - : "+r"(src_argb0), // %0 - "+r"(src_argb1), // %1 - "+r"(dst_argb), // %2 - "+r"(width) // %3 - : - : "memory", "cc" - , "xmm0" - ); + asm volatile( + // 4 pixel loop. + LABELALIGN + "1: \n" + "vmovdqu (%0),%%ymm0 \n" + "lea 0x20(%0),%0 \n" + "vpaddusb (%1),%%ymm0,%%ymm0 \n" + "lea 0x20(%1),%1 \n" + "vmovdqu %%ymm0,(%2) \n" + "lea 0x20(%2),%2 \n" + "sub $0x8,%3 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src_argb0), // %0 + "+r"(src_argb1), // %1 + "+r"(dst_argb), // %2 + "+r"(width) // %3 + : + : "memory", "cc", "xmm0"); } #endif // HAS_ARGBADDROW_AVX2 #ifdef HAS_ARGBSUBTRACTROW_SSE2 // Subtract 2 rows of ARGB pixels, 4 pixels at a time. -void ARGBSubtractRow_SSE2(const uint8* src_argb0, - const uint8* src_argb1, - uint8* dst_argb, +void ARGBSubtractRow_SSE2(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, int width) { - asm volatile ( - // 4 pixel loop. - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "lea " MEMLEA(0x10,0) ",%0 \n" - "movdqu " MEMACCESS(1) ",%%xmm1 \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "psubusb %%xmm1,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS(2) " \n" - "lea " MEMLEA(0x10,2) ",%2 \n" - "sub $0x4,%3 \n" - "jg 1b \n" - : "+r"(src_argb0), // %0 - "+r"(src_argb1), // %1 - "+r"(dst_argb), // %2 - "+r"(width) // %3 - : - : "memory", "cc" - , "xmm0", "xmm1" - ); + asm volatile( + // 4 pixel loop. + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "lea 0x10(%0),%0 \n" + "movdqu (%1),%%xmm1 \n" + "lea 0x10(%1),%1 \n" + "psubusb %%xmm1,%%xmm0 \n" + "movdqu %%xmm0,(%2) \n" + "lea 0x10(%2),%2 \n" + "sub $0x4,%3 \n" + "jg 1b \n" + : "+r"(src_argb0), // %0 + "+r"(src_argb1), // %1 + "+r"(dst_argb), // %2 + "+r"(width) // %3 + : + : "memory", "cc", "xmm0", "xmm1"); } #endif // HAS_ARGBSUBTRACTROW_SSE2 #ifdef HAS_ARGBSUBTRACTROW_AVX2 // Subtract 2 rows of ARGB pixels, 8 pixels at a time. -void ARGBSubtractRow_AVX2(const uint8* src_argb0, - const uint8* src_argb1, - uint8* dst_argb, +void ARGBSubtractRow_AVX2(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, int width) { - asm volatile ( - // 4 pixel loop. - LABELALIGN - "1: \n" - "vmovdqu " MEMACCESS(0) ",%%ymm0 \n" - "lea " MEMLEA(0x20,0) ",%0 \n" - "vpsubusb " MEMACCESS(1) ",%%ymm0,%%ymm0 \n" - "lea " MEMLEA(0x20,1) ",%1 \n" - "vmovdqu %%ymm0," MEMACCESS(2) " \n" - "lea " MEMLEA(0x20,2) ",%2 \n" - "sub $0x8,%3 \n" - "jg 1b \n" - "vzeroupper \n" - : "+r"(src_argb0), // %0 - "+r"(src_argb1), // %1 - "+r"(dst_argb), // %2 - "+r"(width) // %3 - : - : "memory", "cc" - , "xmm0" - ); + asm volatile( + // 4 pixel loop. + LABELALIGN + "1: \n" + "vmovdqu (%0),%%ymm0 \n" + "lea 0x20(%0),%0 \n" + "vpsubusb (%1),%%ymm0,%%ymm0 \n" + "lea 0x20(%1),%1 \n" + "vmovdqu %%ymm0,(%2) \n" + "lea 0x20(%2),%2 \n" + "sub $0x8,%3 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src_argb0), // %0 + "+r"(src_argb1), // %1 + "+r"(dst_argb), // %2 + "+r"(width) // %3 + : + : "memory", "cc", "xmm0"); } #endif // HAS_ARGBSUBTRACTROW_AVX2 @@ -4962,55 +5216,53 @@ void ARGBSubtractRow_AVX2(const uint8* src_argb0, // -1 0 1 // -2 0 2 // -1 0 1 -void SobelXRow_SSE2(const uint8* src_y0, - const uint8* src_y1, - const uint8* src_y2, - uint8* dst_sobelx, +void SobelXRow_SSE2(const uint8_t* src_y0, + const uint8_t* src_y1, + const uint8_t* src_y2, + uint8_t* dst_sobelx, int width) { - asm volatile ( - "sub %0,%1 \n" - "sub %0,%2 \n" - "sub %0,%3 \n" - "pxor %%xmm5,%%xmm5 \n" + asm volatile( + "sub %0,%1 \n" + "sub %0,%2 \n" + "sub %0,%3 \n" + "pxor %%xmm5,%%xmm5 \n" - // 8 pixel loop. - LABELALIGN - "1: \n" - "movq " MEMACCESS(0) ",%%xmm0 \n" - "movq " MEMACCESS2(0x2,0) ",%%xmm1 \n" - "punpcklbw %%xmm5,%%xmm0 \n" - "punpcklbw %%xmm5,%%xmm1 \n" - "psubw %%xmm1,%%xmm0 \n" - MEMOPREG(movq,0x00,0,1,1,xmm1) // movq (%0,%1,1),%%xmm1 - MEMOPREG(movq,0x02,0,1,1,xmm2) // movq 0x2(%0,%1,1),%%xmm2 - "punpcklbw %%xmm5,%%xmm1 \n" - "punpcklbw %%xmm5,%%xmm2 \n" - "psubw %%xmm2,%%xmm1 \n" - MEMOPREG(movq,0x00,0,2,1,xmm2) // movq (%0,%2,1),%%xmm2 - MEMOPREG(movq,0x02,0,2,1,xmm3) // movq 0x2(%0,%2,1),%%xmm3 - "punpcklbw %%xmm5,%%xmm2 \n" - "punpcklbw %%xmm5,%%xmm3 \n" - "psubw %%xmm3,%%xmm2 \n" - "paddw %%xmm2,%%xmm0 \n" - "paddw %%xmm1,%%xmm0 \n" - "paddw %%xmm1,%%xmm0 \n" - "pxor %%xmm1,%%xmm1 \n" - "psubw %%xmm0,%%xmm1 \n" - "pmaxsw %%xmm1,%%xmm0 \n" - "packuswb %%xmm0,%%xmm0 \n" - MEMOPMEM(movq,xmm0,0x00,0,3,1) // movq %%xmm0,(%0,%3,1) - "lea " MEMLEA(0x8,0) ",%0 \n" - "sub $0x8,%4 \n" - "jg 1b \n" - : "+r"(src_y0), // %0 - "+r"(src_y1), // %1 - "+r"(src_y2), // %2 - "+r"(dst_sobelx), // %3 - "+r"(width) // %4 - : - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" - ); + // 8 pixel loop. + LABELALIGN + "1: \n" + "movq (%0),%%xmm0 \n" + "movq 0x2(%0),%%xmm1 \n" + "punpcklbw %%xmm5,%%xmm0 \n" + "punpcklbw %%xmm5,%%xmm1 \n" + "psubw %%xmm1,%%xmm0 \n" + "movq 0x00(%0,%1,1),%%xmm1 \n" + "movq 0x02(%0,%1,1),%%xmm2 \n" + "punpcklbw %%xmm5,%%xmm1 \n" + "punpcklbw %%xmm5,%%xmm2 \n" + "psubw %%xmm2,%%xmm1 \n" + "movq 0x00(%0,%2,1),%%xmm2 \n" + "movq 0x02(%0,%2,1),%%xmm3 \n" + "punpcklbw %%xmm5,%%xmm2 \n" + "punpcklbw %%xmm5,%%xmm3 \n" + "psubw %%xmm3,%%xmm2 \n" + "paddw %%xmm2,%%xmm0 \n" + "paddw %%xmm1,%%xmm0 \n" + "paddw %%xmm1,%%xmm0 \n" + "pxor %%xmm1,%%xmm1 \n" + "psubw %%xmm0,%%xmm1 \n" + "pmaxsw %%xmm1,%%xmm0 \n" + "packuswb %%xmm0,%%xmm0 \n" + "movq %%xmm0,0x00(%0,%3,1) \n" + "lea 0x8(%0),%0 \n" + "sub $0x8,%4 \n" + "jg 1b \n" + : "+r"(src_y0), // %0 + "+r"(src_y1), // %1 + "+r"(src_y2), // %2 + "+r"(dst_sobelx), // %3 + "+r"(width) // %4 + : + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"); } #endif // HAS_SOBELXROW_SSE2 @@ -5019,52 +5271,50 @@ void SobelXRow_SSE2(const uint8* src_y0, // -1 -2 -1 // 0 0 0 // 1 2 1 -void SobelYRow_SSE2(const uint8* src_y0, - const uint8* src_y1, - uint8* dst_sobely, +void SobelYRow_SSE2(const uint8_t* src_y0, + const uint8_t* src_y1, + uint8_t* dst_sobely, int width) { - asm volatile ( - "sub %0,%1 \n" - "sub %0,%2 \n" - "pxor %%xmm5,%%xmm5 \n" + asm volatile( + "sub %0,%1 \n" + "sub %0,%2 \n" + "pxor %%xmm5,%%xmm5 \n" - // 8 pixel loop. - LABELALIGN - "1: \n" - "movq " MEMACCESS(0) ",%%xmm0 \n" - MEMOPREG(movq,0x00,0,1,1,xmm1) // movq (%0,%1,1),%%xmm1 - "punpcklbw %%xmm5,%%xmm0 \n" - "punpcklbw %%xmm5,%%xmm1 \n" - "psubw %%xmm1,%%xmm0 \n" - "movq " MEMACCESS2(0x1,0) ",%%xmm1 \n" - MEMOPREG(movq,0x01,0,1,1,xmm2) // movq 0x1(%0,%1,1),%%xmm2 - "punpcklbw %%xmm5,%%xmm1 \n" - "punpcklbw %%xmm5,%%xmm2 \n" - "psubw %%xmm2,%%xmm1 \n" - "movq " MEMACCESS2(0x2,0) ",%%xmm2 \n" - MEMOPREG(movq,0x02,0,1,1,xmm3) // movq 0x2(%0,%1,1),%%xmm3 - "punpcklbw %%xmm5,%%xmm2 \n" - "punpcklbw %%xmm5,%%xmm3 \n" - "psubw %%xmm3,%%xmm2 \n" - "paddw %%xmm2,%%xmm0 \n" - "paddw %%xmm1,%%xmm0 \n" - "paddw %%xmm1,%%xmm0 \n" - "pxor %%xmm1,%%xmm1 \n" - "psubw %%xmm0,%%xmm1 \n" - "pmaxsw %%xmm1,%%xmm0 \n" - "packuswb %%xmm0,%%xmm0 \n" - MEMOPMEM(movq,xmm0,0x00,0,2,1) // movq %%xmm0,(%0,%2,1) - "lea " MEMLEA(0x8,0) ",%0 \n" - "sub $0x8,%3 \n" - "jg 1b \n" - : "+r"(src_y0), // %0 - "+r"(src_y1), // %1 - "+r"(dst_sobely), // %2 - "+r"(width) // %3 - : - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" - ); + // 8 pixel loop. + LABELALIGN + "1: \n" + "movq (%0),%%xmm0 \n" + "movq 0x00(%0,%1,1),%%xmm1 \n" + "punpcklbw %%xmm5,%%xmm0 \n" + "punpcklbw %%xmm5,%%xmm1 \n" + "psubw %%xmm1,%%xmm0 \n" + "movq 0x1(%0),%%xmm1 \n" + "movq 0x01(%0,%1,1),%%xmm2 \n" + "punpcklbw %%xmm5,%%xmm1 \n" + "punpcklbw %%xmm5,%%xmm2 \n" + "psubw %%xmm2,%%xmm1 \n" + "movq 0x2(%0),%%xmm2 \n" + "movq 0x02(%0,%1,1),%%xmm3 \n" + "punpcklbw %%xmm5,%%xmm2 \n" + "punpcklbw %%xmm5,%%xmm3 \n" + "psubw %%xmm3,%%xmm2 \n" + "paddw %%xmm2,%%xmm0 \n" + "paddw %%xmm1,%%xmm0 \n" + "paddw %%xmm1,%%xmm0 \n" + "pxor %%xmm1,%%xmm1 \n" + "psubw %%xmm0,%%xmm1 \n" + "pmaxsw %%xmm1,%%xmm0 \n" + "packuswb %%xmm0,%%xmm0 \n" + "movq %%xmm0,0x00(%0,%2,1) \n" + "lea 0x8(%0),%0 \n" + "sub $0x8,%3 \n" + "jg 1b \n" + : "+r"(src_y0), // %0 + "+r"(src_y1), // %1 + "+r"(dst_sobely), // %2 + "+r"(width) // %3 + : + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"); } #endif // HAS_SOBELYROW_SSE2 @@ -5074,83 +5324,79 @@ void SobelYRow_SSE2(const uint8* src_y0, // R = Sobel // G = Sobel // B = Sobel -void SobelRow_SSE2(const uint8* src_sobelx, - const uint8* src_sobely, - uint8* dst_argb, +void SobelRow_SSE2(const uint8_t* src_sobelx, + const uint8_t* src_sobely, + uint8_t* dst_argb, int width) { - asm volatile ( - "sub %0,%1 \n" - "pcmpeqb %%xmm5,%%xmm5 \n" - "pslld $0x18,%%xmm5 \n" + asm volatile( + "sub %0,%1 \n" + "pcmpeqb %%xmm5,%%xmm5 \n" + "pslld $0x18,%%xmm5 \n" - // 8 pixel loop. - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - MEMOPREG(movdqu,0x00,0,1,1,xmm1) // movdqu (%0,%1,1),%%xmm1 - "lea " MEMLEA(0x10,0) ",%0 \n" - "paddusb %%xmm1,%%xmm0 \n" - "movdqa %%xmm0,%%xmm2 \n" - "punpcklbw %%xmm0,%%xmm2 \n" - "punpckhbw %%xmm0,%%xmm0 \n" - "movdqa %%xmm2,%%xmm1 \n" - "punpcklwd %%xmm2,%%xmm1 \n" - "punpckhwd %%xmm2,%%xmm2 \n" - "por %%xmm5,%%xmm1 \n" - "por %%xmm5,%%xmm2 \n" - "movdqa %%xmm0,%%xmm3 \n" - "punpcklwd %%xmm0,%%xmm3 \n" - "punpckhwd %%xmm0,%%xmm0 \n" - "por %%xmm5,%%xmm3 \n" - "por %%xmm5,%%xmm0 \n" - "movdqu %%xmm1," MEMACCESS(2) " \n" - "movdqu %%xmm2," MEMACCESS2(0x10,2) " \n" - "movdqu %%xmm3," MEMACCESS2(0x20,2) " \n" - "movdqu %%xmm0," MEMACCESS2(0x30,2) " \n" - "lea " MEMLEA(0x40,2) ",%2 \n" - "sub $0x10,%3 \n" - "jg 1b \n" - : "+r"(src_sobelx), // %0 - "+r"(src_sobely), // %1 - "+r"(dst_argb), // %2 - "+r"(width) // %3 - : - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" - ); + // 8 pixel loop. + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x00(%0,%1,1),%%xmm1 \n" + "lea 0x10(%0),%0 \n" + "paddusb %%xmm1,%%xmm0 \n" + "movdqa %%xmm0,%%xmm2 \n" + "punpcklbw %%xmm0,%%xmm2 \n" + "punpckhbw %%xmm0,%%xmm0 \n" + "movdqa %%xmm2,%%xmm1 \n" + "punpcklwd %%xmm2,%%xmm1 \n" + "punpckhwd %%xmm2,%%xmm2 \n" + "por %%xmm5,%%xmm1 \n" + "por %%xmm5,%%xmm2 \n" + "movdqa %%xmm0,%%xmm3 \n" + "punpcklwd %%xmm0,%%xmm3 \n" + "punpckhwd %%xmm0,%%xmm0 \n" + "por %%xmm5,%%xmm3 \n" + "por %%xmm5,%%xmm0 \n" + "movdqu %%xmm1,(%2) \n" + "movdqu %%xmm2,0x10(%2) \n" + "movdqu %%xmm3,0x20(%2) \n" + "movdqu %%xmm0,0x30(%2) \n" + "lea 0x40(%2),%2 \n" + "sub $0x10,%3 \n" + "jg 1b \n" + : "+r"(src_sobelx), // %0 + "+r"(src_sobely), // %1 + "+r"(dst_argb), // %2 + "+r"(width) // %3 + : + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"); } #endif // HAS_SOBELROW_SSE2 #ifdef HAS_SOBELTOPLANEROW_SSE2 // Adds Sobel X and Sobel Y and stores Sobel into a plane. -void SobelToPlaneRow_SSE2(const uint8* src_sobelx, - const uint8* src_sobely, - uint8* dst_y, +void SobelToPlaneRow_SSE2(const uint8_t* src_sobelx, + const uint8_t* src_sobely, + uint8_t* dst_y, int width) { - asm volatile ( - "sub %0,%1 \n" - "pcmpeqb %%xmm5,%%xmm5 \n" - "pslld $0x18,%%xmm5 \n" + asm volatile( + "sub %0,%1 \n" + "pcmpeqb %%xmm5,%%xmm5 \n" + "pslld $0x18,%%xmm5 \n" - // 8 pixel loop. - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - MEMOPREG(movdqu,0x00,0,1,1,xmm1) // movdqu (%0,%1,1),%%xmm1 - "lea " MEMLEA(0x10,0) ",%0 \n" - "paddusb %%xmm1,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS(2) " \n" - "lea " MEMLEA(0x10,2) ",%2 \n" - "sub $0x10,%3 \n" - "jg 1b \n" - : "+r"(src_sobelx), // %0 - "+r"(src_sobely), // %1 - "+r"(dst_y), // %2 - "+r"(width) // %3 - : - : "memory", "cc", NACL_R14 - "xmm0", "xmm1" - ); + // 8 pixel loop. + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x00(%0,%1,1),%%xmm1 \n" + "lea 0x10(%0),%0 \n" + "paddusb %%xmm1,%%xmm0 \n" + "movdqu %%xmm0,(%2) \n" + "lea 0x10(%2),%2 \n" + "sub $0x10,%3 \n" + "jg 1b \n" + : "+r"(src_sobelx), // %0 + "+r"(src_sobely), // %1 + "+r"(dst_y), // %2 + "+r"(width) // %3 + : + : "memory", "cc", "xmm0", "xmm1"); } #endif // HAS_SOBELTOPLANEROW_SSE2 @@ -5160,1054 +5406,1123 @@ void SobelToPlaneRow_SSE2(const uint8* src_sobelx, // R = Sobel X // G = Sobel // B = Sobel Y -void SobelXYRow_SSE2(const uint8* src_sobelx, - const uint8* src_sobely, - uint8* dst_argb, +void SobelXYRow_SSE2(const uint8_t* src_sobelx, + const uint8_t* src_sobely, + uint8_t* dst_argb, int width) { - asm volatile ( - "sub %0,%1 \n" - "pcmpeqb %%xmm5,%%xmm5 \n" + asm volatile( + "sub %0,%1 \n" + "pcmpeqb %%xmm5,%%xmm5 \n" - // 8 pixel loop. - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - MEMOPREG(movdqu,0x00,0,1,1,xmm1) // movdqu (%0,%1,1),%%xmm1 - "lea " MEMLEA(0x10,0) ",%0 \n" - "movdqa %%xmm0,%%xmm2 \n" - "paddusb %%xmm1,%%xmm2 \n" - "movdqa %%xmm0,%%xmm3 \n" - "punpcklbw %%xmm5,%%xmm3 \n" - "punpckhbw %%xmm5,%%xmm0 \n" - "movdqa %%xmm1,%%xmm4 \n" - "punpcklbw %%xmm2,%%xmm4 \n" - "punpckhbw %%xmm2,%%xmm1 \n" - "movdqa %%xmm4,%%xmm6 \n" - "punpcklwd %%xmm3,%%xmm6 \n" - "punpckhwd %%xmm3,%%xmm4 \n" - "movdqa %%xmm1,%%xmm7 \n" - "punpcklwd %%xmm0,%%xmm7 \n" - "punpckhwd %%xmm0,%%xmm1 \n" - "movdqu %%xmm6," MEMACCESS(2) " \n" - "movdqu %%xmm4," MEMACCESS2(0x10,2) " \n" - "movdqu %%xmm7," MEMACCESS2(0x20,2) " \n" - "movdqu %%xmm1," MEMACCESS2(0x30,2) " \n" - "lea " MEMLEA(0x40,2) ",%2 \n" - "sub $0x10,%3 \n" - "jg 1b \n" - : "+r"(src_sobelx), // %0 - "+r"(src_sobely), // %1 - "+r"(dst_argb), // %2 - "+r"(width) // %3 - : - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" - ); + // 8 pixel loop. + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x00(%0,%1,1),%%xmm1 \n" + "lea 0x10(%0),%0 \n" + "movdqa %%xmm0,%%xmm2 \n" + "paddusb %%xmm1,%%xmm2 \n" + "movdqa %%xmm0,%%xmm3 \n" + "punpcklbw %%xmm5,%%xmm3 \n" + "punpckhbw %%xmm5,%%xmm0 \n" + "movdqa %%xmm1,%%xmm4 \n" + "punpcklbw %%xmm2,%%xmm4 \n" + "punpckhbw %%xmm2,%%xmm1 \n" + "movdqa %%xmm4,%%xmm6 \n" + "punpcklwd %%xmm3,%%xmm6 \n" + "punpckhwd %%xmm3,%%xmm4 \n" + "movdqa %%xmm1,%%xmm7 \n" + "punpcklwd %%xmm0,%%xmm7 \n" + "punpckhwd %%xmm0,%%xmm1 \n" + "movdqu %%xmm6,(%2) \n" + "movdqu %%xmm4,0x10(%2) \n" + "movdqu %%xmm7,0x20(%2) \n" + "movdqu %%xmm1,0x30(%2) \n" + "lea 0x40(%2),%2 \n" + "sub $0x10,%3 \n" + "jg 1b \n" + : "+r"(src_sobelx), // %0 + "+r"(src_sobely), // %1 + "+r"(dst_argb), // %2 + "+r"(width) // %3 + : + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", + "xmm7"); } #endif // HAS_SOBELXYROW_SSE2 #ifdef HAS_COMPUTECUMULATIVESUMROW_SSE2 // Creates a table of cumulative sums where each value is a sum of all values // above and to the left of the value, inclusive of the value. -void ComputeCumulativeSumRow_SSE2(const uint8* row, - int32* cumsum, - const int32* previous_cumsum, +void ComputeCumulativeSumRow_SSE2(const uint8_t* row, + int32_t* cumsum, + const int32_t* previous_cumsum, int width) { - asm volatile ( - "pxor %%xmm0,%%xmm0 \n" - "pxor %%xmm1,%%xmm1 \n" - "sub $0x4,%3 \n" - "jl 49f \n" - "test $0xf,%1 \n" - "jne 49f \n" - - // 4 pixel loop. - LABELALIGN - "40: \n" - "movdqu " MEMACCESS(0) ",%%xmm2 \n" - "lea " MEMLEA(0x10,0) ",%0 \n" - "movdqa %%xmm2,%%xmm4 \n" - "punpcklbw %%xmm1,%%xmm2 \n" - "movdqa %%xmm2,%%xmm3 \n" - "punpcklwd %%xmm1,%%xmm2 \n" - "punpckhwd %%xmm1,%%xmm3 \n" - "punpckhbw %%xmm1,%%xmm4 \n" - "movdqa %%xmm4,%%xmm5 \n" - "punpcklwd %%xmm1,%%xmm4 \n" - "punpckhwd %%xmm1,%%xmm5 \n" - "paddd %%xmm2,%%xmm0 \n" - "movdqu " MEMACCESS(2) ",%%xmm2 \n" - "paddd %%xmm0,%%xmm2 \n" - "paddd %%xmm3,%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,2) ",%%xmm3 \n" - "paddd %%xmm0,%%xmm3 \n" - "paddd %%xmm4,%%xmm0 \n" - "movdqu " MEMACCESS2(0x20,2) ",%%xmm4 \n" - "paddd %%xmm0,%%xmm4 \n" - "paddd %%xmm5,%%xmm0 \n" - "movdqu " MEMACCESS2(0x30,2) ",%%xmm5 \n" - "lea " MEMLEA(0x40,2) ",%2 \n" - "paddd %%xmm0,%%xmm5 \n" - "movdqu %%xmm2," MEMACCESS(1) " \n" - "movdqu %%xmm3," MEMACCESS2(0x10,1) " \n" - "movdqu %%xmm4," MEMACCESS2(0x20,1) " \n" - "movdqu %%xmm5," MEMACCESS2(0x30,1) " \n" - "lea " MEMLEA(0x40,1) ",%1 \n" - "sub $0x4,%3 \n" - "jge 40b \n" - - "49: \n" - "add $0x3,%3 \n" - "jl 19f \n" - - // 1 pixel loop. - LABELALIGN - "10: \n" - "movd " MEMACCESS(0) ",%%xmm2 \n" - "lea " MEMLEA(0x4,0) ",%0 \n" - "punpcklbw %%xmm1,%%xmm2 \n" - "punpcklwd %%xmm1,%%xmm2 \n" - "paddd %%xmm2,%%xmm0 \n" - "movdqu " MEMACCESS(2) ",%%xmm2 \n" - "lea " MEMLEA(0x10,2) ",%2 \n" - "paddd %%xmm0,%%xmm2 \n" - "movdqu %%xmm2," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x1,%3 \n" - "jge 10b \n" - - "19: \n" - : "+r"(row), // %0 - "+r"(cumsum), // %1 - "+r"(previous_cumsum), // %2 - "+r"(width) // %3 - : - : "memory", "cc" - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" - ); + asm volatile( + "pxor %%xmm0,%%xmm0 \n" + "pxor %%xmm1,%%xmm1 \n" + "sub $0x4,%3 \n" + "jl 49f \n" + "test $0xf,%1 \n" + "jne 49f \n" + + // 4 pixel loop. + LABELALIGN + "40: \n" + "movdqu (%0),%%xmm2 \n" + "lea 0x10(%0),%0 \n" + "movdqa %%xmm2,%%xmm4 \n" + "punpcklbw %%xmm1,%%xmm2 \n" + "movdqa %%xmm2,%%xmm3 \n" + "punpcklwd %%xmm1,%%xmm2 \n" + "punpckhwd %%xmm1,%%xmm3 \n" + "punpckhbw %%xmm1,%%xmm4 \n" + "movdqa %%xmm4,%%xmm5 \n" + "punpcklwd %%xmm1,%%xmm4 \n" + "punpckhwd %%xmm1,%%xmm5 \n" + "paddd %%xmm2,%%xmm0 \n" + "movdqu (%2),%%xmm2 \n" + "paddd %%xmm0,%%xmm2 \n" + "paddd %%xmm3,%%xmm0 \n" + "movdqu 0x10(%2),%%xmm3 \n" + "paddd %%xmm0,%%xmm3 \n" + "paddd %%xmm4,%%xmm0 \n" + "movdqu 0x20(%2),%%xmm4 \n" + "paddd %%xmm0,%%xmm4 \n" + "paddd %%xmm5,%%xmm0 \n" + "movdqu 0x30(%2),%%xmm5 \n" + "lea 0x40(%2),%2 \n" + "paddd %%xmm0,%%xmm5 \n" + "movdqu %%xmm2,(%1) \n" + "movdqu %%xmm3,0x10(%1) \n" + "movdqu %%xmm4,0x20(%1) \n" + "movdqu %%xmm5,0x30(%1) \n" + "lea 0x40(%1),%1 \n" + "sub $0x4,%3 \n" + "jge 40b \n" + + "49: \n" + "add $0x3,%3 \n" + "jl 19f \n" + + // 1 pixel loop. + LABELALIGN + "10: \n" + "movd (%0),%%xmm2 \n" + "lea 0x4(%0),%0 \n" + "punpcklbw %%xmm1,%%xmm2 \n" + "punpcklwd %%xmm1,%%xmm2 \n" + "paddd %%xmm2,%%xmm0 \n" + "movdqu (%2),%%xmm2 \n" + "lea 0x10(%2),%2 \n" + "paddd %%xmm0,%%xmm2 \n" + "movdqu %%xmm2,(%1) \n" + "lea 0x10(%1),%1 \n" + "sub $0x1,%3 \n" + "jge 10b \n" + + "19: \n" + : "+r"(row), // %0 + "+r"(cumsum), // %1 + "+r"(previous_cumsum), // %2 + "+r"(width) // %3 + : + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); } #endif // HAS_COMPUTECUMULATIVESUMROW_SSE2 #ifdef HAS_CUMULATIVESUMTOAVERAGEROW_SSE2 -void CumulativeSumToAverageRow_SSE2(const int32* topleft, - const int32* botleft, +void CumulativeSumToAverageRow_SSE2(const int32_t* topleft, + const int32_t* botleft, int width, int area, - uint8* dst, + uint8_t* dst, int count) { - asm volatile ( - "movd %5,%%xmm5 \n" - "cvtdq2ps %%xmm5,%%xmm5 \n" - "rcpss %%xmm5,%%xmm4 \n" - "pshufd $0x0,%%xmm4,%%xmm4 \n" - "sub $0x4,%3 \n" - "jl 49f \n" - "cmpl $0x80,%5 \n" - "ja 40f \n" - - "pshufd $0x0,%%xmm5,%%xmm5 \n" - "pcmpeqb %%xmm6,%%xmm6 \n" - "psrld $0x10,%%xmm6 \n" - "cvtdq2ps %%xmm6,%%xmm6 \n" - "addps %%xmm6,%%xmm5 \n" - "mulps %%xmm4,%%xmm5 \n" - "cvtps2dq %%xmm5,%%xmm5 \n" - "packssdw %%xmm5,%%xmm5 \n" - - // 4 pixel small loop. - LABELALIGN - "4: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n" - "movdqu " MEMACCESS2(0x30,0) ",%%xmm3 \n" - MEMOPREG(psubd,0x00,0,4,4,xmm0) // psubd 0x00(%0,%4,4),%%xmm0 - MEMOPREG(psubd,0x10,0,4,4,xmm1) // psubd 0x10(%0,%4,4),%%xmm1 - MEMOPREG(psubd,0x20,0,4,4,xmm2) // psubd 0x20(%0,%4,4),%%xmm2 - MEMOPREG(psubd,0x30,0,4,4,xmm3) // psubd 0x30(%0,%4,4),%%xmm3 - "lea " MEMLEA(0x40,0) ",%0 \n" - "psubd " MEMACCESS(1) ",%%xmm0 \n" - "psubd " MEMACCESS2(0x10,1) ",%%xmm1 \n" - "psubd " MEMACCESS2(0x20,1) ",%%xmm2 \n" - "psubd " MEMACCESS2(0x30,1) ",%%xmm3 \n" - MEMOPREG(paddd,0x00,1,4,4,xmm0) // paddd 0x00(%1,%4,4),%%xmm0 - MEMOPREG(paddd,0x10,1,4,4,xmm1) // paddd 0x10(%1,%4,4),%%xmm1 - MEMOPREG(paddd,0x20,1,4,4,xmm2) // paddd 0x20(%1,%4,4),%%xmm2 - MEMOPREG(paddd,0x30,1,4,4,xmm3) // paddd 0x30(%1,%4,4),%%xmm3 - "lea " MEMLEA(0x40,1) ",%1 \n" - "packssdw %%xmm1,%%xmm0 \n" - "packssdw %%xmm3,%%xmm2 \n" - "pmulhuw %%xmm5,%%xmm0 \n" - "pmulhuw %%xmm5,%%xmm2 \n" - "packuswb %%xmm2,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS(2) " \n" - "lea " MEMLEA(0x10,2) ",%2 \n" - "sub $0x4,%3 \n" - "jge 4b \n" - "jmp 49f \n" - - // 4 pixel loop \n" - LABELALIGN - "40: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n" - "movdqu " MEMACCESS2(0x30,0) ",%%xmm3 \n" - MEMOPREG(psubd,0x00,0,4,4,xmm0) // psubd 0x00(%0,%4,4),%%xmm0 - MEMOPREG(psubd,0x10,0,4,4,xmm1) // psubd 0x10(%0,%4,4),%%xmm1 - MEMOPREG(psubd,0x20,0,4,4,xmm2) // psubd 0x20(%0,%4,4),%%xmm2 - MEMOPREG(psubd,0x30,0,4,4,xmm3) // psubd 0x30(%0,%4,4),%%xmm3 - "lea " MEMLEA(0x40,0) ",%0 \n" - "psubd " MEMACCESS(1) ",%%xmm0 \n" - "psubd " MEMACCESS2(0x10,1) ",%%xmm1 \n" - "psubd " MEMACCESS2(0x20,1) ",%%xmm2 \n" - "psubd " MEMACCESS2(0x30,1) ",%%xmm3 \n" - MEMOPREG(paddd,0x00,1,4,4,xmm0) // paddd 0x00(%1,%4,4),%%xmm0 - MEMOPREG(paddd,0x10,1,4,4,xmm1) // paddd 0x10(%1,%4,4),%%xmm1 - MEMOPREG(paddd,0x20,1,4,4,xmm2) // paddd 0x20(%1,%4,4),%%xmm2 - MEMOPREG(paddd,0x30,1,4,4,xmm3) // paddd 0x30(%1,%4,4),%%xmm3 - "lea " MEMLEA(0x40,1) ",%1 \n" - "cvtdq2ps %%xmm0,%%xmm0 \n" - "cvtdq2ps %%xmm1,%%xmm1 \n" - "mulps %%xmm4,%%xmm0 \n" - "mulps %%xmm4,%%xmm1 \n" - "cvtdq2ps %%xmm2,%%xmm2 \n" - "cvtdq2ps %%xmm3,%%xmm3 \n" - "mulps %%xmm4,%%xmm2 \n" - "mulps %%xmm4,%%xmm3 \n" - "cvtps2dq %%xmm0,%%xmm0 \n" - "cvtps2dq %%xmm1,%%xmm1 \n" - "cvtps2dq %%xmm2,%%xmm2 \n" - "cvtps2dq %%xmm3,%%xmm3 \n" - "packssdw %%xmm1,%%xmm0 \n" - "packssdw %%xmm3,%%xmm2 \n" - "packuswb %%xmm2,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS(2) " \n" - "lea " MEMLEA(0x10,2) ",%2 \n" - "sub $0x4,%3 \n" - "jge 40b \n" - - "49: \n" - "add $0x3,%3 \n" - "jl 19f \n" - - // 1 pixel loop \n" - LABELALIGN - "10: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - MEMOPREG(psubd,0x00,0,4,4,xmm0) // psubd 0x00(%0,%4,4),%%xmm0 - "lea " MEMLEA(0x10,0) ",%0 \n" - "psubd " MEMACCESS(1) ",%%xmm0 \n" - MEMOPREG(paddd,0x00,1,4,4,xmm0) // paddd 0x00(%1,%4,4),%%xmm0 - "lea " MEMLEA(0x10,1) ",%1 \n" - "cvtdq2ps %%xmm0,%%xmm0 \n" - "mulps %%xmm4,%%xmm0 \n" - "cvtps2dq %%xmm0,%%xmm0 \n" - "packssdw %%xmm0,%%xmm0 \n" - "packuswb %%xmm0,%%xmm0 \n" - "movd %%xmm0," MEMACCESS(2) " \n" - "lea " MEMLEA(0x4,2) ",%2 \n" - "sub $0x1,%3 \n" - "jge 10b \n" - "19: \n" - : "+r"(topleft), // %0 - "+r"(botleft), // %1 - "+r"(dst), // %2 - "+rm"(count) // %3 - : "r"((intptr_t)(width)), // %4 - "rm"(area) // %5 - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6" - ); + asm volatile( + "movd %5,%%xmm5 \n" + "cvtdq2ps %%xmm5,%%xmm5 \n" + "rcpss %%xmm5,%%xmm4 \n" + "pshufd $0x0,%%xmm4,%%xmm4 \n" + "sub $0x4,%3 \n" + "jl 49f \n" + "cmpl $0x80,%5 \n" + "ja 40f \n" + + "pshufd $0x0,%%xmm5,%%xmm5 \n" + "pcmpeqb %%xmm6,%%xmm6 \n" + "psrld $0x10,%%xmm6 \n" + "cvtdq2ps %%xmm6,%%xmm6 \n" + "addps %%xmm6,%%xmm5 \n" + "mulps %%xmm4,%%xmm5 \n" + "cvtps2dq %%xmm5,%%xmm5 \n" + "packssdw %%xmm5,%%xmm5 \n" + + // 4 pixel small loop. + LABELALIGN + "4: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "movdqu 0x20(%0),%%xmm2 \n" + "movdqu 0x30(%0),%%xmm3 \n" + "psubd 0x00(%0,%4,4),%%xmm0 \n" + "psubd 0x10(%0,%4,4),%%xmm1 \n" + "psubd 0x20(%0,%4,4),%%xmm2 \n" + "psubd 0x30(%0,%4,4),%%xmm3 \n" + "lea 0x40(%0),%0 \n" + "psubd (%1),%%xmm0 \n" + "psubd 0x10(%1),%%xmm1 \n" + "psubd 0x20(%1),%%xmm2 \n" + "psubd 0x30(%1),%%xmm3 \n" + "paddd 0x00(%1,%4,4),%%xmm0 \n" + "paddd 0x10(%1,%4,4),%%xmm1 \n" + "paddd 0x20(%1,%4,4),%%xmm2 \n" + "paddd 0x30(%1,%4,4),%%xmm3 \n" + "lea 0x40(%1),%1 \n" + "packssdw %%xmm1,%%xmm0 \n" + "packssdw %%xmm3,%%xmm2 \n" + "pmulhuw %%xmm5,%%xmm0 \n" + "pmulhuw %%xmm5,%%xmm2 \n" + "packuswb %%xmm2,%%xmm0 \n" + "movdqu %%xmm0,(%2) \n" + "lea 0x10(%2),%2 \n" + "sub $0x4,%3 \n" + "jge 4b \n" + "jmp 49f \n" + + // 4 pixel loop + LABELALIGN + "40: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "movdqu 0x20(%0),%%xmm2 \n" + "movdqu 0x30(%0),%%xmm3 \n" + "psubd 0x00(%0,%4,4),%%xmm0 \n" + "psubd 0x10(%0,%4,4),%%xmm1 \n" + "psubd 0x20(%0,%4,4),%%xmm2 \n" + "psubd 0x30(%0,%4,4),%%xmm3 \n" + "lea 0x40(%0),%0 \n" + "psubd (%1),%%xmm0 \n" + "psubd 0x10(%1),%%xmm1 \n" + "psubd 0x20(%1),%%xmm2 \n" + "psubd 0x30(%1),%%xmm3 \n" + "paddd 0x00(%1,%4,4),%%xmm0 \n" + "paddd 0x10(%1,%4,4),%%xmm1 \n" + "paddd 0x20(%1,%4,4),%%xmm2 \n" + "paddd 0x30(%1,%4,4),%%xmm3 \n" + "lea 0x40(%1),%1 \n" + "cvtdq2ps %%xmm0,%%xmm0 \n" + "cvtdq2ps %%xmm1,%%xmm1 \n" + "mulps %%xmm4,%%xmm0 \n" + "mulps %%xmm4,%%xmm1 \n" + "cvtdq2ps %%xmm2,%%xmm2 \n" + "cvtdq2ps %%xmm3,%%xmm3 \n" + "mulps %%xmm4,%%xmm2 \n" + "mulps %%xmm4,%%xmm3 \n" + "cvtps2dq %%xmm0,%%xmm0 \n" + "cvtps2dq %%xmm1,%%xmm1 \n" + "cvtps2dq %%xmm2,%%xmm2 \n" + "cvtps2dq %%xmm3,%%xmm3 \n" + "packssdw %%xmm1,%%xmm0 \n" + "packssdw %%xmm3,%%xmm2 \n" + "packuswb %%xmm2,%%xmm0 \n" + "movdqu %%xmm0,(%2) \n" + "lea 0x10(%2),%2 \n" + "sub $0x4,%3 \n" + "jge 40b \n" + + "49: \n" + "add $0x3,%3 \n" + "jl 19f \n" + + // 1 pixel loop + LABELALIGN + "10: \n" + "movdqu (%0),%%xmm0 \n" + "psubd 0x00(%0,%4,4),%%xmm0 \n" + "lea 0x10(%0),%0 \n" + "psubd (%1),%%xmm0 \n" + "paddd 0x00(%1,%4,4),%%xmm0 \n" + "lea 0x10(%1),%1 \n" + "cvtdq2ps %%xmm0,%%xmm0 \n" + "mulps %%xmm4,%%xmm0 \n" + "cvtps2dq %%xmm0,%%xmm0 \n" + "packssdw %%xmm0,%%xmm0 \n" + "packuswb %%xmm0,%%xmm0 \n" + "movd %%xmm0,(%2) \n" + "lea 0x4(%2),%2 \n" + "sub $0x1,%3 \n" + "jge 10b \n" + "19: \n" + : "+r"(topleft), // %0 + "+r"(botleft), // %1 + "+r"(dst), // %2 + "+rm"(count) // %3 + : "r"((intptr_t)(width)), // %4 + "rm"(area) // %5 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"); } #endif // HAS_CUMULATIVESUMTOAVERAGEROW_SSE2 #ifdef HAS_ARGBAFFINEROW_SSE2 // Copy ARGB pixels from source image with slope to a row of destination. LIBYUV_API -void ARGBAffineRow_SSE2(const uint8* src_argb, +void ARGBAffineRow_SSE2(const uint8_t* src_argb, int src_argb_stride, - uint8* dst_argb, + uint8_t* dst_argb, const float* src_dudv, int width) { intptr_t src_argb_stride_temp = src_argb_stride; intptr_t temp; - asm volatile ( - "movq " MEMACCESS(3) ",%%xmm2 \n" - "movq " MEMACCESS2(0x08,3) ",%%xmm7 \n" - "shl $0x10,%1 \n" - "add $0x4,%1 \n" - "movd %1,%%xmm5 \n" - "sub $0x4,%4 \n" - "jl 49f \n" - - "pshufd $0x44,%%xmm7,%%xmm7 \n" - "pshufd $0x0,%%xmm5,%%xmm5 \n" - "movdqa %%xmm2,%%xmm0 \n" - "addps %%xmm7,%%xmm0 \n" - "movlhps %%xmm0,%%xmm2 \n" - "movdqa %%xmm7,%%xmm4 \n" - "addps %%xmm4,%%xmm4 \n" - "movdqa %%xmm2,%%xmm3 \n" - "addps %%xmm4,%%xmm3 \n" - "addps %%xmm4,%%xmm4 \n" - - // 4 pixel loop \n" - LABELALIGN - "40: \n" - "cvttps2dq %%xmm2,%%xmm0 \n" // x, y float to int first 2 - "cvttps2dq %%xmm3,%%xmm1 \n" // x, y float to int next 2 - "packssdw %%xmm1,%%xmm0 \n" // x, y as 8 shorts - "pmaddwd %%xmm5,%%xmm0 \n" // off = x * 4 + y * stride - "movd %%xmm0,%k1 \n" - "pshufd $0x39,%%xmm0,%%xmm0 \n" - "movd %%xmm0,%k5 \n" - "pshufd $0x39,%%xmm0,%%xmm0 \n" - MEMOPREG(movd,0x00,0,1,1,xmm1) // movd (%0,%1,1),%%xmm1 - MEMOPREG(movd,0x00,0,5,1,xmm6) // movd (%0,%5,1),%%xmm6 - "punpckldq %%xmm6,%%xmm1 \n" - "addps %%xmm4,%%xmm2 \n" - "movq %%xmm1," MEMACCESS(2) " \n" - "movd %%xmm0,%k1 \n" - "pshufd $0x39,%%xmm0,%%xmm0 \n" - "movd %%xmm0,%k5 \n" - MEMOPREG(movd,0x00,0,1,1,xmm0) // movd (%0,%1,1),%%xmm0 - MEMOPREG(movd,0x00,0,5,1,xmm6) // movd (%0,%5,1),%%xmm6 - "punpckldq %%xmm6,%%xmm0 \n" - "addps %%xmm4,%%xmm3 \n" - "movq %%xmm0," MEMACCESS2(0x08,2) " \n" - "lea " MEMLEA(0x10,2) ",%2 \n" - "sub $0x4,%4 \n" - "jge 40b \n" - - "49: \n" - "add $0x3,%4 \n" - "jl 19f \n" - - // 1 pixel loop \n" - LABELALIGN - "10: \n" - "cvttps2dq %%xmm2,%%xmm0 \n" - "packssdw %%xmm0,%%xmm0 \n" - "pmaddwd %%xmm5,%%xmm0 \n" - "addps %%xmm7,%%xmm2 \n" - "movd %%xmm0,%k1 \n" - MEMOPREG(movd,0x00,0,1,1,xmm0) // movd (%0,%1,1),%%xmm0 - "movd %%xmm0," MEMACCESS(2) " \n" - "lea " MEMLEA(0x04,2) ",%2 \n" - "sub $0x1,%4 \n" - "jge 10b \n" - "19: \n" - : "+r"(src_argb), // %0 - "+r"(src_argb_stride_temp), // %1 - "+r"(dst_argb), // %2 - "+r"(src_dudv), // %3 - "+rm"(width), // %4 - "=&r"(temp) // %5 - : - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" - ); + asm volatile( + "movq (%3),%%xmm2 \n" + "movq 0x08(%3),%%xmm7 \n" + "shl $0x10,%1 \n" + "add $0x4,%1 \n" + "movd %1,%%xmm5 \n" + "sub $0x4,%4 \n" + "jl 49f \n" + + "pshufd $0x44,%%xmm7,%%xmm7 \n" + "pshufd $0x0,%%xmm5,%%xmm5 \n" + "movdqa %%xmm2,%%xmm0 \n" + "addps %%xmm7,%%xmm0 \n" + "movlhps %%xmm0,%%xmm2 \n" + "movdqa %%xmm7,%%xmm4 \n" + "addps %%xmm4,%%xmm4 \n" + "movdqa %%xmm2,%%xmm3 \n" + "addps %%xmm4,%%xmm3 \n" + "addps %%xmm4,%%xmm4 \n" + + // 4 pixel loop + LABELALIGN + "40: \n" + "cvttps2dq %%xmm2,%%xmm0 \n" // x,y float->int first 2 + "cvttps2dq %%xmm3,%%xmm1 \n" // x,y float->int next 2 + "packssdw %%xmm1,%%xmm0 \n" // x, y as 8 shorts + "pmaddwd %%xmm5,%%xmm0 \n" // off = x*4 + y*stride + "movd %%xmm0,%k1 \n" + "pshufd $0x39,%%xmm0,%%xmm0 \n" + "movd %%xmm0,%k5 \n" + "pshufd $0x39,%%xmm0,%%xmm0 \n" + "movd 0x00(%0,%1,1),%%xmm1 \n" + "movd 0x00(%0,%5,1),%%xmm6 \n" + "punpckldq %%xmm6,%%xmm1 \n" + "addps %%xmm4,%%xmm2 \n" + "movq %%xmm1,(%2) \n" + "movd %%xmm0,%k1 \n" + "pshufd $0x39,%%xmm0,%%xmm0 \n" + "movd %%xmm0,%k5 \n" + "movd 0x00(%0,%1,1),%%xmm0 \n" + "movd 0x00(%0,%5,1),%%xmm6 \n" + "punpckldq %%xmm6,%%xmm0 \n" + "addps %%xmm4,%%xmm3 \n" + "movq %%xmm0,0x08(%2) \n" + "lea 0x10(%2),%2 \n" + "sub $0x4,%4 \n" + "jge 40b \n" + + "49: \n" + "add $0x3,%4 \n" + "jl 19f \n" + + // 1 pixel loop + LABELALIGN + "10: \n" + "cvttps2dq %%xmm2,%%xmm0 \n" + "packssdw %%xmm0,%%xmm0 \n" + "pmaddwd %%xmm5,%%xmm0 \n" + "addps %%xmm7,%%xmm2 \n" + "movd %%xmm0,%k1 \n" + "movd 0x00(%0,%1,1),%%xmm0 \n" + "movd %%xmm0,(%2) \n" + "lea 0x04(%2),%2 \n" + "sub $0x1,%4 \n" + "jge 10b \n" + "19: \n" + : "+r"(src_argb), // %0 + "+r"(src_argb_stride_temp), // %1 + "+r"(dst_argb), // %2 + "+r"(src_dudv), // %3 + "+rm"(width), // %4 + "=&r"(temp) // %5 + : + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", + "xmm7"); } #endif // HAS_ARGBAFFINEROW_SSE2 #ifdef HAS_INTERPOLATEROW_SSSE3 // Bilinear filter 16x2 -> 16x1 -void InterpolateRow_SSSE3(uint8* dst_ptr, - const uint8* src_ptr, +void InterpolateRow_SSSE3(uint8_t* dst_ptr, + const uint8_t* src_ptr, ptrdiff_t src_stride, int dst_width, int source_y_fraction) { - asm volatile ( - "sub %1,%0 \n" - "cmp $0x0,%3 \n" - "je 100f \n" - "cmp $0x80,%3 \n" - "je 50f \n" - - "movd %3,%%xmm0 \n" - "neg %3 \n" - "add $0x100,%3 \n" - "movd %3,%%xmm5 \n" - "punpcklbw %%xmm0,%%xmm5 \n" - "punpcklwd %%xmm5,%%xmm5 \n" - "pshufd $0x0,%%xmm5,%%xmm5 \n" - "mov $0x80808080,%%eax \n" - "movd %%eax,%%xmm4 \n" - "pshufd $0x0,%%xmm4,%%xmm4 \n" - - // General purpose row blend. - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(1) ",%%xmm0 \n" - MEMOPREG(movdqu,0x00,1,4,1,xmm2) - "movdqa %%xmm0,%%xmm1 \n" - "punpcklbw %%xmm2,%%xmm0 \n" - "punpckhbw %%xmm2,%%xmm1 \n" - "psubb %%xmm4,%%xmm0 \n" - "psubb %%xmm4,%%xmm1 \n" - "movdqa %%xmm5,%%xmm2 \n" - "movdqa %%xmm5,%%xmm3 \n" - "pmaddubsw %%xmm0,%%xmm2 \n" - "pmaddubsw %%xmm1,%%xmm3 \n" - "paddw %%xmm4,%%xmm2 \n" - "paddw %%xmm4,%%xmm3 \n" - "psrlw $0x8,%%xmm2 \n" - "psrlw $0x8,%%xmm3 \n" - "packuswb %%xmm3,%%xmm2 \n" - MEMOPMEM(movdqu,xmm2,0x00,1,0,1) - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x10,%2 \n" - "jg 1b \n" - "jmp 99f \n" + asm volatile( + "sub %1,%0 \n" + "cmp $0x0,%3 \n" + "je 100f \n" + "cmp $0x80,%3 \n" + "je 50f \n" + + "movd %3,%%xmm0 \n" + "neg %3 \n" + "add $0x100,%3 \n" + "movd %3,%%xmm5 \n" + "punpcklbw %%xmm0,%%xmm5 \n" + "punpcklwd %%xmm5,%%xmm5 \n" + "pshufd $0x0,%%xmm5,%%xmm5 \n" + "mov $0x80808080,%%eax \n" + "movd %%eax,%%xmm4 \n" + "pshufd $0x0,%%xmm4,%%xmm4 \n" + + // General purpose row blend. + LABELALIGN + "1: \n" + "movdqu (%1),%%xmm0 \n" + "movdqu 0x00(%1,%4,1),%%xmm2 \n" + "movdqa %%xmm0,%%xmm1 \n" + "punpcklbw %%xmm2,%%xmm0 \n" + "punpckhbw %%xmm2,%%xmm1 \n" + "psubb %%xmm4,%%xmm0 \n" + "psubb %%xmm4,%%xmm1 \n" + "movdqa %%xmm5,%%xmm2 \n" + "movdqa %%xmm5,%%xmm3 \n" + "pmaddubsw %%xmm0,%%xmm2 \n" + "pmaddubsw %%xmm1,%%xmm3 \n" + "paddw %%xmm4,%%xmm2 \n" + "paddw %%xmm4,%%xmm3 \n" + "psrlw $0x8,%%xmm2 \n" + "psrlw $0x8,%%xmm3 \n" + "packuswb %%xmm3,%%xmm2 \n" + "movdqu %%xmm2,0x00(%1,%0,1) \n" + "lea 0x10(%1),%1 \n" + "sub $0x10,%2 \n" + "jg 1b \n" + "jmp 99f \n" - // Blend 50 / 50. - LABELALIGN - "50: \n" - "movdqu " MEMACCESS(1) ",%%xmm0 \n" - MEMOPREG(movdqu,0x00,1,4,1,xmm1) - "pavgb %%xmm1,%%xmm0 \n" - MEMOPMEM(movdqu,xmm0,0x00,1,0,1) - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x10,%2 \n" - "jg 50b \n" - "jmp 99f \n" - - // Blend 100 / 0 - Copy row unchanged. - LABELALIGN - "100: \n" - "movdqu " MEMACCESS(1) ",%%xmm0 \n" - MEMOPMEM(movdqu,xmm0,0x00,1,0,1) - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x10,%2 \n" - "jg 100b \n" - - "99: \n" - : "+r"(dst_ptr), // %0 - "+r"(src_ptr), // %1 - "+rm"(dst_width), // %2 - "+r"(source_y_fraction) // %3 - : "r"((intptr_t)(src_stride)) // %4 - : "memory", "cc", "eax", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" - ); + // Blend 50 / 50. + LABELALIGN + "50: \n" + "movdqu (%1),%%xmm0 \n" + "movdqu 0x00(%1,%4,1),%%xmm1 \n" + "pavgb %%xmm1,%%xmm0 \n" + "movdqu %%xmm0,0x00(%1,%0,1) \n" + "lea 0x10(%1),%1 \n" + "sub $0x10,%2 \n" + "jg 50b \n" + "jmp 99f \n" + + // Blend 100 / 0 - Copy row unchanged. + LABELALIGN + "100: \n" + "movdqu (%1),%%xmm0 \n" + "movdqu %%xmm0,0x00(%1,%0,1) \n" + "lea 0x10(%1),%1 \n" + "sub $0x10,%2 \n" + "jg 100b \n" + + "99: \n" + : "+r"(dst_ptr), // %0 + "+r"(src_ptr), // %1 + "+rm"(dst_width), // %2 + "+r"(source_y_fraction) // %3 + : "r"((intptr_t)(src_stride)) // %4 + : "memory", "cc", "eax", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); } #endif // HAS_INTERPOLATEROW_SSSE3 #ifdef HAS_INTERPOLATEROW_AVX2 // Bilinear filter 32x2 -> 32x1 -void InterpolateRow_AVX2(uint8* dst_ptr, - const uint8* src_ptr, +void InterpolateRow_AVX2(uint8_t* dst_ptr, + const uint8_t* src_ptr, ptrdiff_t src_stride, int dst_width, int source_y_fraction) { - asm volatile ( - "cmp $0x0,%3 \n" - "je 100f \n" - "sub %1,%0 \n" - "cmp $0x80,%3 \n" - "je 50f \n" - - "vmovd %3,%%xmm0 \n" - "neg %3 \n" - "add $0x100,%3 \n" - "vmovd %3,%%xmm5 \n" - "vpunpcklbw %%xmm0,%%xmm5,%%xmm5 \n" - "vpunpcklwd %%xmm5,%%xmm5,%%xmm5 \n" - "vbroadcastss %%xmm5,%%ymm5 \n" - "mov $0x80808080,%%eax \n" - "vmovd %%eax,%%xmm4 \n" - "vbroadcastss %%xmm4,%%ymm4 \n" - - // General purpose row blend. - LABELALIGN - "1: \n" - "vmovdqu " MEMACCESS(1) ",%%ymm0 \n" - MEMOPREG(vmovdqu,0x00,1,4,1,ymm2) - "vpunpckhbw %%ymm2,%%ymm0,%%ymm1 \n" - "vpunpcklbw %%ymm2,%%ymm0,%%ymm0 \n" - "vpsubb %%ymm4,%%ymm1,%%ymm1 \n" - "vpsubb %%ymm4,%%ymm0,%%ymm0 \n" - "vpmaddubsw %%ymm1,%%ymm5,%%ymm1 \n" - "vpmaddubsw %%ymm0,%%ymm5,%%ymm0 \n" - "vpaddw %%ymm4,%%ymm1,%%ymm1 \n" - "vpaddw %%ymm4,%%ymm0,%%ymm0 \n" - "vpsrlw $0x8,%%ymm1,%%ymm1 \n" - "vpsrlw $0x8,%%ymm0,%%ymm0 \n" - "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" - MEMOPMEM(vmovdqu,ymm0,0x00,1,0,1) - "lea " MEMLEA(0x20,1) ",%1 \n" - "sub $0x20,%2 \n" - "jg 1b \n" - "jmp 99f \n" + asm volatile( + "cmp $0x0,%3 \n" + "je 100f \n" + "sub %1,%0 \n" + "cmp $0x80,%3 \n" + "je 50f \n" + + "vmovd %3,%%xmm0 \n" + "neg %3 \n" + "add $0x100,%3 \n" + "vmovd %3,%%xmm5 \n" + "vpunpcklbw %%xmm0,%%xmm5,%%xmm5 \n" + "vpunpcklwd %%xmm5,%%xmm5,%%xmm5 \n" + "vbroadcastss %%xmm5,%%ymm5 \n" + "mov $0x80808080,%%eax \n" + "vmovd %%eax,%%xmm4 \n" + "vbroadcastss %%xmm4,%%ymm4 \n" - // Blend 50 / 50. - LABELALIGN - "50: \n" - "vmovdqu " MEMACCESS(1) ",%%ymm0 \n" - VMEMOPREG(vpavgb,0x00,1,4,1,ymm0,ymm0) // vpavgb (%1,%4,1),%%ymm0,%%ymm0 - MEMOPMEM(vmovdqu,ymm0,0x00,1,0,1) - "lea " MEMLEA(0x20,1) ",%1 \n" - "sub $0x20,%2 \n" - "jg 50b \n" - "jmp 99f \n" + // General purpose row blend. + LABELALIGN + "1: \n" + "vmovdqu (%1),%%ymm0 \n" + "vmovdqu 0x00(%1,%4,1),%%ymm2 \n" + "vpunpckhbw %%ymm2,%%ymm0,%%ymm1 \n" + "vpunpcklbw %%ymm2,%%ymm0,%%ymm0 \n" + "vpsubb %%ymm4,%%ymm1,%%ymm1 \n" + "vpsubb %%ymm4,%%ymm0,%%ymm0 \n" + "vpmaddubsw %%ymm1,%%ymm5,%%ymm1 \n" + "vpmaddubsw %%ymm0,%%ymm5,%%ymm0 \n" + "vpaddw %%ymm4,%%ymm1,%%ymm1 \n" + "vpaddw %%ymm4,%%ymm0,%%ymm0 \n" + "vpsrlw $0x8,%%ymm1,%%ymm1 \n" + "vpsrlw $0x8,%%ymm0,%%ymm0 \n" + "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" + "vmovdqu %%ymm0,0x00(%1,%0,1) \n" + "lea 0x20(%1),%1 \n" + "sub $0x20,%2 \n" + "jg 1b \n" + "jmp 99f \n" - // Blend 100 / 0 - Copy row unchanged. - LABELALIGN - "100: \n" - "rep movsb " MEMMOVESTRING(1,0) " \n" - "jmp 999f \n" + // Blend 50 / 50. + LABELALIGN + "50: \n" + "vmovdqu (%1),%%ymm0 \n" + "vpavgb 0x00(%1,%4,1),%%ymm0,%%ymm0 \n" + "vmovdqu %%ymm0,0x00(%1,%0,1) \n" + "lea 0x20(%1),%1 \n" + "sub $0x20,%2 \n" + "jg 50b \n" + "jmp 99f \n" + + // Blend 100 / 0 - Copy row unchanged. + LABELALIGN + "100: \n" + "rep movsb \n" + "jmp 999f \n" - "99: \n" - "vzeroupper \n" - "999: \n" - : "+D"(dst_ptr), // %0 - "+S"(src_ptr), // %1 - "+cm"(dst_width), // %2 - "+r"(source_y_fraction) // %3 - : "r"((intptr_t)(src_stride)) // %4 - : "memory", "cc", "eax", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm4", "xmm5" - ); + "99: \n" + "vzeroupper \n" + "999: \n" + : "+D"(dst_ptr), // %0 + "+S"(src_ptr), // %1 + "+cm"(dst_width), // %2 + "+r"(source_y_fraction) // %3 + : "r"((intptr_t)(src_stride)) // %4 + : "memory", "cc", "eax", "xmm0", "xmm1", "xmm2", "xmm4", "xmm5"); } #endif // HAS_INTERPOLATEROW_AVX2 #ifdef HAS_ARGBSHUFFLEROW_SSSE3 // For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA. -void ARGBShuffleRow_SSSE3(const uint8* src_argb, - uint8* dst_argb, - const uint8* shuffler, +void ARGBShuffleRow_SSSE3(const uint8_t* src_argb, + uint8_t* dst_argb, + const uint8_t* shuffler, int width) { - asm volatile ( - "movdqu " MEMACCESS(3) ",%%xmm5 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "lea " MEMLEA(0x20,0) ",%0 \n" - "pshufb %%xmm5,%%xmm0 \n" - "pshufb %%xmm5,%%xmm1 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "movdqu %%xmm1," MEMACCESS2(0x10,1) " \n" - "lea " MEMLEA(0x20,1) ",%1 \n" - "sub $0x8,%2 \n" - "jg 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_argb), // %1 - "+r"(width) // %2 - : "r"(shuffler) // %3 - : "memory", "cc" - , "xmm0", "xmm1", "xmm5" - ); + asm volatile( + + "movdqu (%3),%%xmm5 \n" + + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "lea 0x20(%0),%0 \n" + "pshufb %%xmm5,%%xmm0 \n" + "pshufb %%xmm5,%%xmm1 \n" + "movdqu %%xmm0,(%1) \n" + "movdqu %%xmm1,0x10(%1) \n" + "lea 0x20(%1),%1 \n" + "sub $0x8,%2 \n" + "jg 1b \n" + : "+r"(src_argb), // %0 + "+r"(dst_argb), // %1 + "+r"(width) // %2 + : "r"(shuffler) // %3 + : "memory", "cc", "xmm0", "xmm1", "xmm5"); } #endif // HAS_ARGBSHUFFLEROW_SSSE3 #ifdef HAS_ARGBSHUFFLEROW_AVX2 // For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA. -void ARGBShuffleRow_AVX2(const uint8* src_argb, - uint8* dst_argb, - const uint8* shuffler, +void ARGBShuffleRow_AVX2(const uint8_t* src_argb, + uint8_t* dst_argb, + const uint8_t* shuffler, int width) { - asm volatile ( - "vbroadcastf128 " MEMACCESS(3) ",%%ymm5 \n" - LABELALIGN - "1: \n" - "vmovdqu " MEMACCESS(0) ",%%ymm0 \n" - "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n" - "lea " MEMLEA(0x40,0) ",%0 \n" - "vpshufb %%ymm5,%%ymm0,%%ymm0 \n" - "vpshufb %%ymm5,%%ymm1,%%ymm1 \n" - "vmovdqu %%ymm0," MEMACCESS(1) " \n" - "vmovdqu %%ymm1," MEMACCESS2(0x20,1) " \n" - "lea " MEMLEA(0x40,1) ",%1 \n" - "sub $0x10,%2 \n" - "jg 1b \n" - "vzeroupper \n" - : "+r"(src_argb), // %0 - "+r"(dst_argb), // %1 - "+r"(width) // %2 - : "r"(shuffler) // %3 - : "memory", "cc" - , "xmm0", "xmm1", "xmm5" - ); + asm volatile( + + "vbroadcastf128 (%3),%%ymm5 \n" + + LABELALIGN + "1: \n" + "vmovdqu (%0),%%ymm0 \n" + "vmovdqu 0x20(%0),%%ymm1 \n" + "lea 0x40(%0),%0 \n" + "vpshufb %%ymm5,%%ymm0,%%ymm0 \n" + "vpshufb %%ymm5,%%ymm1,%%ymm1 \n" + "vmovdqu %%ymm0,(%1) \n" + "vmovdqu %%ymm1,0x20(%1) \n" + "lea 0x40(%1),%1 \n" + "sub $0x10,%2 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src_argb), // %0 + "+r"(dst_argb), // %1 + "+r"(width) // %2 + : "r"(shuffler) // %3 + : "memory", "cc", "xmm0", "xmm1", "xmm5"); } #endif // HAS_ARGBSHUFFLEROW_AVX2 #ifdef HAS_I422TOYUY2ROW_SSE2 -void I422ToYUY2Row_SSE2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_frame, +void I422ToYUY2Row_SSE2(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_yuy2, int width) { - asm volatile ( - "sub %1,%2 \n" - LABELALIGN - "1: \n" - "movq " MEMACCESS(1) ",%%xmm2 \n" - MEMOPREG(movq,0x00,1,2,1,xmm3) // movq (%1,%2,1),%%xmm3 - "lea " MEMLEA(0x8,1) ",%1 \n" - "punpcklbw %%xmm3,%%xmm2 \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "lea " MEMLEA(0x10,0) ",%0 \n" - "movdqa %%xmm0,%%xmm1 \n" - "punpcklbw %%xmm2,%%xmm0 \n" - "punpckhbw %%xmm2,%%xmm1 \n" - "movdqu %%xmm0," MEMACCESS(3) " \n" - "movdqu %%xmm1," MEMACCESS2(0x10,3) " \n" - "lea " MEMLEA(0x20,3) ",%3 \n" - "sub $0x10,%4 \n" - "jg 1b \n" - : "+r"(src_y), // %0 - "+r"(src_u), // %1 - "+r"(src_v), // %2 - "+r"(dst_frame), // %3 - "+rm"(width) // %4 - : - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm3" - ); + asm volatile( + + "sub %1,%2 \n" + + LABELALIGN + "1: \n" + "movq (%1),%%xmm2 \n" + "movq 0x00(%1,%2,1),%%xmm1 \n" + "add $0x8,%1 \n" + "punpcklbw %%xmm1,%%xmm2 \n" + "movdqu (%0),%%xmm0 \n" + "add $0x10,%0 \n" + "movdqa %%xmm0,%%xmm1 \n" + "punpcklbw %%xmm2,%%xmm0 \n" + "punpckhbw %%xmm2,%%xmm1 \n" + "movdqu %%xmm0,(%3) \n" + "movdqu %%xmm1,0x10(%3) \n" + "lea 0x20(%3),%3 \n" + "sub $0x10,%4 \n" + "jg 1b \n" + : "+r"(src_y), // %0 + "+r"(src_u), // %1 + "+r"(src_v), // %2 + "+r"(dst_yuy2), // %3 + "+rm"(width) // %4 + : + : "memory", "cc", "xmm0", "xmm1", "xmm2"); } #endif // HAS_I422TOYUY2ROW_SSE2 #ifdef HAS_I422TOUYVYROW_SSE2 -void I422ToUYVYRow_SSE2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_frame, +void I422ToUYVYRow_SSE2(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_uyvy, int width) { - asm volatile ( - "sub %1,%2 \n" - LABELALIGN - "1: \n" - "movq " MEMACCESS(1) ",%%xmm2 \n" - MEMOPREG(movq,0x00,1,2,1,xmm3) // movq (%1,%2,1),%%xmm3 - "lea " MEMLEA(0x8,1) ",%1 \n" - "punpcklbw %%xmm3,%%xmm2 \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqa %%xmm2,%%xmm1 \n" - "lea " MEMLEA(0x10,0) ",%0 \n" - "punpcklbw %%xmm0,%%xmm1 \n" - "punpckhbw %%xmm0,%%xmm2 \n" - "movdqu %%xmm1," MEMACCESS(3) " \n" - "movdqu %%xmm2," MEMACCESS2(0x10,3) " \n" - "lea " MEMLEA(0x20,3) ",%3 \n" - "sub $0x10,%4 \n" - "jg 1b \n" - : "+r"(src_y), // %0 - "+r"(src_u), // %1 - "+r"(src_v), // %2 - "+r"(dst_frame), // %3 - "+rm"(width) // %4 - : - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm3" - ); + asm volatile( + + "sub %1,%2 \n" + + LABELALIGN + "1: \n" + "movq (%1),%%xmm2 \n" + "movq 0x00(%1,%2,1),%%xmm1 \n" + "add $0x8,%1 \n" + "punpcklbw %%xmm1,%%xmm2 \n" + "movdqu (%0),%%xmm0 \n" + "movdqa %%xmm2,%%xmm1 \n" + "add $0x10,%0 \n" + "punpcklbw %%xmm0,%%xmm1 \n" + "punpckhbw %%xmm0,%%xmm2 \n" + "movdqu %%xmm1,(%3) \n" + "movdqu %%xmm2,0x10(%3) \n" + "lea 0x20(%3),%3 \n" + "sub $0x10,%4 \n" + "jg 1b \n" + : "+r"(src_y), // %0 + "+r"(src_u), // %1 + "+r"(src_v), // %2 + "+r"(dst_uyvy), // %3 + "+rm"(width) // %4 + : + : "memory", "cc", "xmm0", "xmm1", "xmm2"); } #endif // HAS_I422TOUYVYROW_SSE2 +#ifdef HAS_I422TOYUY2ROW_AVX2 +void I422ToYUY2Row_AVX2(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_yuy2, + int width) { + asm volatile( + + "sub %1,%2 \n" + + LABELALIGN + "1: \n" + "vpmovzxbw (%1),%%ymm1 \n" + "vpmovzxbw 0x00(%1,%2,1),%%ymm2 \n" + "add $0x10,%1 \n" + "vpsllw $0x8,%%ymm2,%%ymm2 \n" + "vpor %%ymm1,%%ymm2,%%ymm2 \n" + "vmovdqu (%0),%%ymm0 \n" + "add $0x20,%0 \n" + "vpunpcklbw %%ymm2,%%ymm0,%%ymm1 \n" + "vpunpckhbw %%ymm2,%%ymm0,%%ymm2 \n" + "vextractf128 $0x0,%%ymm1,(%3) \n" + "vextractf128 $0x0,%%ymm2,0x10(%3) \n" + "vextractf128 $0x1,%%ymm1,0x20(%3) \n" + "vextractf128 $0x1,%%ymm2,0x30(%3) \n" + "lea 0x40(%3),%3 \n" + "sub $0x20,%4 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src_y), // %0 + "+r"(src_u), // %1 + "+r"(src_v), // %2 + "+r"(dst_yuy2), // %3 + "+rm"(width) // %4 + : + : "memory", "cc", "xmm0", "xmm1", "xmm2"); +} +#endif // HAS_I422TOYUY2ROW_AVX2 + +#ifdef HAS_I422TOUYVYROW_AVX2 +void I422ToUYVYRow_AVX2(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_uyvy, + int width) { + asm volatile( + + "sub %1,%2 \n" + + LABELALIGN + "1: \n" + "vpmovzxbw (%1),%%ymm1 \n" + "vpmovzxbw 0x00(%1,%2,1),%%ymm2 \n" + "add $0x10,%1 \n" + "vpsllw $0x8,%%ymm2,%%ymm2 \n" + "vpor %%ymm1,%%ymm2,%%ymm2 \n" + "vmovdqu (%0),%%ymm0 \n" + "add $0x20,%0 \n" + "vpunpcklbw %%ymm0,%%ymm2,%%ymm1 \n" + "vpunpckhbw %%ymm0,%%ymm2,%%ymm2 \n" + "vextractf128 $0x0,%%ymm1,(%3) \n" + "vextractf128 $0x0,%%ymm2,0x10(%3) \n" + "vextractf128 $0x1,%%ymm1,0x20(%3) \n" + "vextractf128 $0x1,%%ymm2,0x30(%3) \n" + "lea 0x40(%3),%3 \n" + "sub $0x20,%4 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src_y), // %0 + "+r"(src_u), // %1 + "+r"(src_v), // %2 + "+r"(dst_uyvy), // %3 + "+rm"(width) // %4 + : + : "memory", "cc", "xmm0", "xmm1", "xmm2"); +} +#endif // HAS_I422TOUYVYROW_AVX2 + #ifdef HAS_ARGBPOLYNOMIALROW_SSE2 -void ARGBPolynomialRow_SSE2(const uint8* src_argb, - uint8* dst_argb, +void ARGBPolynomialRow_SSE2(const uint8_t* src_argb, + uint8_t* dst_argb, const float* poly, int width) { - asm volatile ( - "pxor %%xmm3,%%xmm3 \n" + asm volatile( - // 2 pixel loop. - LABELALIGN - "1: \n" - "movq " MEMACCESS(0) ",%%xmm0 \n" - "lea " MEMLEA(0x8,0) ",%0 \n" - "punpcklbw %%xmm3,%%xmm0 \n" - "movdqa %%xmm0,%%xmm4 \n" - "punpcklwd %%xmm3,%%xmm0 \n" - "punpckhwd %%xmm3,%%xmm4 \n" - "cvtdq2ps %%xmm0,%%xmm0 \n" - "cvtdq2ps %%xmm4,%%xmm4 \n" - "movdqa %%xmm0,%%xmm1 \n" - "movdqa %%xmm4,%%xmm5 \n" - "mulps " MEMACCESS2(0x10,3) ",%%xmm0 \n" - "mulps " MEMACCESS2(0x10,3) ",%%xmm4 \n" - "addps " MEMACCESS(3) ",%%xmm0 \n" - "addps " MEMACCESS(3) ",%%xmm4 \n" - "movdqa %%xmm1,%%xmm2 \n" - "movdqa %%xmm5,%%xmm6 \n" - "mulps %%xmm1,%%xmm2 \n" - "mulps %%xmm5,%%xmm6 \n" - "mulps %%xmm2,%%xmm1 \n" - "mulps %%xmm6,%%xmm5 \n" - "mulps " MEMACCESS2(0x20,3) ",%%xmm2 \n" - "mulps " MEMACCESS2(0x20,3) ",%%xmm6 \n" - "mulps " MEMACCESS2(0x30,3) ",%%xmm1 \n" - "mulps " MEMACCESS2(0x30,3) ",%%xmm5 \n" - "addps %%xmm2,%%xmm0 \n" - "addps %%xmm6,%%xmm4 \n" - "addps %%xmm1,%%xmm0 \n" - "addps %%xmm5,%%xmm4 \n" - "cvttps2dq %%xmm0,%%xmm0 \n" - "cvttps2dq %%xmm4,%%xmm4 \n" - "packuswb %%xmm4,%%xmm0 \n" - "packuswb %%xmm0,%%xmm0 \n" - "movq %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x8,1) ",%1 \n" - "sub $0x2,%2 \n" - "jg 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_argb), // %1 - "+r"(width) // %2 - : "r"(poly) // %3 - : "memory", "cc" - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6" - ); + "pxor %%xmm3,%%xmm3 \n" + + // 2 pixel loop. + LABELALIGN + "1: \n" + "movq (%0),%%xmm0 \n" + "lea 0x8(%0),%0 \n" + "punpcklbw %%xmm3,%%xmm0 \n" + "movdqa %%xmm0,%%xmm4 \n" + "punpcklwd %%xmm3,%%xmm0 \n" + "punpckhwd %%xmm3,%%xmm4 \n" + "cvtdq2ps %%xmm0,%%xmm0 \n" + "cvtdq2ps %%xmm4,%%xmm4 \n" + "movdqa %%xmm0,%%xmm1 \n" + "movdqa %%xmm4,%%xmm5 \n" + "mulps 0x10(%3),%%xmm0 \n" + "mulps 0x10(%3),%%xmm4 \n" + "addps (%3),%%xmm0 \n" + "addps (%3),%%xmm4 \n" + "movdqa %%xmm1,%%xmm2 \n" + "movdqa %%xmm5,%%xmm6 \n" + "mulps %%xmm1,%%xmm2 \n" + "mulps %%xmm5,%%xmm6 \n" + "mulps %%xmm2,%%xmm1 \n" + "mulps %%xmm6,%%xmm5 \n" + "mulps 0x20(%3),%%xmm2 \n" + "mulps 0x20(%3),%%xmm6 \n" + "mulps 0x30(%3),%%xmm1 \n" + "mulps 0x30(%3),%%xmm5 \n" + "addps %%xmm2,%%xmm0 \n" + "addps %%xmm6,%%xmm4 \n" + "addps %%xmm1,%%xmm0 \n" + "addps %%xmm5,%%xmm4 \n" + "cvttps2dq %%xmm0,%%xmm0 \n" + "cvttps2dq %%xmm4,%%xmm4 \n" + "packuswb %%xmm4,%%xmm0 \n" + "packuswb %%xmm0,%%xmm0 \n" + "movq %%xmm0,(%1) \n" + "lea 0x8(%1),%1 \n" + "sub $0x2,%2 \n" + "jg 1b \n" + : "+r"(src_argb), // %0 + "+r"(dst_argb), // %1 + "+r"(width) // %2 + : "r"(poly) // %3 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"); } #endif // HAS_ARGBPOLYNOMIALROW_SSE2 #ifdef HAS_ARGBPOLYNOMIALROW_AVX2 -void ARGBPolynomialRow_AVX2(const uint8* src_argb, - uint8* dst_argb, +void ARGBPolynomialRow_AVX2(const uint8_t* src_argb, + uint8_t* dst_argb, const float* poly, int width) { - asm volatile ( - "vbroadcastf128 " MEMACCESS(3) ",%%ymm4 \n" - "vbroadcastf128 " MEMACCESS2(0x10,3) ",%%ymm5 \n" - "vbroadcastf128 " MEMACCESS2(0x20,3) ",%%ymm6 \n" - "vbroadcastf128 " MEMACCESS2(0x30,3) ",%%ymm7 \n" + asm volatile( + "vbroadcastf128 (%3),%%ymm4 \n" + "vbroadcastf128 0x10(%3),%%ymm5 \n" + "vbroadcastf128 0x20(%3),%%ymm6 \n" + "vbroadcastf128 0x30(%3),%%ymm7 \n" - // 2 pixel loop. - LABELALIGN - "1: \n" - "vpmovzxbd " MEMACCESS(0) ",%%ymm0 \n" // 2 ARGB pixels - "lea " MEMLEA(0x8,0) ",%0 \n" - "vcvtdq2ps %%ymm0,%%ymm0 \n" // X 8 floats - "vmulps %%ymm0,%%ymm0,%%ymm2 \n" // X * X - "vmulps %%ymm7,%%ymm0,%%ymm3 \n" // C3 * X - "vfmadd132ps %%ymm5,%%ymm4,%%ymm0 \n" // result = C0 + C1 * X - "vfmadd231ps %%ymm6,%%ymm2,%%ymm0 \n" // result += C2 * X * X - "vfmadd231ps %%ymm3,%%ymm2,%%ymm0 \n" // result += C3 * X * X * X - "vcvttps2dq %%ymm0,%%ymm0 \n" - "vpackusdw %%ymm0,%%ymm0,%%ymm0 \n" - "vpermq $0xd8,%%ymm0,%%ymm0 \n" - "vpackuswb %%xmm0,%%xmm0,%%xmm0 \n" - "vmovq %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x8,1) ",%1 \n" - "sub $0x2,%2 \n" - "jg 1b \n" - "vzeroupper \n" - : "+r"(src_argb), // %0 - "+r"(dst_argb), // %1 - "+r"(width) // %2 - : "r"(poly) // %3 - : "memory", "cc", - "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" - ); + // 2 pixel loop. + LABELALIGN + "1: \n" + "vpmovzxbd (%0),%%ymm0 \n" // 2 ARGB pixels + "lea 0x8(%0),%0 \n" + "vcvtdq2ps %%ymm0,%%ymm0 \n" // X 8 floats + "vmulps %%ymm0,%%ymm0,%%ymm2 \n" // X * X + "vmulps %%ymm7,%%ymm0,%%ymm3 \n" // C3 * X + "vfmadd132ps %%ymm5,%%ymm4,%%ymm0 \n" // result = C0 + C1 * X + "vfmadd231ps %%ymm6,%%ymm2,%%ymm0 \n" // result += C2 * X * X + "vfmadd231ps %%ymm3,%%ymm2,%%ymm0 \n" // result += C3 * X * X * + // X + "vcvttps2dq %%ymm0,%%ymm0 \n" + "vpackusdw %%ymm0,%%ymm0,%%ymm0 \n" + "vpermq $0xd8,%%ymm0,%%ymm0 \n" + "vpackuswb %%xmm0,%%xmm0,%%xmm0 \n" + "vmovq %%xmm0,(%1) \n" + "lea 0x8(%1),%1 \n" + "sub $0x2,%2 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src_argb), // %0 + "+r"(dst_argb), // %1 + "+r"(width) // %2 + : "r"(poly) // %3 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", + "xmm7"); } #endif // HAS_ARGBPOLYNOMIALROW_AVX2 #ifdef HAS_HALFFLOATROW_SSE2 static float kScaleBias = 1.9259299444e-34f; -void HalfFloatRow_SSE2(const uint16* src, uint16* dst, float scale, int width) { +void HalfFloatRow_SSE2(const uint16_t* src, + uint16_t* dst, + float scale, + int width) { scale *= kScaleBias; - asm volatile ( - "movd %3,%%xmm4 \n" - "pshufd $0x0,%%xmm4,%%xmm4 \n" - "pxor %%xmm5,%%xmm5 \n" - "sub %0,%1 \n" + asm volatile( + "movd %3,%%xmm4 \n" + "pshufd $0x0,%%xmm4,%%xmm4 \n" + "pxor %%xmm5,%%xmm5 \n" + "sub %0,%1 \n" - // 16 pixel loop. - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm2 \n" // 8 shorts - "add $0x10,%0 \n" - "movdqa %%xmm2,%%xmm3 \n" - "punpcklwd %%xmm5,%%xmm2 \n" // 8 ints in xmm2/1 - "cvtdq2ps %%xmm2,%%xmm2 \n" // 8 floats - "punpckhwd %%xmm5,%%xmm3 \n" - "cvtdq2ps %%xmm3,%%xmm3 \n" - "mulps %%xmm4,%%xmm2 \n" - "mulps %%xmm4,%%xmm3 \n" - "psrld $0xd,%%xmm2 \n" - "psrld $0xd,%%xmm3 \n" - "packssdw %%xmm3,%%xmm2 \n" - MEMOPMEM(movdqu,xmm2,-0x10,0,1,1) - "sub $0x8,%2 \n" - "jg 1b \n" - : "+r"(src), // %0 - "+r"(dst), // %1 - "+r"(width) // %2 - : "m"(scale) // %3 - : "memory", "cc", - "xmm2", "xmm3", "xmm4", "xmm5" - ); + // 16 pixel loop. + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm2 \n" // 8 shorts + "add $0x10,%0 \n" + "movdqa %%xmm2,%%xmm3 \n" + "punpcklwd %%xmm5,%%xmm2 \n" // 8 ints in xmm2/1 + "cvtdq2ps %%xmm2,%%xmm2 \n" // 8 floats + "punpckhwd %%xmm5,%%xmm3 \n" + "cvtdq2ps %%xmm3,%%xmm3 \n" + "mulps %%xmm4,%%xmm2 \n" + "mulps %%xmm4,%%xmm3 \n" + "psrld $0xd,%%xmm2 \n" + "psrld $0xd,%%xmm3 \n" + "packssdw %%xmm3,%%xmm2 \n" + "movdqu %%xmm2,-0x10(%0,%1,1) \n" + "sub $0x8,%2 \n" + "jg 1b \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + : "m"(scale) // %3 + : "memory", "cc", "xmm2", "xmm3", "xmm4", "xmm5"); } #endif // HAS_HALFFLOATROW_SSE2 #ifdef HAS_HALFFLOATROW_AVX2 -void HalfFloatRow_AVX2(const uint16* src, uint16* dst, float scale, int width) { +void HalfFloatRow_AVX2(const uint16_t* src, + uint16_t* dst, + float scale, + int width) { scale *= kScaleBias; - asm volatile ( - "vbroadcastss %3, %%ymm4 \n" - "vpxor %%ymm5,%%ymm5,%%ymm5 \n" - "sub %0,%1 \n" + asm volatile( + "vbroadcastss %3, %%ymm4 \n" + "vpxor %%ymm5,%%ymm5,%%ymm5 \n" + "sub %0,%1 \n" - // 16 pixel loop. - LABELALIGN - "1: \n" - "vmovdqu " MEMACCESS(0) ",%%ymm2 \n" // 16 shorts - "add $0x20,%0 \n" - "vpunpckhwd %%ymm5,%%ymm2,%%ymm3 \n" // mutates - "vpunpcklwd %%ymm5,%%ymm2,%%ymm2 \n" - "vcvtdq2ps %%ymm3,%%ymm3 \n" - "vcvtdq2ps %%ymm2,%%ymm2 \n" - "vmulps %%ymm3,%%ymm4,%%ymm3 \n" - "vmulps %%ymm2,%%ymm4,%%ymm2 \n" - "vpsrld $0xd,%%ymm3,%%ymm3 \n" - "vpsrld $0xd,%%ymm2,%%ymm2 \n" - "vpackssdw %%ymm3, %%ymm2, %%ymm2 \n" // unmutates - MEMOPMEM(vmovdqu,ymm2,-0x20,0,1,1) - "sub $0x10,%2 \n" - "jg 1b \n" + // 16 pixel loop. + LABELALIGN + "1: \n" + "vmovdqu (%0),%%ymm2 \n" // 16 shorts + "add $0x20,%0 \n" + "vpunpckhwd %%ymm5,%%ymm2,%%ymm3 \n" // mutates + "vpunpcklwd %%ymm5,%%ymm2,%%ymm2 \n" + "vcvtdq2ps %%ymm3,%%ymm3 \n" + "vcvtdq2ps %%ymm2,%%ymm2 \n" + "vmulps %%ymm3,%%ymm4,%%ymm3 \n" + "vmulps %%ymm2,%%ymm4,%%ymm2 \n" + "vpsrld $0xd,%%ymm3,%%ymm3 \n" + "vpsrld $0xd,%%ymm2,%%ymm2 \n" + "vpackssdw %%ymm3, %%ymm2, %%ymm2 \n" // unmutates + "vmovdqu %%ymm2,-0x20(%0,%1,1) \n" + "sub $0x10,%2 \n" + "jg 1b \n" - "vzeroupper \n" - : "+r"(src), // %0 - "+r"(dst), // %1 - "+r"(width) // %2 + "vzeroupper \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 #if defined(__x86_64__) - : "x"(scale) // %3 + : "x"(scale) // %3 #else - : "m"(scale) // %3 + : "m"(scale) // %3 #endif - : "memory", "cc", - "xmm2", "xmm3", "xmm4", "xmm5" - ); + : "memory", "cc", "xmm2", "xmm3", "xmm4", "xmm5"); } #endif // HAS_HALFFLOATROW_AVX2 #ifdef HAS_HALFFLOATROW_F16C -void HalfFloatRow_F16C(const uint16* src, uint16* dst, float scale, int width) { - asm volatile ( - "vbroadcastss %3, %%ymm4 \n" - "sub %0,%1 \n" +void HalfFloatRow_F16C(const uint16_t* src, + uint16_t* dst, + float scale, + int width) { + asm volatile( + "vbroadcastss %3, %%ymm4 \n" + "sub %0,%1 \n" - // 16 pixel loop. - LABELALIGN - "1: \n" - "vpmovzxwd " MEMACCESS(0) ",%%ymm2 \n" // 16 shorts -> 16 ints - "vpmovzxwd " MEMACCESS2(0x10,0) ",%%ymm3 \n" - "vcvtdq2ps %%ymm2,%%ymm2 \n" - "vcvtdq2ps %%ymm3,%%ymm3 \n" - "vmulps %%ymm2,%%ymm4,%%ymm2 \n" - "vmulps %%ymm3,%%ymm4,%%ymm3 \n" - "vcvtps2ph $3, %%ymm2, %%xmm2 \n" - "vcvtps2ph $3, %%ymm3, %%xmm3 \n" - MEMOPMEM(vmovdqu,xmm2,0x00,0,1,1) - MEMOPMEM(vmovdqu,xmm3,0x10,0,1,1) - "add $0x20,%0 \n" - "sub $0x10,%2 \n" - "jg 1b \n" - "vzeroupper \n" - : "+r"(src), // %0 - "+r"(dst), // %1 - "+r"(width) // %2 + // 16 pixel loop. + LABELALIGN + "1: \n" + "vpmovzxwd (%0),%%ymm2 \n" // 16 shorts -> 16 ints + "vpmovzxwd 0x10(%0),%%ymm3 \n" + "vcvtdq2ps %%ymm2,%%ymm2 \n" + "vcvtdq2ps %%ymm3,%%ymm3 \n" + "vmulps %%ymm2,%%ymm4,%%ymm2 \n" + "vmulps %%ymm3,%%ymm4,%%ymm3 \n" + "vcvtps2ph $3, %%ymm2, %%xmm2 \n" + "vcvtps2ph $3, %%ymm3, %%xmm3 \n" + "vmovdqu %%xmm2,0x00(%0,%1,1) \n" + "vmovdqu %%xmm3,0x10(%0,%1,1) \n" + "add $0x20,%0 \n" + "sub $0x10,%2 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 #if defined(__x86_64__) - : "x"(scale) // %3 + : "x"(scale) // %3 #else - : "m"(scale) // %3 + : "m"(scale) // %3 #endif - : "memory", "cc", - "xmm2", "xmm3", "xmm4" - ); + : "memory", "cc", "xmm2", "xmm3", "xmm4"); } #endif // HAS_HALFFLOATROW_F16C #ifdef HAS_HALFFLOATROW_F16C -void HalfFloat1Row_F16C(const uint16* src, uint16* dst, float, int width) { - asm volatile ( - "sub %0,%1 \n" - // 16 pixel loop. - LABELALIGN - "1: \n" - "vpmovzxwd " MEMACCESS(0) ",%%ymm2 \n" // 16 shorts -> 16 ints - "vpmovzxwd " MEMACCESS2(0x10,0) ",%%ymm3 \n" - "vcvtdq2ps %%ymm2,%%ymm2 \n" - "vcvtdq2ps %%ymm3,%%ymm3 \n" - "vcvtps2ph $3, %%ymm2, %%xmm2 \n" - "vcvtps2ph $3, %%ymm3, %%xmm3 \n" - MEMOPMEM(vmovdqu,xmm2,0x00,0,1,1) - MEMOPMEM(vmovdqu,xmm3,0x10,0,1,1) - "add $0x20,%0 \n" - "sub $0x10,%2 \n" - "jg 1b \n" - "vzeroupper \n" - : "+r"(src), // %0 - "+r"(dst), // %1 - "+r"(width) // %2 - : - : "memory", "cc", - "xmm2", "xmm3" - ); +void HalfFloat1Row_F16C(const uint16_t* src, uint16_t* dst, float, int width) { + asm volatile( + "sub %0,%1 \n" + // 16 pixel loop. + LABELALIGN + "1: \n" + "vpmovzxwd (%0),%%ymm2 \n" // 16 shorts -> 16 ints + "vpmovzxwd 0x10(%0),%%ymm3 \n" + "vcvtdq2ps %%ymm2,%%ymm2 \n" + "vcvtdq2ps %%ymm3,%%ymm3 \n" + "vcvtps2ph $3, %%ymm2, %%xmm2 \n" + "vcvtps2ph $3, %%ymm3, %%xmm3 \n" + "vmovdqu %%xmm2,0x00(%0,%1,1) \n" + "vmovdqu %%xmm3,0x10(%0,%1,1) \n" + "add $0x20,%0 \n" + "sub $0x10,%2 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + : + : "memory", "cc", "xmm2", "xmm3"); } #endif // HAS_HALFFLOATROW_F16C #ifdef HAS_ARGBCOLORTABLEROW_X86 // Tranform ARGB pixels with color table. -void ARGBColorTableRow_X86(uint8* dst_argb, - const uint8* table_argb, +void ARGBColorTableRow_X86(uint8_t* dst_argb, + const uint8_t* table_argb, int width) { uintptr_t pixel_temp; - asm volatile ( - // 1 pixel loop. - LABELALIGN - "1: \n" - "movzb " MEMACCESS(0) ",%1 \n" - "lea " MEMLEA(0x4,0) ",%0 \n" - MEMOPARG(movzb,0x00,3,1,4,1) " \n" // movzb (%3,%1,4),%1 - "mov %b1," MEMACCESS2(-0x4,0) " \n" - "movzb " MEMACCESS2(-0x3,0) ",%1 \n" - MEMOPARG(movzb,0x01,3,1,4,1) " \n" // movzb 0x1(%3,%1,4),%1 - "mov %b1," MEMACCESS2(-0x3,0) " \n" - "movzb " MEMACCESS2(-0x2,0) ",%1 \n" - MEMOPARG(movzb,0x02,3,1,4,1) " \n" // movzb 0x2(%3,%1,4),%1 - "mov %b1," MEMACCESS2(-0x2,0) " \n" - "movzb " MEMACCESS2(-0x1,0) ",%1 \n" - MEMOPARG(movzb,0x03,3,1,4,1) " \n" // movzb 0x3(%3,%1,4),%1 - "mov %b1," MEMACCESS2(-0x1,0) " \n" - "dec %2 \n" - "jg 1b \n" - : "+r"(dst_argb), // %0 - "=&d"(pixel_temp), // %1 - "+r"(width) // %2 - : "r"(table_argb) // %3 - : "memory", "cc"); + asm volatile( + // 1 pixel loop. + LABELALIGN + "1: \n" + "movzb (%0),%1 \n" + "lea 0x4(%0),%0 \n" + "movzb 0x00(%3,%1,4),%1 \n" + "mov %b1,-0x4(%0) \n" + "movzb -0x3(%0),%1 \n" + "movzb 0x01(%3,%1,4),%1 \n" + "mov %b1,-0x3(%0) \n" + "movzb -0x2(%0),%1 \n" + "movzb 0x02(%3,%1,4),%1 \n" + "mov %b1,-0x2(%0) \n" + "movzb -0x1(%0),%1 \n" + "movzb 0x03(%3,%1,4),%1 \n" + "mov %b1,-0x1(%0) \n" + "dec %2 \n" + "jg 1b \n" + : "+r"(dst_argb), // %0 + "=&d"(pixel_temp), // %1 + "+r"(width) // %2 + : "r"(table_argb) // %3 + : "memory", "cc"); } #endif // HAS_ARGBCOLORTABLEROW_X86 #ifdef HAS_RGBCOLORTABLEROW_X86 // Tranform RGB pixels with color table. -void RGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width) { +void RGBColorTableRow_X86(uint8_t* dst_argb, + const uint8_t* table_argb, + int width) { uintptr_t pixel_temp; - asm volatile ( - // 1 pixel loop. - LABELALIGN - "1: \n" - "movzb " MEMACCESS(0) ",%1 \n" - "lea " MEMLEA(0x4,0) ",%0 \n" - MEMOPARG(movzb,0x00,3,1,4,1) " \n" // movzb (%3,%1,4),%1 - "mov %b1," MEMACCESS2(-0x4,0) " \n" - "movzb " MEMACCESS2(-0x3,0) ",%1 \n" - MEMOPARG(movzb,0x01,3,1,4,1) " \n" // movzb 0x1(%3,%1,4),%1 - "mov %b1," MEMACCESS2(-0x3,0) " \n" - "movzb " MEMACCESS2(-0x2,0) ",%1 \n" - MEMOPARG(movzb,0x02,3,1,4,1) " \n" // movzb 0x2(%3,%1,4),%1 - "mov %b1," MEMACCESS2(-0x2,0) " \n" - "dec %2 \n" - "jg 1b \n" - : "+r"(dst_argb), // %0 - "=&d"(pixel_temp), // %1 - "+r"(width) // %2 - : "r"(table_argb) // %3 - : "memory", "cc"); + asm volatile( + // 1 pixel loop. + LABELALIGN + "1: \n" + "movzb (%0),%1 \n" + "lea 0x4(%0),%0 \n" + "movzb 0x00(%3,%1,4),%1 \n" + "mov %b1,-0x4(%0) \n" + "movzb -0x3(%0),%1 \n" + "movzb 0x01(%3,%1,4),%1 \n" + "mov %b1,-0x3(%0) \n" + "movzb -0x2(%0),%1 \n" + "movzb 0x02(%3,%1,4),%1 \n" + "mov %b1,-0x2(%0) \n" + "dec %2 \n" + "jg 1b \n" + : "+r"(dst_argb), // %0 + "=&d"(pixel_temp), // %1 + "+r"(width) // %2 + : "r"(table_argb) // %3 + : "memory", "cc"); } #endif // HAS_RGBCOLORTABLEROW_X86 #ifdef HAS_ARGBLUMACOLORTABLEROW_SSSE3 // Tranform RGB pixels with luma table. -void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb, - uint8* dst_argb, +void ARGBLumaColorTableRow_SSSE3(const uint8_t* src_argb, + uint8_t* dst_argb, int width, - const uint8* luma, - uint32 lumacoeff) { + const uint8_t* luma, + uint32_t lumacoeff) { uintptr_t pixel_temp; uintptr_t table_temp; - asm volatile ( - "movd %6,%%xmm3 \n" - "pshufd $0x0,%%xmm3,%%xmm3 \n" - "pcmpeqb %%xmm4,%%xmm4 \n" - "psllw $0x8,%%xmm4 \n" - "pxor %%xmm5,%%xmm5 \n" + asm volatile( + "movd %6,%%xmm3 \n" + "pshufd $0x0,%%xmm3,%%xmm3 \n" + "pcmpeqb %%xmm4,%%xmm4 \n" + "psllw $0x8,%%xmm4 \n" + "pxor %%xmm5,%%xmm5 \n" - // 4 pixel loop. - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(2) ",%%xmm0 \n" - "pmaddubsw %%xmm3,%%xmm0 \n" - "phaddw %%xmm0,%%xmm0 \n" - "pand %%xmm4,%%xmm0 \n" - "punpcklwd %%xmm5,%%xmm0 \n" - "movd %%xmm0,%k1 \n" // 32 bit offset - "add %5,%1 \n" - "pshufd $0x39,%%xmm0,%%xmm0 \n" - - "movzb " MEMACCESS(2) ",%0 \n" - MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0 - "mov %b0," MEMACCESS(3) " \n" - "movzb " MEMACCESS2(0x1,2) ",%0 \n" - MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0 - "mov %b0," MEMACCESS2(0x1,3) " \n" - "movzb " MEMACCESS2(0x2,2) ",%0 \n" - MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0 - "mov %b0," MEMACCESS2(0x2,3) " \n" - "movzb " MEMACCESS2(0x3,2) ",%0 \n" - "mov %b0," MEMACCESS2(0x3,3) " \n" - - "movd %%xmm0,%k1 \n" // 32 bit offset - "add %5,%1 \n" - "pshufd $0x39,%%xmm0,%%xmm0 \n" - - "movzb " MEMACCESS2(0x4,2) ",%0 \n" - MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0 - "mov %b0," MEMACCESS2(0x4,3) " \n" - "movzb " MEMACCESS2(0x5,2) ",%0 \n" - MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0 - "mov %b0," MEMACCESS2(0x5,3) " \n" - "movzb " MEMACCESS2(0x6,2) ",%0 \n" - MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0 - "mov %b0," MEMACCESS2(0x6,3) " \n" - "movzb " MEMACCESS2(0x7,2) ",%0 \n" - "mov %b0," MEMACCESS2(0x7,3) " \n" - - "movd %%xmm0,%k1 \n" // 32 bit offset - "add %5,%1 \n" - "pshufd $0x39,%%xmm0,%%xmm0 \n" - - "movzb " MEMACCESS2(0x8,2) ",%0 \n" - MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0 - "mov %b0," MEMACCESS2(0x8,3) " \n" - "movzb " MEMACCESS2(0x9,2) ",%0 \n" - MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0 - "mov %b0," MEMACCESS2(0x9,3) " \n" - "movzb " MEMACCESS2(0xa,2) ",%0 \n" - MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0 - "mov %b0," MEMACCESS2(0xa,3) " \n" - "movzb " MEMACCESS2(0xb,2) ",%0 \n" - "mov %b0," MEMACCESS2(0xb,3) " \n" - - "movd %%xmm0,%k1 \n" // 32 bit offset - "add %5,%1 \n" - - "movzb " MEMACCESS2(0xc,2) ",%0 \n" - MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0 - "mov %b0," MEMACCESS2(0xc,3) " \n" - "movzb " MEMACCESS2(0xd,2) ",%0 \n" - MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0 - "mov %b0," MEMACCESS2(0xd,3) " \n" - "movzb " MEMACCESS2(0xe,2) ",%0 \n" - MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0 - "mov %b0," MEMACCESS2(0xe,3) " \n" - "movzb " MEMACCESS2(0xf,2) ",%0 \n" - "mov %b0," MEMACCESS2(0xf,3) " \n" - "lea " MEMLEA(0x10,2) ",%2 \n" - "lea " MEMLEA(0x10,3) ",%3 \n" - "sub $0x4,%4 \n" - "jg 1b \n" - : "=&d"(pixel_temp), // %0 - "=&a"(table_temp), // %1 - "+r"(src_argb), // %2 - "+r"(dst_argb), // %3 - "+rm"(width) // %4 - : "r"(luma), // %5 - "rm"(lumacoeff) // %6 - : "memory", "cc", "xmm0", "xmm3", "xmm4", "xmm5" - ); + // 4 pixel loop. + LABELALIGN + "1: \n" + "movdqu (%2),%%xmm0 \n" + "pmaddubsw %%xmm3,%%xmm0 \n" + "phaddw %%xmm0,%%xmm0 \n" + "pand %%xmm4,%%xmm0 \n" + "punpcklwd %%xmm5,%%xmm0 \n" + "movd %%xmm0,%k1 \n" // 32 bit offset + "add %5,%1 \n" + "pshufd $0x39,%%xmm0,%%xmm0 \n" + + "movzb (%2),%0 \n" + "movzb 0x00(%1,%0,1),%0 \n" + "mov %b0,(%3) \n" + "movzb 0x1(%2),%0 \n" + "movzb 0x00(%1,%0,1),%0 \n" + "mov %b0,0x1(%3) \n" + "movzb 0x2(%2),%0 \n" + "movzb 0x00(%1,%0,1),%0 \n" + "mov %b0,0x2(%3) \n" + "movzb 0x3(%2),%0 \n" + "mov %b0,0x3(%3) \n" + + "movd %%xmm0,%k1 \n" // 32 bit offset + "add %5,%1 \n" + "pshufd $0x39,%%xmm0,%%xmm0 \n" + + "movzb 0x4(%2),%0 \n" + "movzb 0x00(%1,%0,1),%0 \n" + "mov %b0,0x4(%3) \n" + "movzb 0x5(%2),%0 \n" + "movzb 0x00(%1,%0,1),%0 \n" + "mov %b0,0x5(%3) \n" + "movzb 0x6(%2),%0 \n" + "movzb 0x00(%1,%0,1),%0 \n" + "mov %b0,0x6(%3) \n" + "movzb 0x7(%2),%0 \n" + "mov %b0,0x7(%3) \n" + + "movd %%xmm0,%k1 \n" // 32 bit offset + "add %5,%1 \n" + "pshufd $0x39,%%xmm0,%%xmm0 \n" + + "movzb 0x8(%2),%0 \n" + "movzb 0x00(%1,%0,1),%0 \n" + "mov %b0,0x8(%3) \n" + "movzb 0x9(%2),%0 \n" + "movzb 0x00(%1,%0,1),%0 \n" + "mov %b0,0x9(%3) \n" + "movzb 0xa(%2),%0 \n" + "movzb 0x00(%1,%0,1),%0 \n" + "mov %b0,0xa(%3) \n" + "movzb 0xb(%2),%0 \n" + "mov %b0,0xb(%3) \n" + + "movd %%xmm0,%k1 \n" // 32 bit offset + "add %5,%1 \n" + + "movzb 0xc(%2),%0 \n" + "movzb 0x00(%1,%0,1),%0 \n" + "mov %b0,0xc(%3) \n" + "movzb 0xd(%2),%0 \n" + "movzb 0x00(%1,%0,1),%0 \n" + "mov %b0,0xd(%3) \n" + "movzb 0xe(%2),%0 \n" + "movzb 0x00(%1,%0,1),%0 \n" + "mov %b0,0xe(%3) \n" + "movzb 0xf(%2),%0 \n" + "mov %b0,0xf(%3) \n" + "lea 0x10(%2),%2 \n" + "lea 0x10(%3),%3 \n" + "sub $0x4,%4 \n" + "jg 1b \n" + : "=&d"(pixel_temp), // %0 + "=&a"(table_temp), // %1 + "+r"(src_argb), // %2 + "+r"(dst_argb), // %3 + "+rm"(width) // %4 + : "r"(luma), // %5 + "rm"(lumacoeff) // %6 + : "memory", "cc", "xmm0", "xmm3", "xmm4", "xmm5"); } #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 diff --git a/chromium/third_party/libyuv/source/row_msa.cc b/chromium/third_party/libyuv/source/row_msa.cc index 5cc23450a52..66666cefcd9 100644 --- a/chromium/third_party/libyuv/source/row_msa.cc +++ b/chromium/third_party/libyuv/source/row_msa.cc @@ -37,17 +37,17 @@ extern "C" { } // Load YUV 422 pixel data -#define READYUV422(psrc_y, psrc_u, psrc_v, out_y, out_u, out_v) \ - { \ - uint64 y_m; \ - uint32 u_m, v_m; \ - v4i32 zero_m = {0}; \ - y_m = LD(psrc_y); \ - u_m = LW(psrc_u); \ - v_m = LW(psrc_v); \ - out_y = (v16u8)__msa_insert_d((v2i64)zero_m, 0, (int64)y_m); \ - out_u = (v16u8)__msa_insert_w(zero_m, 0, (int32)u_m); \ - out_v = (v16u8)__msa_insert_w(zero_m, 0, (int32)v_m); \ +#define READYUV422(psrc_y, psrc_u, psrc_v, out_y, out_u, out_v) \ + { \ + uint64_t y_m; \ + uint32_t u_m, v_m; \ + v4i32 zero_m = {0}; \ + y_m = LD(psrc_y); \ + u_m = LW(psrc_u); \ + v_m = LW(psrc_v); \ + out_y = (v16u8)__msa_insert_d((v2i64)zero_m, 0, (int64_t)y_m); \ + out_u = (v16u8)__msa_insert_w(zero_m, 0, (int32_t)u_m); \ + out_v = (v16u8)__msa_insert_w(zero_m, 0, (int32_t)v_m); \ } // Clip input vector elements between 0 to 255 @@ -275,17 +275,17 @@ extern "C" { // Load I444 pixel data #define READI444(psrc_y, psrc_u, psrc_v, out_y, out_u, out_v) \ { \ - uint64 y_m, u_m, v_m; \ + uint64_t y_m, u_m, v_m; \ v2i64 zero_m = {0}; \ y_m = LD(psrc_y); \ u_m = LD(psrc_u); \ v_m = LD(psrc_v); \ - out_y = (v16u8)__msa_insert_d(zero_m, 0, (int64)y_m); \ - out_u = (v16u8)__msa_insert_d(zero_m, 0, (int64)u_m); \ - out_v = (v16u8)__msa_insert_d(zero_m, 0, (int64)v_m); \ + out_y = (v16u8)__msa_insert_d(zero_m, 0, (int64_t)y_m); \ + out_u = (v16u8)__msa_insert_d(zero_m, 0, (int64_t)u_m); \ + out_v = (v16u8)__msa_insert_d(zero_m, 0, (int64_t)v_m); \ } -void MirrorRow_MSA(const uint8* src, uint8* dst, int width) { +void MirrorRow_MSA(const uint8_t* src, uint8_t* dst, int width) { int x; v16u8 src0, src1, src2, src3; v16u8 dst0, dst1, dst2, dst3; @@ -302,7 +302,7 @@ void MirrorRow_MSA(const uint8* src, uint8* dst, int width) { } } -void ARGBMirrorRow_MSA(const uint8* src, uint8* dst, int width) { +void ARGBMirrorRow_MSA(const uint8_t* src, uint8_t* dst, int width) { int x; v16u8 src0, src1, src2, src3; v16u8 dst0, dst1, dst2, dst3; @@ -319,10 +319,10 @@ void ARGBMirrorRow_MSA(const uint8* src, uint8* dst, int width) { } } -void I422ToYUY2Row_MSA(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_yuy2, +void I422ToYUY2Row_MSA(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_yuy2, int width) { int x; v16u8 src_u0, src_v0, src_y0, src_y1, vec_uv0, vec_uv1; @@ -343,10 +343,10 @@ void I422ToYUY2Row_MSA(const uint8* src_y, } } -void I422ToUYVYRow_MSA(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_uyvy, +void I422ToUYVYRow_MSA(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_uyvy, int width) { int x; v16u8 src_u0, src_v0, src_y0, src_y1, vec_uv0, vec_uv1; @@ -367,10 +367,10 @@ void I422ToUYVYRow_MSA(const uint8* src_y, } } -void I422ToARGBRow_MSA(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* rgb_buf, +void I422ToARGBRow_MSA(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { int x; @@ -390,18 +390,18 @@ void I422ToARGBRow_MSA(const uint8* src_y, src1 = (v16u8)__msa_ilvr_b((v16i8)src2, (v16i8)src1); YUVTORGB(src0, src1, vec_ubvr, vec_ugvg, vec_bb, vec_bg, vec_br, vec_yg, vec0, vec1, vec2); - STOREARGB(vec0, vec1, vec2, alpha, rgb_buf); + STOREARGB(vec0, vec1, vec2, alpha, dst_argb); src_y += 8; src_u += 4; src_v += 4; - rgb_buf += 32; + dst_argb += 32; } } -void I422ToRGBARow_MSA(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* rgb_buf, +void I422ToRGBARow_MSA(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { int x; @@ -421,23 +421,23 @@ void I422ToRGBARow_MSA(const uint8* src_y, src1 = (v16u8)__msa_ilvr_b((v16i8)src2, (v16i8)src1); YUVTORGB(src0, src1, vec_ubvr, vec_ugvg, vec_bb, vec_bg, vec_br, vec_yg, vec0, vec1, vec2); - STOREARGB(alpha, vec0, vec1, vec2, rgb_buf); + STOREARGB(alpha, vec0, vec1, vec2, dst_argb); src_y += 8; src_u += 4; src_v += 4; - rgb_buf += 32; + dst_argb += 32; } } -void I422AlphaToARGBRow_MSA(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - const uint8* src_a, - uint8* rgb_buf, +void I422AlphaToARGBRow_MSA(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + const uint8_t* src_a, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { int x; - int64 data_a; + int64_t data_a; v16u8 src0, src1, src2, src3; v8i16 vec0, vec1, vec2; v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg; @@ -457,23 +457,23 @@ void I422AlphaToARGBRow_MSA(const uint8* src_y, YUVTORGB(src0, src1, vec_ubvr, vec_ugvg, vec_bb, vec_bg, vec_br, vec_yg, vec0, vec1, vec2); src3 = (v16u8)__msa_ilvr_b((v16i8)src3, (v16i8)src3); - STOREARGB(vec0, vec1, vec2, src3, rgb_buf); + STOREARGB(vec0, vec1, vec2, src3, dst_argb); src_y += 8; src_u += 4; src_v += 4; src_a += 8; - rgb_buf += 32; + dst_argb += 32; } } -void I422ToRGB24Row_MSA(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* rgb_buf, +void I422ToRGB24Row_MSA(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, - int32 width) { + int32_t width) { int x; - int64 data_u, data_v; + int64_t data_u, data_v; v16u8 src0, src1, src2, src3, src4, dst0, dst1, dst2; v8i16 vec0, vec1, vec2, vec3, vec4, vec5; v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg; @@ -510,20 +510,20 @@ void I422ToRGB24Row_MSA(const uint8* src_y, dst0 = (v16u8)__msa_vshf_b(shuffler0, (v16i8)reg3, (v16i8)reg0); dst1 = (v16u8)__msa_vshf_b(shuffler1, (v16i8)reg3, (v16i8)reg1); dst2 = (v16u8)__msa_vshf_b(shuffler2, (v16i8)reg3, (v16i8)reg2); - ST_UB2(dst0, dst1, rgb_buf, 16); - ST_UB(dst2, (rgb_buf + 32)); + ST_UB2(dst0, dst1, dst_argb, 16); + ST_UB(dst2, (dst_argb + 32)); src_y += 16; src_u += 8; src_v += 8; - rgb_buf += 48; + dst_argb += 48; } } // TODO(fbarchard): Consider AND instead of shift to isolate 5 upper bits of R. -void I422ToRGB565Row_MSA(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgb565, +void I422ToRGB565Row_MSA(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_rgb565, const struct YuvConstants* yuvconstants, int width) { int x; @@ -558,10 +558,10 @@ void I422ToRGB565Row_MSA(const uint8* src_y, } // TODO(fbarchard): Consider AND instead of shift to isolate 4 upper bits of G. -void I422ToARGB4444Row_MSA(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb4444, +void I422ToARGB4444Row_MSA(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb4444, const struct YuvConstants* yuvconstants, int width) { int x; @@ -598,10 +598,10 @@ void I422ToARGB4444Row_MSA(const uint8* src_y, } } -void I422ToARGB1555Row_MSA(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb1555, +void I422ToARGB1555Row_MSA(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb1555, const struct YuvConstants* yuvconstants, int width) { int x; @@ -638,7 +638,7 @@ void I422ToARGB1555Row_MSA(const uint8* src_y, } } -void YUY2ToYRow_MSA(const uint8* src_yuy2, uint8* dst_y, int width) { +void YUY2ToYRow_MSA(const uint8_t* src_yuy2, uint8_t* dst_y, int width) { int x; v16u8 src0, src1, src2, src3, dst0, dst1; @@ -652,12 +652,12 @@ void YUY2ToYRow_MSA(const uint8* src_yuy2, uint8* dst_y, int width) { } } -void YUY2ToUVRow_MSA(const uint8* src_yuy2, +void YUY2ToUVRow_MSA(const uint8_t* src_yuy2, int src_stride_yuy2, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width) { - const uint8* src_yuy2_next = src_yuy2 + src_stride_yuy2; + const uint8_t* src_yuy2_next = src_yuy2 + src_stride_yuy2; int x; v16u8 src0, src1, src2, src3, src4, src5, src6, src7; v16u8 vec0, vec1, dst0, dst1; @@ -682,9 +682,9 @@ void YUY2ToUVRow_MSA(const uint8* src_yuy2, } } -void YUY2ToUV422Row_MSA(const uint8* src_yuy2, - uint8* dst_u, - uint8* dst_v, +void YUY2ToUV422Row_MSA(const uint8_t* src_yuy2, + uint8_t* dst_u, + uint8_t* dst_v, int width) { int x; v16u8 src0, src1, src2, src3, dst0, dst1; @@ -703,7 +703,7 @@ void YUY2ToUV422Row_MSA(const uint8* src_yuy2, } } -void UYVYToYRow_MSA(const uint8* src_uyvy, uint8* dst_y, int width) { +void UYVYToYRow_MSA(const uint8_t* src_uyvy, uint8_t* dst_y, int width) { int x; v16u8 src0, src1, src2, src3, dst0, dst1; @@ -717,12 +717,12 @@ void UYVYToYRow_MSA(const uint8* src_uyvy, uint8* dst_y, int width) { } } -void UYVYToUVRow_MSA(const uint8* src_uyvy, +void UYVYToUVRow_MSA(const uint8_t* src_uyvy, int src_stride_uyvy, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width) { - const uint8* src_uyvy_next = src_uyvy + src_stride_uyvy; + const uint8_t* src_uyvy_next = src_uyvy + src_stride_uyvy; int x; v16u8 src0, src1, src2, src3, src4, src5, src6, src7; v16u8 vec0, vec1, dst0, dst1; @@ -747,9 +747,9 @@ void UYVYToUVRow_MSA(const uint8* src_uyvy, } } -void UYVYToUV422Row_MSA(const uint8* src_uyvy, - uint8* dst_u, - uint8* dst_v, +void UYVYToUV422Row_MSA(const uint8_t* src_uyvy, + uint8_t* dst_u, + uint8_t* dst_v, int width) { int x; v16u8 src0, src1, src2, src3, dst0, dst1; @@ -768,7 +768,7 @@ void UYVYToUV422Row_MSA(const uint8* src_uyvy, } } -void ARGBToYRow_MSA(const uint8* src_argb0, uint8* dst_y, int width) { +void ARGBToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) { int x; v16u8 src0, src1, src2, src3, vec0, vec1, vec2, vec3, dst0; v8u16 reg0, reg1, reg2, reg3, reg4, reg5; @@ -814,13 +814,13 @@ void ARGBToYRow_MSA(const uint8* src_argb0, uint8* dst_y, int width) { } } -void ARGBToUVRow_MSA(const uint8* src_argb0, +void ARGBToUVRow_MSA(const uint8_t* src_argb0, int src_stride_argb, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width) { int x; - const uint8* src_argb0_next = src_argb0 + src_stride_argb; + const uint8_t* src_argb0_next = src_argb0 + src_stride_argb; v16u8 src0, src1, src2, src3, src4, src5, src6, src7; v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, vec9; v8u16 reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7, reg8, reg9; @@ -932,7 +932,7 @@ void ARGBToUVRow_MSA(const uint8* src_argb0, } } -void ARGBToRGB24Row_MSA(const uint8* src_argb, uint8* dst_rgb, int width) { +void ARGBToRGB24Row_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { int x; v16u8 src0, src1, src2, src3, dst0, dst1, dst2; v16i8 shuffler0 = {0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 16, 17, 18, 20}; @@ -956,7 +956,7 @@ void ARGBToRGB24Row_MSA(const uint8* src_argb, uint8* dst_rgb, int width) { } } -void ARGBToRAWRow_MSA(const uint8* src_argb, uint8* dst_rgb, int width) { +void ARGBToRAWRow_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { int x; v16u8 src0, src1, src2, src3, dst0, dst1, dst2; v16i8 shuffler0 = {2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, 18, 17, 16, 22}; @@ -980,7 +980,7 @@ void ARGBToRAWRow_MSA(const uint8* src_argb, uint8* dst_rgb, int width) { } } -void ARGBToRGB565Row_MSA(const uint8* src_argb, uint8* dst_rgb, int width) { +void ARGBToRGB565Row_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { int x; v16u8 src0, src1, dst0; v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7; @@ -1014,7 +1014,9 @@ void ARGBToRGB565Row_MSA(const uint8* src_argb, uint8* dst_rgb, int width) { } } -void ARGBToARGB1555Row_MSA(const uint8* src_argb, uint8* dst_rgb, int width) { +void ARGBToARGB1555Row_MSA(const uint8_t* src_argb, + uint8_t* dst_rgb, + int width) { int x; v16u8 src0, src1, dst0; v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, vec9; @@ -1054,7 +1056,9 @@ void ARGBToARGB1555Row_MSA(const uint8* src_argb, uint8* dst_rgb, int width) { } } -void ARGBToARGB4444Row_MSA(const uint8* src_argb, uint8* dst_rgb, int width) { +void ARGBToARGB4444Row_MSA(const uint8_t* src_argb, + uint8_t* dst_rgb, + int width) { int x; v16u8 src0, src1; v16u8 vec0, vec1; @@ -1077,11 +1081,11 @@ void ARGBToARGB4444Row_MSA(const uint8* src_argb, uint8* dst_rgb, int width) { } } -void ARGBToUV444Row_MSA(const uint8* src_argb, - uint8* dst_u, - uint8* dst_v, - int32 width) { - int32 x; +void ARGBToUV444Row_MSA(const uint8_t* src_argb, + uint8_t* dst_u, + uint8_t* dst_v, + int32_t width) { + int32_t x; v16u8 src0, src1, src2, src3, reg0, reg1, reg2, reg3, dst0, dst1; v8u16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7; v8u16 vec8, vec9, vec10, vec11; @@ -1149,9 +1153,9 @@ void ARGBToUV444Row_MSA(const uint8* src_argb, } } -void ARGBMultiplyRow_MSA(const uint8* src_argb0, - const uint8* src_argb1, - uint8* dst_argb, +void ARGBMultiplyRow_MSA(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, int width) { int x; v16u8 src0, src1, dst0; @@ -1188,9 +1192,9 @@ void ARGBMultiplyRow_MSA(const uint8* src_argb0, } } -void ARGBAddRow_MSA(const uint8* src_argb0, - const uint8* src_argb1, - uint8* dst_argb, +void ARGBAddRow_MSA(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, int width) { int x; v16u8 src0, src1, src2, src3, dst0, dst1; @@ -1209,9 +1213,9 @@ void ARGBAddRow_MSA(const uint8* src_argb0, } } -void ARGBSubtractRow_MSA(const uint8* src_argb0, - const uint8* src_argb1, - uint8* dst_argb, +void ARGBSubtractRow_MSA(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, int width) { int x; v16u8 src0, src1, src2, src3, dst0, dst1; @@ -1230,7 +1234,9 @@ void ARGBSubtractRow_MSA(const uint8* src_argb0, } } -void ARGBAttenuateRow_MSA(const uint8* src_argb, uint8* dst_argb, int width) { +void ARGBAttenuateRow_MSA(const uint8_t* src_argb, + uint8_t* dst_argb, + int width) { int x; v16u8 src0, src1, dst0, dst1; v8u16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, vec9; @@ -1295,9 +1301,9 @@ void ARGBAttenuateRow_MSA(const uint8* src_argb, uint8* dst_argb, int width) { } } -void ARGBToRGB565DitherRow_MSA(const uint8* src_argb, - uint8* dst_rgb, - uint32 dither4, +void ARGBToRGB565DitherRow_MSA(const uint8_t* src_argb, + uint8_t* dst_rgb, + uint32_t dither4, int width) { int x; v16u8 src0, src1, dst0, vec0, vec1; @@ -1339,15 +1345,15 @@ void ARGBToRGB565DitherRow_MSA(const uint8* src_argb, } } -void ARGBShuffleRow_MSA(const uint8* src_argb, - uint8* dst_argb, - const uint8* shuffler, +void ARGBShuffleRow_MSA(const uint8_t* src_argb, + uint8_t* dst_argb, + const uint8_t* shuffler, int width) { int x; v16u8 src0, src1, dst0, dst1; v16i8 vec0; v16i8 shuffler_vec = {0, 0, 0, 0, 4, 4, 4, 4, 8, 8, 8, 8, 12, 12, 12, 12}; - int32 val = LW((int32*)shuffler); + int32_t val = LW((int32_t*)shuffler); vec0 = (v16i8)__msa_fill_w(val); shuffler_vec += vec0; @@ -1363,10 +1369,10 @@ void ARGBShuffleRow_MSA(const uint8* src_argb, } } -void ARGBShadeRow_MSA(const uint8* src_argb, - uint8* dst_argb, +void ARGBShadeRow_MSA(const uint8_t* src_argb, + uint8_t* dst_argb, int width, - uint32 value) { + uint32_t value) { int x; v16u8 src0, dst0; v8u16 vec0, vec1; @@ -1402,7 +1408,7 @@ void ARGBShadeRow_MSA(const uint8* src_argb, } } -void ARGBGrayRow_MSA(const uint8* src_argb, uint8* dst_argb, int width) { +void ARGBGrayRow_MSA(const uint8_t* src_argb, uint8_t* dst_argb, int width) { int x; v16u8 src0, src1, vec0, vec1, dst0, dst1; v8u16 reg0; @@ -1427,7 +1433,7 @@ void ARGBGrayRow_MSA(const uint8* src_argb, uint8* dst_argb, int width) { } } -void ARGBSepiaRow_MSA(uint8* dst_argb, int width) { +void ARGBSepiaRow_MSA(uint8_t* dst_argb, int width) { int x; v16u8 src0, src1, dst0, dst1, vec0, vec1, vec2, vec3, vec4, vec5; v8u16 reg0, reg1, reg2; @@ -1468,8 +1474,8 @@ void ARGBSepiaRow_MSA(uint8* dst_argb, int width) { } } -void ARGB4444ToARGBRow_MSA(const uint8* src_argb4444, - uint8* dst_argb, +void ARGB4444ToARGBRow_MSA(const uint8_t* src_argb4444, + uint8_t* dst_argb, int width) { int x; v16u8 src0, src1; @@ -1497,8 +1503,8 @@ void ARGB4444ToARGBRow_MSA(const uint8* src_argb4444, } } -void ARGB1555ToARGBRow_MSA(const uint8* src_argb1555, - uint8* dst_argb, +void ARGB1555ToARGBRow_MSA(const uint8_t* src_argb1555, + uint8_t* dst_argb, int width) { int x; v8u16 src0, src1; @@ -1547,7 +1553,9 @@ void ARGB1555ToARGBRow_MSA(const uint8* src_argb1555, } } -void RGB565ToARGBRow_MSA(const uint8* src_rgb565, uint8* dst_argb, int width) { +void RGB565ToARGBRow_MSA(const uint8_t* src_rgb565, + uint8_t* dst_argb, + int width) { int x; v8u16 src0, src1, vec0, vec1, vec2, vec3, vec4, vec5; v8u16 reg0, reg1, reg2, reg3, reg4, reg5; @@ -1592,7 +1600,9 @@ void RGB565ToARGBRow_MSA(const uint8* src_rgb565, uint8* dst_argb, int width) { } } -void RGB24ToARGBRow_MSA(const uint8* src_rgb24, uint8* dst_argb, int width) { +void RGB24ToARGBRow_MSA(const uint8_t* src_rgb24, + uint8_t* dst_argb, + int width) { int x; v16u8 src0, src1, src2; v16u8 vec0, vec1, vec2; @@ -1617,7 +1627,7 @@ void RGB24ToARGBRow_MSA(const uint8* src_rgb24, uint8* dst_argb, int width) { } } -void RAWToARGBRow_MSA(const uint8* src_raw, uint8* dst_argb, int width) { +void RAWToARGBRow_MSA(const uint8_t* src_raw, uint8_t* dst_argb, int width) { int x; v16u8 src0, src1, src2; v16u8 vec0, vec1, vec2; @@ -1642,7 +1652,9 @@ void RAWToARGBRow_MSA(const uint8* src_raw, uint8* dst_argb, int width) { } } -void ARGB1555ToYRow_MSA(const uint8* src_argb1555, uint8* dst_y, int width) { +void ARGB1555ToYRow_MSA(const uint8_t* src_argb1555, + uint8_t* dst_y, + int width) { int x; v8u16 src0, src1, vec0, vec1, vec2, vec3, vec4, vec5; v8u16 reg0, reg1, reg2, reg3, reg4, reg5; @@ -1699,7 +1711,7 @@ void ARGB1555ToYRow_MSA(const uint8* src_argb1555, uint8* dst_y, int width) { } } -void RGB565ToYRow_MSA(const uint8* src_rgb565, uint8* dst_y, int width) { +void RGB565ToYRow_MSA(const uint8_t* src_rgb565, uint8_t* dst_y, int width) { int x; v8u16 src0, src1, vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7; v8u16 reg0, reg1, reg2, reg3, reg4, reg5; @@ -1762,7 +1774,7 @@ void RGB565ToYRow_MSA(const uint8* src_rgb565, uint8* dst_y, int width) { } } -void RGB24ToYRow_MSA(const uint8* src_argb0, uint8* dst_y, int width) { +void RGB24ToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) { int x; v16u8 src0, src1, src2, reg0, reg1, reg2, reg3, dst0; v8u16 vec0, vec1, vec2, vec3; @@ -1803,7 +1815,7 @@ void RGB24ToYRow_MSA(const uint8* src_argb0, uint8* dst_y, int width) { } } -void RAWToYRow_MSA(const uint8* src_argb0, uint8* dst_y, int width) { +void RAWToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) { int x; v16u8 src0, src1, src2, reg0, reg1, reg2, reg3, dst0; v8u16 vec0, vec1, vec2, vec3; @@ -1844,14 +1856,14 @@ void RAWToYRow_MSA(const uint8* src_argb0, uint8* dst_y, int width) { } } -void ARGB1555ToUVRow_MSA(const uint8* src_argb1555, +void ARGB1555ToUVRow_MSA(const uint8_t* src_argb1555, int src_stride_argb1555, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width) { int x; - const uint16* s = (const uint16*)src_argb1555; - const uint16* t = (const uint16*)(src_argb1555 + src_stride_argb1555); + const uint16_t* s = (const uint16_t*)src_argb1555; + const uint16_t* t = (const uint16_t*)(src_argb1555 + src_stride_argb1555); int64_t res0, res1; v8u16 src0, src1, src2, src3, reg0, reg1, reg2, reg3; v8u16 vec0, vec1, vec2, vec3, vec4, vec5, vec6; @@ -1925,14 +1937,14 @@ void ARGB1555ToUVRow_MSA(const uint8* src_argb1555, } } -void RGB565ToUVRow_MSA(const uint8* src_rgb565, +void RGB565ToUVRow_MSA(const uint8_t* src_rgb565, int src_stride_rgb565, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width) { int x; - const uint16* s = (const uint16*)src_rgb565; - const uint16* t = (const uint16*)(src_rgb565 + src_stride_rgb565); + const uint16_t* s = (const uint16_t*)src_rgb565; + const uint16_t* t = (const uint16_t*)(src_rgb565 + src_stride_rgb565); int64_t res0, res1; v8u16 src0, src1, src2, src3, reg0, reg1, reg2, reg3; v8u16 vec0, vec1, vec2, vec3, vec4, vec5; @@ -2005,15 +2017,15 @@ void RGB565ToUVRow_MSA(const uint8* src_rgb565, } } -void RGB24ToUVRow_MSA(const uint8* src_rgb0, +void RGB24ToUVRow_MSA(const uint8_t* src_rgb0, int src_stride_rgb, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width) { int x; - const uint8* s = src_rgb0; - const uint8* t = src_rgb0 + src_stride_rgb; - int64 res0, res1; + const uint8_t* s = src_rgb0; + const uint8_t* t = src_rgb0 + src_stride_rgb; + int64_t res0, res1; v16u8 src0, src1, src2, src3, src4, src5, src6, src7; v16u8 inp0, inp1, inp2, inp3, inp4, inp5; v8u16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7; @@ -2110,15 +2122,15 @@ void RGB24ToUVRow_MSA(const uint8* src_rgb0, } } -void RAWToUVRow_MSA(const uint8* src_rgb0, +void RAWToUVRow_MSA(const uint8_t* src_rgb0, int src_stride_rgb, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width) { int x; - const uint8* s = src_rgb0; - const uint8* t = src_rgb0 + src_stride_rgb; - int64 res0, res1; + const uint8_t* s = src_rgb0; + const uint8_t* t = src_rgb0 + src_stride_rgb; + int64_t res0, res1; v16u8 inp0, inp1, inp2, inp3, inp4, inp5; v16u8 src0, src1, src2, src3, src4, src5, src6, src7; v8u16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7; @@ -2215,13 +2227,13 @@ void RAWToUVRow_MSA(const uint8* src_rgb0, } } -void NV12ToARGBRow_MSA(const uint8* src_y, - const uint8* src_uv, - uint8* rgb_buf, +void NV12ToARGBRow_MSA(const uint8_t* src_y, + const uint8_t* src_uv, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { int x; - uint64 val0, val1; + uint64_t val0, val1; v16u8 src0, src1, res0, res1, dst0, dst1; v8i16 vec0, vec1, vec2; v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg; @@ -2245,20 +2257,20 @@ void NV12ToARGBRow_MSA(const uint8* src_y, res1 = (v16u8)__msa_ilvev_b((v16i8)alpha, (v16i8)vec1); dst0 = (v16u8)__msa_ilvr_b((v16i8)res1, (v16i8)res0); dst1 = (v16u8)__msa_ilvl_b((v16i8)res1, (v16i8)res0); - ST_UB2(dst0, dst1, rgb_buf, 16); + ST_UB2(dst0, dst1, dst_argb, 16); src_y += 8; src_uv += 8; - rgb_buf += 32; + dst_argb += 32; } } -void NV12ToRGB565Row_MSA(const uint8* src_y, - const uint8* src_uv, - uint8* rgb_buf, +void NV12ToRGB565Row_MSA(const uint8_t* src_y, + const uint8_t* src_uv, + uint8_t* dst_rgb565, const struct YuvConstants* yuvconstants, int width) { int x; - uint64 val0, val1; + uint64_t val0, val1; v16u8 src0, src1, dst0; v8i16 vec0, vec1, vec2; v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg; @@ -2281,20 +2293,20 @@ void NV12ToRGB565Row_MSA(const uint8* src_y, vec1 = (vec1 >> 2) << 5; vec2 = (vec2 >> 3) << 11; dst0 = (v16u8)(vec0 | vec1 | vec2); - ST_UB(dst0, rgb_buf); + ST_UB(dst0, dst_rgb565); src_y += 8; src_uv += 8; - rgb_buf += 16; + dst_rgb565 += 16; } } -void NV21ToARGBRow_MSA(const uint8* src_y, - const uint8* src_vu, - uint8* rgb_buf, +void NV21ToARGBRow_MSA(const uint8_t* src_y, + const uint8_t* src_vu, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { int x; - uint64 val0, val1; + uint64_t val0, val1; v16u8 src0, src1, res0, res1, dst0, dst1; v8i16 vec0, vec1, vec2; v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg; @@ -2320,16 +2332,16 @@ void NV21ToARGBRow_MSA(const uint8* src_y, res1 = (v16u8)__msa_ilvev_b((v16i8)alpha, (v16i8)vec1); dst0 = (v16u8)__msa_ilvr_b((v16i8)res1, (v16i8)res0); dst1 = (v16u8)__msa_ilvl_b((v16i8)res1, (v16i8)res0); - ST_UB2(dst0, dst1, rgb_buf, 16); + ST_UB2(dst0, dst1, dst_argb, 16); src_y += 8; src_vu += 8; - rgb_buf += 32; + dst_argb += 32; } } -void SobelRow_MSA(const uint8* src_sobelx, - const uint8* src_sobely, - uint8* dst_argb, +void SobelRow_MSA(const uint8_t* src_sobelx, + const uint8_t* src_sobely, + uint8_t* dst_argb, int width) { int x; v16u8 src0, src1, vec0, dst0, dst1, dst2, dst3; @@ -2355,9 +2367,9 @@ void SobelRow_MSA(const uint8* src_sobelx, } } -void SobelToPlaneRow_MSA(const uint8* src_sobelx, - const uint8* src_sobely, - uint8* dst_y, +void SobelToPlaneRow_MSA(const uint8_t* src_sobelx, + const uint8_t* src_sobely, + uint8_t* dst_y, int width) { int x; v16u8 src0, src1, src2, src3, dst0, dst1; @@ -2376,9 +2388,9 @@ void SobelToPlaneRow_MSA(const uint8* src_sobelx, } } -void SobelXYRow_MSA(const uint8* src_sobelx, - const uint8* src_sobely, - uint8* dst_argb, +void SobelXYRow_MSA(const uint8_t* src_sobelx, + const uint8_t* src_sobely, + uint8_t* dst_argb, int width) { int x; v16u8 src0, src1, vec0, vec1, vec2; @@ -2404,7 +2416,7 @@ void SobelXYRow_MSA(const uint8* src_sobelx, } } -void ARGBToYJRow_MSA(const uint8* src_argb0, uint8* dst_y, int width) { +void ARGBToYJRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) { int x; v16u8 src0, src1, src2, src3, dst0; v16u8 const_0x4B0F = (v16u8)__msa_fill_h(0x4B0F); @@ -2424,7 +2436,7 @@ void ARGBToYJRow_MSA(const uint8* src_argb0, uint8* dst_y, int width) { } } -void BGRAToYRow_MSA(const uint8* src_argb0, uint8* dst_y, int width) { +void BGRAToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) { int x; v16u8 src0, src1, src2, src3, dst0; v16u8 const_0x4200 = (v16u8)__msa_fill_h(0x4200); @@ -2444,7 +2456,7 @@ void BGRAToYRow_MSA(const uint8* src_argb0, uint8* dst_y, int width) { } } -void ABGRToYRow_MSA(const uint8* src_argb0, uint8* dst_y, int width) { +void ABGRToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) { int x; v16u8 src0, src1, src2, src3, dst0; v16u8 const_0x8142 = (v16u8)__msa_fill_h(0x8142); @@ -2464,7 +2476,7 @@ void ABGRToYRow_MSA(const uint8* src_argb0, uint8* dst_y, int width) { } } -void RGBAToYRow_MSA(const uint8* src_argb0, uint8* dst_y, int width) { +void RGBAToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) { int x; v16u8 src0, src1, src2, src3, dst0; v16u8 const_0x1900 = (v16u8)__msa_fill_h(0x1900); @@ -2484,14 +2496,14 @@ void RGBAToYRow_MSA(const uint8* src_argb0, uint8* dst_y, int width) { } } -void ARGBToUVJRow_MSA(const uint8* src_rgb0, +void ARGBToUVJRow_MSA(const uint8_t* src_rgb0, int src_stride_rgb, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width) { int x; - const uint8* s = src_rgb0; - const uint8* t = src_rgb0 + src_stride_rgb; + const uint8_t* s = src_rgb0; + const uint8_t* t = src_rgb0 + src_stride_rgb; v16u8 src0, src1, src2, src3, src4, src5, src6, src7; v16u8 vec0, vec1, vec2, vec3; v16u8 dst0, dst1; @@ -2554,14 +2566,14 @@ void ARGBToUVJRow_MSA(const uint8* src_rgb0, } } -void BGRAToUVRow_MSA(const uint8* src_rgb0, +void BGRAToUVRow_MSA(const uint8_t* src_rgb0, int src_stride_rgb, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width) { int x; - const uint8* s = src_rgb0; - const uint8* t = src_rgb0 + src_stride_rgb; + const uint8_t* s = src_rgb0; + const uint8_t* t = src_rgb0 + src_stride_rgb; v16u8 dst0, dst1, vec0, vec1, vec2, vec3; v16i8 shuffler0 = {0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29}; v16i8 shuffler1 = {2, 3, 6, 7, 10, 11, 14, 15, @@ -2587,14 +2599,14 @@ void BGRAToUVRow_MSA(const uint8* src_rgb0, } } -void ABGRToUVRow_MSA(const uint8* src_rgb0, +void ABGRToUVRow_MSA(const uint8_t* src_rgb0, int src_stride_rgb, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width) { int x; - const uint8* s = src_rgb0; - const uint8* t = src_rgb0 + src_stride_rgb; + const uint8_t* s = src_rgb0; + const uint8_t* t = src_rgb0 + src_stride_rgb; v16u8 src0, src1, src2, src3; v16u8 dst0, dst1; v16i8 shuffler0 = {0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29}; @@ -2621,14 +2633,14 @@ void ABGRToUVRow_MSA(const uint8* src_rgb0, } } -void RGBAToUVRow_MSA(const uint8* src_rgb0, +void RGBAToUVRow_MSA(const uint8_t* src_rgb0, int src_stride_rgb, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width) { int x; - const uint8* s = src_rgb0; - const uint8* t = src_rgb0 + src_stride_rgb; + const uint8_t* s = src_rgb0; + const uint8_t* t = src_rgb0 + src_stride_rgb; v16u8 dst0, dst1, vec0, vec1, vec2, vec3; v16i8 shuffler0 = {0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29}; v16i8 shuffler1 = {2, 3, 6, 7, 10, 11, 14, 15, @@ -2654,10 +2666,10 @@ void RGBAToUVRow_MSA(const uint8* src_rgb0, } } -void I444ToARGBRow_MSA(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* rgb_buf, +void I444ToARGBRow_MSA(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { int x; @@ -2714,15 +2726,15 @@ void I444ToARGBRow_MSA(const uint8* src_y, vec1 = (v8u16)__msa_ilvev_b((v16i8)alpha, (v16i8)vec2); dst0 = (v16u8)__msa_ilvr_h((v8i16)vec1, (v8i16)vec0); dst1 = (v16u8)__msa_ilvl_h((v8i16)vec1, (v8i16)vec0); - ST_UB2(dst0, dst1, rgb_buf, 16); + ST_UB2(dst0, dst1, dst_argb, 16); src_y += 8; src_u += 8; src_v += 8; - rgb_buf += 32; + dst_argb += 32; } } -void I400ToARGBRow_MSA(const uint8* src_y, uint8* rgb_buf, int width) { +void I400ToARGBRow_MSA(const uint8_t* src_y, uint8_t* dst_argb, int width) { int x; v16u8 src0, res0, res1, res2, res3, res4, dst0, dst1, dst2, dst3; v8i16 vec0, vec1; @@ -2768,13 +2780,13 @@ void I400ToARGBRow_MSA(const uint8* src_y, uint8* rgb_buf, int width) { dst1 = (v16u8)__msa_ilvl_b((v16i8)res3, (v16i8)res1); dst2 = (v16u8)__msa_ilvr_b((v16i8)res4, (v16i8)res2); dst3 = (v16u8)__msa_ilvl_b((v16i8)res4, (v16i8)res2); - ST_UB4(dst0, dst1, dst2, dst3, rgb_buf, 16); + ST_UB4(dst0, dst1, dst2, dst3, dst_argb, 16); src_y += 16; - rgb_buf += 64; + dst_argb += 64; } } -void J400ToARGBRow_MSA(const uint8* src_y, uint8* dst_argb, int width) { +void J400ToARGBRow_MSA(const uint8_t* src_y, uint8_t* dst_argb, int width) { int x; v16u8 src0, vec0, vec1, vec2, vec3, dst0, dst1, dst2, dst3; v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL); @@ -2795,8 +2807,8 @@ void J400ToARGBRow_MSA(const uint8* src_y, uint8* dst_argb, int width) { } } -void YUY2ToARGBRow_MSA(const uint8* src_yuy2, - uint8* rgb_buf, +void YUY2ToARGBRow_MSA(const uint8_t* src_yuy2, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { int x; @@ -2817,14 +2829,14 @@ void YUY2ToARGBRow_MSA(const uint8* src_yuy2, src2 = (v16u8)__msa_pckod_b((v16i8)src0, (v16i8)src0); YUVTORGB(src1, src2, vec_ubvr, vec_ugvg, vec_bb, vec_bg, vec_br, vec_yg, vec0, vec1, vec2); - STOREARGB(vec0, vec1, vec2, alpha, rgb_buf); + STOREARGB(vec0, vec1, vec2, alpha, dst_argb); src_yuy2 += 16; - rgb_buf += 32; + dst_argb += 32; } } -void UYVYToARGBRow_MSA(const uint8* src_uyvy, - uint8* rgb_buf, +void UYVYToARGBRow_MSA(const uint8_t* src_uyvy, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { int x; @@ -2845,22 +2857,22 @@ void UYVYToARGBRow_MSA(const uint8* src_uyvy, src2 = (v16u8)__msa_pckev_b((v16i8)src0, (v16i8)src0); YUVTORGB(src1, src2, vec_ubvr, vec_ugvg, vec_bb, vec_bg, vec_br, vec_yg, vec0, vec1, vec2); - STOREARGB(vec0, vec1, vec2, alpha, rgb_buf); + STOREARGB(vec0, vec1, vec2, alpha, dst_argb); src_uyvy += 16; - rgb_buf += 32; + dst_argb += 32; } } -void InterpolateRow_MSA(uint8* dst_ptr, - const uint8* src_ptr, +void InterpolateRow_MSA(uint8_t* dst_ptr, + const uint8_t* src_ptr, ptrdiff_t src_stride, int width, - int32 source_y_fraction) { - int32 y1_fraction = source_y_fraction; - int32 y0_fraction = 256 - y1_fraction; - uint16 y_fractions; - const uint8* s = src_ptr; - const uint8* t = src_ptr + src_stride; + int32_t source_y_fraction) { + int32_t y1_fraction = source_y_fraction; + int32_t y0_fraction = 256 - y1_fraction; + uint16_t y_fractions; + const uint8_t* s = src_ptr; + const uint8_t* t = src_ptr + src_stride; int x; v16u8 src0, src1, src2, src3, dst0, dst1; v8u16 vec0, vec1, vec2, vec3, y_frac; @@ -2886,7 +2898,7 @@ void InterpolateRow_MSA(uint8* dst_ptr, return; } - y_fractions = (uint16)(y0_fraction + (y1_fraction << 8)); + y_fractions = (uint16_t)(y0_fraction + (y1_fraction << 8)); y_frac = (v8u16)__msa_fill_h(y_fractions); for (x = 0; x < width; x += 32) { @@ -2915,7 +2927,7 @@ void InterpolateRow_MSA(uint8* dst_ptr, } } -void ARGBSetRow_MSA(uint8* dst_argb, uint32 v32, int width) { +void ARGBSetRow_MSA(uint8_t* dst_argb, uint32_t v32, int width) { int x; v4i32 dst0 = __builtin_msa_fill_w(v32); @@ -2925,7 +2937,7 @@ void ARGBSetRow_MSA(uint8* dst_argb, uint32 v32, int width) { } } -void RAWToRGB24Row_MSA(const uint8* src_raw, uint8* dst_rgb24, int width) { +void RAWToRGB24Row_MSA(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) { int x; v16u8 src0, src1, src2, src3, src4, dst0, dst1, dst2; v16i8 shuffler0 = {2, 1, 0, 5, 4, 3, 8, 7, 6, 11, 10, 9, 14, 13, 12, 17}; @@ -2950,9 +2962,9 @@ void RAWToRGB24Row_MSA(const uint8* src_raw, uint8* dst_rgb24, int width) { } } -void MergeUVRow_MSA(const uint8* src_u, - const uint8* src_v, - uint8* dst_uv, +void MergeUVRow_MSA(const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_uv, int width) { int x; v16u8 src0, src1, dst0, dst1; @@ -2969,7 +2981,9 @@ void MergeUVRow_MSA(const uint8* src_u, } } -void ARGBExtractAlphaRow_MSA(const uint8* src_argb, uint8* dst_a, int width) { +void ARGBExtractAlphaRow_MSA(const uint8_t* src_argb, + uint8_t* dst_a, + int width) { int i; v16u8 src0, src1, src2, src3, vec0, vec1, dst0; @@ -2987,9 +3001,9 @@ void ARGBExtractAlphaRow_MSA(const uint8* src_argb, uint8* dst_a, int width) { } } -void ARGBBlendRow_MSA(const uint8* src_argb0, - const uint8* src_argb1, - uint8* dst_argb, +void ARGBBlendRow_MSA(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, int width) { int x; v16u8 src0, src1, src2, src3, dst0, dst1; @@ -3052,7 +3066,7 @@ void ARGBBlendRow_MSA(const uint8* src_argb0, } } -void ARGBQuantizeRow_MSA(uint8* dst_argb, +void ARGBQuantizeRow_MSA(uint8_t* dst_argb, int scale, int interval_size, int interval_offset, @@ -3158,11 +3172,11 @@ void ARGBQuantizeRow_MSA(uint8* dst_argb, } } -void ARGBColorMatrixRow_MSA(const uint8* src_argb, - uint8* dst_argb, - const int8* matrix_argb, +void ARGBColorMatrixRow_MSA(const uint8_t* src_argb, + uint8_t* dst_argb, + const int8_t* matrix_argb, int width) { - int32 x; + int32_t x; v16i8 src0; v16u8 src1, src2, dst0, dst1; v8i16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, vec9; @@ -3267,9 +3281,9 @@ void ARGBColorMatrixRow_MSA(const uint8* src_argb, } } -void SplitUVRow_MSA(const uint8* src_uv, - uint8* dst_u, - uint8* dst_v, +void SplitUVRow_MSA(const uint8_t* src_uv, + uint8_t* dst_u, + uint8_t* dst_v, int width) { int x; v16u8 src0, src1, src2, src3, dst0, dst1, dst2, dst3; @@ -3291,7 +3305,7 @@ void SplitUVRow_MSA(const uint8* src_uv, } } -void SetRow_MSA(uint8* dst, uint8 v8, int width) { +void SetRow_MSA(uint8_t* dst, uint8_t v8, int width) { int x; v16u8 dst0 = (v16u8)__msa_fill_b(v8); @@ -3301,9 +3315,9 @@ void SetRow_MSA(uint8* dst, uint8 v8, int width) { } } -void MirrorUVRow_MSA(const uint8* src_uv, - uint8* dst_u, - uint8* dst_v, +void MirrorUVRow_MSA(const uint8_t* src_uv, + uint8_t* dst_u, + uint8_t* dst_v, int width) { int x; v16u8 src0, src1, src2, src3; @@ -3330,11 +3344,11 @@ void MirrorUVRow_MSA(const uint8* src_uv, } } -void SobelXRow_MSA(const uint8* src_y0, - const uint8* src_y1, - const uint8* src_y2, - uint8* dst_sobelx, - int32 width) { +void SobelXRow_MSA(const uint8_t* src_y0, + const uint8_t* src_y1, + const uint8_t* src_y2, + uint8_t* dst_sobelx, + int32_t width) { int x; v16u8 src0, src1, src2, src3, src4, src5, dst0; v8i16 vec0, vec1, vec2, vec3, vec4, vec5; @@ -3384,10 +3398,10 @@ void SobelXRow_MSA(const uint8* src_y0, } } -void SobelYRow_MSA(const uint8* src_y0, - const uint8* src_y1, - uint8* dst_sobely, - int32 width) { +void SobelYRow_MSA(const uint8_t* src_y0, + const uint8_t* src_y1, + uint8_t* dst_sobely, + int32_t width) { int x; v16u8 src0, src1, dst0; v8i16 vec0, vec1, vec2, vec3, vec4, vec5, vec6; @@ -3429,7 +3443,10 @@ void SobelYRow_MSA(const uint8* src_y0, } } -void HalfFloatRow_MSA(const uint16* src, uint16* dst, float scale, int width) { +void HalfFloatRow_MSA(const uint16_t* src, + uint16_t* dst, + float scale, + int width) { int i; v8u16 src0, src1, src2, src3, dst0, dst1, dst2, dst3; v4u32 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7; diff --git a/chromium/third_party/libyuv/source/row_neon.cc b/chromium/third_party/libyuv/source/row_neon.cc index 1af828622cd..93a3497d275 100644 --- a/chromium/third_party/libyuv/source/row_neon.cc +++ b/chromium/third_party/libyuv/source/row_neon.cc @@ -106,10 +106,10 @@ extern "C" { "vqshrun.s16 d22, q9, #6 \n" /* R */ \ "vqshrun.s16 d21, q0, #6 \n" /* G */ -void I444ToARGBRow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I444ToARGBRow_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { asm volatile( @@ -132,10 +132,10 @@ void I444ToARGBRow_NEON(const uint8* src_y, "q12", "q13", "q14", "q15"); } -void I422ToARGBRow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I422ToARGBRow_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { asm volatile( @@ -158,11 +158,11 @@ void I422ToARGBRow_NEON(const uint8* src_y, "q12", "q13", "q14", "q15"); } -void I422AlphaToARGBRow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - const uint8* src_a, - uint8* dst_argb, +void I422AlphaToARGBRow_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + const uint8_t* src_a, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { asm volatile( @@ -186,10 +186,10 @@ void I422AlphaToARGBRow_NEON(const uint8* src_y, "q12", "q13", "q14", "q15"); } -void I422ToRGBARow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgba, +void I422ToRGBARow_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_rgba, const struct YuvConstants* yuvconstants, int width) { asm volatile( @@ -213,10 +213,10 @@ void I422ToRGBARow_NEON(const uint8* src_y, "q12", "q13", "q14", "q15"); } -void I422ToRGB24Row_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgb24, +void I422ToRGB24Row_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_rgb24, const struct YuvConstants* yuvconstants, int width) { asm volatile( @@ -245,10 +245,10 @@ void I422ToRGB24Row_NEON(const uint8* src_y, "vsri.16 q0, q8, #5 \n" /* RG */ \ "vsri.16 q0, q9, #11 \n" /* RGB */ -void I422ToRGB565Row_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgb565, +void I422ToRGB565Row_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_rgb565, const struct YuvConstants* yuvconstants, int width) { asm volatile( @@ -279,10 +279,10 @@ void I422ToRGB565Row_NEON(const uint8* src_y, "vsri.16 q0, q9, #6 \n" /* ARG */ \ "vsri.16 q0, q10, #11 \n" /* ARGB */ -void I422ToARGB1555Row_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb1555, +void I422ToARGB1555Row_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb1555, const struct YuvConstants* yuvconstants, int width) { asm volatile( @@ -315,10 +315,10 @@ void I422ToARGB1555Row_NEON(const uint8* src_y, "vorr d1, d22, d23 \n" /* RA */ \ "vzip.u8 d0, d1 \n" /* BGRA */ -void I422ToARGB4444Row_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb4444, +void I422ToARGB4444Row_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb4444, const struct YuvConstants* yuvconstants, int width) { asm volatile( @@ -344,7 +344,7 @@ void I422ToARGB4444Row_NEON(const uint8* src_y, "q12", "q13", "q14", "q15"); } -void I400ToARGBRow_NEON(const uint8* src_y, uint8* dst_argb, int width) { +void I400ToARGBRow_NEON(const uint8_t* src_y, uint8_t* dst_argb, int width) { asm volatile( YUVTORGB_SETUP "vmov.u8 d23, #255 \n" @@ -363,7 +363,7 @@ void I400ToARGBRow_NEON(const uint8* src_y, uint8* dst_argb, int width) { "q12", "q13", "q14", "q15"); } -void J400ToARGBRow_NEON(const uint8* src_y, uint8* dst_argb, int width) { +void J400ToARGBRow_NEON(const uint8_t* src_y, uint8_t* dst_argb, int width) { asm volatile( "vmov.u8 d23, #255 \n" "1: \n" @@ -380,9 +380,9 @@ void J400ToARGBRow_NEON(const uint8* src_y, uint8* dst_argb, int width) { : "cc", "memory", "d20", "d21", "d22", "d23"); } -void NV12ToARGBRow_NEON(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, +void NV12ToARGBRow_NEON(const uint8_t* src_y, + const uint8_t* src_uv, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { asm volatile(YUVTORGB_SETUP @@ -403,9 +403,9 @@ void NV12ToARGBRow_NEON(const uint8* src_y, "q10", "q11", "q12", "q13", "q14", "q15"); } -void NV21ToARGBRow_NEON(const uint8* src_y, - const uint8* src_vu, - uint8* dst_argb, +void NV21ToARGBRow_NEON(const uint8_t* src_y, + const uint8_t* src_vu, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { asm volatile(YUVTORGB_SETUP @@ -426,9 +426,9 @@ void NV21ToARGBRow_NEON(const uint8* src_y, "q10", "q11", "q12", "q13", "q14", "q15"); } -void NV12ToRGB565Row_NEON(const uint8* src_y, - const uint8* src_uv, - uint8* dst_rgb565, +void NV12ToRGB565Row_NEON(const uint8_t* src_y, + const uint8_t* src_uv, + uint8_t* dst_rgb565, const struct YuvConstants* yuvconstants, int width) { asm volatile( @@ -449,8 +449,8 @@ void NV12ToRGB565Row_NEON(const uint8* src_y, "q12", "q13", "q14", "q15"); } -void YUY2ToARGBRow_NEON(const uint8* src_yuy2, - uint8* dst_argb, +void YUY2ToARGBRow_NEON(const uint8_t* src_yuy2, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { asm volatile(YUVTORGB_SETUP @@ -470,8 +470,8 @@ void YUY2ToARGBRow_NEON(const uint8* src_yuy2, "q10", "q11", "q12", "q13", "q14", "q15"); } -void UYVYToARGBRow_NEON(const uint8* src_uyvy, - uint8* dst_argb, +void UYVYToARGBRow_NEON(const uint8_t* src_uyvy, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { asm volatile(YUVTORGB_SETUP @@ -492,9 +492,9 @@ void UYVYToARGBRow_NEON(const uint8* src_uyvy, } // Reads 16 pairs of UV and write even values to dst_u and odd to dst_v. -void SplitUVRow_NEON(const uint8* src_uv, - uint8* dst_u, - uint8* dst_v, +void SplitUVRow_NEON(const uint8_t* src_uv, + uint8_t* dst_u, + uint8_t* dst_v, int width) { asm volatile( "1: \n" @@ -513,9 +513,9 @@ void SplitUVRow_NEON(const uint8* src_uv, } // Reads 16 U's and V's and writes out 16 pairs of UV. -void MergeUVRow_NEON(const uint8* src_u, - const uint8* src_v, - uint8* dst_uv, +void MergeUVRow_NEON(const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_uv, int width) { asm volatile( "1: \n" @@ -534,10 +534,10 @@ void MergeUVRow_NEON(const uint8* src_u, } // Reads 16 packed RGB and write to planar dst_r, dst_g, dst_b. -void SplitRGBRow_NEON(const uint8* src_rgb, - uint8* dst_r, - uint8* dst_g, - uint8* dst_b, +void SplitRGBRow_NEON(const uint8_t* src_rgb, + uint8_t* dst_r, + uint8_t* dst_g, + uint8_t* dst_b, int width) { asm volatile( "1: \n" @@ -559,10 +559,10 @@ void SplitRGBRow_NEON(const uint8* src_rgb, } // Reads 16 planar R's, G's and B's and writes out 16 packed RGB at a time -void MergeRGBRow_NEON(const uint8* src_r, - const uint8* src_g, - const uint8* src_b, - uint8* dst_rgb, +void MergeRGBRow_NEON(const uint8_t* src_r, + const uint8_t* src_g, + const uint8_t* src_b, + uint8_t* dst_rgb, int width) { asm volatile( "1: \n" @@ -584,7 +584,7 @@ void MergeRGBRow_NEON(const uint8* src_r, } // Copy multiple of 32. vld4.8 allow unaligned and is fastest on a15. -void CopyRow_NEON(const uint8* src, uint8* dst, int count) { +void CopyRow_NEON(const uint8_t* src, uint8_t* dst, int width) { asm volatile( "1: \n" "vld1.8 {d0, d1, d2, d3}, [%0]! \n" // load 32 @@ -593,14 +593,14 @@ void CopyRow_NEON(const uint8* src, uint8* dst, int count) { "bgt 1b \n" : "+r"(src), // %0 "+r"(dst), // %1 - "+r"(count) // %2 // Output registers + "+r"(width) // %2 // Output registers : // Input registers : "cc", "memory", "q0", "q1" // Clobber List ); } -// SetRow writes 'count' bytes using an 8 bit value repeated. -void SetRow_NEON(uint8* dst, uint8 v8, int count) { +// SetRow writes 'width' bytes using an 8 bit value repeated. +void SetRow_NEON(uint8_t* dst, uint8_t v8, int width) { asm volatile( "vdup.8 q0, %2 \n" // duplicate 16 bytes "1: \n" @@ -608,13 +608,13 @@ void SetRow_NEON(uint8* dst, uint8 v8, int count) { "vst1.8 {q0}, [%0]! \n" // store "bgt 1b \n" : "+r"(dst), // %0 - "+r"(count) // %1 + "+r"(width) // %1 : "r"(v8) // %2 : "cc", "memory", "q0"); } -// ARGBSetRow writes 'count' pixels using an 32 bit value repeated. -void ARGBSetRow_NEON(uint8* dst, uint32 v32, int count) { +// ARGBSetRow writes 'width' pixels using an 32 bit value repeated. +void ARGBSetRow_NEON(uint8_t* dst, uint32_t v32, int width) { asm volatile( "vdup.u32 q0, %2 \n" // duplicate 4 ints "1: \n" @@ -622,12 +622,12 @@ void ARGBSetRow_NEON(uint8* dst, uint32 v32, int count) { "vst1.8 {q0}, [%0]! \n" // store "bgt 1b \n" : "+r"(dst), // %0 - "+r"(count) // %1 + "+r"(width) // %1 : "r"(v32) // %2 : "cc", "memory", "q0"); } -void MirrorRow_NEON(const uint8* src, uint8* dst, int width) { +void MirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width) { asm volatile( // Start at end of source row. "mov r3, #-16 \n" @@ -648,9 +648,9 @@ void MirrorRow_NEON(const uint8* src, uint8* dst, int width) { : "cc", "memory", "r3", "q0"); } -void MirrorUVRow_NEON(const uint8* src_uv, - uint8* dst_u, - uint8* dst_v, +void MirrorUVRow_NEON(const uint8_t* src_uv, + uint8_t* dst_u, + uint8_t* dst_v, int width) { asm volatile( // Start at end of source row. @@ -673,7 +673,7 @@ void MirrorUVRow_NEON(const uint8* src_uv, : "cc", "memory", "r12", "q0"); } -void ARGBMirrorRow_NEON(const uint8* src, uint8* dst, int width) { +void ARGBMirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width) { asm volatile( // Start at end of source row. "mov r3, #-16 \n" @@ -694,7 +694,9 @@ void ARGBMirrorRow_NEON(const uint8* src, uint8* dst, int width) { : "cc", "memory", "r3", "q0"); } -void RGB24ToARGBRow_NEON(const uint8* src_rgb24, uint8* dst_argb, int width) { +void RGB24ToARGBRow_NEON(const uint8_t* src_rgb24, + uint8_t* dst_argb, + int width) { asm volatile( "vmov.u8 d4, #255 \n" // Alpha "1: \n" @@ -710,7 +712,7 @@ void RGB24ToARGBRow_NEON(const uint8* src_rgb24, uint8* dst_argb, int width) { ); } -void RAWToARGBRow_NEON(const uint8* src_raw, uint8* dst_argb, int width) { +void RAWToARGBRow_NEON(const uint8_t* src_raw, uint8_t* dst_argb, int width) { asm volatile( "vmov.u8 d4, #255 \n" // Alpha "1: \n" @@ -727,7 +729,7 @@ void RAWToARGBRow_NEON(const uint8* src_raw, uint8* dst_argb, int width) { ); } -void RAWToRGB24Row_NEON(const uint8* src_raw, uint8* dst_rgb24, int width) { +void RAWToRGB24Row_NEON(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) { asm volatile( "1: \n" "vld3.8 {d1, d2, d3}, [%0]! \n" // load 8 pixels of RAW. @@ -756,7 +758,9 @@ void RAWToRGB24Row_NEON(const uint8* src_raw, uint8* dst_rgb24, int width) { "vorr.u8 d2, d1, d5 \n" /* R */ \ "vorr.u8 d1, d4, d6 \n" /* G */ -void RGB565ToARGBRow_NEON(const uint8* src_rgb565, uint8* dst_argb, int width) { +void RGB565ToARGBRow_NEON(const uint8_t* src_rgb565, + uint8_t* dst_argb, + int width) { asm volatile( "vmov.u8 d3, #255 \n" // Alpha "1: \n" @@ -800,8 +804,8 @@ void RGB565ToARGBRow_NEON(const uint8* src_rgb565, uint8* dst_argb, int width) { "vorr.u8 d2, d1, d5 \n" /* R */ \ "vorr.u8 d1, d4, d6 \n" /* G */ -void ARGB1555ToARGBRow_NEON(const uint8* src_argb1555, - uint8* dst_argb, +void ARGB1555ToARGBRow_NEON(const uint8_t* src_argb1555, + uint8_t* dst_argb, int width) { asm volatile( "vmov.u8 d3, #255 \n" // Alpha @@ -829,8 +833,8 @@ void ARGB1555ToARGBRow_NEON(const uint8* src_argb1555, "vorr.u8 q1, q1, q2 \n" /* G,A GGGGGGGG */ \ "vswp.u8 d1, d2 \n" /* B,R,G,A -> B,G,R,A */ -void ARGB4444ToARGBRow_NEON(const uint8* src_argb4444, - uint8* dst_argb, +void ARGB4444ToARGBRow_NEON(const uint8_t* src_argb4444, + uint8_t* dst_argb, int width) { asm volatile( "vmov.u8 d3, #255 \n" // Alpha @@ -848,7 +852,9 @@ void ARGB4444ToARGBRow_NEON(const uint8* src_argb4444, ); } -void ARGBToRGB24Row_NEON(const uint8* src_argb, uint8* dst_rgb24, int width) { +void ARGBToRGB24Row_NEON(const uint8_t* src_argb, + uint8_t* dst_rgb24, + int width) { asm volatile( "1: \n" "vld4.8 {d1, d2, d3, d4}, [%0]! \n" // load 8 pixels of ARGB. @@ -864,7 +870,7 @@ void ARGBToRGB24Row_NEON(const uint8* src_argb, uint8* dst_rgb24, int width) { ); } -void ARGBToRAWRow_NEON(const uint8* src_argb, uint8* dst_raw, int width) { +void ARGBToRAWRow_NEON(const uint8_t* src_argb, uint8_t* dst_raw, int width) { asm volatile( "1: \n" "vld4.8 {d1, d2, d3, d4}, [%0]! \n" // load 8 pixels of ARGB. @@ -880,7 +886,7 @@ void ARGBToRAWRow_NEON(const uint8* src_argb, uint8* dst_raw, int width) { ); } -void YUY2ToYRow_NEON(const uint8* src_yuy2, uint8* dst_y, int width) { +void YUY2ToYRow_NEON(const uint8_t* src_yuy2, uint8_t* dst_y, int width) { asm volatile( "1: \n" "vld2.8 {q0, q1}, [%0]! \n" // load 16 pixels of YUY2. @@ -895,7 +901,7 @@ void YUY2ToYRow_NEON(const uint8* src_yuy2, uint8* dst_y, int width) { ); } -void UYVYToYRow_NEON(const uint8* src_uyvy, uint8* dst_y, int width) { +void UYVYToYRow_NEON(const uint8_t* src_uyvy, uint8_t* dst_y, int width) { asm volatile( "1: \n" "vld2.8 {q0, q1}, [%0]! \n" // load 16 pixels of UYVY. @@ -910,9 +916,9 @@ void UYVYToYRow_NEON(const uint8* src_uyvy, uint8* dst_y, int width) { ); } -void YUY2ToUV422Row_NEON(const uint8* src_yuy2, - uint8* dst_u, - uint8* dst_v, +void YUY2ToUV422Row_NEON(const uint8_t* src_yuy2, + uint8_t* dst_u, + uint8_t* dst_v, int width) { asm volatile( "1: \n" @@ -930,9 +936,9 @@ void YUY2ToUV422Row_NEON(const uint8* src_yuy2, ); } -void UYVYToUV422Row_NEON(const uint8* src_uyvy, - uint8* dst_u, - uint8* dst_v, +void UYVYToUV422Row_NEON(const uint8_t* src_uyvy, + uint8_t* dst_u, + uint8_t* dst_v, int width) { asm volatile( "1: \n" @@ -950,10 +956,10 @@ void UYVYToUV422Row_NEON(const uint8* src_uyvy, ); } -void YUY2ToUVRow_NEON(const uint8* src_yuy2, +void YUY2ToUVRow_NEON(const uint8_t* src_yuy2, int stride_yuy2, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width) { asm volatile( "add %1, %0, %1 \n" // stride + src_yuy2 @@ -977,10 +983,10 @@ void YUY2ToUVRow_NEON(const uint8* src_yuy2, ); } -void UYVYToUVRow_NEON(const uint8* src_uyvy, +void UYVYToUVRow_NEON(const uint8_t* src_uyvy, int stride_uyvy, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width) { asm volatile( "add %1, %0, %1 \n" // stride + src_uyvy @@ -1005,9 +1011,9 @@ void UYVYToUVRow_NEON(const uint8* src_uyvy, } // For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA. -void ARGBShuffleRow_NEON(const uint8* src_argb, - uint8* dst_argb, - const uint8* shuffler, +void ARGBShuffleRow_NEON(const uint8_t* src_argb, + uint8_t* dst_argb, + const uint8_t* shuffler, int width) { asm volatile( "vld1.8 {q2}, [%3] \n" // shuffler @@ -1026,10 +1032,10 @@ void ARGBShuffleRow_NEON(const uint8* src_argb, ); } -void I422ToYUY2Row_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_yuy2, +void I422ToYUY2Row_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_yuy2, int width) { asm volatile( "1: \n" @@ -1048,10 +1054,10 @@ void I422ToYUY2Row_NEON(const uint8* src_y, : "cc", "memory", "d0", "d1", "d2", "d3"); } -void I422ToUYVYRow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_uyvy, +void I422ToUYVYRow_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_uyvy, int width) { asm volatile( "1: \n" @@ -1070,7 +1076,9 @@ void I422ToUYVYRow_NEON(const uint8* src_y, : "cc", "memory", "d0", "d1", "d2", "d3"); } -void ARGBToRGB565Row_NEON(const uint8* src_argb, uint8* dst_rgb565, int width) { +void ARGBToRGB565Row_NEON(const uint8_t* src_argb, + uint8_t* dst_rgb565, + int width) { asm volatile( "1: \n" "vld4.8 {d20, d21, d22, d23}, [%0]! \n" // load 8 pixels of ARGB. @@ -1085,9 +1093,9 @@ void ARGBToRGB565Row_NEON(const uint8* src_argb, uint8* dst_rgb565, int width) { : "cc", "memory", "q0", "q8", "q9", "q10", "q11"); } -void ARGBToRGB565DitherRow_NEON(const uint8* src_argb, - uint8* dst_rgb, - const uint32 dither4, +void ARGBToRGB565DitherRow_NEON(const uint8_t* src_argb, + uint8_t* dst_rgb, + const uint32_t dither4, int width) { asm volatile( "vdup.32 d2, %2 \n" // dither4 @@ -1107,8 +1115,8 @@ void ARGBToRGB565DitherRow_NEON(const uint8* src_argb, : "cc", "memory", "q0", "q1", "q8", "q9", "q10", "q11"); } -void ARGBToARGB1555Row_NEON(const uint8* src_argb, - uint8* dst_argb1555, +void ARGBToARGB1555Row_NEON(const uint8_t* src_argb, + uint8_t* dst_argb1555, int width) { asm volatile( "1: \n" @@ -1124,8 +1132,8 @@ void ARGBToARGB1555Row_NEON(const uint8* src_argb, : "cc", "memory", "q0", "q8", "q9", "q10", "q11"); } -void ARGBToARGB4444Row_NEON(const uint8* src_argb, - uint8* dst_argb4444, +void ARGBToARGB4444Row_NEON(const uint8_t* src_argb, + uint8_t* dst_argb4444, int width) { asm volatile( "vmov.u8 d4, #0x0f \n" // bits to clear with @@ -1143,7 +1151,7 @@ void ARGBToARGB4444Row_NEON(const uint8* src_argb, : "cc", "memory", "q0", "q8", "q9", "q10", "q11"); } -void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int width) { +void ARGBToYRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width) { asm volatile( "vmov.u8 d24, #13 \n" // B * 0.1016 coefficient "vmov.u8 d25, #65 \n" // G * 0.5078 coefficient @@ -1166,7 +1174,9 @@ void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int width) { : "cc", "memory", "q0", "q1", "q2", "q12", "q13"); } -void ARGBExtractAlphaRow_NEON(const uint8* src_argb, uint8* dst_a, int width) { +void ARGBExtractAlphaRow_NEON(const uint8_t* src_argb, + uint8_t* dst_a, + int width) { asm volatile( "1: \n" "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels @@ -1182,7 +1192,7 @@ void ARGBExtractAlphaRow_NEON(const uint8* src_argb, uint8* dst_a, int width) { ); } -void ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int width) { +void ARGBToYJRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width) { asm volatile( "vmov.u8 d24, #15 \n" // B * 0.11400 coefficient "vmov.u8 d25, #75 \n" // G * 0.58700 coefficient @@ -1204,9 +1214,9 @@ void ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int width) { } // 8x1 pixels. -void ARGBToUV444Row_NEON(const uint8* src_argb, - uint8* dst_u, - uint8* dst_v, +void ARGBToUV444Row_NEON(const uint8_t* src_argb, + uint8_t* dst_u, + uint8_t* dst_v, int width) { asm volatile( "vmov.u8 d24, #112 \n" // UB / VR 0.875 @@ -1260,10 +1270,10 @@ void ARGBToUV444Row_NEON(const uint8* src_argb, // clang-format on // TODO(fbarchard): Consider vhadd vertical, then vpaddl horizontal, avoid shr. -void ARGBToUVRow_NEON(const uint8* src_argb, +void ARGBToUVRow_NEON(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width) { asm volatile ( "add %1, %0, %1 \n" // src_stride + src_argb @@ -1306,10 +1316,10 @@ void ARGBToUVRow_NEON(const uint8* src_argb, } // TODO(fbarchard): Subsample match C code. -void ARGBToUVJRow_NEON(const uint8* src_argb, +void ARGBToUVJRow_NEON(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width) { asm volatile ( "add %1, %0, %1 \n" // src_stride + src_argb @@ -1351,10 +1361,10 @@ void ARGBToUVJRow_NEON(const uint8* src_argb, ); } -void BGRAToUVRow_NEON(const uint8* src_bgra, +void BGRAToUVRow_NEON(const uint8_t* src_bgra, int src_stride_bgra, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width) { asm volatile ( "add %1, %0, %1 \n" // src_stride + src_bgra @@ -1396,10 +1406,10 @@ void BGRAToUVRow_NEON(const uint8* src_bgra, ); } -void ABGRToUVRow_NEON(const uint8* src_abgr, +void ABGRToUVRow_NEON(const uint8_t* src_abgr, int src_stride_abgr, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width) { asm volatile ( "add %1, %0, %1 \n" // src_stride + src_abgr @@ -1441,10 +1451,10 @@ void ABGRToUVRow_NEON(const uint8* src_abgr, ); } -void RGBAToUVRow_NEON(const uint8* src_rgba, +void RGBAToUVRow_NEON(const uint8_t* src_rgba, int src_stride_rgba, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width) { asm volatile ( "add %1, %0, %1 \n" // src_stride + src_rgba @@ -1486,10 +1496,10 @@ void RGBAToUVRow_NEON(const uint8* src_rgba, ); } -void RGB24ToUVRow_NEON(const uint8* src_rgb24, +void RGB24ToUVRow_NEON(const uint8_t* src_rgb24, int src_stride_rgb24, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width) { asm volatile ( "add %1, %0, %1 \n" // src_stride + src_rgb24 @@ -1531,10 +1541,10 @@ void RGB24ToUVRow_NEON(const uint8* src_rgb24, ); } -void RAWToUVRow_NEON(const uint8* src_raw, +void RAWToUVRow_NEON(const uint8_t* src_raw, int src_stride_raw, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width) { asm volatile ( "add %1, %0, %1 \n" // src_stride + src_raw @@ -1577,10 +1587,10 @@ void RAWToUVRow_NEON(const uint8* src_raw, } // 16x2 pixels -> 8x1. width is number of argb pixels. e.g. 16. -void RGB565ToUVRow_NEON(const uint8* src_rgb565, +void RGB565ToUVRow_NEON(const uint8_t* src_rgb565, int src_stride_rgb565, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width) { asm volatile( "add %1, %0, %1 \n" // src_stride + src_argb @@ -1643,10 +1653,10 @@ void RGB565ToUVRow_NEON(const uint8* src_rgb565, } // 16x2 pixels -> 8x1. width is number of argb pixels. e.g. 16. -void ARGB1555ToUVRow_NEON(const uint8* src_argb1555, +void ARGB1555ToUVRow_NEON(const uint8_t* src_argb1555, int src_stride_argb1555, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width) { asm volatile( "add %1, %0, %1 \n" // src_stride + src_argb @@ -1709,10 +1719,10 @@ void ARGB1555ToUVRow_NEON(const uint8* src_argb1555, } // 16x2 pixels -> 8x1. width is number of argb pixels. e.g. 16. -void ARGB4444ToUVRow_NEON(const uint8* src_argb4444, +void ARGB4444ToUVRow_NEON(const uint8_t* src_argb4444, int src_stride_argb4444, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width) { asm volatile( "add %1, %0, %1 \n" // src_stride + src_argb @@ -1774,7 +1784,7 @@ void ARGB4444ToUVRow_NEON(const uint8* src_argb4444, "q9", "q10", "q11", "q12", "q13", "q14", "q15"); } -void RGB565ToYRow_NEON(const uint8* src_rgb565, uint8* dst_y, int width) { +void RGB565ToYRow_NEON(const uint8_t* src_rgb565, uint8_t* dst_y, int width) { asm volatile( "vmov.u8 d24, #13 \n" // B * 0.1016 coefficient "vmov.u8 d25, #65 \n" // G * 0.5078 coefficient @@ -1798,7 +1808,9 @@ void RGB565ToYRow_NEON(const uint8* src_rgb565, uint8* dst_y, int width) { : "cc", "memory", "q0", "q1", "q2", "q3", "q12", "q13"); } -void ARGB1555ToYRow_NEON(const uint8* src_argb1555, uint8* dst_y, int width) { +void ARGB1555ToYRow_NEON(const uint8_t* src_argb1555, + uint8_t* dst_y, + int width) { asm volatile( "vmov.u8 d24, #13 \n" // B * 0.1016 coefficient "vmov.u8 d25, #65 \n" // G * 0.5078 coefficient @@ -1822,7 +1834,9 @@ void ARGB1555ToYRow_NEON(const uint8* src_argb1555, uint8* dst_y, int width) { : "cc", "memory", "q0", "q1", "q2", "q3", "q12", "q13"); } -void ARGB4444ToYRow_NEON(const uint8* src_argb4444, uint8* dst_y, int width) { +void ARGB4444ToYRow_NEON(const uint8_t* src_argb4444, + uint8_t* dst_y, + int width) { asm volatile( "vmov.u8 d24, #13 \n" // B * 0.1016 coefficient "vmov.u8 d25, #65 \n" // G * 0.5078 coefficient @@ -1846,7 +1860,7 @@ void ARGB4444ToYRow_NEON(const uint8* src_argb4444, uint8* dst_y, int width) { : "cc", "memory", "q0", "q1", "q2", "q3", "q12", "q13"); } -void BGRAToYRow_NEON(const uint8* src_bgra, uint8* dst_y, int width) { +void BGRAToYRow_NEON(const uint8_t* src_bgra, uint8_t* dst_y, int width) { asm volatile( "vmov.u8 d4, #33 \n" // R * 0.2578 coefficient "vmov.u8 d5, #65 \n" // G * 0.5078 coefficient @@ -1869,7 +1883,7 @@ void BGRAToYRow_NEON(const uint8* src_bgra, uint8* dst_y, int width) { : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8"); } -void ABGRToYRow_NEON(const uint8* src_abgr, uint8* dst_y, int width) { +void ABGRToYRow_NEON(const uint8_t* src_abgr, uint8_t* dst_y, int width) { asm volatile( "vmov.u8 d4, #33 \n" // R * 0.2578 coefficient "vmov.u8 d5, #65 \n" // G * 0.5078 coefficient @@ -1892,7 +1906,7 @@ void ABGRToYRow_NEON(const uint8* src_abgr, uint8* dst_y, int width) { : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8"); } -void RGBAToYRow_NEON(const uint8* src_rgba, uint8* dst_y, int width) { +void RGBAToYRow_NEON(const uint8_t* src_rgba, uint8_t* dst_y, int width) { asm volatile( "vmov.u8 d4, #13 \n" // B * 0.1016 coefficient "vmov.u8 d5, #65 \n" // G * 0.5078 coefficient @@ -1915,7 +1929,7 @@ void RGBAToYRow_NEON(const uint8* src_rgba, uint8* dst_y, int width) { : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8"); } -void RGB24ToYRow_NEON(const uint8* src_rgb24, uint8* dst_y, int width) { +void RGB24ToYRow_NEON(const uint8_t* src_rgb24, uint8_t* dst_y, int width) { asm volatile( "vmov.u8 d4, #13 \n" // B * 0.1016 coefficient "vmov.u8 d5, #65 \n" // G * 0.5078 coefficient @@ -1938,7 +1952,7 @@ void RGB24ToYRow_NEON(const uint8* src_rgb24, uint8* dst_y, int width) { : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8"); } -void RAWToYRow_NEON(const uint8* src_raw, uint8* dst_y, int width) { +void RAWToYRow_NEON(const uint8_t* src_raw, uint8_t* dst_y, int width) { asm volatile( "vmov.u8 d4, #33 \n" // R * 0.2578 coefficient "vmov.u8 d5, #65 \n" // G * 0.5078 coefficient @@ -1962,8 +1976,8 @@ void RAWToYRow_NEON(const uint8* src_raw, uint8* dst_y, int width) { } // Bilinear filter 16x2 -> 16x1 -void InterpolateRow_NEON(uint8* dst_ptr, - const uint8* src_ptr, +void InterpolateRow_NEON(uint8_t* dst_ptr, + const uint8_t* src_ptr, ptrdiff_t src_stride, int dst_width, int source_y_fraction) { @@ -2021,9 +2035,9 @@ void InterpolateRow_NEON(uint8* dst_ptr, } // dr * (256 - sa) / 256 + sr = dr - dr * sa / 256 + sr -void ARGBBlendRow_NEON(const uint8* src_argb0, - const uint8* src_argb1, - uint8* dst_argb, +void ARGBBlendRow_NEON(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, int width) { asm volatile( "subs %3, #8 \n" @@ -2081,7 +2095,9 @@ void ARGBBlendRow_NEON(const uint8* src_argb0, } // Attenuate 8 pixels at a time. -void ARGBAttenuateRow_NEON(const uint8* src_argb, uint8* dst_argb, int width) { +void ARGBAttenuateRow_NEON(const uint8_t* src_argb, + uint8_t* dst_argb, + int width) { asm volatile( // Attenuate 8 pixels. "1: \n" @@ -2104,7 +2120,7 @@ void ARGBAttenuateRow_NEON(const uint8* src_argb, uint8* dst_argb, int width) { // Quantize 8 ARGB pixels (32 bytes). // dst = (dst * scale >> 16) * interval_size + interval_offset; -void ARGBQuantizeRow_NEON(uint8* dst_argb, +void ARGBQuantizeRow_NEON(uint8_t* dst_argb, int scale, int interval_size, int interval_offset, @@ -2147,10 +2163,10 @@ void ARGBQuantizeRow_NEON(uint8* dst_argb, // Shade 8 pixels at a time by specified value. // NOTE vqrdmulh.s16 q10, q10, d0[0] must use a scaler register from 0 to 8. // Rounding in vqrdmulh does +1 to high if high bit of low s16 is set. -void ARGBShadeRow_NEON(const uint8* src_argb, - uint8* dst_argb, +void ARGBShadeRow_NEON(const uint8_t* src_argb, + uint8_t* dst_argb, int width, - uint32 value) { + uint32_t value) { asm volatile( "vdup.u32 q0, %3 \n" // duplicate scale value. "vzip.u8 d0, d1 \n" // d0 aarrggbb. @@ -2184,7 +2200,7 @@ void ARGBShadeRow_NEON(const uint8* src_argb, // Convert 8 ARGB pixels (64 bytes) to 8 Gray ARGB pixels // Similar to ARGBToYJ but stores ARGB. // C code is (15 * b + 75 * g + 38 * r + 64) >> 7; -void ARGBGrayRow_NEON(const uint8* src_argb, uint8* dst_argb, int width) { +void ARGBGrayRow_NEON(const uint8_t* src_argb, uint8_t* dst_argb, int width) { asm volatile( "vmov.u8 d24, #15 \n" // B * 0.11400 coefficient "vmov.u8 d25, #75 \n" // G * 0.58700 coefficient @@ -2211,7 +2227,7 @@ void ARGBGrayRow_NEON(const uint8* src_argb, uint8* dst_argb, int width) { // b = (r * 35 + g * 68 + b * 17) >> 7 // g = (r * 45 + g * 88 + b * 22) >> 7 // r = (r * 50 + g * 98 + b * 24) >> 7 -void ARGBSepiaRow_NEON(uint8* dst_argb, int width) { +void ARGBSepiaRow_NEON(uint8_t* dst_argb, int width) { asm volatile( "vmov.u8 d20, #17 \n" // BB coefficient "vmov.u8 d21, #68 \n" // BG coefficient @@ -2249,9 +2265,9 @@ void ARGBSepiaRow_NEON(uint8* dst_argb, int width) { // Tranform 8 ARGB pixels (32 bytes) with color matrix. // TODO(fbarchard): Was same as Sepia except matrix is provided. This function // needs to saturate. Consider doing a non-saturating version. -void ARGBColorMatrixRow_NEON(const uint8* src_argb, - uint8* dst_argb, - const int8* matrix_argb, +void ARGBColorMatrixRow_NEON(const uint8_t* src_argb, + uint8_t* dst_argb, + const int8_t* matrix_argb, int width) { asm volatile( "vld1.8 {q2}, [%3] \n" // load 3 ARGB vectors. @@ -2308,9 +2324,9 @@ void ARGBColorMatrixRow_NEON(const uint8* src_argb, } // Multiply 2 rows of ARGB pixels together, 8 pixels at a time. -void ARGBMultiplyRow_NEON(const uint8* src_argb0, - const uint8* src_argb1, - uint8* dst_argb, +void ARGBMultiplyRow_NEON(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, int width) { asm volatile( // 8 pixel loop. @@ -2337,9 +2353,9 @@ void ARGBMultiplyRow_NEON(const uint8* src_argb0, } // Add 2 rows of ARGB pixels together, 8 pixels at a time. -void ARGBAddRow_NEON(const uint8* src_argb0, - const uint8* src_argb1, - uint8* dst_argb, +void ARGBAddRow_NEON(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, int width) { asm volatile( // 8 pixel loop. @@ -2360,9 +2376,9 @@ void ARGBAddRow_NEON(const uint8* src_argb0, } // Subtract 2 rows of ARGB pixels, 8 pixels at a time. -void ARGBSubtractRow_NEON(const uint8* src_argb0, - const uint8* src_argb1, - uint8* dst_argb, +void ARGBSubtractRow_NEON(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, int width) { asm volatile( // 8 pixel loop. @@ -2387,9 +2403,9 @@ void ARGBSubtractRow_NEON(const uint8* src_argb0, // R = Sobel // G = Sobel // B = Sobel -void SobelRow_NEON(const uint8* src_sobelx, - const uint8* src_sobely, - uint8* dst_argb, +void SobelRow_NEON(const uint8_t* src_sobelx, + const uint8_t* src_sobely, + uint8_t* dst_argb, int width) { asm volatile( "vmov.u8 d3, #255 \n" // alpha @@ -2412,9 +2428,9 @@ void SobelRow_NEON(const uint8* src_sobelx, } // Adds Sobel X and Sobel Y and stores Sobel into plane. -void SobelToPlaneRow_NEON(const uint8* src_sobelx, - const uint8* src_sobely, - uint8* dst_y, +void SobelToPlaneRow_NEON(const uint8_t* src_sobelx, + const uint8_t* src_sobely, + uint8_t* dst_y, int width) { asm volatile( // 16 pixel loop. @@ -2438,9 +2454,9 @@ void SobelToPlaneRow_NEON(const uint8* src_sobelx, // R = Sobel X // G = Sobel // B = Sobel Y -void SobelXYRow_NEON(const uint8* src_sobelx, - const uint8* src_sobely, - uint8* dst_argb, +void SobelXYRow_NEON(const uint8_t* src_sobelx, + const uint8_t* src_sobely, + uint8_t* dst_argb, int width) { asm volatile( "vmov.u8 d3, #255 \n" // alpha @@ -2464,10 +2480,10 @@ void SobelXYRow_NEON(const uint8* src_sobelx, // -1 0 1 // -2 0 2 // -1 0 1 -void SobelXRow_NEON(const uint8* src_y0, - const uint8* src_y1, - const uint8* src_y2, - uint8* dst_sobelx, +void SobelXRow_NEON(const uint8_t* src_y0, + const uint8_t* src_y1, + const uint8_t* src_y2, + uint8_t* dst_sobelx, int width) { asm volatile( "1: \n" @@ -2503,9 +2519,9 @@ void SobelXRow_NEON(const uint8* src_y0, // -1 -2 -1 // 0 0 0 // 1 2 1 -void SobelYRow_NEON(const uint8* src_y0, - const uint8* src_y1, - uint8* dst_sobely, +void SobelYRow_NEON(const uint8_t* src_y0, + const uint8_t* src_y1, + uint8_t* dst_sobely, int width) { asm volatile( "1: \n" @@ -2536,7 +2552,10 @@ void SobelYRow_NEON(const uint8* src_y0, ); } -void HalfFloat1Row_NEON(const uint16* src, uint16* dst, float, int width) { +void HalfFloat1Row_NEON(const uint16_t* src, + uint16_t* dst, + float /*unused*/, + int width) { asm volatile( "vdup.32 q0, %3 \n" @@ -2561,7 +2580,10 @@ void HalfFloat1Row_NEON(const uint16* src, uint16* dst, float, int width) { } // TODO(fbarchard): multiply by element. -void HalfFloatRow_NEON(const uint16* src, uint16* dst, float scale, int width) { +void HalfFloatRow_NEON(const uint16_t* src, + uint16_t* dst, + float scale, + int width) { asm volatile( "vdup.32 q0, %3 \n" diff --git a/chromium/third_party/libyuv/source/row_neon64.cc b/chromium/third_party/libyuv/source/row_neon64.cc index 5616d8a5b5f..e7b8b5c1dd9 100644 --- a/chromium/third_party/libyuv/source/row_neon64.cc +++ b/chromium/third_party/libyuv/source/row_neon64.cc @@ -112,10 +112,10 @@ extern "C" { ".8h, #6 \n" /* G */ \ "sqshrun " #vR ".8b, " #vR ".8h, #6 \n" /* R */ -void I444ToARGBRow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I444ToARGBRow_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { asm volatile ( @@ -141,10 +141,10 @@ void I444ToARGBRow_NEON(const uint8* src_y, ); } -void I422ToARGBRow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I422ToARGBRow_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { asm volatile ( @@ -170,11 +170,11 @@ void I422ToARGBRow_NEON(const uint8* src_y, ); } -void I422AlphaToARGBRow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - const uint8* src_a, - uint8* dst_argb, +void I422AlphaToARGBRow_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + const uint8_t* src_a, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { asm volatile ( @@ -201,10 +201,10 @@ void I422AlphaToARGBRow_NEON(const uint8* src_y, ); } -void I422ToRGBARow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgba, +void I422ToRGBARow_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_rgba, const struct YuvConstants* yuvconstants, int width) { asm volatile ( @@ -230,10 +230,10 @@ void I422ToRGBARow_NEON(const uint8* src_y, ); } -void I422ToRGB24Row_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgb24, +void I422ToRGB24Row_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_rgb24, const struct YuvConstants* yuvconstants, int width) { asm volatile ( @@ -265,10 +265,10 @@ void I422ToRGB24Row_NEON(const uint8* src_y, "sri v0.8h, v21.8h, #5 \n" /* RG */ \ "sri v0.8h, v20.8h, #11 \n" /* RGB */ -void I422ToRGB565Row_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgb565, +void I422ToRGB565Row_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_rgb565, const struct YuvConstants* yuvconstants, int width) { asm volatile( @@ -301,10 +301,10 @@ void I422ToRGB565Row_NEON(const uint8* src_y, "sri v0.8h, v21.8h, #6 \n" /* ARG */ \ "sri v0.8h, v20.8h, #11 \n" /* ARGB */ -void I422ToARGB1555Row_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb1555, +void I422ToARGB1555Row_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb1555, const struct YuvConstants* yuvconstants, int width) { asm volatile( @@ -339,10 +339,10 @@ void I422ToARGB1555Row_NEON(const uint8* src_y, "orr v1.8b, v22.8b, v23.8b \n" /* RA */ \ "zip1 v0.16b, v0.16b, v1.16b \n" /* BGRA */ -void I422ToARGB4444Row_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb4444, +void I422ToARGB4444Row_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb4444, const struct YuvConstants* yuvconstants, int width) { asm volatile ( @@ -370,7 +370,7 @@ void I422ToARGB4444Row_NEON(const uint8* src_y, ); } -void I400ToARGBRow_NEON(const uint8* src_y, uint8* dst_argb, int width) { +void I400ToARGBRow_NEON(const uint8_t* src_y, uint8_t* dst_argb, int width) { asm volatile ( YUVTORGB_SETUP "movi v23.8b, #255 \n" @@ -392,7 +392,7 @@ void I400ToARGBRow_NEON(const uint8* src_y, uint8* dst_argb, int width) { ); } -void J400ToARGBRow_NEON(const uint8* src_y, uint8* dst_argb, int width) { +void J400ToARGBRow_NEON(const uint8_t* src_y, uint8_t* dst_argb, int width) { asm volatile( "movi v23.8b, #255 \n" "1: \n" @@ -409,9 +409,9 @@ void J400ToARGBRow_NEON(const uint8* src_y, uint8* dst_argb, int width) { : "cc", "memory", "v20", "v21", "v22", "v23"); } -void NV12ToARGBRow_NEON(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, +void NV12ToARGBRow_NEON(const uint8_t* src_y, + const uint8_t* src_uv, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { asm volatile ( @@ -436,9 +436,9 @@ void NV12ToARGBRow_NEON(const uint8* src_y, ); } -void NV21ToARGBRow_NEON(const uint8* src_y, - const uint8* src_vu, - uint8* dst_argb, +void NV21ToARGBRow_NEON(const uint8_t* src_y, + const uint8_t* src_vu, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { asm volatile ( @@ -463,9 +463,9 @@ void NV21ToARGBRow_NEON(const uint8* src_y, ); } -void NV12ToRGB565Row_NEON(const uint8* src_y, - const uint8* src_uv, - uint8* dst_rgb565, +void NV12ToRGB565Row_NEON(const uint8_t* src_y, + const uint8_t* src_uv, + uint8_t* dst_rgb565, const struct YuvConstants* yuvconstants, int width) { asm volatile( @@ -488,8 +488,8 @@ void NV12ToRGB565Row_NEON(const uint8* src_y, "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"); } -void YUY2ToARGBRow_NEON(const uint8* src_yuy2, - uint8* dst_argb, +void YUY2ToARGBRow_NEON(const uint8_t* src_yuy2, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { asm volatile ( @@ -513,8 +513,8 @@ void YUY2ToARGBRow_NEON(const uint8* src_yuy2, ); } -void UYVYToARGBRow_NEON(const uint8* src_uyvy, - uint8* dst_argb, +void UYVYToARGBRow_NEON(const uint8_t* src_uyvy, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { asm volatile ( @@ -539,9 +539,9 @@ void UYVYToARGBRow_NEON(const uint8* src_uyvy, } // Reads 16 pairs of UV and write even values to dst_u and odd to dst_v. -void SplitUVRow_NEON(const uint8* src_uv, - uint8* dst_u, - uint8* dst_v, +void SplitUVRow_NEON(const uint8_t* src_uv, + uint8_t* dst_u, + uint8_t* dst_v, int width) { asm volatile( "1: \n" @@ -560,9 +560,9 @@ void SplitUVRow_NEON(const uint8* src_uv, } // Reads 16 U's and V's and writes out 16 pairs of UV. -void MergeUVRow_NEON(const uint8* src_u, - const uint8* src_v, - uint8* dst_uv, +void MergeUVRow_NEON(const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_uv, int width) { asm volatile( "1: \n" @@ -581,10 +581,10 @@ void MergeUVRow_NEON(const uint8* src_u, } // Reads 16 packed RGB and write to planar dst_r, dst_g, dst_b. -void SplitRGBRow_NEON(const uint8* src_rgb, - uint8* dst_r, - uint8* dst_g, - uint8* dst_b, +void SplitRGBRow_NEON(const uint8_t* src_rgb, + uint8_t* dst_r, + uint8_t* dst_g, + uint8_t* dst_b, int width) { asm volatile( "1: \n" @@ -605,10 +605,10 @@ void SplitRGBRow_NEON(const uint8* src_rgb, } // Reads 16 planar R's, G's and B's and writes out 16 packed RGB at a time -void MergeRGBRow_NEON(const uint8* src_r, - const uint8* src_g, - const uint8* src_b, - uint8* dst_rgb, +void MergeRGBRow_NEON(const uint8_t* src_r, + const uint8_t* src_g, + const uint8_t* src_b, + uint8_t* dst_rgb, int width) { asm volatile( "1: \n" @@ -629,7 +629,7 @@ void MergeRGBRow_NEON(const uint8* src_r, } // Copy multiple of 32. -void CopyRow_NEON(const uint8* src, uint8* dst, int count) { +void CopyRow_NEON(const uint8_t* src, uint8_t* dst, int width) { asm volatile( "1: \n" "ldp q0, q1, [%0], #32 \n" @@ -638,14 +638,14 @@ void CopyRow_NEON(const uint8* src, uint8* dst, int count) { "b.gt 1b \n" : "+r"(src), // %0 "+r"(dst), // %1 - "+r"(count) // %2 // Output registers + "+r"(width) // %2 // Output registers : // Input registers : "cc", "memory", "v0", "v1" // Clobber List ); } -// SetRow writes 'count' bytes using an 8 bit value repeated. -void SetRow_NEON(uint8* dst, uint8 v8, int count) { +// SetRow writes 'width' bytes using an 8 bit value repeated. +void SetRow_NEON(uint8_t* dst, uint8_t v8, int width) { asm volatile( "dup v0.16b, %w2 \n" // duplicate 16 bytes "1: \n" @@ -653,12 +653,12 @@ void SetRow_NEON(uint8* dst, uint8 v8, int count) { "st1 {v0.16b}, [%0], #16 \n" // store "b.gt 1b \n" : "+r"(dst), // %0 - "+r"(count) // %1 + "+r"(width) // %1 : "r"(v8) // %2 : "cc", "memory", "v0"); } -void ARGBSetRow_NEON(uint8* dst, uint32 v32, int count) { +void ARGBSetRow_NEON(uint8_t* dst, uint32_t v32, int width) { asm volatile( "dup v0.4s, %w2 \n" // duplicate 4 ints "1: \n" @@ -666,12 +666,12 @@ void ARGBSetRow_NEON(uint8* dst, uint32 v32, int count) { "st1 {v0.16b}, [%0], #16 \n" // store "b.gt 1b \n" : "+r"(dst), // %0 - "+r"(count) // %1 + "+r"(width) // %1 : "r"(v32) // %2 : "cc", "memory", "v0"); } -void MirrorRow_NEON(const uint8* src, uint8* dst, int width) { +void MirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width) { asm volatile( // Start at end of source row. "add %0, %0, %w2, sxtw \n" @@ -690,9 +690,9 @@ void MirrorRow_NEON(const uint8* src, uint8* dst, int width) { : "cc", "memory", "v0"); } -void MirrorUVRow_NEON(const uint8* src_uv, - uint8* dst_u, - uint8* dst_v, +void MirrorUVRow_NEON(const uint8_t* src_uv, + uint8_t* dst_u, + uint8_t* dst_v, int width) { asm volatile( // Start at end of source row. @@ -714,7 +714,7 @@ void MirrorUVRow_NEON(const uint8* src_uv, : "cc", "memory", "v0", "v1"); } -void ARGBMirrorRow_NEON(const uint8* src, uint8* dst, int width) { +void ARGBMirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width) { asm volatile( // Start at end of source row. "add %0, %0, %w2, sxtw #2 \n" @@ -733,7 +733,9 @@ void ARGBMirrorRow_NEON(const uint8* src, uint8* dst, int width) { : "cc", "memory", "v0"); } -void RGB24ToARGBRow_NEON(const uint8* src_rgb24, uint8* dst_argb, int width) { +void RGB24ToARGBRow_NEON(const uint8_t* src_rgb24, + uint8_t* dst_argb, + int width) { asm volatile( "movi v4.8b, #255 \n" // Alpha "1: \n" @@ -749,7 +751,7 @@ void RGB24ToARGBRow_NEON(const uint8* src_rgb24, uint8* dst_argb, int width) { ); } -void RAWToARGBRow_NEON(const uint8* src_raw, uint8* dst_argb, int width) { +void RAWToARGBRow_NEON(const uint8_t* src_raw, uint8_t* dst_argb, int width) { asm volatile( "movi v5.8b, #255 \n" // Alpha "1: \n" @@ -767,7 +769,7 @@ void RAWToARGBRow_NEON(const uint8* src_raw, uint8* dst_argb, int width) { ); } -void RAWToRGB24Row_NEON(const uint8* src_raw, uint8* dst_rgb24, int width) { +void RAWToRGB24Row_NEON(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) { asm volatile( "1: \n" "ld3 {v0.8b,v1.8b,v2.8b}, [%0], #24 \n" // read r g b @@ -797,7 +799,9 @@ void RAWToRGB24Row_NEON(const uint8* src_raw, uint8* dst_rgb24, int width) { "orr v0.16b, v0.16b, v2.16b \n" /* R,B */ \ "dup v2.2D, v0.D[1] \n" /* R */ -void RGB565ToARGBRow_NEON(const uint8* src_rgb565, uint8* dst_argb, int width) { +void RGB565ToARGBRow_NEON(const uint8_t* src_rgb565, + uint8_t* dst_argb, + int width) { asm volatile( "movi v3.8b, #255 \n" // Alpha "1: \n" @@ -851,8 +855,8 @@ void RGB565ToARGBRow_NEON(const uint8* src_rgb565, uint8* dst_argb, int width) { "orr v2.16b, v1.16b, v3.16b \n" /* R */ \ "dup v1.2D, v0.D[1] \n" /* G */ -void ARGB1555ToARGBRow_NEON(const uint8* src_argb1555, - uint8* dst_argb, +void ARGB1555ToARGBRow_NEON(const uint8_t* src_argb1555, + uint8_t* dst_argb, int width) { asm volatile( "movi v3.8b, #255 \n" // Alpha @@ -883,8 +887,8 @@ void ARGB1555ToARGBRow_NEON(const uint8* src_argb1555, "dup v0.2D, v2.D[1] \n" \ "dup v1.2D, v3.D[1] \n" -void ARGB4444ToARGBRow_NEON(const uint8* src_argb4444, - uint8* dst_argb, +void ARGB4444ToARGBRow_NEON(const uint8_t* src_argb4444, + uint8_t* dst_argb, int width) { asm volatile( "1: \n" @@ -902,7 +906,9 @@ void ARGB4444ToARGBRow_NEON(const uint8* src_argb4444, ); } -void ARGBToRGB24Row_NEON(const uint8* src_argb, uint8* dst_rgb24, int width) { +void ARGBToRGB24Row_NEON(const uint8_t* src_argb, + uint8_t* dst_rgb24, + int width) { asm volatile( "1: \n" "ld4 {v1.8b,v2.8b,v3.8b,v4.8b}, [%0], #32 \n" // load 8 ARGB @@ -918,7 +924,7 @@ void ARGBToRGB24Row_NEON(const uint8* src_argb, uint8* dst_rgb24, int width) { ); } -void ARGBToRAWRow_NEON(const uint8* src_argb, uint8* dst_raw, int width) { +void ARGBToRAWRow_NEON(const uint8_t* src_argb, uint8_t* dst_raw, int width) { asm volatile( "1: \n" "ld4 {v1.8b,v2.8b,v3.8b,v4.8b}, [%0], #32 \n" // load b g r a @@ -935,7 +941,7 @@ void ARGBToRAWRow_NEON(const uint8* src_argb, uint8* dst_raw, int width) { ); } -void YUY2ToYRow_NEON(const uint8* src_yuy2, uint8* dst_y, int width) { +void YUY2ToYRow_NEON(const uint8_t* src_yuy2, uint8_t* dst_y, int width) { asm volatile( "1: \n" "ld2 {v0.16b,v1.16b}, [%0], #32 \n" // load 16 pixels of YUY2. @@ -950,7 +956,7 @@ void YUY2ToYRow_NEON(const uint8* src_yuy2, uint8* dst_y, int width) { ); } -void UYVYToYRow_NEON(const uint8* src_uyvy, uint8* dst_y, int width) { +void UYVYToYRow_NEON(const uint8_t* src_uyvy, uint8_t* dst_y, int width) { asm volatile( "1: \n" "ld2 {v0.16b,v1.16b}, [%0], #32 \n" // load 16 pixels of UYVY. @@ -965,9 +971,9 @@ void UYVYToYRow_NEON(const uint8* src_uyvy, uint8* dst_y, int width) { ); } -void YUY2ToUV422Row_NEON(const uint8* src_yuy2, - uint8* dst_u, - uint8* dst_v, +void YUY2ToUV422Row_NEON(const uint8_t* src_yuy2, + uint8_t* dst_u, + uint8_t* dst_v, int width) { asm volatile( "1: \n" @@ -985,9 +991,9 @@ void YUY2ToUV422Row_NEON(const uint8* src_yuy2, ); } -void UYVYToUV422Row_NEON(const uint8* src_uyvy, - uint8* dst_u, - uint8* dst_v, +void UYVYToUV422Row_NEON(const uint8_t* src_uyvy, + uint8_t* dst_u, + uint8_t* dst_v, int width) { asm volatile( "1: \n" @@ -1005,12 +1011,12 @@ void UYVYToUV422Row_NEON(const uint8* src_uyvy, ); } -void YUY2ToUVRow_NEON(const uint8* src_yuy2, +void YUY2ToUVRow_NEON(const uint8_t* src_yuy2, int stride_yuy2, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width) { - const uint8* src_yuy2b = src_yuy2 + stride_yuy2; + const uint8_t* src_yuy2b = src_yuy2 + stride_yuy2; asm volatile( "1: \n" "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 16 pixels @@ -1032,12 +1038,12 @@ void YUY2ToUVRow_NEON(const uint8* src_yuy2, ); } -void UYVYToUVRow_NEON(const uint8* src_uyvy, +void UYVYToUVRow_NEON(const uint8_t* src_uyvy, int stride_uyvy, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width) { - const uint8* src_uyvyb = src_uyvy + stride_uyvy; + const uint8_t* src_uyvyb = src_uyvy + stride_uyvy; asm volatile( "1: \n" "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 16 pixels @@ -1060,9 +1066,9 @@ void UYVYToUVRow_NEON(const uint8* src_uyvy, } // For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA. -void ARGBShuffleRow_NEON(const uint8* src_argb, - uint8* dst_argb, - const uint8* shuffler, +void ARGBShuffleRow_NEON(const uint8_t* src_argb, + uint8_t* dst_argb, + const uint8_t* shuffler, int width) { asm volatile( "ld1 {v2.16b}, [%3] \n" // shuffler @@ -1080,10 +1086,10 @@ void ARGBShuffleRow_NEON(const uint8* src_argb, ); } -void I422ToYUY2Row_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_yuy2, +void I422ToYUY2Row_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_yuy2, int width) { asm volatile( "1: \n" @@ -1103,10 +1109,10 @@ void I422ToYUY2Row_NEON(const uint8* src_y, : "cc", "memory", "v0", "v1", "v2", "v3"); } -void I422ToUYVYRow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_uyvy, +void I422ToUYVYRow_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_uyvy, int width) { asm volatile( "1: \n" @@ -1126,7 +1132,9 @@ void I422ToUYVYRow_NEON(const uint8* src_y, : "cc", "memory", "v0", "v1", "v2", "v3"); } -void ARGBToRGB565Row_NEON(const uint8* src_argb, uint8* dst_rgb565, int width) { +void ARGBToRGB565Row_NEON(const uint8_t* src_argb, + uint8_t* dst_rgb565, + int width) { asm volatile( "1: \n" "ld4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%0], #32 \n" // load 8 pixels @@ -1141,9 +1149,9 @@ void ARGBToRGB565Row_NEON(const uint8* src_argb, uint8* dst_rgb565, int width) { : "cc", "memory", "v0", "v20", "v21", "v22", "v23"); } -void ARGBToRGB565DitherRow_NEON(const uint8* src_argb, - uint8* dst_rgb, - const uint32 dither4, +void ARGBToRGB565DitherRow_NEON(const uint8_t* src_argb, + uint8_t* dst_rgb, + const uint32_t dither4, int width) { asm volatile( "dup v1.4s, %w2 \n" // dither4 @@ -1162,8 +1170,8 @@ void ARGBToRGB565DitherRow_NEON(const uint8* src_argb, : "cc", "memory", "v0", "v1", "v20", "v21", "v22", "v23"); } -void ARGBToARGB1555Row_NEON(const uint8* src_argb, - uint8* dst_argb1555, +void ARGBToARGB1555Row_NEON(const uint8_t* src_argb, + uint8_t* dst_argb1555, int width) { asm volatile( "1: \n" @@ -1180,8 +1188,8 @@ void ARGBToARGB1555Row_NEON(const uint8* src_argb, : "cc", "memory", "v0", "v20", "v21", "v22", "v23"); } -void ARGBToARGB4444Row_NEON(const uint8* src_argb, - uint8* dst_argb4444, +void ARGBToARGB4444Row_NEON(const uint8_t* src_argb, + uint8_t* dst_argb4444, int width) { asm volatile( "movi v4.16b, #0x0f \n" // bits to clear with @@ -1200,7 +1208,7 @@ void ARGBToARGB4444Row_NEON(const uint8* src_argb, : "cc", "memory", "v0", "v1", "v4", "v20", "v21", "v22", "v23"); } -void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int width) { +void ARGBToYRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width) { asm volatile( "movi v4.8b, #13 \n" // B * 0.1016 coefficient "movi v5.8b, #65 \n" // G * 0.5078 coefficient @@ -1223,7 +1231,9 @@ void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int width) { : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7"); } -void ARGBExtractAlphaRow_NEON(const uint8* src_argb, uint8* dst_a, int width) { +void ARGBExtractAlphaRow_NEON(const uint8_t* src_argb, + uint8_t* dst_a, + int width) { asm volatile( "1: \n" "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load row 16 @@ -1239,7 +1249,7 @@ void ARGBExtractAlphaRow_NEON(const uint8* src_argb, uint8* dst_a, int width) { ); } -void ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int width) { +void ARGBToYJRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width) { asm volatile( "movi v4.8b, #15 \n" // B * 0.11400 coefficient "movi v5.8b, #75 \n" // G * 0.58700 coefficient @@ -1261,9 +1271,9 @@ void ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int width) { } // 8x1 pixels. -void ARGBToUV444Row_NEON(const uint8* src_argb, - uint8* dst_u, - uint8* dst_v, +void ARGBToUV444Row_NEON(const uint8_t* src_argb, + uint8_t* dst_u, + uint8_t* dst_v, int width) { asm volatile( "movi v24.8b, #112 \n" // UB / VR 0.875 @@ -1328,12 +1338,12 @@ void ARGBToUV444Row_NEON(const uint8* src_argb, // TODO(fbarchard): Consider vhadd vertical, then vpaddl horizontal, avoid shr. // TODO(fbarchard): consider ptrdiff_t for all strides. -void ARGBToUVRow_NEON(const uint8* src_argb, +void ARGBToUVRow_NEON(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width) { - const uint8* src_argb_1 = src_argb + src_stride_argb; + const uint8_t* src_argb_1 = src_argb + src_stride_argb; asm volatile ( RGBTOUV_SETUP_REG "1: \n" @@ -1368,12 +1378,12 @@ void ARGBToUVRow_NEON(const uint8* src_argb, } // TODO(fbarchard): Subsample match C code. -void ARGBToUVJRow_NEON(const uint8* src_argb, +void ARGBToUVJRow_NEON(const uint8_t* src_argb, int src_stride_argb, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width) { - const uint8* src_argb_1 = src_argb + src_stride_argb; + const uint8_t* src_argb_1 = src_argb + src_stride_argb; asm volatile ( "movi v20.8h, #63, lsl #0 \n" // UB/VR coeff (0.500) / 2 "movi v21.8h, #42, lsl #0 \n" // UG coeff (-0.33126) / 2 @@ -1411,12 +1421,12 @@ void ARGBToUVJRow_NEON(const uint8* src_argb, ); } -void BGRAToUVRow_NEON(const uint8* src_bgra, +void BGRAToUVRow_NEON(const uint8_t* src_bgra, int src_stride_bgra, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width) { - const uint8* src_bgra_1 = src_bgra + src_stride_bgra; + const uint8_t* src_bgra_1 = src_bgra + src_stride_bgra; asm volatile ( RGBTOUV_SETUP_REG "1: \n" @@ -1449,12 +1459,12 @@ void BGRAToUVRow_NEON(const uint8* src_bgra, ); } -void ABGRToUVRow_NEON(const uint8* src_abgr, +void ABGRToUVRow_NEON(const uint8_t* src_abgr, int src_stride_abgr, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width) { - const uint8* src_abgr_1 = src_abgr + src_stride_abgr; + const uint8_t* src_abgr_1 = src_abgr + src_stride_abgr; asm volatile ( RGBTOUV_SETUP_REG "1: \n" @@ -1487,12 +1497,12 @@ void ABGRToUVRow_NEON(const uint8* src_abgr, ); } -void RGBAToUVRow_NEON(const uint8* src_rgba, +void RGBAToUVRow_NEON(const uint8_t* src_rgba, int src_stride_rgba, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width) { - const uint8* src_rgba_1 = src_rgba + src_stride_rgba; + const uint8_t* src_rgba_1 = src_rgba + src_stride_rgba; asm volatile ( RGBTOUV_SETUP_REG "1: \n" @@ -1525,12 +1535,12 @@ void RGBAToUVRow_NEON(const uint8* src_rgba, ); } -void RGB24ToUVRow_NEON(const uint8* src_rgb24, +void RGB24ToUVRow_NEON(const uint8_t* src_rgb24, int src_stride_rgb24, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width) { - const uint8* src_rgb24_1 = src_rgb24 + src_stride_rgb24; + const uint8_t* src_rgb24_1 = src_rgb24 + src_stride_rgb24; asm volatile ( RGBTOUV_SETUP_REG "1: \n" @@ -1563,12 +1573,12 @@ void RGB24ToUVRow_NEON(const uint8* src_rgb24, ); } -void RAWToUVRow_NEON(const uint8* src_raw, +void RAWToUVRow_NEON(const uint8_t* src_raw, int src_stride_raw, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width) { - const uint8* src_raw_1 = src_raw + src_stride_raw; + const uint8_t* src_raw_1 = src_raw + src_stride_raw; asm volatile ( RGBTOUV_SETUP_REG "1: \n" @@ -1602,12 +1612,12 @@ void RAWToUVRow_NEON(const uint8* src_raw, } // 16x2 pixels -> 8x1. width is number of argb pixels. e.g. 16. -void RGB565ToUVRow_NEON(const uint8* src_rgb565, +void RGB565ToUVRow_NEON(const uint8_t* src_rgb565, int src_stride_rgb565, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width) { - const uint8* src_rgb565_1 = src_rgb565 + src_stride_rgb565; + const uint8_t* src_rgb565_1 = src_rgb565 + src_stride_rgb565; asm volatile( "movi v22.8h, #56, lsl #0 \n" // UB / VR coeff (0.875) / // 2 @@ -1673,12 +1683,12 @@ void RGB565ToUVRow_NEON(const uint8* src_rgb565, } // 16x2 pixels -> 8x1. width is number of argb pixels. e.g. 16. -void ARGB1555ToUVRow_NEON(const uint8* src_argb1555, +void ARGB1555ToUVRow_NEON(const uint8_t* src_argb1555, int src_stride_argb1555, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width) { - const uint8* src_argb1555_1 = src_argb1555 + src_stride_argb1555; + const uint8_t* src_argb1555_1 = src_argb1555 + src_stride_argb1555; asm volatile( RGBTOUV_SETUP_REG "1: \n" @@ -1738,12 +1748,12 @@ void ARGB1555ToUVRow_NEON(const uint8* src_argb1555, } // 16x2 pixels -> 8x1. width is number of argb pixels. e.g. 16. -void ARGB4444ToUVRow_NEON(const uint8* src_argb4444, +void ARGB4444ToUVRow_NEON(const uint8_t* src_argb4444, int src_stride_argb4444, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width) { - const uint8* src_argb4444_1 = src_argb4444 + src_stride_argb4444; + const uint8_t* src_argb4444_1 = src_argb4444 + src_stride_argb4444; asm volatile( RGBTOUV_SETUP_REG "1: \n" @@ -1804,7 +1814,7 @@ void ARGB4444ToUVRow_NEON(const uint8* src_argb4444, ); } -void RGB565ToYRow_NEON(const uint8* src_rgb565, uint8* dst_y, int width) { +void RGB565ToYRow_NEON(const uint8_t* src_rgb565, uint8_t* dst_y, int width) { asm volatile( "movi v24.8b, #13 \n" // B * 0.1016 coefficient "movi v25.8b, #65 \n" // G * 0.5078 coefficient @@ -1829,7 +1839,9 @@ void RGB565ToYRow_NEON(const uint8* src_rgb565, uint8* dst_y, int width) { "v27"); } -void ARGB1555ToYRow_NEON(const uint8* src_argb1555, uint8* dst_y, int width) { +void ARGB1555ToYRow_NEON(const uint8_t* src_argb1555, + uint8_t* dst_y, + int width) { asm volatile( "movi v4.8b, #13 \n" // B * 0.1016 coefficient "movi v5.8b, #65 \n" // G * 0.5078 coefficient @@ -1853,7 +1865,9 @@ void ARGB1555ToYRow_NEON(const uint8* src_argb1555, uint8* dst_y, int width) { : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7"); } -void ARGB4444ToYRow_NEON(const uint8* src_argb4444, uint8* dst_y, int width) { +void ARGB4444ToYRow_NEON(const uint8_t* src_argb4444, + uint8_t* dst_y, + int width) { asm volatile( "movi v24.8b, #13 \n" // B * 0.1016 coefficient "movi v25.8b, #65 \n" // G * 0.5078 coefficient @@ -1877,7 +1891,7 @@ void ARGB4444ToYRow_NEON(const uint8* src_argb4444, uint8* dst_y, int width) { : "cc", "memory", "v0", "v1", "v2", "v3", "v24", "v25", "v26", "v27"); } -void BGRAToYRow_NEON(const uint8* src_bgra, uint8* dst_y, int width) { +void BGRAToYRow_NEON(const uint8_t* src_bgra, uint8_t* dst_y, int width) { asm volatile( "movi v4.8b, #33 \n" // R * 0.2578 coefficient "movi v5.8b, #65 \n" // G * 0.5078 coefficient @@ -1900,7 +1914,7 @@ void BGRAToYRow_NEON(const uint8* src_bgra, uint8* dst_y, int width) { : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16"); } -void ABGRToYRow_NEON(const uint8* src_abgr, uint8* dst_y, int width) { +void ABGRToYRow_NEON(const uint8_t* src_abgr, uint8_t* dst_y, int width) { asm volatile( "movi v4.8b, #33 \n" // R * 0.2578 coefficient "movi v5.8b, #65 \n" // G * 0.5078 coefficient @@ -1923,7 +1937,7 @@ void ABGRToYRow_NEON(const uint8* src_abgr, uint8* dst_y, int width) { : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16"); } -void RGBAToYRow_NEON(const uint8* src_rgba, uint8* dst_y, int width) { +void RGBAToYRow_NEON(const uint8_t* src_rgba, uint8_t* dst_y, int width) { asm volatile( "movi v4.8b, #13 \n" // B * 0.1016 coefficient "movi v5.8b, #65 \n" // G * 0.5078 coefficient @@ -1946,7 +1960,7 @@ void RGBAToYRow_NEON(const uint8* src_rgba, uint8* dst_y, int width) { : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16"); } -void RGB24ToYRow_NEON(const uint8* src_rgb24, uint8* dst_y, int width) { +void RGB24ToYRow_NEON(const uint8_t* src_rgb24, uint8_t* dst_y, int width) { asm volatile( "movi v4.8b, #13 \n" // B * 0.1016 coefficient "movi v5.8b, #65 \n" // G * 0.5078 coefficient @@ -1969,7 +1983,7 @@ void RGB24ToYRow_NEON(const uint8* src_rgb24, uint8* dst_y, int width) { : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16"); } -void RAWToYRow_NEON(const uint8* src_raw, uint8* dst_y, int width) { +void RAWToYRow_NEON(const uint8_t* src_raw, uint8_t* dst_y, int width) { asm volatile( "movi v4.8b, #33 \n" // R * 0.2578 coefficient "movi v5.8b, #65 \n" // G * 0.5078 coefficient @@ -1993,14 +2007,14 @@ void RAWToYRow_NEON(const uint8* src_raw, uint8* dst_y, int width) { } // Bilinear filter 16x2 -> 16x1 -void InterpolateRow_NEON(uint8* dst_ptr, - const uint8* src_ptr, +void InterpolateRow_NEON(uint8_t* dst_ptr, + const uint8_t* src_ptr, ptrdiff_t src_stride, int dst_width, int source_y_fraction) { int y1_fraction = source_y_fraction; int y0_fraction = 256 - y1_fraction; - const uint8* src_ptr1 = src_ptr + src_stride; + const uint8_t* src_ptr1 = src_ptr + src_stride; asm volatile( "cmp %w4, #0 \n" "b.eq 100f \n" @@ -2053,9 +2067,9 @@ void InterpolateRow_NEON(uint8* dst_ptr, } // dr * (256 - sa) / 256 + sr = dr - dr * sa / 256 + sr -void ARGBBlendRow_NEON(const uint8* src_argb0, - const uint8* src_argb1, - uint8* dst_argb, +void ARGBBlendRow_NEON(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, int width) { asm volatile( "subs %w3, %w3, #8 \n" @@ -2121,7 +2135,9 @@ void ARGBBlendRow_NEON(const uint8* src_argb0, } // Attenuate 8 pixels at a time. -void ARGBAttenuateRow_NEON(const uint8* src_argb, uint8* dst_argb, int width) { +void ARGBAttenuateRow_NEON(const uint8_t* src_argb, + uint8_t* dst_argb, + int width) { asm volatile( // Attenuate 8 pixels. "1: \n" @@ -2145,7 +2161,7 @@ void ARGBAttenuateRow_NEON(const uint8* src_argb, uint8* dst_argb, int width) { // Quantize 8 ARGB pixels (32 bytes). // dst = (dst * scale >> 16) * interval_size + interval_offset; -void ARGBQuantizeRow_NEON(uint8* dst_argb, +void ARGBQuantizeRow_NEON(uint8_t* dst_argb, int scale, int interval_size, int interval_offset, @@ -2188,10 +2204,10 @@ void ARGBQuantizeRow_NEON(uint8* dst_argb, // Shade 8 pixels at a time by specified value. // NOTE vqrdmulh.s16 q10, q10, d0[0] must use a scaler register from 0 to 8. // Rounding in vqrdmulh does +1 to high if high bit of low s16 is set. -void ARGBShadeRow_NEON(const uint8* src_argb, - uint8* dst_argb, +void ARGBShadeRow_NEON(const uint8_t* src_argb, + uint8_t* dst_argb, int width, - uint32 value) { + uint32_t value) { asm volatile( "dup v0.4s, %w3 \n" // duplicate scale value. "zip1 v0.8b, v0.8b, v0.8b \n" // v0.8b aarrggbb. @@ -2225,7 +2241,7 @@ void ARGBShadeRow_NEON(const uint8* src_argb, // Convert 8 ARGB pixels (64 bytes) to 8 Gray ARGB pixels // Similar to ARGBToYJ but stores ARGB. // C code is (15 * b + 75 * g + 38 * r + 64) >> 7; -void ARGBGrayRow_NEON(const uint8* src_argb, uint8* dst_argb, int width) { +void ARGBGrayRow_NEON(const uint8_t* src_argb, uint8_t* dst_argb, int width) { asm volatile( "movi v24.8b, #15 \n" // B * 0.11400 coefficient "movi v25.8b, #75 \n" // G * 0.58700 coefficient @@ -2253,7 +2269,7 @@ void ARGBGrayRow_NEON(const uint8* src_argb, uint8* dst_argb, int width) { // g = (r * 45 + g * 88 + b * 22) >> 7 // r = (r * 50 + g * 98 + b * 24) >> 7 -void ARGBSepiaRow_NEON(uint8* dst_argb, int width) { +void ARGBSepiaRow_NEON(uint8_t* dst_argb, int width) { asm volatile( "movi v20.8b, #17 \n" // BB coefficient "movi v21.8b, #68 \n" // BG coefficient @@ -2291,9 +2307,9 @@ void ARGBSepiaRow_NEON(uint8* dst_argb, int width) { // Tranform 8 ARGB pixels (32 bytes) with color matrix. // TODO(fbarchard): Was same as Sepia except matrix is provided. This function // needs to saturate. Consider doing a non-saturating version. -void ARGBColorMatrixRow_NEON(const uint8* src_argb, - uint8* dst_argb, - const int8* matrix_argb, +void ARGBColorMatrixRow_NEON(const uint8_t* src_argb, + uint8_t* dst_argb, + const int8_t* matrix_argb, int width) { asm volatile( "ld1 {v2.16b}, [%3] \n" // load 3 ARGB vectors. @@ -2351,9 +2367,9 @@ void ARGBColorMatrixRow_NEON(const uint8* src_argb, // TODO(fbarchard): fix vqshrun in ARGBMultiplyRow_NEON and reenable. // Multiply 2 rows of ARGB pixels together, 8 pixels at a time. -void ARGBMultiplyRow_NEON(const uint8* src_argb0, - const uint8* src_argb1, - uint8* dst_argb, +void ARGBMultiplyRow_NEON(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, int width) { asm volatile( // 8 pixel loop. @@ -2380,9 +2396,9 @@ void ARGBMultiplyRow_NEON(const uint8* src_argb0, } // Add 2 rows of ARGB pixels together, 8 pixels at a time. -void ARGBAddRow_NEON(const uint8* src_argb0, - const uint8* src_argb1, - uint8* dst_argb, +void ARGBAddRow_NEON(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, int width) { asm volatile( // 8 pixel loop. @@ -2405,9 +2421,9 @@ void ARGBAddRow_NEON(const uint8* src_argb0, } // Subtract 2 rows of ARGB pixels, 8 pixels at a time. -void ARGBSubtractRow_NEON(const uint8* src_argb0, - const uint8* src_argb1, - uint8* dst_argb, +void ARGBSubtractRow_NEON(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, int width) { asm volatile( // 8 pixel loop. @@ -2434,9 +2450,9 @@ void ARGBSubtractRow_NEON(const uint8* src_argb0, // R = Sobel // G = Sobel // B = Sobel -void SobelRow_NEON(const uint8* src_sobelx, - const uint8* src_sobely, - uint8* dst_argb, +void SobelRow_NEON(const uint8_t* src_sobelx, + const uint8_t* src_sobely, + uint8_t* dst_argb, int width) { asm volatile( "movi v3.8b, #255 \n" // alpha @@ -2459,9 +2475,9 @@ void SobelRow_NEON(const uint8* src_sobelx, } // Adds Sobel X and Sobel Y and stores Sobel into plane. -void SobelToPlaneRow_NEON(const uint8* src_sobelx, - const uint8* src_sobely, - uint8* dst_y, +void SobelToPlaneRow_NEON(const uint8_t* src_sobelx, + const uint8_t* src_sobely, + uint8_t* dst_y, int width) { asm volatile( // 16 pixel loop. @@ -2485,9 +2501,9 @@ void SobelToPlaneRow_NEON(const uint8* src_sobelx, // R = Sobel X // G = Sobel // B = Sobel Y -void SobelXYRow_NEON(const uint8* src_sobelx, - const uint8* src_sobely, - uint8* dst_argb, +void SobelXYRow_NEON(const uint8_t* src_sobelx, + const uint8_t* src_sobely, + uint8_t* dst_argb, int width) { asm volatile( "movi v3.8b, #255 \n" // alpha @@ -2511,10 +2527,10 @@ void SobelXYRow_NEON(const uint8* src_sobelx, // -1 0 1 // -2 0 2 // -1 0 1 -void SobelXRow_NEON(const uint8* src_y0, - const uint8* src_y1, - const uint8* src_y2, - uint8* dst_sobelx, +void SobelXRow_NEON(const uint8_t* src_y0, + const uint8_t* src_y1, + const uint8_t* src_y2, + uint8_t* dst_sobelx, int width) { asm volatile( "1: \n" @@ -2550,9 +2566,9 @@ void SobelXRow_NEON(const uint8* src_y0, // -1 -2 -1 // 0 0 0 // 1 2 1 -void SobelYRow_NEON(const uint8* src_y0, - const uint8* src_y1, - uint8* dst_sobely, +void SobelYRow_NEON(const uint8_t* src_y0, + const uint8_t* src_y1, + uint8_t* dst_sobely, int width) { asm volatile( "1: \n" @@ -2584,7 +2600,10 @@ void SobelYRow_NEON(const uint8* src_y0, } // Caveat - rounds float to half float whereas scaling version truncates. -void HalfFloat1Row_NEON(const uint16* src, uint16* dst, float, int width) { +void HalfFloat1Row_NEON(const uint16_t* src, + uint16_t* dst, + float /*unused*/, + int width) { asm volatile( "1: \n" "ld1 {v1.16b}, [%0], #16 \n" // load 8 shorts @@ -2604,7 +2623,10 @@ void HalfFloat1Row_NEON(const uint16* src, uint16* dst, float, int width) { : "cc", "memory", "v1", "v2", "v3"); } -void HalfFloatRow_NEON(const uint16* src, uint16* dst, float scale, int width) { +void HalfFloatRow_NEON(const uint16_t* src, + uint16_t* dst, + float scale, + int width) { asm volatile( "1: \n" "ld1 {v1.16b}, [%0], #16 \n" // load 8 shorts @@ -2702,12 +2724,12 @@ void ScaleSamples_NEON(const float* src, float* dst, float scale, int width) { } // filter 5 rows with 1, 4, 6, 4, 1 coefficients to produce 1 row. -void GaussCol_NEON(const uint16* src0, - const uint16* src1, - const uint16* src2, - const uint16* src3, - const uint16* src4, - uint32* dst, +void GaussCol_NEON(const uint16_t* src0, + const uint16_t* src1, + const uint16_t* src2, + const uint16_t* src3, + const uint16_t* src4, + uint32_t* dst, int width) { asm volatile( "movi v6.8h, #4 \n" // constant 4 @@ -2742,10 +2764,10 @@ void GaussCol_NEON(const uint16* src0, } // filter 5 rows with 1, 4, 6, 4, 1 coefficients to produce 1 row. -void GaussRow_NEON(const uint32* src, uint16* dst, int width) { - const uint32* src1 = src + 1; - const uint32* src2 = src + 2; - const uint32* src3 = src + 3; +void GaussRow_NEON(const uint32_t* src, uint16_t* dst, int width) { + const uint32_t* src1 = src + 1; + const uint32_t* src2 = src + 2; + const uint32_t* src3 = src + 3; asm volatile( "movi v6.4s, #4 \n" // constant 4 "movi v7.4s, #6 \n" // constant 6 diff --git a/chromium/third_party/libyuv/source/row_win.cc b/chromium/third_party/libyuv/source/row_win.cc index 596d7df739e..5500d7f5a64 100644 --- a/chromium/third_party/libyuv/source/row_win.cc +++ b/chromium/third_party/libyuv/source/row_win.cc @@ -28,27 +28,27 @@ extern "C" { #if defined(_M_X64) // Read 4 UV from 422, upsample to 8 UV. -#define READYUV422 \ - xmm0 = _mm_cvtsi32_si128(*(uint32*)u_buf); \ - xmm1 = _mm_cvtsi32_si128(*(uint32*)(u_buf + offset)); \ - xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); \ - xmm0 = _mm_unpacklo_epi16(xmm0, xmm0); \ - u_buf += 4; \ - xmm4 = _mm_loadl_epi64((__m128i*)y_buf); \ - xmm4 = _mm_unpacklo_epi8(xmm4, xmm4); \ +#define READYUV422 \ + xmm0 = _mm_cvtsi32_si128(*(uint32_t*)u_buf); \ + xmm1 = _mm_cvtsi32_si128(*(uint32_t*)(u_buf + offset)); \ + xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); \ + xmm0 = _mm_unpacklo_epi16(xmm0, xmm0); \ + u_buf += 4; \ + xmm4 = _mm_loadl_epi64((__m128i*)y_buf); \ + xmm4 = _mm_unpacklo_epi8(xmm4, xmm4); \ y_buf += 8; // Read 4 UV from 422, upsample to 8 UV. With 8 Alpha. -#define READYUVA422 \ - xmm0 = _mm_cvtsi32_si128(*(uint32*)u_buf); \ - xmm1 = _mm_cvtsi32_si128(*(uint32*)(u_buf + offset)); \ - xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); \ - xmm0 = _mm_unpacklo_epi16(xmm0, xmm0); \ - u_buf += 4; \ - xmm4 = _mm_loadl_epi64((__m128i*)y_buf); \ - xmm4 = _mm_unpacklo_epi8(xmm4, xmm4); \ - y_buf += 8; \ - xmm5 = _mm_loadl_epi64((__m128i*)a_buf); \ +#define READYUVA422 \ + xmm0 = _mm_cvtsi32_si128(*(uint32_t*)u_buf); \ + xmm1 = _mm_cvtsi32_si128(*(uint32_t*)(u_buf + offset)); \ + xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); \ + xmm0 = _mm_unpacklo_epi16(xmm0, xmm0); \ + u_buf += 4; \ + xmm4 = _mm_loadl_epi64((__m128i*)y_buf); \ + xmm4 = _mm_unpacklo_epi8(xmm4, xmm4); \ + y_buf += 8; \ + xmm5 = _mm_loadl_epi64((__m128i*)a_buf); \ a_buf += 8; // Convert 8 pixels: 8 UV and 8 Y. @@ -84,15 +84,15 @@ extern "C" { dst_argb += 32; #if defined(HAS_I422TOARGBROW_SSSE3) -void I422ToARGBRow_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_argb, +void I422ToARGBRow_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { __m128i xmm0, xmm1, xmm2, xmm4; const __m128i xmm5 = _mm_set1_epi8(-1); - const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf; + const ptrdiff_t offset = (uint8_t*)v_buf - (uint8_t*)u_buf; while (width > 0) { READYUV422 YUVTORGB(yuvconstants) @@ -103,15 +103,15 @@ void I422ToARGBRow_SSSE3(const uint8* y_buf, #endif #if defined(HAS_I422ALPHATOARGBROW_SSSE3) -void I422AlphaToARGBRow_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - const uint8* a_buf, - uint8* dst_argb, +void I422AlphaToARGBRow_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + const uint8_t* a_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { __m128i xmm0, xmm1, xmm2, xmm4, xmm5; - const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf; + const ptrdiff_t offset = (uint8_t*)v_buf - (uint8_t*)u_buf; while (width > 0) { READYUVA422 YUVTORGB(yuvconstants) @@ -255,8 +255,8 @@ static const lvec8 kShuffleNV21 = { }; // Duplicates gray value 3 times and fills in alpha opaque. -__declspec(naked) void J400ToARGBRow_SSE2(const uint8* src_y, - uint8* dst_argb, +__declspec(naked) void J400ToARGBRow_SSE2(const uint8_t* src_y, + uint8_t* dst_argb, int width) { __asm { mov eax, [esp + 4] // src_y @@ -285,8 +285,8 @@ __declspec(naked) void J400ToARGBRow_SSE2(const uint8* src_y, #ifdef HAS_J400TOARGBROW_AVX2 // Duplicates gray value 3 times and fills in alpha opaque. -__declspec(naked) void J400ToARGBRow_AVX2(const uint8* src_y, - uint8* dst_argb, +__declspec(naked) void J400ToARGBRow_AVX2(const uint8_t* src_y, + uint8_t* dst_argb, int width) { __asm { mov eax, [esp + 4] // src_y @@ -316,8 +316,8 @@ __declspec(naked) void J400ToARGBRow_AVX2(const uint8* src_y, } #endif // HAS_J400TOARGBROW_AVX2 -__declspec(naked) void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, - uint8* dst_argb, +__declspec(naked) void RGB24ToARGBRow_SSSE3(const uint8_t* src_rgb24, + uint8_t* dst_argb, int width) { __asm { mov eax, [esp + 4] // src_rgb24 @@ -355,8 +355,8 @@ __declspec(naked) void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, } } -__declspec(naked) void RAWToARGBRow_SSSE3(const uint8* src_raw, - uint8* dst_argb, +__declspec(naked) void RAWToARGBRow_SSSE3(const uint8_t* src_raw, + uint8_t* dst_argb, int width) { __asm { mov eax, [esp + 4] // src_raw @@ -394,8 +394,8 @@ __declspec(naked) void RAWToARGBRow_SSSE3(const uint8* src_raw, } } -__declspec(naked) void RAWToRGB24Row_SSSE3(const uint8* src_raw, - uint8* dst_rgb24, +__declspec(naked) void RAWToRGB24Row_SSSE3(const uint8_t* src_raw, + uint8_t* dst_rgb24, int width) { __asm { mov eax, [esp + 4] // src_raw @@ -430,8 +430,8 @@ __declspec(naked) void RAWToRGB24Row_SSSE3(const uint8* src_raw, // v * (256 + 8) // G shift of 5 is incorporated, so shift is 5 + 8 and 5 + 3 // 20 instructions. -__declspec(naked) void RGB565ToARGBRow_SSE2(const uint8* src_rgb565, - uint8* dst_argb, +__declspec(naked) void RGB565ToARGBRow_SSE2(const uint8_t* src_rgb565, + uint8_t* dst_argb, int width) { __asm { mov eax, 0x01080108 // generate multiplier to repeat 5 bits @@ -486,8 +486,8 @@ __declspec(naked) void RGB565ToARGBRow_SSE2(const uint8* src_rgb565, // v * 256 + v * 8 // v * (256 + 8) // G shift of 5 is incorporated, so shift is 5 + 8 and 5 + 3 -__declspec(naked) void RGB565ToARGBRow_AVX2(const uint8* src_rgb565, - uint8* dst_argb, +__declspec(naked) void RGB565ToARGBRow_AVX2(const uint8_t* src_rgb565, + uint8_t* dst_argb, int width) { __asm { mov eax, 0x01080108 // generate multiplier to repeat 5 bits @@ -537,8 +537,8 @@ __declspec(naked) void RGB565ToARGBRow_AVX2(const uint8* src_rgb565, #endif // HAS_RGB565TOARGBROW_AVX2 #ifdef HAS_ARGB1555TOARGBROW_AVX2 -__declspec(naked) void ARGB1555ToARGBRow_AVX2(const uint8* src_argb1555, - uint8* dst_argb, +__declspec(naked) void ARGB1555ToARGBRow_AVX2(const uint8_t* src_argb1555, + uint8_t* dst_argb, int width) { __asm { mov eax, 0x01080108 // generate multiplier to repeat 5 bits @@ -589,8 +589,8 @@ __declspec(naked) void ARGB1555ToARGBRow_AVX2(const uint8* src_argb1555, #endif // HAS_ARGB1555TOARGBROW_AVX2 #ifdef HAS_ARGB4444TOARGBROW_AVX2 -__declspec(naked) void ARGB4444ToARGBRow_AVX2(const uint8* src_argb4444, - uint8* dst_argb, +__declspec(naked) void ARGB4444ToARGBRow_AVX2(const uint8_t* src_argb4444, + uint8_t* dst_argb, int width) { __asm { mov eax, 0x0f0f0f0f // generate mask 0x0f0f0f0f @@ -627,8 +627,8 @@ __declspec(naked) void ARGB4444ToARGBRow_AVX2(const uint8* src_argb4444, #endif // HAS_ARGB4444TOARGBROW_AVX2 // 24 instructions -__declspec(naked) void ARGB1555ToARGBRow_SSE2(const uint8* src_argb1555, - uint8* dst_argb, +__declspec(naked) void ARGB1555ToARGBRow_SSE2(const uint8_t* src_argb1555, + uint8_t* dst_argb, int width) { __asm { mov eax, 0x01080108 // generate multiplier to repeat 5 bits @@ -680,8 +680,8 @@ __declspec(naked) void ARGB1555ToARGBRow_SSE2(const uint8* src_argb1555, } // 18 instructions. -__declspec(naked) void ARGB4444ToARGBRow_SSE2(const uint8* src_argb4444, - uint8* dst_argb, +__declspec(naked) void ARGB4444ToARGBRow_SSE2(const uint8_t* src_argb4444, + uint8_t* dst_argb, int width) { __asm { mov eax, 0x0f0f0f0f // generate mask 0x0f0f0f0f @@ -718,8 +718,8 @@ __declspec(naked) void ARGB4444ToARGBRow_SSE2(const uint8* src_argb4444, } } -__declspec(naked) void ARGBToRGB24Row_SSSE3(const uint8* src_argb, - uint8* dst_rgb, +__declspec(naked) void ARGBToRGB24Row_SSSE3(const uint8_t* src_argb, + uint8_t* dst_rgb, int width) { __asm { mov eax, [esp + 4] // src_argb @@ -757,8 +757,8 @@ __declspec(naked) void ARGBToRGB24Row_SSSE3(const uint8* src_argb, } } -__declspec(naked) void ARGBToRAWRow_SSSE3(const uint8* src_argb, - uint8* dst_rgb, +__declspec(naked) void ARGBToRAWRow_SSSE3(const uint8_t* src_argb, + uint8_t* dst_rgb, int width) { __asm { mov eax, [esp + 4] // src_argb @@ -796,8 +796,8 @@ __declspec(naked) void ARGBToRAWRow_SSSE3(const uint8* src_argb, } } -__declspec(naked) void ARGBToRGB565Row_SSE2(const uint8* src_argb, - uint8* dst_rgb, +__declspec(naked) void ARGBToRGB565Row_SSE2(const uint8_t* src_argb, + uint8_t* dst_rgb, int width) { __asm { mov eax, [esp + 4] // src_argb @@ -834,9 +834,9 @@ __declspec(naked) void ARGBToRGB565Row_SSE2(const uint8* src_argb, } } -__declspec(naked) void ARGBToRGB565DitherRow_SSE2(const uint8* src_argb, - uint8* dst_rgb, - const uint32 dither4, +__declspec(naked) void ARGBToRGB565DitherRow_SSE2(const uint8_t* src_argb, + uint8_t* dst_rgb, + const uint32_t dither4, int width) { __asm { @@ -881,9 +881,9 @@ __declspec(naked) void ARGBToRGB565DitherRow_SSE2(const uint8* src_argb, } #ifdef HAS_ARGBTORGB565DITHERROW_AVX2 -__declspec(naked) void ARGBToRGB565DitherRow_AVX2(const uint8* src_argb, - uint8* dst_rgb, - const uint32 dither4, +__declspec(naked) void ARGBToRGB565DitherRow_AVX2(const uint8_t* src_argb, + uint8_t* dst_rgb, + const uint32_t dither4, int width) { __asm { mov eax, [esp + 4] // src_argb @@ -925,8 +925,8 @@ __declspec(naked) void ARGBToRGB565DitherRow_AVX2(const uint8* src_argb, #endif // HAS_ARGBTORGB565DITHERROW_AVX2 // TODO(fbarchard): Improve sign extension/packing. -__declspec(naked) void ARGBToARGB1555Row_SSE2(const uint8* src_argb, - uint8* dst_rgb, +__declspec(naked) void ARGBToARGB1555Row_SSE2(const uint8_t* src_argb, + uint8_t* dst_rgb, int width) { __asm { mov eax, [esp + 4] // src_argb @@ -967,8 +967,8 @@ __declspec(naked) void ARGBToARGB1555Row_SSE2(const uint8* src_argb, } } -__declspec(naked) void ARGBToARGB4444Row_SSE2(const uint8* src_argb, - uint8* dst_rgb, +__declspec(naked) void ARGBToARGB4444Row_SSE2(const uint8_t* src_argb, + uint8_t* dst_rgb, int width) { __asm { mov eax, [esp + 4] // src_argb @@ -998,8 +998,8 @@ __declspec(naked) void ARGBToARGB4444Row_SSE2(const uint8* src_argb, } #ifdef HAS_ARGBTORGB565ROW_AVX2 -__declspec(naked) void ARGBToRGB565Row_AVX2(const uint8* src_argb, - uint8* dst_rgb, +__declspec(naked) void ARGBToRGB565Row_AVX2(const uint8_t* src_argb, + uint8_t* dst_rgb, int width) { __asm { mov eax, [esp + 4] // src_argb @@ -1036,8 +1036,8 @@ __declspec(naked) void ARGBToRGB565Row_AVX2(const uint8* src_argb, #endif // HAS_ARGBTORGB565ROW_AVX2 #ifdef HAS_ARGBTOARGB1555ROW_AVX2 -__declspec(naked) void ARGBToARGB1555Row_AVX2(const uint8* src_argb, - uint8* dst_rgb, +__declspec(naked) void ARGBToARGB1555Row_AVX2(const uint8_t* src_argb, + uint8_t* dst_rgb, int width) { __asm { mov eax, [esp + 4] // src_argb @@ -1077,8 +1077,8 @@ __declspec(naked) void ARGBToARGB1555Row_AVX2(const uint8* src_argb, #endif // HAS_ARGBTOARGB1555ROW_AVX2 #ifdef HAS_ARGBTOARGB4444ROW_AVX2 -__declspec(naked) void ARGBToARGB4444Row_AVX2(const uint8* src_argb, - uint8* dst_rgb, +__declspec(naked) void ARGBToARGB4444Row_AVX2(const uint8_t* src_argb, + uint8_t* dst_rgb, int width) { __asm { mov eax, [esp + 4] // src_argb @@ -1109,8 +1109,8 @@ __declspec(naked) void ARGBToARGB4444Row_AVX2(const uint8* src_argb, #endif // HAS_ARGBTOARGB4444ROW_AVX2 // Convert 16 ARGB pixels (64 bytes) to 16 Y values. -__declspec(naked) void ARGBToYRow_SSSE3(const uint8* src_argb, - uint8* dst_y, +__declspec(naked) void ARGBToYRow_SSSE3(const uint8_t* src_argb, + uint8_t* dst_y, int width) { __asm { mov eax, [esp + 4] /* src_argb */ @@ -1145,8 +1145,8 @@ __declspec(naked) void ARGBToYRow_SSSE3(const uint8* src_argb, // Convert 16 ARGB pixels (64 bytes) to 16 YJ values. // Same as ARGBToYRow but different coefficients, no add 16, but do rounding. -__declspec(naked) void ARGBToYJRow_SSSE3(const uint8* src_argb, - uint8* dst_y, +__declspec(naked) void ARGBToYJRow_SSSE3(const uint8_t* src_argb, + uint8_t* dst_y, int width) { __asm { mov eax, [esp + 4] /* src_argb */ @@ -1185,8 +1185,8 @@ __declspec(naked) void ARGBToYJRow_SSSE3(const uint8* src_argb, static const lvec32 kPermdARGBToY_AVX = {0, 4, 1, 5, 2, 6, 3, 7}; // Convert 32 ARGB pixels (128 bytes) to 32 Y values. -__declspec(naked) void ARGBToYRow_AVX2(const uint8* src_argb, - uint8* dst_y, +__declspec(naked) void ARGBToYRow_AVX2(const uint8_t* src_argb, + uint8_t* dst_y, int width) { __asm { mov eax, [esp + 4] /* src_argb */ @@ -1225,8 +1225,8 @@ __declspec(naked) void ARGBToYRow_AVX2(const uint8* src_argb, #ifdef HAS_ARGBTOYJROW_AVX2 // Convert 32 ARGB pixels (128 bytes) to 32 Y values. -__declspec(naked) void ARGBToYJRow_AVX2(const uint8* src_argb, - uint8* dst_y, +__declspec(naked) void ARGBToYJRow_AVX2(const uint8_t* src_argb, + uint8_t* dst_y, int width) { __asm { mov eax, [esp + 4] /* src_argb */ @@ -1265,8 +1265,8 @@ __declspec(naked) void ARGBToYJRow_AVX2(const uint8* src_argb, } #endif // HAS_ARGBTOYJROW_AVX2 -__declspec(naked) void BGRAToYRow_SSSE3(const uint8* src_argb, - uint8* dst_y, +__declspec(naked) void BGRAToYRow_SSSE3(const uint8_t* src_argb, + uint8_t* dst_y, int width) { __asm { mov eax, [esp + 4] /* src_argb */ @@ -1299,8 +1299,8 @@ __declspec(naked) void BGRAToYRow_SSSE3(const uint8* src_argb, } } -__declspec(naked) void ABGRToYRow_SSSE3(const uint8* src_argb, - uint8* dst_y, +__declspec(naked) void ABGRToYRow_SSSE3(const uint8_t* src_argb, + uint8_t* dst_y, int width) { __asm { mov eax, [esp + 4] /* src_argb */ @@ -1333,8 +1333,8 @@ __declspec(naked) void ABGRToYRow_SSSE3(const uint8* src_argb, } } -__declspec(naked) void RGBAToYRow_SSSE3(const uint8* src_argb, - uint8* dst_y, +__declspec(naked) void RGBAToYRow_SSSE3(const uint8_t* src_argb, + uint8_t* dst_y, int width) { __asm { mov eax, [esp + 4] /* src_argb */ @@ -1367,10 +1367,10 @@ __declspec(naked) void RGBAToYRow_SSSE3(const uint8* src_argb, } } -__declspec(naked) void ARGBToUVRow_SSSE3(const uint8* src_argb0, +__declspec(naked) void ARGBToUVRow_SSSE3(const uint8_t* src_argb0, int src_stride_argb, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width) { __asm { push esi @@ -1439,10 +1439,10 @@ __declspec(naked) void ARGBToUVRow_SSSE3(const uint8* src_argb0, } } -__declspec(naked) void ARGBToUVJRow_SSSE3(const uint8* src_argb0, +__declspec(naked) void ARGBToUVJRow_SSSE3(const uint8_t* src_argb0, int src_stride_argb, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width) { __asm { push esi @@ -1513,10 +1513,10 @@ __declspec(naked) void ARGBToUVJRow_SSSE3(const uint8* src_argb0, } #ifdef HAS_ARGBTOUVROW_AVX2 -__declspec(naked) void ARGBToUVRow_AVX2(const uint8* src_argb0, +__declspec(naked) void ARGBToUVRow_AVX2(const uint8_t* src_argb0, int src_stride_argb, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width) { __asm { push esi @@ -1581,10 +1581,10 @@ __declspec(naked) void ARGBToUVRow_AVX2(const uint8* src_argb0, #endif // HAS_ARGBTOUVROW_AVX2 #ifdef HAS_ARGBTOUVJROW_AVX2 -__declspec(naked) void ARGBToUVJRow_AVX2(const uint8* src_argb0, +__declspec(naked) void ARGBToUVJRow_AVX2(const uint8_t* src_argb0, int src_stride_argb, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width) { __asm { push esi @@ -1649,9 +1649,9 @@ __declspec(naked) void ARGBToUVJRow_AVX2(const uint8* src_argb0, } #endif // HAS_ARGBTOUVJROW_AVX2 -__declspec(naked) void ARGBToUV444Row_SSSE3(const uint8* src_argb0, - uint8* dst_u, - uint8* dst_v, +__declspec(naked) void ARGBToUV444Row_SSSE3(const uint8_t* src_argb0, + uint8_t* dst_u, + uint8_t* dst_v, int width) { __asm { push edi @@ -1707,10 +1707,10 @@ __declspec(naked) void ARGBToUV444Row_SSSE3(const uint8* src_argb0, } } -__declspec(naked) void BGRAToUVRow_SSSE3(const uint8* src_argb0, +__declspec(naked) void BGRAToUVRow_SSSE3(const uint8_t* src_argb0, int src_stride_argb, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width) { __asm { push esi @@ -1779,10 +1779,10 @@ __declspec(naked) void BGRAToUVRow_SSSE3(const uint8* src_argb0, } } -__declspec(naked) void ABGRToUVRow_SSSE3(const uint8* src_argb0, +__declspec(naked) void ABGRToUVRow_SSSE3(const uint8_t* src_argb0, int src_stride_argb, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width) { __asm { push esi @@ -1851,10 +1851,10 @@ __declspec(naked) void ABGRToUVRow_SSSE3(const uint8* src_argb0, } } -__declspec(naked) void RGBAToUVRow_SSSE3(const uint8* src_argb0, +__declspec(naked) void RGBAToUVRow_SSSE3(const uint8_t* src_argb0, int src_stride_argb, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width) { __asm { push esi @@ -2065,10 +2065,10 @@ __declspec(naked) void RGBAToUVRow_SSSE3(const uint8* src_argb0, // 16 pixels // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). __declspec(naked) void I422ToARGBRow_AVX2( - const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_argb, + const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { __asm { @@ -2105,11 +2105,11 @@ __declspec(naked) void I422ToARGBRow_AVX2( // 16 pixels // 8 UV values upsampled to 16 UV, mixed with 16 Y and 16 A producing 16 ARGB. __declspec(naked) void I422AlphaToARGBRow_AVX2( - const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - const uint8* a_buf, - uint8* dst_argb, + const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + const uint8_t* a_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { __asm { @@ -2148,10 +2148,10 @@ __declspec(naked) void I422AlphaToARGBRow_AVX2( // 16 pixels // 16 UV values with 16 Y producing 16 ARGB (64 bytes). __declspec(naked) void I444ToARGBRow_AVX2( - const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_argb, + const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { __asm { @@ -2187,9 +2187,9 @@ __declspec(naked) void I444ToARGBRow_AVX2( // 16 pixels. // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). __declspec(naked) void NV12ToARGBRow_AVX2( - const uint8* y_buf, - const uint8* uv_buf, - uint8* dst_argb, + const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { __asm { @@ -2222,9 +2222,9 @@ __declspec(naked) void NV12ToARGBRow_AVX2( // 16 pixels. // 8 VU values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). __declspec(naked) void NV21ToARGBRow_AVX2( - const uint8* y_buf, - const uint8* vu_buf, - uint8* dst_argb, + const uint8_t* y_buf, + const uint8_t* vu_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { __asm { @@ -2257,8 +2257,8 @@ __declspec(naked) void NV21ToARGBRow_AVX2( // 16 pixels. // 8 YUY2 values with 16 Y and 8 UV producing 16 ARGB (64 bytes). __declspec(naked) void YUY2ToARGBRow_AVX2( - const uint8* src_yuy2, - uint8* dst_argb, + const uint8_t* src_yuy2, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { __asm { @@ -2288,8 +2288,8 @@ __declspec(naked) void YUY2ToARGBRow_AVX2( // 16 pixels. // 8 UYVY values with 16 Y and 8 UV producing 16 ARGB (64 bytes). __declspec(naked) void UYVYToARGBRow_AVX2( - const uint8* src_uyvy, - uint8* dst_argb, + const uint8_t* src_uyvy, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { __asm { @@ -2319,10 +2319,10 @@ __declspec(naked) void UYVYToARGBRow_AVX2( // 16 pixels // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 RGBA (64 bytes). __declspec(naked) void I422ToRGBARow_AVX2( - const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_argb, + const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { __asm { @@ -2551,10 +2551,10 @@ __declspec(naked) void I422ToRGBARow_AVX2( // 8 pixels. // 8 UV values, mixed with 8 Y producing 8 ARGB (32 bytes). __declspec(naked) void I444ToARGBRow_SSSE3( - const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_argb, + const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { __asm { @@ -2588,10 +2588,10 @@ __declspec(naked) void I444ToARGBRow_SSSE3( // 8 pixels. // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 RGB24 (24 bytes). __declspec(naked) void I422ToRGB24Row_SSSE3( - const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_rgb24, + const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_rgb24, const struct YuvConstants* yuvconstants, int width) { __asm { @@ -2626,10 +2626,10 @@ __declspec(naked) void I422ToRGB24Row_SSSE3( // 8 pixels // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 RGB565 (16 bytes). __declspec(naked) void I422ToRGB565Row_SSSE3( - const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb565_buf, + const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* rgb565_buf, const struct YuvConstants* yuvconstants, int width) { __asm { @@ -2669,10 +2669,10 @@ __declspec(naked) void I422ToRGB565Row_SSSE3( // 8 pixels. // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). __declspec(naked) void I422ToARGBRow_SSSE3( - const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_argb, + const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { __asm { @@ -2706,11 +2706,11 @@ __declspec(naked) void I422ToARGBRow_SSSE3( // 8 pixels. // 4 UV values upsampled to 8 UV, mixed with 8 Y and 8 A producing 8 ARGB. __declspec(naked) void I422AlphaToARGBRow_SSSE3( - const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - const uint8* a_buf, - uint8* dst_argb, + const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + const uint8_t* a_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { __asm { @@ -2746,9 +2746,9 @@ __declspec(naked) void I422AlphaToARGBRow_SSSE3( // 8 pixels. // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). __declspec(naked) void NV12ToARGBRow_SSSE3( - const uint8* y_buf, - const uint8* uv_buf, - uint8* dst_argb, + const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { __asm { @@ -2778,9 +2778,9 @@ __declspec(naked) void NV12ToARGBRow_SSSE3( // 8 pixels. // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). __declspec(naked) void NV21ToARGBRow_SSSE3( - const uint8* y_buf, - const uint8* vu_buf, - uint8* dst_argb, + const uint8_t* y_buf, + const uint8_t* vu_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { __asm { @@ -2810,8 +2810,8 @@ __declspec(naked) void NV21ToARGBRow_SSSE3( // 8 pixels. // 4 YUY2 values with 8 Y and 4 UV producing 8 ARGB (32 bytes). __declspec(naked) void YUY2ToARGBRow_SSSE3( - const uint8* src_yuy2, - uint8* dst_argb, + const uint8_t* src_yuy2, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { __asm { @@ -2838,8 +2838,8 @@ __declspec(naked) void YUY2ToARGBRow_SSSE3( // 8 pixels. // 4 UYVY values with 8 Y and 4 UV producing 8 ARGB (32 bytes). __declspec(naked) void UYVYToARGBRow_SSSE3( - const uint8* src_uyvy, - uint8* dst_argb, + const uint8_t* src_uyvy, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { __asm { @@ -2864,10 +2864,10 @@ __declspec(naked) void UYVYToARGBRow_SSSE3( } __declspec(naked) void I422ToRGBARow_SSSE3( - const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_rgba, + const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_rgba, const struct YuvConstants* yuvconstants, int width) { __asm { @@ -2900,8 +2900,8 @@ __declspec(naked) void I422ToRGBARow_SSSE3( #ifdef HAS_I400TOARGBROW_SSE2 // 8 pixels of Y converted to 8 pixels of ARGB (32 bytes). -__declspec(naked) void I400ToARGBRow_SSE2(const uint8* y_buf, - uint8* rgb_buf, +__declspec(naked) void I400ToARGBRow_SSE2(const uint8_t* y_buf, + uint8_t* rgb_buf, int width) { __asm { mov eax, 0x4a354a35 // 4a35 = 18997 = round(1.164 * 64 * 256) @@ -2947,8 +2947,8 @@ __declspec(naked) void I400ToARGBRow_SSE2(const uint8* y_buf, #ifdef HAS_I400TOARGBROW_AVX2 // 16 pixels of Y converted to 16 pixels of ARGB (64 bytes). // note: vpunpcklbw mutates and vpackuswb unmutates. -__declspec(naked) void I400ToARGBRow_AVX2(const uint8* y_buf, - uint8* rgb_buf, +__declspec(naked) void I400ToARGBRow_AVX2(const uint8_t* y_buf, + uint8_t* rgb_buf, int width) { __asm { mov eax, 0x4a354a35 // 4a35 = 18997 = round(1.164 * 64 * 256) @@ -3000,8 +3000,8 @@ static const uvec8 kShuffleMirror = {15u, 14u, 13u, 12u, 11u, 10u, 9u, 8u, 7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u}; // TODO(fbarchard): Replace lea with -16 offset. -__declspec(naked) void MirrorRow_SSSE3(const uint8* src, - uint8* dst, +__declspec(naked) void MirrorRow_SSSE3(const uint8_t* src, + uint8_t* dst, int width) { __asm { mov eax, [esp + 4] // src @@ -3022,7 +3022,9 @@ __declspec(naked) void MirrorRow_SSSE3(const uint8* src, #endif // HAS_MIRRORROW_SSSE3 #ifdef HAS_MIRRORROW_AVX2 -__declspec(naked) void MirrorRow_AVX2(const uint8* src, uint8* dst, int width) { +__declspec(naked) void MirrorRow_AVX2(const uint8_t* src, + uint8_t* dst, + int width) { __asm { mov eax, [esp + 4] // src mov edx, [esp + 8] // dst @@ -3048,9 +3050,9 @@ __declspec(naked) void MirrorRow_AVX2(const uint8* src, uint8* dst, int width) { static const uvec8 kShuffleMirrorUV = {14u, 12u, 10u, 8u, 6u, 4u, 2u, 0u, 15u, 13u, 11u, 9u, 7u, 5u, 3u, 1u}; -__declspec(naked) void MirrorUVRow_SSSE3(const uint8* src, - uint8* dst_u, - uint8* dst_v, +__declspec(naked) void MirrorUVRow_SSSE3(const uint8_t* src, + uint8_t* dst_u, + uint8_t* dst_v, int width) { __asm { push edi @@ -3079,8 +3081,8 @@ __declspec(naked) void MirrorUVRow_SSSE3(const uint8* src, #endif // HAS_MIRRORUVROW_SSSE3 #ifdef HAS_ARGBMIRRORROW_SSE2 -__declspec(naked) void ARGBMirrorRow_SSE2(const uint8* src, - uint8* dst, +__declspec(naked) void ARGBMirrorRow_SSE2(const uint8_t* src, + uint8_t* dst, int width) { __asm { mov eax, [esp + 4] // src @@ -3105,8 +3107,8 @@ __declspec(naked) void ARGBMirrorRow_SSE2(const uint8* src, // Shuffle table for reversing the bytes. static const ulvec32 kARGBShuffleMirror_AVX2 = {7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u}; -__declspec(naked) void ARGBMirrorRow_AVX2(const uint8* src, - uint8* dst, +__declspec(naked) void ARGBMirrorRow_AVX2(const uint8_t* src, + uint8_t* dst, int width) { __asm { mov eax, [esp + 4] // src @@ -3127,9 +3129,9 @@ __declspec(naked) void ARGBMirrorRow_AVX2(const uint8* src, #endif // HAS_ARGBMIRRORROW_AVX2 #ifdef HAS_SPLITUVROW_SSE2 -__declspec(naked) void SplitUVRow_SSE2(const uint8* src_uv, - uint8* dst_u, - uint8* dst_v, +__declspec(naked) void SplitUVRow_SSE2(const uint8_t* src_uv, + uint8_t* dst_u, + uint8_t* dst_v, int width) { __asm { push edi @@ -3167,9 +3169,9 @@ __declspec(naked) void SplitUVRow_SSE2(const uint8* src_uv, #endif // HAS_SPLITUVROW_SSE2 #ifdef HAS_SPLITUVROW_AVX2 -__declspec(naked) void SplitUVRow_AVX2(const uint8* src_uv, - uint8* dst_u, - uint8* dst_v, +__declspec(naked) void SplitUVRow_AVX2(const uint8_t* src_uv, + uint8_t* dst_u, + uint8_t* dst_v, int width) { __asm { push edi @@ -3207,9 +3209,9 @@ __declspec(naked) void SplitUVRow_AVX2(const uint8* src_uv, #endif // HAS_SPLITUVROW_AVX2 #ifdef HAS_MERGEUVROW_SSE2 -__declspec(naked) void MergeUVRow_SSE2(const uint8* src_u, - const uint8* src_v, - uint8* dst_uv, +__declspec(naked) void MergeUVRow_SSE2(const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_uv, int width) { __asm { push edi @@ -3239,9 +3241,9 @@ __declspec(naked) void MergeUVRow_SSE2(const uint8* src_u, #endif // HAS_MERGEUVROW_SSE2 #ifdef HAS_MERGEUVROW_AVX2 -__declspec(naked) void MergeUVRow_AVX2(const uint8* src_u, - const uint8* src_v, - uint8* dst_uv, +__declspec(naked) void MergeUVRow_AVX2(const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_uv, int width) { __asm { push edi @@ -3273,12 +3275,14 @@ __declspec(naked) void MergeUVRow_AVX2(const uint8* src_u, #endif // HAS_MERGEUVROW_AVX2 #ifdef HAS_COPYROW_SSE2 -// CopyRow copys 'count' bytes using a 16 byte load/store, 32 bytes at time. -__declspec(naked) void CopyRow_SSE2(const uint8* src, uint8* dst, int count) { +// CopyRow copys 'width' bytes using a 16 byte load/store, 32 bytes at time. +__declspec(naked) void CopyRow_SSE2(const uint8_t* src, + uint8_t* dst, + int width) { __asm { mov eax, [esp + 4] // src mov edx, [esp + 8] // dst - mov ecx, [esp + 12] // count + mov ecx, [esp + 12] // width test eax, 15 jne convertloopu test edx, 15 @@ -3310,12 +3314,14 @@ __declspec(naked) void CopyRow_SSE2(const uint8* src, uint8* dst, int count) { #endif // HAS_COPYROW_SSE2 #ifdef HAS_COPYROW_AVX -// CopyRow copys 'count' bytes using a 32 byte load/store, 64 bytes at time. -__declspec(naked) void CopyRow_AVX(const uint8* src, uint8* dst, int count) { +// CopyRow copys 'width' bytes using a 32 byte load/store, 64 bytes at time. +__declspec(naked) void CopyRow_AVX(const uint8_t* src, + uint8_t* dst, + int width) { __asm { mov eax, [esp + 4] // src mov edx, [esp + 8] // dst - mov ecx, [esp + 12] // count + mov ecx, [esp + 12] // width convertloop: vmovdqu ymm0, [eax] @@ -3334,13 +3340,15 @@ __declspec(naked) void CopyRow_AVX(const uint8* src, uint8* dst, int count) { #endif // HAS_COPYROW_AVX // Multiple of 1. -__declspec(naked) void CopyRow_ERMS(const uint8* src, uint8* dst, int count) { +__declspec(naked) void CopyRow_ERMS(const uint8_t* src, + uint8_t* dst, + int width) { __asm { mov eax, esi mov edx, edi mov esi, [esp + 4] // src mov edi, [esp + 8] // dst - mov ecx, [esp + 12] // count + mov ecx, [esp + 12] // width rep movsb mov edi, edx mov esi, eax @@ -3350,13 +3358,13 @@ __declspec(naked) void CopyRow_ERMS(const uint8* src, uint8* dst, int count) { #ifdef HAS_ARGBCOPYALPHAROW_SSE2 // width in pixels -__declspec(naked) void ARGBCopyAlphaRow_SSE2(const uint8* src, - uint8* dst, +__declspec(naked) void ARGBCopyAlphaRow_SSE2(const uint8_t* src, + uint8_t* dst, int width) { __asm { mov eax, [esp + 4] // src mov edx, [esp + 8] // dst - mov ecx, [esp + 12] // count + mov ecx, [esp + 12] // width pcmpeqb xmm0, xmm0 // generate mask 0xff000000 pslld xmm0, 24 pcmpeqb xmm1, xmm1 // generate mask 0x00ffffff @@ -3387,13 +3395,13 @@ __declspec(naked) void ARGBCopyAlphaRow_SSE2(const uint8* src, #ifdef HAS_ARGBCOPYALPHAROW_AVX2 // width in pixels -__declspec(naked) void ARGBCopyAlphaRow_AVX2(const uint8* src, - uint8* dst, +__declspec(naked) void ARGBCopyAlphaRow_AVX2(const uint8_t* src, + uint8_t* dst, int width) { __asm { mov eax, [esp + 4] // src mov edx, [esp + 8] // dst - mov ecx, [esp + 12] // count + mov ecx, [esp + 12] // width vpcmpeqb ymm0, ymm0, ymm0 vpsrld ymm0, ymm0, 8 // generate mask 0x00ffffff @@ -3417,8 +3425,8 @@ __declspec(naked) void ARGBCopyAlphaRow_AVX2(const uint8* src, #ifdef HAS_ARGBEXTRACTALPHAROW_SSE2 // width in pixels -__declspec(naked) void ARGBExtractAlphaRow_SSE2(const uint8* src_argb, - uint8* dst_a, +__declspec(naked) void ARGBExtractAlphaRow_SSE2(const uint8_t* src_argb, + uint8_t* dst_a, int width) { __asm { mov eax, [esp + 4] // src_argb @@ -3445,8 +3453,8 @@ __declspec(naked) void ARGBExtractAlphaRow_SSE2(const uint8* src_argb, #ifdef HAS_ARGBEXTRACTALPHAROW_AVX2 // width in pixels -__declspec(naked) void ARGBExtractAlphaRow_AVX2(const uint8* src_argb, - uint8* dst_a, +__declspec(naked) void ARGBExtractAlphaRow_AVX2(const uint8_t* src_argb, + uint8_t* dst_a, int width) { __asm { mov eax, [esp + 4] // src_argb @@ -3481,13 +3489,13 @@ __declspec(naked) void ARGBExtractAlphaRow_AVX2(const uint8* src_argb, #ifdef HAS_ARGBCOPYYTOALPHAROW_SSE2 // width in pixels -__declspec(naked) void ARGBCopyYToAlphaRow_SSE2(const uint8* src, - uint8* dst, +__declspec(naked) void ARGBCopyYToAlphaRow_SSE2(const uint8_t* src, + uint8_t* dst, int width) { __asm { mov eax, [esp + 4] // src mov edx, [esp + 8] // dst - mov ecx, [esp + 12] // count + mov ecx, [esp + 12] // width pcmpeqb xmm0, xmm0 // generate mask 0xff000000 pslld xmm0, 24 pcmpeqb xmm1, xmm1 // generate mask 0x00ffffff @@ -3520,13 +3528,13 @@ __declspec(naked) void ARGBCopyYToAlphaRow_SSE2(const uint8* src, #ifdef HAS_ARGBCOPYYTOALPHAROW_AVX2 // width in pixels -__declspec(naked) void ARGBCopyYToAlphaRow_AVX2(const uint8* src, - uint8* dst, +__declspec(naked) void ARGBCopyYToAlphaRow_AVX2(const uint8_t* src, + uint8_t* dst, int width) { __asm { mov eax, [esp + 4] // src mov edx, [esp + 8] // dst - mov ecx, [esp + 12] // count + mov ecx, [esp + 12] // width vpcmpeqb ymm0, ymm0, ymm0 vpsrld ymm0, ymm0, 8 // generate mask 0x00ffffff @@ -3551,16 +3559,16 @@ __declspec(naked) void ARGBCopyYToAlphaRow_AVX2(const uint8* src, #endif // HAS_ARGBCOPYYTOALPHAROW_AVX2 #ifdef HAS_SETROW_X86 -// Write 'count' bytes using an 8 bit value repeated. -// Count should be multiple of 4. -__declspec(naked) void SetRow_X86(uint8* dst, uint8 v8, int count) { +// Write 'width' bytes using an 8 bit value repeated. +// width should be multiple of 4. +__declspec(naked) void SetRow_X86(uint8_t* dst, uint8_t v8, int width) { __asm { movzx eax, byte ptr [esp + 8] // v8 mov edx, 0x01010101 // Duplicate byte to all bytes. mul edx // overwrites edx with upper part of result. mov edx, edi mov edi, [esp + 4] // dst - mov ecx, [esp + 12] // count + mov ecx, [esp + 12] // width shr ecx, 2 rep stosd mov edi, edx @@ -3568,26 +3576,28 @@ __declspec(naked) void SetRow_X86(uint8* dst, uint8 v8, int count) { } } -// Write 'count' bytes using an 8 bit value repeated. -__declspec(naked) void SetRow_ERMS(uint8* dst, uint8 v8, int count) { +// Write 'width' bytes using an 8 bit value repeated. +__declspec(naked) void SetRow_ERMS(uint8_t* dst, uint8_t v8, int width) { __asm { mov edx, edi mov edi, [esp + 4] // dst mov eax, [esp + 8] // v8 - mov ecx, [esp + 12] // count + mov ecx, [esp + 12] // width rep stosb mov edi, edx ret } } -// Write 'count' 32 bit values. -__declspec(naked) void ARGBSetRow_X86(uint8* dst_argb, uint32 v32, int count) { +// Write 'width' 32 bit values. +__declspec(naked) void ARGBSetRow_X86(uint8_t* dst_argb, + uint32_t v32, + int width) { __asm { mov edx, edi mov edi, [esp + 4] // dst mov eax, [esp + 8] // v32 - mov ecx, [esp + 12] // count + mov ecx, [esp + 12] // width rep stosd mov edi, edx ret @@ -3596,8 +3606,8 @@ __declspec(naked) void ARGBSetRow_X86(uint8* dst_argb, uint32 v32, int count) { #endif // HAS_SETROW_X86 #ifdef HAS_YUY2TOYROW_AVX2 -__declspec(naked) void YUY2ToYRow_AVX2(const uint8* src_yuy2, - uint8* dst_y, +__declspec(naked) void YUY2ToYRow_AVX2(const uint8_t* src_yuy2, + uint8_t* dst_y, int width) { __asm { mov eax, [esp + 4] // src_yuy2 @@ -3623,10 +3633,10 @@ __declspec(naked) void YUY2ToYRow_AVX2(const uint8* src_yuy2, } } -__declspec(naked) void YUY2ToUVRow_AVX2(const uint8* src_yuy2, +__declspec(naked) void YUY2ToUVRow_AVX2(const uint8_t* src_yuy2, int stride_yuy2, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width) { __asm { push esi @@ -3669,9 +3679,9 @@ __declspec(naked) void YUY2ToUVRow_AVX2(const uint8* src_yuy2, } } -__declspec(naked) void YUY2ToUV422Row_AVX2(const uint8* src_yuy2, - uint8* dst_u, - uint8* dst_v, +__declspec(naked) void YUY2ToUV422Row_AVX2(const uint8_t* src_yuy2, + uint8_t* dst_u, + uint8_t* dst_v, int width) { __asm { push edi @@ -3709,8 +3719,8 @@ __declspec(naked) void YUY2ToUV422Row_AVX2(const uint8* src_yuy2, } } -__declspec(naked) void UYVYToYRow_AVX2(const uint8* src_uyvy, - uint8* dst_y, +__declspec(naked) void UYVYToYRow_AVX2(const uint8_t* src_uyvy, + uint8_t* dst_y, int width) { __asm { mov eax, [esp + 4] // src_uyvy @@ -3734,10 +3744,10 @@ __declspec(naked) void UYVYToYRow_AVX2(const uint8* src_uyvy, } } -__declspec(naked) void UYVYToUVRow_AVX2(const uint8* src_uyvy, +__declspec(naked) void UYVYToUVRow_AVX2(const uint8_t* src_uyvy, int stride_uyvy, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width) { __asm { push esi @@ -3780,9 +3790,9 @@ __declspec(naked) void UYVYToUVRow_AVX2(const uint8* src_uyvy, } } -__declspec(naked) void UYVYToUV422Row_AVX2(const uint8* src_uyvy, - uint8* dst_u, - uint8* dst_v, +__declspec(naked) void UYVYToUV422Row_AVX2(const uint8_t* src_uyvy, + uint8_t* dst_u, + uint8_t* dst_v, int width) { __asm { push edi @@ -3822,8 +3832,8 @@ __declspec(naked) void UYVYToUV422Row_AVX2(const uint8* src_uyvy, #endif // HAS_YUY2TOYROW_AVX2 #ifdef HAS_YUY2TOYROW_SSE2 -__declspec(naked) void YUY2ToYRow_SSE2(const uint8* src_yuy2, - uint8* dst_y, +__declspec(naked) void YUY2ToYRow_SSE2(const uint8_t* src_yuy2, + uint8_t* dst_y, int width) { __asm { mov eax, [esp + 4] // src_yuy2 @@ -3847,10 +3857,10 @@ __declspec(naked) void YUY2ToYRow_SSE2(const uint8* src_yuy2, } } -__declspec(naked) void YUY2ToUVRow_SSE2(const uint8* src_yuy2, +__declspec(naked) void YUY2ToUVRow_SSE2(const uint8_t* src_yuy2, int stride_yuy2, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width) { __asm { push esi @@ -3892,9 +3902,9 @@ __declspec(naked) void YUY2ToUVRow_SSE2(const uint8* src_yuy2, } } -__declspec(naked) void YUY2ToUV422Row_SSE2(const uint8* src_yuy2, - uint8* dst_u, - uint8* dst_v, +__declspec(naked) void YUY2ToUV422Row_SSE2(const uint8_t* src_yuy2, + uint8_t* dst_u, + uint8_t* dst_v, int width) { __asm { push edi @@ -3929,8 +3939,8 @@ __declspec(naked) void YUY2ToUV422Row_SSE2(const uint8* src_yuy2, } } -__declspec(naked) void UYVYToYRow_SSE2(const uint8* src_uyvy, - uint8* dst_y, +__declspec(naked) void UYVYToYRow_SSE2(const uint8_t* src_uyvy, + uint8_t* dst_y, int width) { __asm { mov eax, [esp + 4] // src_uyvy @@ -3952,10 +3962,10 @@ __declspec(naked) void UYVYToYRow_SSE2(const uint8* src_uyvy, } } -__declspec(naked) void UYVYToUVRow_SSE2(const uint8* src_uyvy, +__declspec(naked) void UYVYToUVRow_SSE2(const uint8_t* src_uyvy, int stride_uyvy, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_u, + uint8_t* dst_v, int width) { __asm { push esi @@ -3997,9 +4007,9 @@ __declspec(naked) void UYVYToUVRow_SSE2(const uint8* src_uyvy, } } -__declspec(naked) void UYVYToUV422Row_SSE2(const uint8* src_uyvy, - uint8* dst_u, - uint8* dst_v, +__declspec(naked) void UYVYToUV422Row_SSE2(const uint8_t* src_uyvy, + uint8_t* dst_u, + uint8_t* dst_v, int width) { __asm { push edi @@ -4041,10 +4051,10 @@ __declspec(naked) void UYVYToUV422Row_SSE2(const uint8* src_uyvy, // =((A2*C2)+(B2*(255-C2))+255)/256 // signed version of math // =(((A2-128)*C2)+((B2-128)*(255-C2))+32768+127)/256 -__declspec(naked) void BlendPlaneRow_SSSE3(const uint8* src0, - const uint8* src1, - const uint8* alpha, - uint8* dst, +__declspec(naked) void BlendPlaneRow_SSSE3(const uint8_t* src0, + const uint8_t* src1, + const uint8_t* alpha, + uint8_t* dst, int width) { __asm { push esi @@ -4098,10 +4108,10 @@ __declspec(naked) void BlendPlaneRow_SSSE3(const uint8* src0, // =((A2*C2)+(B2*(255-C2))+255)/256 // signed version of math // =(((A2-128)*C2)+((B2-128)*(255-C2))+32768+127)/256 -__declspec(naked) void BlendPlaneRow_AVX2(const uint8* src0, - const uint8* src1, - const uint8* alpha, - uint8* dst, +__declspec(naked) void BlendPlaneRow_AVX2(const uint8_t* src0, + const uint8_t* src1, + const uint8_t* alpha, + uint8_t* dst, int width) { __asm { push esi @@ -4162,9 +4172,9 @@ static const uvec8 kShuffleAlpha = {3u, 0x80, 3u, 0x80, 7u, 0x80, 7u, 0x80, 11u, 0x80, 11u, 0x80, 15u, 0x80, 15u, 0x80}; // Blend 8 pixels at a time. -__declspec(naked) void ARGBBlendRow_SSSE3(const uint8* src_argb0, - const uint8* src_argb1, - uint8* dst_argb, +__declspec(naked) void ARGBBlendRow_SSSE3(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, int width) { __asm { push esi @@ -4253,8 +4263,8 @@ static const uvec8 kShuffleAlpha1 = { 11u, 11u, 11u, 11u, 11u, 11u, 128u, 128u, 15u, 15u, 15u, 15u, 15u, 15u, 128u, 128u, }; -__declspec(naked) void ARGBAttenuateRow_SSSE3(const uint8* src_argb, - uint8* dst_argb, +__declspec(naked) void ARGBAttenuateRow_SSSE3(const uint8_t* src_argb, + uint8_t* dst_argb, int width) { __asm { mov eax, [esp + 4] // src_argb0 @@ -4298,8 +4308,8 @@ __declspec(naked) void ARGBAttenuateRow_SSSE3(const uint8* src_argb, static const uvec8 kShuffleAlpha_AVX2 = {6u, 7u, 6u, 7u, 6u, 7u, 128u, 128u, 14u, 15u, 14u, 15u, 14u, 15u, 128u, 128u}; -__declspec(naked) void ARGBAttenuateRow_AVX2(const uint8* src_argb, - uint8* dst_argb, +__declspec(naked) void ARGBAttenuateRow_AVX2(const uint8_t* src_argb, + uint8_t* dst_argb, int width) { __asm { mov eax, [esp + 4] // src_argb0 @@ -4336,8 +4346,8 @@ __declspec(naked) void ARGBAttenuateRow_AVX2(const uint8* src_argb, #ifdef HAS_ARGBUNATTENUATEROW_SSE2 // Unattenuate 4 pixels at a time. -__declspec(naked) void ARGBUnattenuateRow_SSE2(const uint8* src_argb, - uint8* dst_argb, +__declspec(naked) void ARGBUnattenuateRow_SSE2(const uint8_t* src_argb, + uint8_t* dst_argb, int width) { __asm { push ebx @@ -4392,8 +4402,8 @@ static const uvec8 kUnattenShuffleAlpha_AVX2 = { // TODO(fbarchard): Enable USE_GATHER for future hardware if faster. // USE_GATHER is not on by default, due to being a slow instruction. #ifdef USE_GATHER -__declspec(naked) void ARGBUnattenuateRow_AVX2(const uint8* src_argb, - uint8* dst_argb, +__declspec(naked) void ARGBUnattenuateRow_AVX2(const uint8_t* src_argb, + uint8_t* dst_argb, int width) { __asm { mov eax, [esp + 4] // src_argb0 @@ -4426,8 +4436,8 @@ __declspec(naked) void ARGBUnattenuateRow_AVX2(const uint8* src_argb, } } #else // USE_GATHER -__declspec(naked) void ARGBUnattenuateRow_AVX2(const uint8* src_argb, - uint8* dst_argb, +__declspec(naked) void ARGBUnattenuateRow_AVX2(const uint8_t* src_argb, + uint8_t* dst_argb, int width) { __asm { @@ -4495,8 +4505,8 @@ __declspec(naked) void ARGBUnattenuateRow_AVX2(const uint8* src_argb, #ifdef HAS_ARGBGRAYROW_SSSE3 // Convert 8 ARGB pixels (64 bytes) to 8 Gray ARGB pixels. -__declspec(naked) void ARGBGrayRow_SSSE3(const uint8* src_argb, - uint8* dst_argb, +__declspec(naked) void ARGBGrayRow_SSSE3(const uint8_t* src_argb, + uint8_t* dst_argb, int width) { __asm { mov eax, [esp + 4] /* src_argb */ @@ -4552,7 +4562,7 @@ static const vec8 kARGBToSepiaR = {24, 98, 50, 0, 24, 98, 50, 0, 24, 98, 50, 0, 24, 98, 50, 0}; // Convert 8 ARGB pixels (32 bytes) to 8 Sepia ARGB pixels. -__declspec(naked) void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width) { +__declspec(naked) void ARGBSepiaRow_SSSE3(uint8_t* dst_argb, int width) { __asm { mov eax, [esp + 4] /* dst_argb */ mov ecx, [esp + 8] /* width */ @@ -4608,9 +4618,9 @@ __declspec(naked) void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width) { // Same as Sepia except matrix is provided. // TODO(fbarchard): packuswbs only use half of the reg. To make RGBA, combine R // and B into a high and low, then G/A, unpackl/hbw and then unpckl/hwd. -__declspec(naked) void ARGBColorMatrixRow_SSSE3(const uint8* src_argb, - uint8* dst_argb, - const int8* matrix_argb, +__declspec(naked) void ARGBColorMatrixRow_SSSE3(const uint8_t* src_argb, + uint8_t* dst_argb, + const int8_t* matrix_argb, int width) { __asm { mov eax, [esp + 4] /* src_argb */ @@ -4670,7 +4680,7 @@ __declspec(naked) void ARGBColorMatrixRow_SSSE3(const uint8* src_argb, #ifdef HAS_ARGBQUANTIZEROW_SSE2 // Quantize 4 ARGB pixels (16 bytes). -__declspec(naked) void ARGBQuantizeRow_SSE2(uint8* dst_argb, +__declspec(naked) void ARGBQuantizeRow_SSE2(uint8_t* dst_argb, int scale, int interval_size, int interval_offset, @@ -4717,10 +4727,10 @@ __declspec(naked) void ARGBQuantizeRow_SSE2(uint8* dst_argb, #ifdef HAS_ARGBSHADEROW_SSE2 // Shade 4 pixels at a time by specified value. -__declspec(naked) void ARGBShadeRow_SSE2(const uint8* src_argb, - uint8* dst_argb, +__declspec(naked) void ARGBShadeRow_SSE2(const uint8_t* src_argb, + uint8_t* dst_argb, int width, - uint32 value) { + uint32_t value) { __asm { mov eax, [esp + 4] // src_argb mov edx, [esp + 8] // dst_argb @@ -4752,9 +4762,9 @@ __declspec(naked) void ARGBShadeRow_SSE2(const uint8* src_argb, #ifdef HAS_ARGBMULTIPLYROW_SSE2 // Multiply 2 rows of ARGB pixels together, 4 pixels at a time. -__declspec(naked) void ARGBMultiplyRow_SSE2(const uint8* src_argb0, - const uint8* src_argb1, - uint8* dst_argb, +__declspec(naked) void ARGBMultiplyRow_SSE2(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, int width) { __asm { push esi @@ -4792,9 +4802,9 @@ __declspec(naked) void ARGBMultiplyRow_SSE2(const uint8* src_argb0, #ifdef HAS_ARGBADDROW_SSE2 // Add 2 rows of ARGB pixels together, 4 pixels at a time. // TODO(fbarchard): Port this to posix, neon and other math functions. -__declspec(naked) void ARGBAddRow_SSE2(const uint8* src_argb0, - const uint8* src_argb1, - uint8* dst_argb, +__declspec(naked) void ARGBAddRow_SSE2(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, int width) { __asm { push esi @@ -4841,9 +4851,9 @@ __declspec(naked) void ARGBAddRow_SSE2(const uint8* src_argb0, #ifdef HAS_ARGBSUBTRACTROW_SSE2 // Subtract 2 rows of ARGB pixels together, 4 pixels at a time. -__declspec(naked) void ARGBSubtractRow_SSE2(const uint8* src_argb0, - const uint8* src_argb1, - uint8* dst_argb, +__declspec(naked) void ARGBSubtractRow_SSE2(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, int width) { __asm { push esi @@ -4871,9 +4881,9 @@ __declspec(naked) void ARGBSubtractRow_SSE2(const uint8* src_argb0, #ifdef HAS_ARGBMULTIPLYROW_AVX2 // Multiply 2 rows of ARGB pixels together, 8 pixels at a time. -__declspec(naked) void ARGBMultiplyRow_AVX2(const uint8* src_argb0, - const uint8* src_argb1, - uint8* dst_argb, +__declspec(naked) void ARGBMultiplyRow_AVX2(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, int width) { __asm { push esi @@ -4909,9 +4919,9 @@ __declspec(naked) void ARGBMultiplyRow_AVX2(const uint8* src_argb0, #ifdef HAS_ARGBADDROW_AVX2 // Add 2 rows of ARGB pixels together, 8 pixels at a time. -__declspec(naked) void ARGBAddRow_AVX2(const uint8* src_argb0, - const uint8* src_argb1, - uint8* dst_argb, +__declspec(naked) void ARGBAddRow_AVX2(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, int width) { __asm { push esi @@ -4939,9 +4949,9 @@ __declspec(naked) void ARGBAddRow_AVX2(const uint8* src_argb0, #ifdef HAS_ARGBSUBTRACTROW_AVX2 // Subtract 2 rows of ARGB pixels together, 8 pixels at a time. -__declspec(naked) void ARGBSubtractRow_AVX2(const uint8* src_argb0, - const uint8* src_argb1, - uint8* dst_argb, +__declspec(naked) void ARGBSubtractRow_AVX2(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, int width) { __asm { push esi @@ -4972,10 +4982,10 @@ __declspec(naked) void ARGBSubtractRow_AVX2(const uint8* src_argb0, // -1 0 1 // -2 0 2 // -1 0 1 -__declspec(naked) void SobelXRow_SSE2(const uint8* src_y0, - const uint8* src_y1, - const uint8* src_y2, - uint8* dst_sobelx, +__declspec(naked) void SobelXRow_SSE2(const uint8_t* src_y0, + const uint8_t* src_y1, + const uint8_t* src_y2, + uint8_t* dst_sobelx, int width) { __asm { push esi @@ -5030,9 +5040,9 @@ __declspec(naked) void SobelXRow_SSE2(const uint8* src_y0, // -1 -2 -1 // 0 0 0 // 1 2 1 -__declspec(naked) void SobelYRow_SSE2(const uint8* src_y0, - const uint8* src_y1, - uint8* dst_sobely, +__declspec(naked) void SobelYRow_SSE2(const uint8_t* src_y0, + const uint8_t* src_y1, + uint8_t* dst_sobely, int width) { __asm { push esi @@ -5084,9 +5094,9 @@ __declspec(naked) void SobelYRow_SSE2(const uint8* src_y0, // R = Sobel // G = Sobel // B = Sobel -__declspec(naked) void SobelRow_SSE2(const uint8* src_sobelx, - const uint8* src_sobely, - uint8* dst_argb, +__declspec(naked) void SobelRow_SSE2(const uint8_t* src_sobelx, + const uint8_t* src_sobely, + uint8_t* dst_argb, int width) { __asm { push esi @@ -5132,9 +5142,9 @@ __declspec(naked) void SobelRow_SSE2(const uint8* src_sobelx, #ifdef HAS_SOBELTOPLANEROW_SSE2 // Adds Sobel X and Sobel Y and stores Sobel into a plane. -__declspec(naked) void SobelToPlaneRow_SSE2(const uint8* src_sobelx, - const uint8* src_sobely, - uint8* dst_y, +__declspec(naked) void SobelToPlaneRow_SSE2(const uint8_t* src_sobelx, + const uint8_t* src_sobely, + uint8_t* dst_y, int width) { __asm { push esi @@ -5166,9 +5176,9 @@ __declspec(naked) void SobelToPlaneRow_SSE2(const uint8* src_sobelx, // R = Sobel X // G = Sobel // B = Sobel Y -__declspec(naked) void SobelXYRow_SSE2(const uint8* src_sobelx, - const uint8* src_sobely, - uint8* dst_argb, +__declspec(naked) void SobelXYRow_SSE2(const uint8_t* src_sobelx, + const uint8_t* src_sobely, + uint8_t* dst_argb, int width) { __asm { push esi @@ -5225,11 +5235,11 @@ __declspec(naked) void SobelXYRow_SSE2(const uint8* src_sobelx, // count is number of averaged pixels to produce. // Does 4 pixels at a time. // This function requires alignment on accumulation buffer pointers. -void CumulativeSumToAverageRow_SSE2(const int32* topleft, - const int32* botleft, +void CumulativeSumToAverageRow_SSE2(const int32_t* topleft, + const int32_t* botleft, int width, int area, - uint8* dst, + uint8_t* dst, int count) { __asm { mov eax, topleft // eax topleft @@ -5375,9 +5385,9 @@ void CumulativeSumToAverageRow_SSE2(const int32* topleft, #ifdef HAS_COMPUTECUMULATIVESUMROW_SSE2 // Creates a table of cumulative sums where each value is a sum of all values // above and to the left of the value. -void ComputeCumulativeSumRow_SSE2(const uint8* row, - int32* cumsum, - const int32* previous_cumsum, +void ComputeCumulativeSumRow_SSE2(const uint8_t* row, + int32_t* cumsum, + const int32_t* previous_cumsum, int width) { __asm { mov eax, row @@ -5460,9 +5470,9 @@ void ComputeCumulativeSumRow_SSE2(const uint8* row, #ifdef HAS_ARGBAFFINEROW_SSE2 // Copy ARGB pixels from source image with slope to a row of destination. -__declspec(naked) LIBYUV_API void ARGBAffineRow_SSE2(const uint8* src_argb, +__declspec(naked) LIBYUV_API void ARGBAffineRow_SSE2(const uint8_t* src_argb, int src_argb_stride, - uint8* dst_argb, + uint8_t* dst_argb, const float* uv_dudv, int width) { __asm { @@ -5546,8 +5556,8 @@ __declspec(naked) LIBYUV_API void ARGBAffineRow_SSE2(const uint8* src_argb, #ifdef HAS_INTERPOLATEROW_AVX2 // Bilinear filter 32x2 -> 32x1 -__declspec(naked) void InterpolateRow_AVX2(uint8* dst_ptr, - const uint8* src_ptr, +__declspec(naked) void InterpolateRow_AVX2(uint8_t* dst_ptr, + const uint8_t* src_ptr, ptrdiff_t src_stride, int dst_width, int source_y_fraction) { @@ -5623,8 +5633,8 @@ __declspec(naked) void InterpolateRow_AVX2(uint8* dst_ptr, // Bilinear filter 16x2 -> 16x1 // TODO(fbarchard): Consider allowing 256 using memcpy. -__declspec(naked) void InterpolateRow_SSSE3(uint8* dst_ptr, - const uint8* src_ptr, +__declspec(naked) void InterpolateRow_SSSE3(uint8_t* dst_ptr, + const uint8_t* src_ptr, ptrdiff_t src_stride, int dst_width, int source_y_fraction) { @@ -5705,9 +5715,9 @@ __declspec(naked) void InterpolateRow_SSSE3(uint8* dst_ptr, } // For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA. -__declspec(naked) void ARGBShuffleRow_SSSE3(const uint8* src_argb, - uint8* dst_argb, - const uint8* shuffler, +__declspec(naked) void ARGBShuffleRow_SSSE3(const uint8_t* src_argb, + uint8_t* dst_argb, + const uint8_t* shuffler, int width) { __asm { mov eax, [esp + 4] // src_argb @@ -5732,9 +5742,9 @@ __declspec(naked) void ARGBShuffleRow_SSSE3(const uint8* src_argb, } #ifdef HAS_ARGBSHUFFLEROW_AVX2 -__declspec(naked) void ARGBShuffleRow_AVX2(const uint8* src_argb, - uint8* dst_argb, - const uint8* shuffler, +__declspec(naked) void ARGBShuffleRow_AVX2(const uint8_t* src_argb, + uint8_t* dst_argb, + const uint8_t* shuffler, int width) { __asm { mov eax, [esp + 4] // src_argb @@ -5767,10 +5777,10 @@ __declspec(naked) void ARGBShuffleRow_AVX2(const uint8* src_argb, // UYVY - Macro-pixel = 2 image pixels // U0Y0V0Y1 -__declspec(naked) void I422ToYUY2Row_SSE2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_frame, +__declspec(naked) void I422ToYUY2Row_SSE2(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_frame, int width) { __asm { push esi @@ -5804,10 +5814,10 @@ __declspec(naked) void I422ToYUY2Row_SSE2(const uint8* src_y, } } -__declspec(naked) void I422ToUYVYRow_SSE2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_frame, +__declspec(naked) void I422ToUYVYRow_SSE2(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_frame, int width) { __asm { push esi @@ -5842,8 +5852,8 @@ __declspec(naked) void I422ToUYVYRow_SSE2(const uint8* src_y, } #ifdef HAS_ARGBPOLYNOMIALROW_SSE2 -__declspec(naked) void ARGBPolynomialRow_SSE2(const uint8* src_argb, - uint8* dst_argb, +__declspec(naked) void ARGBPolynomialRow_SSE2(const uint8_t* src_argb, + uint8_t* dst_argb, const float* poly, int width) { __asm { @@ -5901,8 +5911,8 @@ __declspec(naked) void ARGBPolynomialRow_SSE2(const uint8* src_argb, #endif // HAS_ARGBPOLYNOMIALROW_SSE2 #ifdef HAS_ARGBPOLYNOMIALROW_AVX2 -__declspec(naked) void ARGBPolynomialRow_AVX2(const uint8* src_argb, - uint8* dst_argb, +__declspec(naked) void ARGBPolynomialRow_AVX2(const uint8_t* src_argb, + uint8_t* dst_argb, const float* poly, int width) { __asm { @@ -5941,8 +5951,8 @@ __declspec(naked) void ARGBPolynomialRow_AVX2(const uint8* src_argb, #ifdef HAS_HALFFLOATROW_SSE2 static float kExpBias = 1.9259299444e-34f; -__declspec(naked) void HalfFloatRow_SSE2(const uint16* src, - uint16* dst, +__declspec(naked) void HalfFloatRow_SSE2(const uint16_t* src, + uint16_t* dst, float scale, int width) { __asm { @@ -5978,8 +5988,8 @@ __declspec(naked) void HalfFloatRow_SSE2(const uint16* src, #endif // HAS_HALFFLOATROW_SSE2 #ifdef HAS_HALFFLOATROW_AVX2 -__declspec(naked) void HalfFloatRow_AVX2(const uint16* src, - uint16* dst, +__declspec(naked) void HalfFloatRow_AVX2(const uint16_t* src, + uint16_t* dst, float scale, int width) { __asm { @@ -6016,8 +6026,8 @@ __declspec(naked) void HalfFloatRow_AVX2(const uint16* src, #endif // HAS_HALFFLOATROW_AVX2 #ifdef HAS_HALFFLOATROW_F16C -__declspec(naked) void HalfFloatRow_F16C(const uint16* src, - uint16* dst, +__declspec(naked) void HalfFloatRow_F16C(const uint16_t* src, + uint16_t* dst, float scale, int width) { __asm { @@ -6050,8 +6060,8 @@ __declspec(naked) void HalfFloatRow_F16C(const uint16* src, #ifdef HAS_ARGBCOLORTABLEROW_X86 // Tranform ARGB pixels with color table. -__declspec(naked) void ARGBColorTableRow_X86(uint8* dst_argb, - const uint8* table_argb, +__declspec(naked) void ARGBColorTableRow_X86(uint8_t* dst_argb, + const uint8_t* table_argb, int width) { __asm { push esi @@ -6084,8 +6094,8 @@ __declspec(naked) void ARGBColorTableRow_X86(uint8* dst_argb, #ifdef HAS_RGBCOLORTABLEROW_X86 // Tranform RGB pixels with color table. -__declspec(naked) void RGBColorTableRow_X86(uint8* dst_argb, - const uint8* table_argb, +__declspec(naked) void RGBColorTableRow_X86(uint8_t* dst_argb, + const uint8_t* table_argb, int width) { __asm { push esi @@ -6116,11 +6126,11 @@ __declspec(naked) void RGBColorTableRow_X86(uint8* dst_argb, #ifdef HAS_ARGBLUMACOLORTABLEROW_SSSE3 // Tranform RGB pixels with luma table. -__declspec(naked) void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb, - uint8* dst_argb, +__declspec(naked) void ARGBLumaColorTableRow_SSSE3(const uint8_t* src_argb, + uint8_t* dst_argb, int width, - const uint8* luma, - uint32 lumacoeff) { + const uint8_t* luma, + uint32_t lumacoeff) { __asm { push esi push edi diff --git a/chromium/third_party/libyuv/source/scale.cc b/chromium/third_party/libyuv/source/scale.cc index 6951d8fb95e..2cfa1c6cb1c 100644 --- a/chromium/third_party/libyuv/source/scale.cc +++ b/chromium/third_party/libyuv/source/scale.cc @@ -39,12 +39,12 @@ static void ScalePlaneDown2(int src_width, int dst_height, int src_stride, int dst_stride, - const uint8* src_ptr, - uint8* dst_ptr, + const uint8_t* src_ptr, + uint8_t* dst_ptr, enum FilterMode filtering) { int y; - void (*ScaleRowDown2)(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) = + void (*ScaleRowDown2)(const uint8_t* src_ptr, ptrdiff_t src_stride, + uint8_t* dst_ptr, int dst_width) = filtering == kFilterNone ? ScaleRowDown2_C : (filtering == kFilterLinear ? ScaleRowDown2Linear_C @@ -136,12 +136,12 @@ static void ScalePlaneDown2_16(int src_width, int dst_height, int src_stride, int dst_stride, - const uint16* src_ptr, - uint16* dst_ptr, + const uint16_t* src_ptr, + uint16_t* dst_ptr, enum FilterMode filtering) { int y; - void (*ScaleRowDown2)(const uint16* src_ptr, ptrdiff_t src_stride, - uint16* dst_ptr, int dst_width) = + void (*ScaleRowDown2)(const uint16_t* src_ptr, ptrdiff_t src_stride, + uint16_t* dst_ptr, int dst_width) = filtering == kFilterNone ? ScaleRowDown2_16_C : (filtering == kFilterLinear ? ScaleRowDown2Linear_16_C @@ -191,12 +191,12 @@ static void ScalePlaneDown4(int src_width, int dst_height, int src_stride, int dst_stride, - const uint8* src_ptr, - uint8* dst_ptr, + const uint8_t* src_ptr, + uint8_t* dst_ptr, enum FilterMode filtering) { int y; - void (*ScaleRowDown4)(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) = + void (*ScaleRowDown4)(const uint8_t* src_ptr, ptrdiff_t src_stride, + uint8_t* dst_ptr, int dst_width) = filtering ? ScaleRowDown4Box_C : ScaleRowDown4_C; int row_stride = src_stride << 2; (void)src_width; @@ -258,12 +258,12 @@ static void ScalePlaneDown4_16(int src_width, int dst_height, int src_stride, int dst_stride, - const uint16* src_ptr, - uint16* dst_ptr, + const uint16_t* src_ptr, + uint16_t* dst_ptr, enum FilterMode filtering) { int y; - void (*ScaleRowDown4)(const uint16* src_ptr, ptrdiff_t src_stride, - uint16* dst_ptr, int dst_width) = + void (*ScaleRowDown4)(const uint16_t* src_ptr, ptrdiff_t src_stride, + uint16_t* dst_ptr, int dst_width) = filtering ? ScaleRowDown4Box_16_C : ScaleRowDown4_16_C; int row_stride = src_stride << 2; (void)src_width; @@ -302,14 +302,14 @@ static void ScalePlaneDown34(int src_width, int dst_height, int src_stride, int dst_stride, - const uint8* src_ptr, - uint8* dst_ptr, + const uint8_t* src_ptr, + uint8_t* dst_ptr, enum FilterMode filtering) { int y; - void (*ScaleRowDown34_0)(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); - void (*ScaleRowDown34_1)(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); + void (*ScaleRowDown34_0)(const uint8_t* src_ptr, ptrdiff_t src_stride, + uint8_t* dst_ptr, int dst_width); + void (*ScaleRowDown34_1)(const uint8_t* src_ptr, ptrdiff_t src_stride, + uint8_t* dst_ptr, int dst_width); const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride; (void)src_width; (void)src_height; @@ -411,14 +411,14 @@ static void ScalePlaneDown34_16(int src_width, int dst_height, int src_stride, int dst_stride, - const uint16* src_ptr, - uint16* dst_ptr, + const uint16_t* src_ptr, + uint16_t* dst_ptr, enum FilterMode filtering) { int y; - void (*ScaleRowDown34_0)(const uint16* src_ptr, ptrdiff_t src_stride, - uint16* dst_ptr, int dst_width); - void (*ScaleRowDown34_1)(const uint16* src_ptr, ptrdiff_t src_stride, - uint16* dst_ptr, int dst_width); + void (*ScaleRowDown34_0)(const uint16_t* src_ptr, ptrdiff_t src_stride, + uint16_t* dst_ptr, int dst_width); + void (*ScaleRowDown34_1)(const uint16_t* src_ptr, ptrdiff_t src_stride, + uint16_t* dst_ptr, int dst_width); const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride; (void)src_width; (void)src_height; @@ -497,14 +497,14 @@ static void ScalePlaneDown38(int src_width, int dst_height, int src_stride, int dst_stride, - const uint8* src_ptr, - uint8* dst_ptr, + const uint8_t* src_ptr, + uint8_t* dst_ptr, enum FilterMode filtering) { int y; - void (*ScaleRowDown38_3)(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); - void (*ScaleRowDown38_2)(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); + void (*ScaleRowDown38_3)(const uint8_t* src_ptr, ptrdiff_t src_stride, + uint8_t* dst_ptr, int dst_width); + void (*ScaleRowDown38_2)(const uint8_t* src_ptr, ptrdiff_t src_stride, + uint8_t* dst_ptr, int dst_width); const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride; assert(dst_width % 3 == 0); (void)src_width; @@ -606,14 +606,14 @@ static void ScalePlaneDown38_16(int src_width, int dst_height, int src_stride, int dst_stride, - const uint16* src_ptr, - uint16* dst_ptr, + const uint16_t* src_ptr, + uint16_t* dst_ptr, enum FilterMode filtering) { int y; - void (*ScaleRowDown38_3)(const uint16* src_ptr, ptrdiff_t src_stride, - uint16* dst_ptr, int dst_width); - void (*ScaleRowDown38_2)(const uint16* src_ptr, ptrdiff_t src_stride, - uint16* dst_ptr, int dst_width); + void (*ScaleRowDown38_3)(const uint16_t* src_ptr, ptrdiff_t src_stride, + uint16_t* dst_ptr, int dst_width); + void (*ScaleRowDown38_2)(const uint16_t* src_ptr, ptrdiff_t src_stride, + uint16_t* dst_ptr, int dst_width); const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride; (void)src_width; (void)src_height; @@ -673,8 +673,8 @@ static void ScalePlaneDown38_16(int src_width, #define MIN1(x) ((x) < 1 ? 1 : (x)) -static __inline uint32 SumPixels(int iboxwidth, const uint16* src_ptr) { - uint32 sum = 0u; +static __inline uint32_t SumPixels(int iboxwidth, const uint16_t* src_ptr) { + uint32_t sum = 0u; int x; assert(iboxwidth > 0); for (x = 0; x < iboxwidth; ++x) { @@ -683,8 +683,8 @@ static __inline uint32 SumPixels(int iboxwidth, const uint16* src_ptr) { return sum; } -static __inline uint32 SumPixels_16(int iboxwidth, const uint32* src_ptr) { - uint32 sum = 0u; +static __inline uint32_t SumPixels_16(int iboxwidth, const uint32_t* src_ptr) { + uint32_t sum = 0u; int x; assert(iboxwidth > 0); for (x = 0; x < iboxwidth; ++x) { @@ -697,8 +697,8 @@ static void ScaleAddCols2_C(int dst_width, int boxheight, int x, int dx, - const uint16* src_ptr, - uint8* dst_ptr) { + const uint16_t* src_ptr, + uint8_t* dst_ptr) { int i; int scaletbl[2]; int minboxwidth = dx >> 16; @@ -719,8 +719,8 @@ static void ScaleAddCols2_16_C(int dst_width, int boxheight, int x, int dx, - const uint32* src_ptr, - uint16* dst_ptr) { + const uint32_t* src_ptr, + uint16_t* dst_ptr) { int i; int scaletbl[2]; int minboxwidth = dx >> 16; @@ -741,8 +741,8 @@ static void ScaleAddCols0_C(int dst_width, int boxheight, int x, int dx, - const uint16* src_ptr, - uint8* dst_ptr) { + const uint16_t* src_ptr, + uint8_t* dst_ptr) { int scaleval = 65536 / boxheight; int i; (void)dx; @@ -756,8 +756,8 @@ static void ScaleAddCols1_C(int dst_width, int boxheight, int x, int dx, - const uint16* src_ptr, - uint8* dst_ptr) { + const uint16_t* src_ptr, + uint8_t* dst_ptr) { int boxwidth = MIN1(dx >> 16); int scaleval = 65536 / (boxwidth * boxheight); int i; @@ -772,8 +772,8 @@ static void ScaleAddCols1_16_C(int dst_width, int boxheight, int x, int dx, - const uint32* src_ptr, - uint16* dst_ptr) { + const uint32_t* src_ptr, + uint16_t* dst_ptr) { int boxwidth = MIN1(dx >> 16); int scaleval = 65536 / (boxwidth * boxheight); int i; @@ -796,8 +796,8 @@ static void ScalePlaneBox(int src_width, int dst_height, int src_stride, int dst_stride, - const uint8* src_ptr, - uint8* dst_ptr) { + const uint8_t* src_ptr, + uint8_t* dst_ptr) { int j, k; // Initial source x/y coordinate and step values as 16.16 fixed point. int x = 0; @@ -809,14 +809,14 @@ static void ScalePlaneBox(int src_width, &dx, &dy); src_width = Abs(src_width); { - // Allocate a row buffer of uint16. + // Allocate a row buffer of uint16_t. align_buffer_64(row16, src_width * 2); void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx, - const uint16* src_ptr, uint8* dst_ptr) = + const uint16_t* src_ptr, uint8_t* dst_ptr) = (dx & 0xffff) ? ScaleAddCols2_C : ((dx != 0x10000) ? ScaleAddCols1_C : ScaleAddCols0_C); - void (*ScaleAddRow)(const uint8* src_ptr, uint16* dst_ptr, int src_width) = - ScaleAddRow_C; + void (*ScaleAddRow)(const uint8_t* src_ptr, uint16_t* dst_ptr, + int src_width) = ScaleAddRow_C; #if defined(HAS_SCALEADDROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { ScaleAddRow = ScaleAddRow_Any_SSE2; @@ -853,7 +853,7 @@ static void ScalePlaneBox(int src_width, for (j = 0; j < dst_height; ++j) { int boxheight; int iy = y >> 16; - const uint8* src = src_ptr + iy * src_stride; + const uint8_t* src = src_ptr + iy * src_stride; y += dy; if (y > max_y) { y = max_y; @@ -861,10 +861,10 @@ static void ScalePlaneBox(int src_width, boxheight = MIN1((y >> 16) - iy); memset(row16, 0, src_width * 2); for (k = 0; k < boxheight; ++k) { - ScaleAddRow(src, (uint16*)(row16), src_width); + ScaleAddRow(src, (uint16_t*)(row16), src_width); src += src_stride; } - ScaleAddCols(dst_width, boxheight, x, dx, (uint16*)(row16), dst_ptr); + ScaleAddCols(dst_width, boxheight, x, dx, (uint16_t*)(row16), dst_ptr); dst_ptr += dst_stride; } free_aligned_buffer_64(row16); @@ -877,8 +877,8 @@ static void ScalePlaneBox_16(int src_width, int dst_height, int src_stride, int dst_stride, - const uint16* src_ptr, - uint16* dst_ptr) { + const uint16_t* src_ptr, + uint16_t* dst_ptr) { int j, k; // Initial source x/y coordinate and step values as 16.16 fixed point. int x = 0; @@ -890,13 +890,13 @@ static void ScalePlaneBox_16(int src_width, &dx, &dy); src_width = Abs(src_width); { - // Allocate a row buffer of uint32. + // Allocate a row buffer of uint32_t. align_buffer_64(row32, src_width * 4); void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx, - const uint32* src_ptr, uint16* dst_ptr) = + const uint32_t* src_ptr, uint16_t* dst_ptr) = (dx & 0xffff) ? ScaleAddCols2_16_C : ScaleAddCols1_16_C; - void (*ScaleAddRow)(const uint16* src_ptr, uint32* dst_ptr, int src_width) = - ScaleAddRow_16_C; + void (*ScaleAddRow)(const uint16_t* src_ptr, uint32_t* dst_ptr, + int src_width) = ScaleAddRow_16_C; #if defined(HAS_SCALEADDROW_16_SSE2) if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(src_width, 16)) { @@ -907,7 +907,7 @@ static void ScalePlaneBox_16(int src_width, for (j = 0; j < dst_height; ++j) { int boxheight; int iy = y >> 16; - const uint16* src = src_ptr + iy * src_stride; + const uint16_t* src = src_ptr + iy * src_stride; y += dy; if (y > max_y) { y = max_y; @@ -915,10 +915,10 @@ static void ScalePlaneBox_16(int src_width, boxheight = MIN1((y >> 16) - iy); memset(row32, 0, src_width * 4); for (k = 0; k < boxheight; ++k) { - ScaleAddRow(src, (uint32*)(row32), src_width); + ScaleAddRow(src, (uint32_t*)(row32), src_width); src += src_stride; } - ScaleAddCols(dst_width, boxheight, x, dx, (uint32*)(row32), dst_ptr); + ScaleAddCols(dst_width, boxheight, x, dx, (uint32_t*)(row32), dst_ptr); dst_ptr += dst_stride; } free_aligned_buffer_64(row32); @@ -932,8 +932,8 @@ void ScalePlaneBilinearDown(int src_width, int dst_height, int src_stride, int dst_stride, - const uint8* src_ptr, - uint8* dst_ptr, + const uint8_t* src_ptr, + uint8_t* dst_ptr, enum FilterMode filtering) { // Initial source x/y coordinate and step values as 16.16 fixed point. int x = 0; @@ -946,10 +946,10 @@ void ScalePlaneBilinearDown(int src_width, const int max_y = (src_height - 1) << 16; int j; - void (*ScaleFilterCols)(uint8 * dst_ptr, const uint8* src_ptr, int dst_width, - int x, int dx) = + void (*ScaleFilterCols)(uint8_t * dst_ptr, const uint8_t* src_ptr, + int dst_width, int x, int dx) = (src_width >= 32768) ? ScaleFilterCols64_C : ScaleFilterCols_C; - void (*InterpolateRow)(uint8 * dst_ptr, const uint8* src_ptr, + void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr, ptrdiff_t src_stride, int dst_width, int source_y_fraction) = InterpolateRow_C; ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y, @@ -1016,7 +1016,7 @@ void ScalePlaneBilinearDown(int src_width, for (j = 0; j < dst_height; ++j) { int yi = y >> 16; - const uint8* src = src_ptr + yi * src_stride; + const uint8_t* src = src_ptr + yi * src_stride; if (filtering == kFilterLinear) { ScaleFilterCols(dst_ptr, src, dst_width, x, dx); } else { @@ -1039,8 +1039,8 @@ void ScalePlaneBilinearDown_16(int src_width, int dst_height, int src_stride, int dst_stride, - const uint16* src_ptr, - uint16* dst_ptr, + const uint16_t* src_ptr, + uint16_t* dst_ptr, enum FilterMode filtering) { // Initial source x/y coordinate and step values as 16.16 fixed point. int x = 0; @@ -1053,10 +1053,10 @@ void ScalePlaneBilinearDown_16(int src_width, const int max_y = (src_height - 1) << 16; int j; - void (*ScaleFilterCols)(uint16 * dst_ptr, const uint16* src_ptr, + void (*ScaleFilterCols)(uint16_t * dst_ptr, const uint16_t* src_ptr, int dst_width, int x, int dx) = (src_width >= 32768) ? ScaleFilterCols64_16_C : ScaleFilterCols_16_C; - void (*InterpolateRow)(uint16 * dst_ptr, const uint16* src_ptr, + void (*InterpolateRow)(uint16_t * dst_ptr, const uint16_t* src_ptr, ptrdiff_t src_stride, int dst_width, int source_y_fraction) = InterpolateRow_16_C; ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y, @@ -1107,13 +1107,13 @@ void ScalePlaneBilinearDown_16(int src_width, for (j = 0; j < dst_height; ++j) { int yi = y >> 16; - const uint16* src = src_ptr + yi * src_stride; + const uint16_t* src = src_ptr + yi * src_stride; if (filtering == kFilterLinear) { ScaleFilterCols(dst_ptr, src, dst_width, x, dx); } else { int yf = (y >> 8) & 255; - InterpolateRow((uint16*)row, src, src_stride, src_width, yf); - ScaleFilterCols(dst_ptr, (uint16*)row, dst_width, x, dx); + InterpolateRow((uint16_t*)row, src, src_stride, src_width, yf); + ScaleFilterCols(dst_ptr, (uint16_t*)row, dst_width, x, dx); } dst_ptr += dst_stride; y += dy; @@ -1131,8 +1131,8 @@ void ScalePlaneBilinearUp(int src_width, int dst_height, int src_stride, int dst_stride, - const uint8* src_ptr, - uint8* dst_ptr, + const uint8_t* src_ptr, + uint8_t* dst_ptr, enum FilterMode filtering) { int j; // Initial source x/y coordinate and step values as 16.16 fixed point. @@ -1141,11 +1141,11 @@ void ScalePlaneBilinearUp(int src_width, int dx = 0; int dy = 0; const int max_y = (src_height - 1) << 16; - void (*InterpolateRow)(uint8 * dst_ptr, const uint8* src_ptr, + void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr, ptrdiff_t src_stride, int dst_width, int source_y_fraction) = InterpolateRow_C; - void (*ScaleFilterCols)(uint8 * dst_ptr, const uint8* src_ptr, int dst_width, - int x, int dx) = + void (*ScaleFilterCols)(uint8_t * dst_ptr, const uint8_t* src_ptr, + int dst_width, int x, int dx) = filtering ? ScaleFilterCols_C : ScaleCols_C; ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y, &dx, &dy); @@ -1214,13 +1214,13 @@ void ScalePlaneBilinearUp(int src_width, } { int yi = y >> 16; - const uint8* src = src_ptr + yi * src_stride; + const uint8_t* src = src_ptr + yi * src_stride; // Allocate 2 row buffers. const int kRowSize = (dst_width + 31) & ~31; align_buffer_64(row, kRowSize * 2); - uint8* rowptr = row; + uint8_t* rowptr = row; int rowstride = kRowSize; int lasty = yi; @@ -1266,8 +1266,8 @@ void ScalePlaneBilinearUp_16(int src_width, int dst_height, int src_stride, int dst_stride, - const uint16* src_ptr, - uint16* dst_ptr, + const uint16_t* src_ptr, + uint16_t* dst_ptr, enum FilterMode filtering) { int j; // Initial source x/y coordinate and step values as 16.16 fixed point. @@ -1276,10 +1276,10 @@ void ScalePlaneBilinearUp_16(int src_width, int dx = 0; int dy = 0; const int max_y = (src_height - 1) << 16; - void (*InterpolateRow)(uint16 * dst_ptr, const uint16* src_ptr, + void (*InterpolateRow)(uint16_t * dst_ptr, const uint16_t* src_ptr, ptrdiff_t src_stride, int dst_width, int source_y_fraction) = InterpolateRow_16_C; - void (*ScaleFilterCols)(uint16 * dst_ptr, const uint16* src_ptr, + void (*ScaleFilterCols)(uint16_t * dst_ptr, const uint16_t* src_ptr, int dst_width, int x, int dx) = filtering ? ScaleFilterCols_16_C : ScaleCols_16_C; ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y, @@ -1341,13 +1341,13 @@ void ScalePlaneBilinearUp_16(int src_width, } { int yi = y >> 16; - const uint16* src = src_ptr + yi * src_stride; + const uint16_t* src = src_ptr + yi * src_stride; // Allocate 2 row buffers. const int kRowSize = (dst_width + 31) & ~31; align_buffer_64(row, kRowSize * 4); - uint16* rowptr = (uint16*)row; + uint16_t* rowptr = (uint16_t*)row; int rowstride = kRowSize; int lasty = yi; @@ -1398,11 +1398,11 @@ static void ScalePlaneSimple(int src_width, int dst_height, int src_stride, int dst_stride, - const uint8* src_ptr, - uint8* dst_ptr) { + const uint8_t* src_ptr, + uint8_t* dst_ptr) { int i; - void (*ScaleCols)(uint8 * dst_ptr, const uint8* src_ptr, int dst_width, int x, - int dx) = ScaleCols_C; + void (*ScaleCols)(uint8_t * dst_ptr, const uint8_t* src_ptr, int dst_width, + int x, int dx) = ScaleCols_C; // Initial source x/y coordinate and step values as 16.16 fixed point. int x = 0; int y = 0; @@ -1434,10 +1434,10 @@ static void ScalePlaneSimple_16(int src_width, int dst_height, int src_stride, int dst_stride, - const uint16* src_ptr, - uint16* dst_ptr) { + const uint16_t* src_ptr, + uint16_t* dst_ptr) { int i; - void (*ScaleCols)(uint16 * dst_ptr, const uint16* src_ptr, int dst_width, + void (*ScaleCols)(uint16_t * dst_ptr, const uint16_t* src_ptr, int dst_width, int x, int dx) = ScaleCols_16_C; // Initial source x/y coordinate and step values as 16.16 fixed point. int x = 0; @@ -1468,11 +1468,11 @@ static void ScalePlaneSimple_16(int src_width, // This function dispatches to a specialized scaler based on scale factor. LIBYUV_API -void ScalePlane(const uint8* src, +void ScalePlane(const uint8_t* src, int src_stride, int src_width, int src_height, - uint8* dst, + uint8_t* dst, int dst_stride, int dst_width, int dst_height, @@ -1551,11 +1551,11 @@ void ScalePlane(const uint8* src, } LIBYUV_API -void ScalePlane_16(const uint16* src, +void ScalePlane_16(const uint16_t* src, int src_stride, int src_width, int src_height, - uint16* dst, + uint16_t* dst, int dst_stride, int dst_width, int dst_height, @@ -1637,19 +1637,19 @@ void ScalePlane_16(const uint16* src, // This function in turn calls a scaling function for each plane. LIBYUV_API -int I420Scale(const uint8* src_y, +int I420Scale(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, int src_width, int src_height, - uint8* dst_y, + uint8_t* dst_y, int dst_stride_y, - uint8* dst_u, + uint8_t* dst_u, int dst_stride_u, - uint8* dst_v, + uint8_t* dst_v, int dst_stride_v, int dst_width, int dst_height, @@ -1674,19 +1674,19 @@ int I420Scale(const uint8* src_y, } LIBYUV_API -int I420Scale_16(const uint16* src_y, +int I420Scale_16(const uint16_t* src_y, int src_stride_y, - const uint16* src_u, + const uint16_t* src_u, int src_stride_u, - const uint16* src_v, + const uint16_t* src_v, int src_stride_v, int src_width, int src_height, - uint16* dst_y, + uint16_t* dst_y, int dst_stride_y, - uint16* dst_u, + uint16_t* dst_u, int dst_stride_u, - uint16* dst_v, + uint16_t* dst_v, int dst_stride_v, int dst_width, int dst_height, @@ -1712,17 +1712,17 @@ int I420Scale_16(const uint16* src_y, // Deprecated api LIBYUV_API -int Scale(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, +int Scale(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, int src_stride_y, int src_stride_u, int src_stride_v, int src_width, int src_height, - uint8* dst_y, - uint8* dst_u, - uint8* dst_v, + uint8_t* dst_y, + uint8_t* dst_u, + uint8_t* dst_v, int dst_stride_y, int dst_stride_u, int dst_stride_v, @@ -1735,43 +1735,6 @@ int Scale(const uint8* src_y, dst_height, interpolate ? kFilterBox : kFilterNone); } -// Deprecated api -LIBYUV_API -int ScaleOffset(const uint8* src, - int src_width, - int src_height, - uint8* dst, - int dst_width, - int dst_height, - int dst_yoffset, - LIBYUV_BOOL interpolate) { - // Chroma requires offset to multiple of 2. - int dst_yoffset_even = dst_yoffset & ~1; - int src_halfwidth = SUBSAMPLE(src_width, 1, 1); - int src_halfheight = SUBSAMPLE(src_height, 1, 1); - int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1); - int dst_halfheight = SUBSAMPLE(dst_height, 1, 1); - int aheight = dst_height - dst_yoffset_even * 2; // actual output height - const uint8* src_y = src; - const uint8* src_u = src + src_width * src_height; - const uint8* src_v = - src + src_width * src_height + src_halfwidth * src_halfheight; - uint8* dst_y = dst + dst_yoffset_even * dst_width; - uint8* dst_u = - dst + dst_width * dst_height + (dst_yoffset_even >> 1) * dst_halfwidth; - uint8* dst_v = dst + dst_width * dst_height + dst_halfwidth * dst_halfheight + - (dst_yoffset_even >> 1) * dst_halfwidth; - if (!src || src_width <= 0 || src_height <= 0 || !dst || dst_width <= 0 || - dst_height <= 0 || dst_yoffset_even < 0 || - dst_yoffset_even >= dst_height) { - return -1; - } - return I420Scale(src_y, src_width, src_u, src_halfwidth, src_v, src_halfwidth, - src_width, src_height, dst_y, dst_width, dst_u, - dst_halfwidth, dst_v, dst_halfwidth, dst_width, aheight, - interpolate ? kFilterBox : kFilterNone); -} - #ifdef __cplusplus } // extern "C" } // namespace libyuv diff --git a/chromium/third_party/libyuv/source/scale_any.cc b/chromium/third_party/libyuv/source/scale_any.cc index 8604c233859..53ad1364049 100644 --- a/chromium/third_party/libyuv/source/scale_any.cc +++ b/chromium/third_party/libyuv/source/scale_any.cc @@ -19,15 +19,15 @@ extern "C" { #endif // Definition for ScaleFilterCols, ScaleARGBCols and ScaleARGBFilterCols -#define CANY(NAMEANY, TERP_SIMD, TERP_C, BPP, MASK) \ - void NAMEANY(uint8* dst_ptr, const uint8* src_ptr, int dst_width, int x, \ - int dx) { \ - int r = dst_width & MASK; \ - int n = dst_width & ~MASK; \ - if (n > 0) { \ - TERP_SIMD(dst_ptr, src_ptr, n, x, dx); \ - } \ - TERP_C(dst_ptr + n * BPP, src_ptr, r, x + n * dx, dx); \ +#define CANY(NAMEANY, TERP_SIMD, TERP_C, BPP, MASK) \ + void NAMEANY(uint8_t* dst_ptr, const uint8_t* src_ptr, int dst_width, int x, \ + int dx) { \ + int r = dst_width & MASK; \ + int n = dst_width & ~MASK; \ + if (n > 0) { \ + TERP_SIMD(dst_ptr, src_ptr, n, x, dx); \ + } \ + TERP_C(dst_ptr + n * BPP, src_ptr, r, x + n * dx, dx); \ } #ifdef HAS_SCALEFILTERCOLS_NEON @@ -60,31 +60,31 @@ CANY(ScaleARGBFilterCols_Any_MSA, // Fixed scale down. // Mask may be non-power of 2, so use MOD -#define SDANY(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, FACTOR, BPP, MASK) \ - void NAMEANY(const uint8* src_ptr, ptrdiff_t src_stride, uint8* dst_ptr, \ - int dst_width) { \ - int r = (int)((unsigned int)dst_width % (MASK + 1)); /* NOLINT */ \ - int n = dst_width - r; \ - if (n > 0) { \ - SCALEROWDOWN_SIMD(src_ptr, src_stride, dst_ptr, n); \ - } \ - SCALEROWDOWN_C(src_ptr + (n * FACTOR) * BPP, src_stride, \ - dst_ptr + n * BPP, r); \ +#define SDANY(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, FACTOR, BPP, MASK) \ + void NAMEANY(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, \ + int dst_width) { \ + int r = (int)((unsigned int)dst_width % (MASK + 1)); /* NOLINT */ \ + int n = dst_width - r; \ + if (n > 0) { \ + SCALEROWDOWN_SIMD(src_ptr, src_stride, dst_ptr, n); \ + } \ + SCALEROWDOWN_C(src_ptr + (n * FACTOR) * BPP, src_stride, \ + dst_ptr + n * BPP, r); \ } // Fixed scale down for odd source width. Used by I420Blend subsampling. // Since dst_width is (width + 1) / 2, this function scales one less pixel // and copies the last pixel. -#define SDODD(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, FACTOR, BPP, MASK) \ - void NAMEANY(const uint8* src_ptr, ptrdiff_t src_stride, uint8* dst_ptr, \ - int dst_width) { \ - int r = (int)((unsigned int)(dst_width - 1) % (MASK + 1)); /* NOLINT */ \ - int n = (dst_width - 1) - r; \ - if (n > 0) { \ - SCALEROWDOWN_SIMD(src_ptr, src_stride, dst_ptr, n); \ - } \ - SCALEROWDOWN_C(src_ptr + (n * FACTOR) * BPP, src_stride, \ - dst_ptr + n * BPP, r + 1); \ +#define SDODD(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, FACTOR, BPP, MASK) \ + void NAMEANY(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, \ + int dst_width) { \ + int r = (int)((unsigned int)(dst_width - 1) % (MASK + 1)); /* NOLINT */ \ + int n = (dst_width - 1) - r; \ + if (n > 0) { \ + SCALEROWDOWN_SIMD(src_ptr, src_stride, dst_ptr, n); \ + } \ + SCALEROWDOWN_C(src_ptr + (n * FACTOR) * BPP, src_stride, \ + dst_ptr + n * BPP, r + 1); \ } #ifdef HAS_SCALEROWDOWN2_SSSE3 @@ -385,16 +385,16 @@ SDANY(ScaleARGBRowDown2Box_Any_MSA, #undef SDANY // Scale down by even scale factor. -#define SDAANY(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, BPP, MASK) \ - void NAMEANY(const uint8* src_ptr, ptrdiff_t src_stride, int src_stepx, \ - uint8* dst_ptr, int dst_width) { \ - int r = dst_width & MASK; \ - int n = dst_width & ~MASK; \ - if (n > 0) { \ - SCALEROWDOWN_SIMD(src_ptr, src_stride, src_stepx, dst_ptr, n); \ - } \ - SCALEROWDOWN_C(src_ptr + (n * src_stepx) * BPP, src_stride, src_stepx, \ - dst_ptr + n * BPP, r); \ +#define SDAANY(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, BPP, MASK) \ + void NAMEANY(const uint8_t* src_ptr, ptrdiff_t src_stride, int src_stepx, \ + uint8_t* dst_ptr, int dst_width) { \ + int r = dst_width & MASK; \ + int n = dst_width & ~MASK; \ + if (n > 0) { \ + SCALEROWDOWN_SIMD(src_ptr, src_stride, src_stepx, dst_ptr, n); \ + } \ + SCALEROWDOWN_C(src_ptr + (n * src_stepx) * BPP, src_stride, src_stepx, \ + dst_ptr + n * BPP, r); \ } #ifdef HAS_SCALEARGBROWDOWNEVEN_SSE2 @@ -435,13 +435,13 @@ SDAANY(ScaleARGBRowDownEvenBox_Any_MSA, #endif // Add rows box filter scale down. -#define SAANY(NAMEANY, SCALEADDROW_SIMD, SCALEADDROW_C, MASK) \ - void NAMEANY(const uint8* src_ptr, uint16* dst_ptr, int src_width) { \ - int n = src_width & ~MASK; \ - if (n > 0) { \ - SCALEADDROW_SIMD(src_ptr, dst_ptr, n); \ - } \ - SCALEADDROW_C(src_ptr + n, dst_ptr + n, src_width & MASK); \ +#define SAANY(NAMEANY, SCALEADDROW_SIMD, SCALEADDROW_C, MASK) \ + void NAMEANY(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width) { \ + int n = src_width & ~MASK; \ + if (n > 0) { \ + SCALEADDROW_SIMD(src_ptr, dst_ptr, n); \ + } \ + SCALEADDROW_C(src_ptr + n, dst_ptr + n, src_width & MASK); \ } #ifdef HAS_SCALEADDROW_SSE2 diff --git a/chromium/third_party/libyuv/source/scale_argb.cc b/chromium/third_party/libyuv/source/scale_argb.cc index cd4683b37be..53a22e8b41e 100644 --- a/chromium/third_party/libyuv/source/scale_argb.cc +++ b/chromium/third_party/libyuv/source/scale_argb.cc @@ -36,8 +36,8 @@ static void ScaleARGBDown2(int src_width, int dst_height, int src_stride, int dst_stride, - const uint8* src_argb, - uint8* dst_argb, + const uint8_t* src_argb, + uint8_t* dst_argb, int x, int dx, int y, @@ -45,8 +45,8 @@ static void ScaleARGBDown2(int src_width, enum FilterMode filtering) { int j; int row_stride = src_stride * (dy >> 16); - void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride, - uint8* dst_argb, int dst_width) = + void (*ScaleARGBRowDown2)(const uint8_t* src_argb, ptrdiff_t src_stride, + uint8_t* dst_argb, int dst_width) = filtering == kFilterNone ? ScaleARGBRowDown2_C : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_C @@ -131,8 +131,8 @@ static void ScaleARGBDown4Box(int src_width, int dst_height, int src_stride, int dst_stride, - const uint8* src_argb, - uint8* dst_argb, + const uint8_t* src_argb, + uint8_t* dst_argb, int x, int dx, int y, @@ -142,8 +142,8 @@ static void ScaleARGBDown4Box(int src_width, const int kRowSize = (dst_width * 2 * 4 + 31) & ~31; align_buffer_64(row, kRowSize * 2); int row_stride = src_stride * (dy >> 16); - void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride, - uint8* dst_argb, int dst_width) = + void (*ScaleARGBRowDown2)(const uint8_t* src_argb, ptrdiff_t src_stride, + uint8_t* dst_argb, int dst_width) = ScaleARGBRowDown2Box_C; // Advance to odd row, even column. src_argb += (y >> 16) * src_stride + (x >> 16) * 4; @@ -189,8 +189,8 @@ static void ScaleARGBDownEven(int src_width, int dst_height, int src_stride, int dst_stride, - const uint8* src_argb, - uint8* dst_argb, + const uint8_t* src_argb, + uint8_t* dst_argb, int x, int dx, int y, @@ -199,8 +199,8 @@ static void ScaleARGBDownEven(int src_width, int j; int col_step = dx >> 16; int row_stride = (dy >> 16) * src_stride; - void (*ScaleARGBRowDownEven)(const uint8* src_argb, ptrdiff_t src_stride, - int src_step, uint8* dst_argb, int dst_width) = + void (*ScaleARGBRowDownEven)(const uint8_t* src_argb, ptrdiff_t src_stride, + int src_step, uint8_t* dst_argb, int dst_width) = filtering ? ScaleARGBRowDownEvenBox_C : ScaleARGBRowDownEven_C; (void)src_width; (void)src_height; @@ -255,23 +255,23 @@ static void ScaleARGBBilinearDown(int src_width, int dst_height, int src_stride, int dst_stride, - const uint8* src_argb, - uint8* dst_argb, + const uint8_t* src_argb, + uint8_t* dst_argb, int x, int dx, int y, int dy, enum FilterMode filtering) { int j; - void (*InterpolateRow)(uint8 * dst_argb, const uint8* src_argb, + void (*InterpolateRow)(uint8_t * dst_argb, const uint8_t* src_argb, ptrdiff_t src_stride, int dst_width, int source_y_fraction) = InterpolateRow_C; - void (*ScaleARGBFilterCols)(uint8 * dst_argb, const uint8* src_argb, + void (*ScaleARGBFilterCols)(uint8_t * dst_argb, const uint8_t* src_argb, int dst_width, int x, int dx) = (src_width >= 32768) ? ScaleARGBFilterCols64_C : ScaleARGBFilterCols_C; - int64 xlast = x + (int64)(dst_width - 1) * dx; - int64 xl = (dx >= 0) ? x : xlast; - int64 xr = (dx >= 0) ? xlast : x; + int64_t xlast = x + (int64_t)(dst_width - 1) * dx; + int64_t xl = (dx >= 0) ? x : xlast; + int64_t xr = (dx >= 0) ? xlast : x; int clip_src_width; xl = (xl >> 16) & ~3; // Left edge aligned. xr = (xr >> 16) + 1; // Right most pixel used. Bilinear uses 2 pixels. @@ -346,7 +346,7 @@ static void ScaleARGBBilinearDown(int src_width, } for (j = 0; j < dst_height; ++j) { int yi = y >> 16; - const uint8* src = src_argb + yi * src_stride; + const uint8_t* src = src_argb + yi * src_stride; if (filtering == kFilterLinear) { ScaleARGBFilterCols(dst_argb, src, dst_width, x, dx); } else { @@ -371,18 +371,18 @@ static void ScaleARGBBilinearUp(int src_width, int dst_height, int src_stride, int dst_stride, - const uint8* src_argb, - uint8* dst_argb, + const uint8_t* src_argb, + uint8_t* dst_argb, int x, int dx, int y, int dy, enum FilterMode filtering) { int j; - void (*InterpolateRow)(uint8 * dst_argb, const uint8* src_argb, + void (*InterpolateRow)(uint8_t * dst_argb, const uint8_t* src_argb, ptrdiff_t src_stride, int dst_width, int source_y_fraction) = InterpolateRow_C; - void (*ScaleARGBFilterCols)(uint8 * dst_argb, const uint8* src_argb, + void (*ScaleARGBFilterCols)(uint8_t * dst_argb, const uint8_t* src_argb, int dst_width, int x, int dx) = filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C; const int max_y = (src_height - 1) << 16; @@ -479,13 +479,13 @@ static void ScaleARGBBilinearUp(int src_width, { int yi = y >> 16; - const uint8* src = src_argb + yi * src_stride; + const uint8_t* src = src_argb + yi * src_stride; // Allocate 2 rows of ARGB. const int kRowSize = (dst_width * 4 + 31) & ~31; align_buffer_64(row, kRowSize * 2); - uint8* rowptr = row; + uint8_t* rowptr = row; int rowstride = kRowSize; int lasty = yi; @@ -535,18 +535,18 @@ static void ScaleYUVToARGBBilinearUp(int src_width, int src_stride_u, int src_stride_v, int dst_stride_argb, - const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, + const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb, int x, int dx, int y, int dy, enum FilterMode filtering) { int j; - void (*I422ToARGBRow)(const uint8* y_buf, const uint8* u_buf, - const uint8* v_buf, uint8* rgb_buf, int width) = + void (*I422ToARGBRow)(const uint8_t* y_buf, const uint8_t* u_buf, + const uint8_t* v_buf, uint8_t* rgb_buf, int width) = I422ToARGBRow_C; #if defined(HAS_I422TOARGBROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { @@ -581,7 +581,7 @@ static void ScaleYUVToARGBBilinearUp(int src_width, } #endif - void (*InterpolateRow)(uint8 * dst_argb, const uint8* src_argb, + void (*InterpolateRow)(uint8_t * dst_argb, const uint8_t* src_argb, ptrdiff_t src_stride, int dst_width, int source_y_fraction) = InterpolateRow_C; #if defined(HAS_INTERPOLATEROW_SSSE3) @@ -617,7 +617,7 @@ static void ScaleYUVToARGBBilinearUp(int src_width, } #endif - void (*ScaleARGBFilterCols)(uint8 * dst_argb, const uint8* src_argb, + void (*ScaleARGBFilterCols)(uint8_t * dst_argb, const uint8_t* src_argb, int dst_width, int x, int dx) = filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C; if (src_width >= 32768) { @@ -682,9 +682,9 @@ static void ScaleYUVToARGBBilinearUp(int src_width, const int kYShift = 1; // Shift Y by 1 to convert Y plane to UV coordinate. int yi = y >> 16; int uv_yi = yi >> kYShift; - const uint8* src_row_y = src_y + yi * src_stride_y; - const uint8* src_row_u = src_u + uv_yi * src_stride_u; - const uint8* src_row_v = src_v + uv_yi * src_stride_v; + const uint8_t* src_row_y = src_y + yi * src_stride_y; + const uint8_t* src_row_u = src_u + uv_yi * src_stride_u; + const uint8_t* src_row_v = src_v + uv_yi * src_stride_v; // Allocate 2 rows of ARGB. const int kRowSize = (dst_width * 4 + 31) & ~31; @@ -693,7 +693,7 @@ static void ScaleYUVToARGBBilinearUp(int src_width, // Allocate 1 row of ARGB for source conversion. align_buffer_64(argb_row, src_width * 4); - uint8* rowptr = row; + uint8_t* rowptr = row; int rowstride = kRowSize; int lasty = yi; @@ -765,15 +765,15 @@ static void ScaleARGBSimple(int src_width, int dst_height, int src_stride, int dst_stride, - const uint8* src_argb, - uint8* dst_argb, + const uint8_t* src_argb, + uint8_t* dst_argb, int x, int dx, int y, int dy) { int j; - void (*ScaleARGBCols)(uint8 * dst_argb, const uint8* src_argb, int dst_width, - int x, int dx) = + void (*ScaleARGBCols)(uint8_t * dst_argb, const uint8_t* src_argb, + int dst_width, int x, int dx) = (src_width >= 32768) ? ScaleARGBCols64_C : ScaleARGBCols_C; (void)src_height; #if defined(HAS_SCALEARGBCOLS_SSE2) @@ -817,11 +817,11 @@ static void ScaleARGBSimple(int src_width, // ScaleARGB a ARGB. // This function in turn calls a scaling function // suitable for handling the desired resolutions. -static void ScaleARGB(const uint8* src, +static void ScaleARGB(const uint8_t* src, int src_stride, int src_width, int src_height, - uint8* dst, + uint8_t* dst, int dst_stride, int dst_width, int dst_height, @@ -850,13 +850,13 @@ static void ScaleARGB(const uint8* src, &dx, &dy); src_width = Abs(src_width); if (clip_x) { - int64 clipf = (int64)(clip_x)*dx; + int64_t clipf = (int64_t)(clip_x)*dx; x += (clipf & 0xffff); src += (clipf >> 16) * 4; dst += clip_x * 4; } if (clip_y) { - int64 clipf = (int64)(clip_y)*dy; + int64_t clipf = (int64_t)(clip_y)*dy; y += (clipf & 0xffff); src += (clipf >> 16) * src_stride; dst += clip_y * dst_stride; @@ -922,11 +922,11 @@ static void ScaleARGB(const uint8* src, } LIBYUV_API -int ARGBScaleClip(const uint8* src_argb, +int ARGBScaleClip(const uint8_t* src_argb, int src_stride_argb, int src_width, int src_height, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int dst_width, int dst_height, @@ -950,11 +950,11 @@ int ARGBScaleClip(const uint8* src_argb, // Scale an ARGB image. LIBYUV_API -int ARGBScale(const uint8* src_argb, +int ARGBScale(const uint8_t* src_argb, int src_stride_argb, int src_width, int src_height, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int dst_width, int dst_height, @@ -971,18 +971,18 @@ int ARGBScale(const uint8* src_argb, // Scale with YUV conversion to ARGB and clipping. LIBYUV_API -int YUVToARGBScaleClip(const uint8* src_y, +int YUVToARGBScaleClip(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint32 src_fourcc, + uint32_t src_fourcc, int src_width, int src_height, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, - uint32 dst_fourcc, + uint32_t dst_fourcc, int dst_width, int dst_height, int clip_x, @@ -990,7 +990,7 @@ int YUVToARGBScaleClip(const uint8* src_y, int clip_width, int clip_height, enum FilterMode filtering) { - uint8* argb_buffer = (uint8*)malloc(src_width * src_height * 4); + uint8_t* argb_buffer = (uint8_t*)malloc(src_width * src_height * 4); int r; (void)src_fourcc; // TODO(fbarchard): implement and/or assert. (void)dst_fourcc; diff --git a/chromium/third_party/libyuv/source/scale_common.cc b/chromium/third_party/libyuv/source/scale_common.cc index e060c3cb8d9..b28d7da41fc 100644 --- a/chromium/third_party/libyuv/source/scale_common.cc +++ b/chromium/third_party/libyuv/source/scale_common.cc @@ -28,9 +28,9 @@ static __inline int Abs(int v) { } // CPU agnostic row functions -void ScaleRowDown2_C(const uint8* src_ptr, +void ScaleRowDown2_C(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst, + uint8_t* dst, int dst_width) { int x; (void)src_stride; @@ -45,9 +45,9 @@ void ScaleRowDown2_C(const uint8* src_ptr, } } -void ScaleRowDown2_16_C(const uint16* src_ptr, +void ScaleRowDown2_16_C(const uint16_t* src_ptr, ptrdiff_t src_stride, - uint16* dst, + uint16_t* dst, int dst_width) { int x; (void)src_stride; @@ -62,11 +62,11 @@ void ScaleRowDown2_16_C(const uint16* src_ptr, } } -void ScaleRowDown2Linear_C(const uint8* src_ptr, +void ScaleRowDown2Linear_C(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst, + uint8_t* dst, int dst_width) { - const uint8* s = src_ptr; + const uint8_t* s = src_ptr; int x; (void)src_stride; for (x = 0; x < dst_width - 1; x += 2) { @@ -80,11 +80,11 @@ void ScaleRowDown2Linear_C(const uint8* src_ptr, } } -void ScaleRowDown2Linear_16_C(const uint16* src_ptr, +void ScaleRowDown2Linear_16_C(const uint16_t* src_ptr, ptrdiff_t src_stride, - uint16* dst, + uint16_t* dst, int dst_width) { - const uint16* s = src_ptr; + const uint16_t* s = src_ptr; int x; (void)src_stride; for (x = 0; x < dst_width - 1; x += 2) { @@ -98,12 +98,12 @@ void ScaleRowDown2Linear_16_C(const uint16* src_ptr, } } -void ScaleRowDown2Box_C(const uint8* src_ptr, +void ScaleRowDown2Box_C(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst, + uint8_t* dst, int dst_width) { - const uint8* s = src_ptr; - const uint8* t = src_ptr + src_stride; + const uint8_t* s = src_ptr; + const uint8_t* t = src_ptr + src_stride; int x; for (x = 0; x < dst_width - 1; x += 2) { dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2; @@ -117,12 +117,12 @@ void ScaleRowDown2Box_C(const uint8* src_ptr, } } -void ScaleRowDown2Box_Odd_C(const uint8* src_ptr, +void ScaleRowDown2Box_Odd_C(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst, + uint8_t* dst, int dst_width) { - const uint8* s = src_ptr; - const uint8* t = src_ptr + src_stride; + const uint8_t* s = src_ptr; + const uint8_t* t = src_ptr + src_stride; int x; dst_width -= 1; for (x = 0; x < dst_width - 1; x += 2) { @@ -141,12 +141,12 @@ void ScaleRowDown2Box_Odd_C(const uint8* src_ptr, dst[0] = (s[0] + t[0] + 1) >> 1; } -void ScaleRowDown2Box_16_C(const uint16* src_ptr, +void ScaleRowDown2Box_16_C(const uint16_t* src_ptr, ptrdiff_t src_stride, - uint16* dst, + uint16_t* dst, int dst_width) { - const uint16* s = src_ptr; - const uint16* t = src_ptr + src_stride; + const uint16_t* s = src_ptr; + const uint16_t* t = src_ptr + src_stride; int x; for (x = 0; x < dst_width - 1; x += 2) { dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2; @@ -160,9 +160,9 @@ void ScaleRowDown2Box_16_C(const uint16* src_ptr, } } -void ScaleRowDown4_C(const uint8* src_ptr, +void ScaleRowDown4_C(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst, + uint8_t* dst, int dst_width) { int x; (void)src_stride; @@ -177,9 +177,9 @@ void ScaleRowDown4_C(const uint8* src_ptr, } } -void ScaleRowDown4_16_C(const uint16* src_ptr, +void ScaleRowDown4_16_C(const uint16_t* src_ptr, ptrdiff_t src_stride, - uint16* dst, + uint16_t* dst, int dst_width) { int x; (void)src_stride; @@ -194,9 +194,9 @@ void ScaleRowDown4_16_C(const uint16* src_ptr, } } -void ScaleRowDown4Box_C(const uint8* src_ptr, +void ScaleRowDown4Box_C(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst, + uint8_t* dst, int dst_width) { intptr_t stride = src_stride; int x; @@ -232,9 +232,9 @@ void ScaleRowDown4Box_C(const uint8* src_ptr, } } -void ScaleRowDown4Box_16_C(const uint16* src_ptr, +void ScaleRowDown4Box_16_C(const uint16_t* src_ptr, ptrdiff_t src_stride, - uint16* dst, + uint16_t* dst, int dst_width) { intptr_t stride = src_stride; int x; @@ -270,9 +270,9 @@ void ScaleRowDown4Box_16_C(const uint16* src_ptr, } } -void ScaleRowDown34_C(const uint8* src_ptr, +void ScaleRowDown34_C(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst, + uint8_t* dst, int dst_width) { int x; (void)src_stride; @@ -286,9 +286,9 @@ void ScaleRowDown34_C(const uint8* src_ptr, } } -void ScaleRowDown34_16_C(const uint16* src_ptr, +void ScaleRowDown34_16_C(const uint16_t* src_ptr, ptrdiff_t src_stride, - uint16* dst, + uint16_t* dst, int dst_width) { int x; (void)src_stride; @@ -303,21 +303,21 @@ void ScaleRowDown34_16_C(const uint16* src_ptr, } // Filter rows 0 and 1 together, 3 : 1 -void ScaleRowDown34_0_Box_C(const uint8* src_ptr, +void ScaleRowDown34_0_Box_C(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* d, + uint8_t* d, int dst_width) { - const uint8* s = src_ptr; - const uint8* t = src_ptr + src_stride; + const uint8_t* s = src_ptr; + const uint8_t* t = src_ptr + src_stride; int x; assert((dst_width % 3 == 0) && (dst_width > 0)); for (x = 0; x < dst_width; x += 3) { - uint8 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2; - uint8 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1; - uint8 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2; - uint8 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2; - uint8 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1; - uint8 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2; + uint8_t a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2; + uint8_t a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1; + uint8_t a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2; + uint8_t b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2; + uint8_t b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1; + uint8_t b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2; d[0] = (a0 * 3 + b0 + 2) >> 2; d[1] = (a1 * 3 + b1 + 2) >> 2; d[2] = (a2 * 3 + b2 + 2) >> 2; @@ -327,21 +327,21 @@ void ScaleRowDown34_0_Box_C(const uint8* src_ptr, } } -void ScaleRowDown34_0_Box_16_C(const uint16* src_ptr, +void ScaleRowDown34_0_Box_16_C(const uint16_t* src_ptr, ptrdiff_t src_stride, - uint16* d, + uint16_t* d, int dst_width) { - const uint16* s = src_ptr; - const uint16* t = src_ptr + src_stride; + const uint16_t* s = src_ptr; + const uint16_t* t = src_ptr + src_stride; int x; assert((dst_width % 3 == 0) && (dst_width > 0)); for (x = 0; x < dst_width; x += 3) { - uint16 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2; - uint16 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1; - uint16 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2; - uint16 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2; - uint16 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1; - uint16 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2; + uint16_t a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2; + uint16_t a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1; + uint16_t a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2; + uint16_t b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2; + uint16_t b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1; + uint16_t b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2; d[0] = (a0 * 3 + b0 + 2) >> 2; d[1] = (a1 * 3 + b1 + 2) >> 2; d[2] = (a2 * 3 + b2 + 2) >> 2; @@ -352,21 +352,21 @@ void ScaleRowDown34_0_Box_16_C(const uint16* src_ptr, } // Filter rows 1 and 2 together, 1 : 1 -void ScaleRowDown34_1_Box_C(const uint8* src_ptr, +void ScaleRowDown34_1_Box_C(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* d, + uint8_t* d, int dst_width) { - const uint8* s = src_ptr; - const uint8* t = src_ptr + src_stride; + const uint8_t* s = src_ptr; + const uint8_t* t = src_ptr + src_stride; int x; assert((dst_width % 3 == 0) && (dst_width > 0)); for (x = 0; x < dst_width; x += 3) { - uint8 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2; - uint8 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1; - uint8 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2; - uint8 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2; - uint8 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1; - uint8 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2; + uint8_t a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2; + uint8_t a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1; + uint8_t a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2; + uint8_t b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2; + uint8_t b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1; + uint8_t b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2; d[0] = (a0 + b0 + 1) >> 1; d[1] = (a1 + b1 + 1) >> 1; d[2] = (a2 + b2 + 1) >> 1; @@ -376,21 +376,21 @@ void ScaleRowDown34_1_Box_C(const uint8* src_ptr, } } -void ScaleRowDown34_1_Box_16_C(const uint16* src_ptr, +void ScaleRowDown34_1_Box_16_C(const uint16_t* src_ptr, ptrdiff_t src_stride, - uint16* d, + uint16_t* d, int dst_width) { - const uint16* s = src_ptr; - const uint16* t = src_ptr + src_stride; + const uint16_t* s = src_ptr; + const uint16_t* t = src_ptr + src_stride; int x; assert((dst_width % 3 == 0) && (dst_width > 0)); for (x = 0; x < dst_width; x += 3) { - uint16 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2; - uint16 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1; - uint16 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2; - uint16 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2; - uint16 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1; - uint16 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2; + uint16_t a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2; + uint16_t a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1; + uint16_t a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2; + uint16_t b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2; + uint16_t b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1; + uint16_t b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2; d[0] = (a0 + b0 + 1) >> 1; d[1] = (a1 + b1 + 1) >> 1; d[2] = (a2 + b2 + 1) >> 1; @@ -401,8 +401,8 @@ void ScaleRowDown34_1_Box_16_C(const uint16* src_ptr, } // Scales a single row of pixels using point sampling. -void ScaleCols_C(uint8* dst_ptr, - const uint8* src_ptr, +void ScaleCols_C(uint8_t* dst_ptr, + const uint8_t* src_ptr, int dst_width, int x, int dx) { @@ -419,8 +419,8 @@ void ScaleCols_C(uint8* dst_ptr, } } -void ScaleCols_16_C(uint16* dst_ptr, - const uint16* src_ptr, +void ScaleCols_16_C(uint16_t* dst_ptr, + const uint16_t* src_ptr, int dst_width, int x, int dx) { @@ -438,8 +438,8 @@ void ScaleCols_16_C(uint16* dst_ptr, } // Scales a single row of pixels up by 2x using point sampling. -void ScaleColsUp2_C(uint8* dst_ptr, - const uint8* src_ptr, +void ScaleColsUp2_C(uint8_t* dst_ptr, + const uint8_t* src_ptr, int dst_width, int x, int dx) { @@ -456,8 +456,8 @@ void ScaleColsUp2_C(uint8* dst_ptr, } } -void ScaleColsUp2_16_C(uint16* dst_ptr, - const uint16* src_ptr, +void ScaleColsUp2_16_C(uint16_t* dst_ptr, + const uint16_t* src_ptr, int dst_width, int x, int dx) { @@ -477,15 +477,15 @@ void ScaleColsUp2_16_C(uint16* dst_ptr, // (1-f)a + fb can be replaced with a + f(b-a) #if defined(__arm__) || defined(__aarch64__) #define BLENDER(a, b, f) \ - (uint8)((int)(a) + ((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16)) + (uint8_t)((int)(a) + ((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16)) #else // Intel uses 7 bit math with rounding. #define BLENDER(a, b, f) \ - (uint8)((int)(a) + (((int)((f) >> 9) * ((int)(b) - (int)(a)) + 0x40) >> 7)) + (uint8_t)((int)(a) + (((int)((f) >> 9) * ((int)(b) - (int)(a)) + 0x40) >> 7)) #endif -void ScaleFilterCols_C(uint8* dst_ptr, - const uint8* src_ptr, +void ScaleFilterCols_C(uint8_t* dst_ptr, + const uint8_t* src_ptr, int dst_width, int x, int dx) { @@ -511,15 +511,15 @@ void ScaleFilterCols_C(uint8* dst_ptr, } } -void ScaleFilterCols64_C(uint8* dst_ptr, - const uint8* src_ptr, +void ScaleFilterCols64_C(uint8_t* dst_ptr, + const uint8_t* src_ptr, int dst_width, int x32, int dx) { - int64 x = (int64)(x32); + int64_t x = (int64_t)(x32); int j; for (j = 0; j < dst_width - 1; j += 2) { - int64 xi = x >> 16; + int64_t xi = x >> 16; int a = src_ptr[xi]; int b = src_ptr[xi + 1]; dst_ptr[0] = BLENDER(a, b, x & 0xffff); @@ -532,7 +532,7 @@ void ScaleFilterCols64_C(uint8* dst_ptr, dst_ptr += 2; } if (dst_width & 1) { - int64 xi = x >> 16; + int64_t xi = x >> 16; int a = src_ptr[xi]; int b = src_ptr[xi + 1]; dst_ptr[0] = BLENDER(a, b, x & 0xffff); @@ -540,12 +540,12 @@ void ScaleFilterCols64_C(uint8* dst_ptr, } #undef BLENDER -// Same as 8 bit arm blender but return is cast to uint16 +// Same as 8 bit arm blender but return is cast to uint16_t #define BLENDER(a, b, f) \ - (uint16)((int)(a) + ((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16)) + (uint16_t)((int)(a) + ((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16)) -void ScaleFilterCols_16_C(uint16* dst_ptr, - const uint16* src_ptr, +void ScaleFilterCols_16_C(uint16_t* dst_ptr, + const uint16_t* src_ptr, int dst_width, int x, int dx) { @@ -571,15 +571,15 @@ void ScaleFilterCols_16_C(uint16* dst_ptr, } } -void ScaleFilterCols64_16_C(uint16* dst_ptr, - const uint16* src_ptr, +void ScaleFilterCols64_16_C(uint16_t* dst_ptr, + const uint16_t* src_ptr, int dst_width, int x32, int dx) { - int64 x = (int64)(x32); + int64_t x = (int64_t)(x32); int j; for (j = 0; j < dst_width - 1; j += 2) { - int64 xi = x >> 16; + int64_t xi = x >> 16; int a = src_ptr[xi]; int b = src_ptr[xi + 1]; dst_ptr[0] = BLENDER(a, b, x & 0xffff); @@ -592,7 +592,7 @@ void ScaleFilterCols64_16_C(uint16* dst_ptr, dst_ptr += 2; } if (dst_width & 1) { - int64 xi = x >> 16; + int64_t xi = x >> 16; int a = src_ptr[xi]; int b = src_ptr[xi + 1]; dst_ptr[0] = BLENDER(a, b, x & 0xffff); @@ -600,9 +600,9 @@ void ScaleFilterCols64_16_C(uint16* dst_ptr, } #undef BLENDER -void ScaleRowDown38_C(const uint8* src_ptr, +void ScaleRowDown38_C(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst, + uint8_t* dst, int dst_width) { int x; (void)src_stride; @@ -616,9 +616,9 @@ void ScaleRowDown38_C(const uint8* src_ptr, } } -void ScaleRowDown38_16_C(const uint16* src_ptr, +void ScaleRowDown38_16_C(const uint16_t* src_ptr, ptrdiff_t src_stride, - uint16* dst, + uint16_t* dst, int dst_width) { int x; (void)src_stride; @@ -633,9 +633,9 @@ void ScaleRowDown38_16_C(const uint16* src_ptr, } // 8x3 -> 3x1 -void ScaleRowDown38_3_Box_C(const uint8* src_ptr, +void ScaleRowDown38_3_Box_C(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width) { intptr_t stride = src_stride; int i; @@ -663,9 +663,9 @@ void ScaleRowDown38_3_Box_C(const uint8* src_ptr, } } -void ScaleRowDown38_3_Box_16_C(const uint16* src_ptr, +void ScaleRowDown38_3_Box_16_C(const uint16_t* src_ptr, ptrdiff_t src_stride, - uint16* dst_ptr, + uint16_t* dst_ptr, int dst_width) { intptr_t stride = src_stride; int i; @@ -694,9 +694,9 @@ void ScaleRowDown38_3_Box_16_C(const uint16* src_ptr, } // 8x2 -> 3x1 -void ScaleRowDown38_2_Box_C(const uint8* src_ptr, +void ScaleRowDown38_2_Box_C(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width) { intptr_t stride = src_stride; int i; @@ -719,9 +719,9 @@ void ScaleRowDown38_2_Box_C(const uint8* src_ptr, } } -void ScaleRowDown38_2_Box_16_C(const uint16* src_ptr, +void ScaleRowDown38_2_Box_16_C(const uint16_t* src_ptr, ptrdiff_t src_stride, - uint16* dst_ptr, + uint16_t* dst_ptr, int dst_width) { intptr_t stride = src_stride; int i; @@ -744,7 +744,7 @@ void ScaleRowDown38_2_Box_16_C(const uint16* src_ptr, } } -void ScaleAddRow_C(const uint8* src_ptr, uint16* dst_ptr, int src_width) { +void ScaleAddRow_C(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width) { int x; assert(src_width > 0); for (x = 0; x < src_width - 1; x += 2) { @@ -758,7 +758,9 @@ void ScaleAddRow_C(const uint8* src_ptr, uint16* dst_ptr, int src_width) { } } -void ScaleAddRow_16_C(const uint16* src_ptr, uint32* dst_ptr, int src_width) { +void ScaleAddRow_16_C(const uint16_t* src_ptr, + uint32_t* dst_ptr, + int src_width) { int x; assert(src_width > 0); for (x = 0; x < src_width - 1; x += 2) { @@ -772,12 +774,12 @@ void ScaleAddRow_16_C(const uint16* src_ptr, uint32* dst_ptr, int src_width) { } } -void ScaleARGBRowDown2_C(const uint8* src_argb, +void ScaleARGBRowDown2_C(const uint8_t* src_argb, ptrdiff_t src_stride, - uint8* dst_argb, + uint8_t* dst_argb, int dst_width) { - const uint32* src = (const uint32*)(src_argb); - uint32* dst = (uint32*)(dst_argb); + const uint32_t* src = (const uint32_t*)(src_argb); + uint32_t* dst = (uint32_t*)(dst_argb); int x; (void)src_stride; for (x = 0; x < dst_width - 1; x += 2) { @@ -791,9 +793,9 @@ void ScaleARGBRowDown2_C(const uint8* src_argb, } } -void ScaleARGBRowDown2Linear_C(const uint8* src_argb, +void ScaleARGBRowDown2Linear_C(const uint8_t* src_argb, ptrdiff_t src_stride, - uint8* dst_argb, + uint8_t* dst_argb, int dst_width) { int x; (void)src_stride; @@ -807,9 +809,9 @@ void ScaleARGBRowDown2Linear_C(const uint8* src_argb, } } -void ScaleARGBRowDown2Box_C(const uint8* src_argb, +void ScaleARGBRowDown2Box_C(const uint8_t* src_argb, ptrdiff_t src_stride, - uint8* dst_argb, + uint8_t* dst_argb, int dst_width) { int x; for (x = 0; x < dst_width; ++x) { @@ -830,13 +832,13 @@ void ScaleARGBRowDown2Box_C(const uint8* src_argb, } } -void ScaleARGBRowDownEven_C(const uint8* src_argb, +void ScaleARGBRowDownEven_C(const uint8_t* src_argb, ptrdiff_t src_stride, int src_stepx, - uint8* dst_argb, + uint8_t* dst_argb, int dst_width) { - const uint32* src = (const uint32*)(src_argb); - uint32* dst = (uint32*)(dst_argb); + const uint32_t* src = (const uint32_t*)(src_argb); + uint32_t* dst = (uint32_t*)(dst_argb); (void)src_stride; int x; for (x = 0; x < dst_width - 1; x += 2) { @@ -850,10 +852,10 @@ void ScaleARGBRowDownEven_C(const uint8* src_argb, } } -void ScaleARGBRowDownEvenBox_C(const uint8* src_argb, +void ScaleARGBRowDownEvenBox_C(const uint8_t* src_argb, ptrdiff_t src_stride, int src_stepx, - uint8* dst_argb, + uint8_t* dst_argb, int dst_width) { int x; for (x = 0; x < dst_width; ++x) { @@ -875,13 +877,13 @@ void ScaleARGBRowDownEvenBox_C(const uint8* src_argb, } // Scales a single row of pixels using point sampling. -void ScaleARGBCols_C(uint8* dst_argb, - const uint8* src_argb, +void ScaleARGBCols_C(uint8_t* dst_argb, + const uint8_t* src_argb, int dst_width, int x, int dx) { - const uint32* src = (const uint32*)(src_argb); - uint32* dst = (uint32*)(dst_argb); + const uint32_t* src = (const uint32_t*)(src_argb); + uint32_t* dst = (uint32_t*)(dst_argb); int j; for (j = 0; j < dst_width - 1; j += 2) { dst[0] = src[x >> 16]; @@ -895,14 +897,14 @@ void ScaleARGBCols_C(uint8* dst_argb, } } -void ScaleARGBCols64_C(uint8* dst_argb, - const uint8* src_argb, +void ScaleARGBCols64_C(uint8_t* dst_argb, + const uint8_t* src_argb, int dst_width, int x32, int dx) { - int64 x = (int64)(x32); - const uint32* src = (const uint32*)(src_argb); - uint32* dst = (uint32*)(dst_argb); + int64_t x = (int64_t)(x32); + const uint32_t* src = (const uint32_t*)(src_argb); + uint32_t* dst = (uint32_t*)(dst_argb); int j; for (j = 0; j < dst_width - 1; j += 2) { dst[0] = src[x >> 16]; @@ -917,13 +919,13 @@ void ScaleARGBCols64_C(uint8* dst_argb, } // Scales a single row of pixels up by 2x using point sampling. -void ScaleARGBColsUp2_C(uint8* dst_argb, - const uint8* src_argb, +void ScaleARGBColsUp2_C(uint8_t* dst_argb, + const uint8_t* src_argb, int dst_width, int x, int dx) { - const uint32* src = (const uint32*)(src_argb); - uint32* dst = (uint32*)(dst_argb); + const uint32_t* src = (const uint32_t*)(src_argb); + uint32_t* dst = (uint32_t*)(dst_argb); int j; (void)x; (void)dx; @@ -941,24 +943,24 @@ void ScaleARGBColsUp2_C(uint8* dst_argb, // Mimics SSSE3 blender #define BLENDER1(a, b, f) ((a) * (0x7f ^ f) + (b)*f) >> 7 #define BLENDERC(a, b, f, s) \ - (uint32)(BLENDER1(((a) >> s) & 255, ((b) >> s) & 255, f) << s) + (uint32_t)(BLENDER1(((a) >> s) & 255, ((b) >> s) & 255, f) << s) #define BLENDER(a, b, f) \ BLENDERC(a, b, f, 24) | BLENDERC(a, b, f, 16) | BLENDERC(a, b, f, 8) | \ BLENDERC(a, b, f, 0) -void ScaleARGBFilterCols_C(uint8* dst_argb, - const uint8* src_argb, +void ScaleARGBFilterCols_C(uint8_t* dst_argb, + const uint8_t* src_argb, int dst_width, int x, int dx) { - const uint32* src = (const uint32*)(src_argb); - uint32* dst = (uint32*)(dst_argb); + const uint32_t* src = (const uint32_t*)(src_argb); + uint32_t* dst = (uint32_t*)(dst_argb); int j; for (j = 0; j < dst_width - 1; j += 2) { int xi = x >> 16; int xf = (x >> 9) & 0x7f; - uint32 a = src[xi]; - uint32 b = src[xi + 1]; + uint32_t a = src[xi]; + uint32_t b = src[xi + 1]; dst[0] = BLENDER(a, b, xf); x += dx; xi = x >> 16; @@ -972,26 +974,26 @@ void ScaleARGBFilterCols_C(uint8* dst_argb, if (dst_width & 1) { int xi = x >> 16; int xf = (x >> 9) & 0x7f; - uint32 a = src[xi]; - uint32 b = src[xi + 1]; + uint32_t a = src[xi]; + uint32_t b = src[xi + 1]; dst[0] = BLENDER(a, b, xf); } } -void ScaleARGBFilterCols64_C(uint8* dst_argb, - const uint8* src_argb, +void ScaleARGBFilterCols64_C(uint8_t* dst_argb, + const uint8_t* src_argb, int dst_width, int x32, int dx) { - int64 x = (int64)(x32); - const uint32* src = (const uint32*)(src_argb); - uint32* dst = (uint32*)(dst_argb); + int64_t x = (int64_t)(x32); + const uint32_t* src = (const uint32_t*)(src_argb); + uint32_t* dst = (uint32_t*)(dst_argb); int j; for (j = 0; j < dst_width - 1; j += 2) { - int64 xi = x >> 16; + int64_t xi = x >> 16; int xf = (x >> 9) & 0x7f; - uint32 a = src[xi]; - uint32 b = src[xi + 1]; + uint32_t a = src[xi]; + uint32_t b = src[xi + 1]; dst[0] = BLENDER(a, b, xf); x += dx; xi = x >> 16; @@ -1003,10 +1005,10 @@ void ScaleARGBFilterCols64_C(uint8* dst_argb, dst += 2; } if (dst_width & 1) { - int64 xi = x >> 16; + int64_t xi = x >> 16; int xf = (x >> 9) & 0x7f; - uint32 a = src[xi]; - uint32 b = src[xi + 1]; + uint32_t a = src[xi]; + uint32_t b = src[xi + 1]; dst[0] = BLENDER(a, b, xf); } } @@ -1020,8 +1022,8 @@ void ScalePlaneVertical(int src_height, int dst_height, int src_stride, int dst_stride, - const uint8* src_argb, - uint8* dst_argb, + const uint8_t* src_argb, + uint8_t* dst_argb, int x, int y, int dy, @@ -1029,7 +1031,7 @@ void ScalePlaneVertical(int src_height, enum FilterMode filtering) { // TODO(fbarchard): Allow higher bpp. int dst_width_bytes = dst_width * bpp; - void (*InterpolateRow)(uint8 * dst_argb, const uint8* src_argb, + void (*InterpolateRow)(uint8_t * dst_argb, const uint8_t* src_argb, ptrdiff_t src_stride, int dst_width, int source_y_fraction) = InterpolateRow_C; const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0; @@ -1090,8 +1092,8 @@ void ScalePlaneVertical_16(int src_height, int dst_height, int src_stride, int dst_stride, - const uint16* src_argb, - uint16* dst_argb, + const uint16_t* src_argb, + uint16_t* dst_argb, int x, int y, int dy, @@ -1099,7 +1101,7 @@ void ScalePlaneVertical_16(int src_height, enum FilterMode filtering) { // TODO(fbarchard): Allow higher wpp. int dst_width_words = dst_width * wpp; - void (*InterpolateRow)(uint16 * dst_argb, const uint16* src_argb, + void (*InterpolateRow)(uint16_t * dst_argb, const uint16_t* src_argb, ptrdiff_t src_stride, int dst_width, int source_y_fraction) = InterpolateRow_16_C; const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0; @@ -1202,12 +1204,12 @@ enum FilterMode ScaleFilterReduce(int src_width, // Divide num by div and return as 16.16 fixed point result. int FixedDiv_C(int num, int div) { - return (int)(((int64)(num) << 16) / div); + return (int)(((int64_t)(num) << 16) / div); } // Divide num by div and return as 16.16 fixed point result. int FixedDiv1_C(int num, int div) { - return (int)((((int64)(num) << 16) - 0x00010001) / (div - 1)); + return (int)((((int64_t)(num) << 16) - 0x00010001) / (div - 1)); } #define CENTERSTART(dx, s) (dx < 0) ? -((-dx >> 1) + s) : ((dx >> 1) + s) @@ -1288,18 +1290,18 @@ void ScaleSlope(int src_width, // Read 8x2 upsample with filtering and write 16x1. // actually reads an extra pixel, so 9x2. -void ScaleRowUp2_16_C(const uint16* src_ptr, +void ScaleRowUp2_16_C(const uint16_t* src_ptr, ptrdiff_t src_stride, - uint16* dst, + uint16_t* dst, int dst_width) { - const uint16* src2 = src_ptr + src_stride; + const uint16_t* src2 = src_ptr + src_stride; int x; for (x = 0; x < dst_width - 1; x += 2) { - uint16 p0 = src_ptr[0]; - uint16 p1 = src_ptr[1]; - uint16 p2 = src2[0]; - uint16 p3 = src2[1]; + uint16_t p0 = src_ptr[0]; + uint16_t p1 = src_ptr[1]; + uint16_t p2 = src2[0]; + uint16_t p3 = src2[1]; dst[0] = (p0 * 9 + p1 * 3 + p2 * 3 + p3 + 8) >> 4; dst[1] = (p0 * 3 + p1 * 9 + p2 + p3 * 3 + 8) >> 4; ++src_ptr; @@ -1307,10 +1309,10 @@ void ScaleRowUp2_16_C(const uint16* src_ptr, dst += 2; } if (dst_width & 1) { - uint16 p0 = src_ptr[0]; - uint16 p1 = src_ptr[1]; - uint16 p2 = src2[0]; - uint16 p3 = src2[1]; + uint16_t p0 = src_ptr[0]; + uint16_t p1 = src_ptr[1]; + uint16_t p2 = src2[0]; + uint16_t p3 = src2[1]; dst[0] = (p0 * 9 + p1 * 3 + p2 * 3 + p3 + 8) >> 4; } } diff --git a/chromium/third_party/libyuv/source/scale_gcc.cc b/chromium/third_party/libyuv/source/scale_gcc.cc index 336eb2dba44..312236d2df8 100644 --- a/chromium/third_party/libyuv/source/scale_gcc.cc +++ b/chromium/third_party/libyuv/source/scale_gcc.cc @@ -93,391 +93,386 @@ static const uvec16 kScaleAb2 = {65536 / 3, 65536 / 3, 65536 / 2, 65536 / 3, // Generated using gcc disassembly on Visual C object file: // objdump -D yuvscaler.obj >yuvscaler.txt -void ScaleRowDown2_SSSE3(const uint8* src_ptr, +void ScaleRowDown2_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width) { (void)src_stride; - asm volatile ( - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "lea " MEMLEA(0x20,0) ",%0 \n" - "psrlw $0x8,%%xmm0 \n" - "psrlw $0x8,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x10,%2 \n" - "jg 1b \n" - : "+r"(src_ptr), // %0 - "+r"(dst_ptr), // %1 - "+r"(dst_width) // %2 - :: "memory", "cc", "xmm0", "xmm1" - ); + asm volatile( + // 16 pixel loop. + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "lea 0x20(%0),%0 \n" + "psrlw $0x8,%%xmm0 \n" + "psrlw $0x8,%%xmm1 \n" + "packuswb %%xmm1,%%xmm0 \n" + "movdqu %%xmm0,(%1) \n" + "lea 0x10(%1),%1 \n" + "sub $0x10,%2 \n" + "jg 1b \n" + : "+r"(src_ptr), // %0 + "+r"(dst_ptr), // %1 + "+r"(dst_width) // %2 + ::"memory", + "cc", "xmm0", "xmm1"); } -void ScaleRowDown2Linear_SSSE3(const uint8* src_ptr, +void ScaleRowDown2Linear_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width) { (void)src_stride; - asm volatile ( - "pcmpeqb %%xmm4,%%xmm4 \n" - "psrlw $0xf,%%xmm4 \n" - "packuswb %%xmm4,%%xmm4 \n" - "pxor %%xmm5,%%xmm5 \n" - - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10, 0) ",%%xmm1 \n" - "lea " MEMLEA(0x20,0) ",%0 \n" - "pmaddubsw %%xmm4,%%xmm0 \n" - "pmaddubsw %%xmm4,%%xmm1 \n" - "pavgw %%xmm5,%%xmm0 \n" - "pavgw %%xmm5,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x10,%2 \n" - "jg 1b \n" - : "+r"(src_ptr), // %0 - "+r"(dst_ptr), // %1 - "+r"(dst_width) // %2 - :: "memory", "cc", "xmm0", "xmm1", "xmm4", "xmm5" - ); + asm volatile( + "pcmpeqb %%xmm4,%%xmm4 \n" + "psrlw $0xf,%%xmm4 \n" + "packuswb %%xmm4,%%xmm4 \n" + "pxor %%xmm5,%%xmm5 \n" + + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "lea 0x20(%0),%0 \n" + "pmaddubsw %%xmm4,%%xmm0 \n" + "pmaddubsw %%xmm4,%%xmm1 \n" + "pavgw %%xmm5,%%xmm0 \n" + "pavgw %%xmm5,%%xmm1 \n" + "packuswb %%xmm1,%%xmm0 \n" + "movdqu %%xmm0,(%1) \n" + "lea 0x10(%1),%1 \n" + "sub $0x10,%2 \n" + "jg 1b \n" + : "+r"(src_ptr), // %0 + "+r"(dst_ptr), // %1 + "+r"(dst_width) // %2 + ::"memory", + "cc", "xmm0", "xmm1", "xmm4", "xmm5"); } -void ScaleRowDown2Box_SSSE3(const uint8* src_ptr, +void ScaleRowDown2Box_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width) { - asm volatile ( - "pcmpeqb %%xmm4,%%xmm4 \n" - "psrlw $0xf,%%xmm4 \n" - "packuswb %%xmm4,%%xmm4 \n" - "pxor %%xmm5,%%xmm5 \n" - - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - MEMOPREG(movdqu,0x00,0,3,1,xmm2) // movdqu (%0,%3,1),%%xmm2 - MEMOPREG(movdqu,0x10,0,3,1,xmm3) // movdqu 0x10(%0,%3,1),%%xmm3 - "lea " MEMLEA(0x20,0) ",%0 \n" - "pmaddubsw %%xmm4,%%xmm0 \n" - "pmaddubsw %%xmm4,%%xmm1 \n" - "pmaddubsw %%xmm4,%%xmm2 \n" - "pmaddubsw %%xmm4,%%xmm3 \n" - "paddw %%xmm2,%%xmm0 \n" - "paddw %%xmm3,%%xmm1 \n" - "psrlw $0x1,%%xmm0 \n" - "psrlw $0x1,%%xmm1 \n" - "pavgw %%xmm5,%%xmm0 \n" - "pavgw %%xmm5,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x10,%2 \n" - "jg 1b \n" - : "+r"(src_ptr), // %0 - "+r"(dst_ptr), // %1 - "+r"(dst_width) // %2 - : "r"((intptr_t)(src_stride)) // %3 - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" - ); + asm volatile( + "pcmpeqb %%xmm4,%%xmm4 \n" + "psrlw $0xf,%%xmm4 \n" + "packuswb %%xmm4,%%xmm4 \n" + "pxor %%xmm5,%%xmm5 \n" + + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "movdqu 0x00(%0,%3,1),%%xmm2 \n" + "movdqu 0x10(%0,%3,1),%%xmm3 \n" + "lea 0x20(%0),%0 \n" + "pmaddubsw %%xmm4,%%xmm0 \n" + "pmaddubsw %%xmm4,%%xmm1 \n" + "pmaddubsw %%xmm4,%%xmm2 \n" + "pmaddubsw %%xmm4,%%xmm3 \n" + "paddw %%xmm2,%%xmm0 \n" + "paddw %%xmm3,%%xmm1 \n" + "psrlw $0x1,%%xmm0 \n" + "psrlw $0x1,%%xmm1 \n" + "pavgw %%xmm5,%%xmm0 \n" + "pavgw %%xmm5,%%xmm1 \n" + "packuswb %%xmm1,%%xmm0 \n" + "movdqu %%xmm0,(%1) \n" + "lea 0x10(%1),%1 \n" + "sub $0x10,%2 \n" + "jg 1b \n" + : "+r"(src_ptr), // %0 + "+r"(dst_ptr), // %1 + "+r"(dst_width) // %2 + : "r"((intptr_t)(src_stride)) // %3 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"); } #ifdef HAS_SCALEROWDOWN2_AVX2 -void ScaleRowDown2_AVX2(const uint8* src_ptr, +void ScaleRowDown2_AVX2(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width) { (void)src_stride; - asm volatile ( - LABELALIGN - "1: \n" - "vmovdqu " MEMACCESS(0) ",%%ymm0 \n" - "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n" - "lea " MEMLEA(0x40,0) ",%0 \n" - "vpsrlw $0x8,%%ymm0,%%ymm0 \n" - "vpsrlw $0x8,%%ymm1,%%ymm1 \n" - "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" - "vpermq $0xd8,%%ymm0,%%ymm0 \n" - "vmovdqu %%ymm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x20,1) ",%1 \n" - "sub $0x20,%2 \n" - "jg 1b \n" - "vzeroupper \n" - : "+r"(src_ptr), // %0 - "+r"(dst_ptr), // %1 - "+r"(dst_width) // %2 - :: "memory", "cc", "xmm0", "xmm1" - ); + asm volatile( + + LABELALIGN + "1: \n" + "vmovdqu (%0),%%ymm0 \n" + "vmovdqu 0x20(%0),%%ymm1 \n" + "lea 0x40(%0),%0 \n" + "vpsrlw $0x8,%%ymm0,%%ymm0 \n" + "vpsrlw $0x8,%%ymm1,%%ymm1 \n" + "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" + "vpermq $0xd8,%%ymm0,%%ymm0 \n" + "vmovdqu %%ymm0,(%1) \n" + "lea 0x20(%1),%1 \n" + "sub $0x20,%2 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src_ptr), // %0 + "+r"(dst_ptr), // %1 + "+r"(dst_width) // %2 + ::"memory", + "cc", "xmm0", "xmm1"); } -void ScaleRowDown2Linear_AVX2(const uint8* src_ptr, +void ScaleRowDown2Linear_AVX2(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width) { (void)src_stride; - asm volatile ( - "vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n" - "vpsrlw $0xf,%%ymm4,%%ymm4 \n" - "vpackuswb %%ymm4,%%ymm4,%%ymm4 \n" - "vpxor %%ymm5,%%ymm5,%%ymm5 \n" - - LABELALIGN - "1: \n" - "vmovdqu " MEMACCESS(0) ",%%ymm0 \n" - "vmovdqu " MEMACCESS2(0x20, 0) ",%%ymm1 \n" - "lea " MEMLEA(0x40,0) ",%0 \n" - "vpmaddubsw %%ymm4,%%ymm0,%%ymm0 \n" - "vpmaddubsw %%ymm4,%%ymm1,%%ymm1 \n" - "vpavgw %%ymm5,%%ymm0,%%ymm0 \n" - "vpavgw %%ymm5,%%ymm1,%%ymm1 \n" - "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" - "vpermq $0xd8,%%ymm0,%%ymm0 \n" - "vmovdqu %%ymm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x20,1) ",%1 \n" - "sub $0x20,%2 \n" - "jg 1b \n" - "vzeroupper \n" - : "+r"(src_ptr), // %0 - "+r"(dst_ptr), // %1 - "+r"(dst_width) // %2 - :: "memory", "cc", "xmm0", "xmm1", "xmm4", "xmm5" - ); + asm volatile( + "vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n" + "vpsrlw $0xf,%%ymm4,%%ymm4 \n" + "vpackuswb %%ymm4,%%ymm4,%%ymm4 \n" + "vpxor %%ymm5,%%ymm5,%%ymm5 \n" + + LABELALIGN + "1: \n" + "vmovdqu (%0),%%ymm0 \n" + "vmovdqu 0x20(%0),%%ymm1 \n" + "lea 0x40(%0),%0 \n" + "vpmaddubsw %%ymm4,%%ymm0,%%ymm0 \n" + "vpmaddubsw %%ymm4,%%ymm1,%%ymm1 \n" + "vpavgw %%ymm5,%%ymm0,%%ymm0 \n" + "vpavgw %%ymm5,%%ymm1,%%ymm1 \n" + "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" + "vpermq $0xd8,%%ymm0,%%ymm0 \n" + "vmovdqu %%ymm0,(%1) \n" + "lea 0x20(%1),%1 \n" + "sub $0x20,%2 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src_ptr), // %0 + "+r"(dst_ptr), // %1 + "+r"(dst_width) // %2 + ::"memory", + "cc", "xmm0", "xmm1", "xmm4", "xmm5"); } -void ScaleRowDown2Box_AVX2(const uint8* src_ptr, +void ScaleRowDown2Box_AVX2(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width) { - asm volatile ( - "vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n" - "vpsrlw $0xf,%%ymm4,%%ymm4 \n" - "vpackuswb %%ymm4,%%ymm4,%%ymm4 \n" - "vpxor %%ymm5,%%ymm5,%%ymm5 \n" - - LABELALIGN - "1: \n" - "vmovdqu " MEMACCESS(0) ",%%ymm0 \n" - "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n" - MEMOPREG(vmovdqu,0x00,0,3,1,ymm2) // vmovdqu (%0,%3,1),%%ymm2 - MEMOPREG(vmovdqu,0x20,0,3,1,ymm3) // vmovdqu 0x20(%0,%3,1),%%ymm3 - "lea " MEMLEA(0x40,0) ",%0 \n" - "vpmaddubsw %%ymm4,%%ymm0,%%ymm0 \n" - "vpmaddubsw %%ymm4,%%ymm1,%%ymm1 \n" - "vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n" - "vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n" - "vpaddw %%ymm2,%%ymm0,%%ymm0 \n" - "vpaddw %%ymm3,%%ymm1,%%ymm1 \n" - "vpsrlw $0x1,%%ymm0,%%ymm0 \n" - "vpsrlw $0x1,%%ymm1,%%ymm1 \n" - "vpavgw %%ymm5,%%ymm0,%%ymm0 \n" - "vpavgw %%ymm5,%%ymm1,%%ymm1 \n" - "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" - "vpermq $0xd8,%%ymm0,%%ymm0 \n" - "vmovdqu %%ymm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x20,1) ",%1 \n" - "sub $0x20,%2 \n" - "jg 1b \n" - "vzeroupper \n" - : "+r"(src_ptr), // %0 - "+r"(dst_ptr), // %1 - "+r"(dst_width) // %2 - : "r"((intptr_t)(src_stride)) // %3 - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" - ); + asm volatile( + "vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n" + "vpsrlw $0xf,%%ymm4,%%ymm4 \n" + "vpackuswb %%ymm4,%%ymm4,%%ymm4 \n" + "vpxor %%ymm5,%%ymm5,%%ymm5 \n" + + LABELALIGN + "1: \n" + "vmovdqu (%0),%%ymm0 \n" + "vmovdqu 0x20(%0),%%ymm1 \n" + "vmovdqu 0x00(%0,%3,1),%%ymm2 \n" + "vmovdqu 0x20(%0,%3,1),%%ymm3 \n" + "lea 0x40(%0),%0 \n" + "vpmaddubsw %%ymm4,%%ymm0,%%ymm0 \n" + "vpmaddubsw %%ymm4,%%ymm1,%%ymm1 \n" + "vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n" + "vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n" + "vpaddw %%ymm2,%%ymm0,%%ymm0 \n" + "vpaddw %%ymm3,%%ymm1,%%ymm1 \n" + "vpsrlw $0x1,%%ymm0,%%ymm0 \n" + "vpsrlw $0x1,%%ymm1,%%ymm1 \n" + "vpavgw %%ymm5,%%ymm0,%%ymm0 \n" + "vpavgw %%ymm5,%%ymm1,%%ymm1 \n" + "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" + "vpermq $0xd8,%%ymm0,%%ymm0 \n" + "vmovdqu %%ymm0,(%1) \n" + "lea 0x20(%1),%1 \n" + "sub $0x20,%2 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src_ptr), // %0 + "+r"(dst_ptr), // %1 + "+r"(dst_width) // %2 + : "r"((intptr_t)(src_stride)) // %3 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"); } #endif // HAS_SCALEROWDOWN2_AVX2 -void ScaleRowDown4_SSSE3(const uint8* src_ptr, +void ScaleRowDown4_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width) { (void)src_stride; - asm volatile ( - "pcmpeqb %%xmm5,%%xmm5 \n" - "psrld $0x18,%%xmm5 \n" - "pslld $0x10,%%xmm5 \n" - - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "lea " MEMLEA(0x20,0) ",%0 \n" - "pand %%xmm5,%%xmm0 \n" - "pand %%xmm5,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "psrlw $0x8,%%xmm0 \n" - "packuswb %%xmm0,%%xmm0 \n" - "movq %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x8,1) ",%1 \n" - "sub $0x8,%2 \n" - "jg 1b \n" - : "+r"(src_ptr), // %0 - "+r"(dst_ptr), // %1 - "+r"(dst_width) // %2 - :: "memory", "cc", "xmm0", "xmm1", "xmm5" - ); + asm volatile( + "pcmpeqb %%xmm5,%%xmm5 \n" + "psrld $0x18,%%xmm5 \n" + "pslld $0x10,%%xmm5 \n" + + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "lea 0x20(%0),%0 \n" + "pand %%xmm5,%%xmm0 \n" + "pand %%xmm5,%%xmm1 \n" + "packuswb %%xmm1,%%xmm0 \n" + "psrlw $0x8,%%xmm0 \n" + "packuswb %%xmm0,%%xmm0 \n" + "movq %%xmm0,(%1) \n" + "lea 0x8(%1),%1 \n" + "sub $0x8,%2 \n" + "jg 1b \n" + : "+r"(src_ptr), // %0 + "+r"(dst_ptr), // %1 + "+r"(dst_width) // %2 + ::"memory", + "cc", "xmm0", "xmm1", "xmm5"); } -void ScaleRowDown4Box_SSSE3(const uint8* src_ptr, +void ScaleRowDown4Box_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width) { intptr_t stridex3; - asm volatile ( - "pcmpeqb %%xmm4,%%xmm4 \n" - "psrlw $0xf,%%xmm4 \n" - "movdqa %%xmm4,%%xmm5 \n" - "packuswb %%xmm4,%%xmm4 \n" - "psllw $0x3,%%xmm5 \n" - "lea " MEMLEA4(0x00,4,4,2) ",%3 \n" - - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - MEMOPREG(movdqu,0x00,0,4,1,xmm2) // movdqu (%0,%4,1),%%xmm2 - MEMOPREG(movdqu,0x10,0,4,1,xmm3) // movdqu 0x10(%0,%4,1),%%xmm3 - "pmaddubsw %%xmm4,%%xmm0 \n" - "pmaddubsw %%xmm4,%%xmm1 \n" - "pmaddubsw %%xmm4,%%xmm2 \n" - "pmaddubsw %%xmm4,%%xmm3 \n" - "paddw %%xmm2,%%xmm0 \n" - "paddw %%xmm3,%%xmm1 \n" - MEMOPREG(movdqu,0x00,0,4,2,xmm2) // movdqu (%0,%4,2),%%xmm2 - MEMOPREG(movdqu,0x10,0,4,2,xmm3) // movdqu 0x10(%0,%4,2),%%xmm3 - "pmaddubsw %%xmm4,%%xmm2 \n" - "pmaddubsw %%xmm4,%%xmm3 \n" - "paddw %%xmm2,%%xmm0 \n" - "paddw %%xmm3,%%xmm1 \n" - MEMOPREG(movdqu,0x00,0,3,1,xmm2) // movdqu (%0,%3,1),%%xmm2 - MEMOPREG(movdqu,0x10,0,3,1,xmm3) // movdqu 0x10(%0,%3,1),%%xmm3 - "lea " MEMLEA(0x20,0) ",%0 \n" - "pmaddubsw %%xmm4,%%xmm2 \n" - "pmaddubsw %%xmm4,%%xmm3 \n" - "paddw %%xmm2,%%xmm0 \n" - "paddw %%xmm3,%%xmm1 \n" - "phaddw %%xmm1,%%xmm0 \n" - "paddw %%xmm5,%%xmm0 \n" - "psrlw $0x4,%%xmm0 \n" - "packuswb %%xmm0,%%xmm0 \n" - "movq %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x8,1) ",%1 \n" - "sub $0x8,%2 \n" - "jg 1b \n" - : "+r"(src_ptr), // %0 - "+r"(dst_ptr), // %1 - "+r"(dst_width), // %2 - "=&r"(stridex3) // %3 - : "r"((intptr_t)(src_stride)) // %4 - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" - ); + asm volatile( + "pcmpeqb %%xmm4,%%xmm4 \n" + "psrlw $0xf,%%xmm4 \n" + "movdqa %%xmm4,%%xmm5 \n" + "packuswb %%xmm4,%%xmm4 \n" + "psllw $0x3,%%xmm5 \n" + "lea 0x00(%4,%4,2),%3 \n" + + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "movdqu 0x00(%0,%4,1),%%xmm2 \n" + "movdqu 0x10(%0,%4,1),%%xmm3 \n" + "pmaddubsw %%xmm4,%%xmm0 \n" + "pmaddubsw %%xmm4,%%xmm1 \n" + "pmaddubsw %%xmm4,%%xmm2 \n" + "pmaddubsw %%xmm4,%%xmm3 \n" + "paddw %%xmm2,%%xmm0 \n" + "paddw %%xmm3,%%xmm1 \n" + "movdqu 0x00(%0,%4,2),%%xmm2 \n" + "movdqu 0x10(%0,%4,2),%%xmm3 \n" + "pmaddubsw %%xmm4,%%xmm2 \n" + "pmaddubsw %%xmm4,%%xmm3 \n" + "paddw %%xmm2,%%xmm0 \n" + "paddw %%xmm3,%%xmm1 \n" + "movdqu 0x00(%0,%3,1),%%xmm2 \n" + "movdqu 0x10(%0,%3,1),%%xmm3 \n" + "lea 0x20(%0),%0 \n" + "pmaddubsw %%xmm4,%%xmm2 \n" + "pmaddubsw %%xmm4,%%xmm3 \n" + "paddw %%xmm2,%%xmm0 \n" + "paddw %%xmm3,%%xmm1 \n" + "phaddw %%xmm1,%%xmm0 \n" + "paddw %%xmm5,%%xmm0 \n" + "psrlw $0x4,%%xmm0 \n" + "packuswb %%xmm0,%%xmm0 \n" + "movq %%xmm0,(%1) \n" + "lea 0x8(%1),%1 \n" + "sub $0x8,%2 \n" + "jg 1b \n" + : "+r"(src_ptr), // %0 + "+r"(dst_ptr), // %1 + "+r"(dst_width), // %2 + "=&r"(stridex3) // %3 + : "r"((intptr_t)(src_stride)) // %4 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); } #ifdef HAS_SCALEROWDOWN4_AVX2 -void ScaleRowDown4_AVX2(const uint8* src_ptr, +void ScaleRowDown4_AVX2(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width) { (void)src_stride; - asm volatile ( - "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" - "vpsrld $0x18,%%ymm5,%%ymm5 \n" - "vpslld $0x10,%%ymm5,%%ymm5 \n" - LABELALIGN - "1: \n" - "vmovdqu " MEMACCESS(0) ",%%ymm0 \n" - "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n" - "lea " MEMLEA(0x40,0) ",%0 \n" - "vpand %%ymm5,%%ymm0,%%ymm0 \n" - "vpand %%ymm5,%%ymm1,%%ymm1 \n" - "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" - "vpermq $0xd8,%%ymm0,%%ymm0 \n" - "vpsrlw $0x8,%%ymm0,%%ymm0 \n" - "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n" - "vpermq $0xd8,%%ymm0,%%ymm0 \n" - "vmovdqu %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x10,%2 \n" - "jg 1b \n" - "vzeroupper \n" - : "+r"(src_ptr), // %0 - "+r"(dst_ptr), // %1 - "+r"(dst_width) // %2 - :: "memory", "cc", "xmm0", "xmm1", "xmm5" - ); + asm volatile( + "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" + "vpsrld $0x18,%%ymm5,%%ymm5 \n" + "vpslld $0x10,%%ymm5,%%ymm5 \n" + + LABELALIGN + "1: \n" + "vmovdqu (%0),%%ymm0 \n" + "vmovdqu 0x20(%0),%%ymm1 \n" + "lea 0x40(%0),%0 \n" + "vpand %%ymm5,%%ymm0,%%ymm0 \n" + "vpand %%ymm5,%%ymm1,%%ymm1 \n" + "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" + "vpermq $0xd8,%%ymm0,%%ymm0 \n" + "vpsrlw $0x8,%%ymm0,%%ymm0 \n" + "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n" + "vpermq $0xd8,%%ymm0,%%ymm0 \n" + "vmovdqu %%xmm0,(%1) \n" + "lea 0x10(%1),%1 \n" + "sub $0x10,%2 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src_ptr), // %0 + "+r"(dst_ptr), // %1 + "+r"(dst_width) // %2 + ::"memory", + "cc", "xmm0", "xmm1", "xmm5"); } -void ScaleRowDown4Box_AVX2(const uint8* src_ptr, +void ScaleRowDown4Box_AVX2(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width) { - asm volatile ( - "vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n" - "vpsrlw $0xf,%%ymm4,%%ymm4 \n" - "vpsllw $0x3,%%ymm4,%%ymm5 \n" - "vpackuswb %%ymm4,%%ymm4,%%ymm4 \n" - - LABELALIGN - "1: \n" - "vmovdqu " MEMACCESS(0) ",%%ymm0 \n" - "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n" - MEMOPREG(vmovdqu,0x00,0,3,1,ymm2) // vmovdqu (%0,%3,1),%%ymm2 - MEMOPREG(vmovdqu,0x20,0,3,1,ymm3) // vmovdqu 0x20(%0,%3,1),%%ymm3 - "vpmaddubsw %%ymm4,%%ymm0,%%ymm0 \n" - "vpmaddubsw %%ymm4,%%ymm1,%%ymm1 \n" - "vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n" - "vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n" - "vpaddw %%ymm2,%%ymm0,%%ymm0 \n" - "vpaddw %%ymm3,%%ymm1,%%ymm1 \n" - MEMOPREG(vmovdqu,0x00,0,3,2,ymm2) // vmovdqu (%0,%3,2),%%ymm2 - MEMOPREG(vmovdqu,0x20,0,3,2,ymm3) // vmovdqu 0x20(%0,%3,2),%%ymm3 - "vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n" - "vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n" - "vpaddw %%ymm2,%%ymm0,%%ymm0 \n" - "vpaddw %%ymm3,%%ymm1,%%ymm1 \n" - MEMOPREG(vmovdqu,0x00,0,4,1,ymm2) // vmovdqu (%0,%4,1),%%ymm2 - MEMOPREG(vmovdqu,0x20,0,4,1,ymm3) // vmovdqu 0x20(%0,%4,1),%%ymm3 - "lea " MEMLEA(0x40,0) ",%0 \n" - "vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n" - "vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n" - "vpaddw %%ymm2,%%ymm0,%%ymm0 \n" - "vpaddw %%ymm3,%%ymm1,%%ymm1 \n" - "vphaddw %%ymm1,%%ymm0,%%ymm0 \n" - "vpermq $0xd8,%%ymm0,%%ymm0 \n" - "vpaddw %%ymm5,%%ymm0,%%ymm0 \n" - "vpsrlw $0x4,%%ymm0,%%ymm0 \n" - "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n" - "vpermq $0xd8,%%ymm0,%%ymm0 \n" - "vmovdqu %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x10,%2 \n" - "jg 1b \n" - "vzeroupper \n" - : "+r"(src_ptr), // %0 - "+r"(dst_ptr), // %1 - "+r"(dst_width) // %2 - : "r"((intptr_t)(src_stride)), // %3 - "r"((intptr_t)(src_stride * 3)) // %4 - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" - ); + asm volatile( + "vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n" + "vpsrlw $0xf,%%ymm4,%%ymm4 \n" + "vpsllw $0x3,%%ymm4,%%ymm5 \n" + "vpackuswb %%ymm4,%%ymm4,%%ymm4 \n" + + LABELALIGN + "1: \n" + "vmovdqu (%0),%%ymm0 \n" + "vmovdqu 0x20(%0),%%ymm1 \n" + "vmovdqu 0x00(%0,%3,1),%%ymm2 \n" + "vmovdqu 0x20(%0,%3,1),%%ymm3 \n" + "vpmaddubsw %%ymm4,%%ymm0,%%ymm0 \n" + "vpmaddubsw %%ymm4,%%ymm1,%%ymm1 \n" + "vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n" + "vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n" + "vpaddw %%ymm2,%%ymm0,%%ymm0 \n" + "vpaddw %%ymm3,%%ymm1,%%ymm1 \n" + "vmovdqu 0x00(%0,%3,2),%%ymm2 \n" + "vmovdqu 0x20(%0,%3,2),%%ymm3 \n" + "vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n" + "vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n" + "vpaddw %%ymm2,%%ymm0,%%ymm0 \n" + "vpaddw %%ymm3,%%ymm1,%%ymm1 \n" + "vmovdqu 0x00(%0,%4,1),%%ymm2 \n" + "vmovdqu 0x20(%0,%4,1),%%ymm3 \n" + "lea 0x40(%0),%0 \n" + "vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n" + "vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n" + "vpaddw %%ymm2,%%ymm0,%%ymm0 \n" + "vpaddw %%ymm3,%%ymm1,%%ymm1 \n" + "vphaddw %%ymm1,%%ymm0,%%ymm0 \n" + "vpermq $0xd8,%%ymm0,%%ymm0 \n" + "vpaddw %%ymm5,%%ymm0,%%ymm0 \n" + "vpsrlw $0x4,%%ymm0,%%ymm0 \n" + "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n" + "vpermq $0xd8,%%ymm0,%%ymm0 \n" + "vmovdqu %%xmm0,(%1) \n" + "lea 0x10(%1),%1 \n" + "sub $0x10,%2 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src_ptr), // %0 + "+r"(dst_ptr), // %1 + "+r"(dst_width) // %2 + : "r"((intptr_t)(src_stride)), // %3 + "r"((intptr_t)(src_stride * 3)) // %4 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); } #endif // HAS_SCALEROWDOWN4_AVX2 -void ScaleRowDown34_SSSE3(const uint8* src_ptr, +void ScaleRowDown34_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width) { (void)src_stride; asm volatile( @@ -489,33 +484,34 @@ void ScaleRowDown34_SSSE3(const uint8* src_ptr, "m"(kShuf1), // %1 "m"(kShuf2) // %2 ); - asm volatile ( - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm2 \n" - "lea " MEMLEA(0x20,0) ",%0 \n" - "movdqa %%xmm2,%%xmm1 \n" - "palignr $0x8,%%xmm0,%%xmm1 \n" - "pshufb %%xmm3,%%xmm0 \n" - "pshufb %%xmm4,%%xmm1 \n" - "pshufb %%xmm5,%%xmm2 \n" - "movq %%xmm0," MEMACCESS(1) " \n" - "movq %%xmm1," MEMACCESS2(0x8,1) " \n" - "movq %%xmm2," MEMACCESS2(0x10,1) " \n" - "lea " MEMLEA(0x18,1) ",%1 \n" - "sub $0x18,%2 \n" - "jg 1b \n" - : "+r"(src_ptr), // %0 - "+r"(dst_ptr), // %1 - "+r"(dst_width) // %2 - :: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" - ); + asm volatile( + + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm2 \n" + "lea 0x20(%0),%0 \n" + "movdqa %%xmm2,%%xmm1 \n" + "palignr $0x8,%%xmm0,%%xmm1 \n" + "pshufb %%xmm3,%%xmm0 \n" + "pshufb %%xmm4,%%xmm1 \n" + "pshufb %%xmm5,%%xmm2 \n" + "movq %%xmm0,(%1) \n" + "movq %%xmm1,0x8(%1) \n" + "movq %%xmm2,0x10(%1) \n" + "lea 0x18(%1),%1 \n" + "sub $0x18,%2 \n" + "jg 1b \n" + : "+r"(src_ptr), // %0 + "+r"(dst_ptr), // %1 + "+r"(dst_width) // %2 + ::"memory", + "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); } -void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr, +void ScaleRowDown34_1_Box_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width) { asm volatile( "movdqa %0,%%xmm2 \n" // kShuf01 @@ -535,53 +531,53 @@ void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr, "m"(kMadd11), // %1 "m"(kRound34) // %2 ); - asm volatile ( - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm6 \n" - MEMOPREG(movdqu,0x00,0,3,1,xmm7) // movdqu (%0,%3),%%xmm7 - "pavgb %%xmm7,%%xmm6 \n" - "pshufb %%xmm2,%%xmm6 \n" - "pmaddubsw %%xmm5,%%xmm6 \n" - "paddsw %%xmm1,%%xmm6 \n" - "psrlw $0x2,%%xmm6 \n" - "packuswb %%xmm6,%%xmm6 \n" - "movq %%xmm6," MEMACCESS(1) " \n" - "movdqu " MEMACCESS2(0x8,0) ",%%xmm6 \n" - MEMOPREG(movdqu,0x8,0,3,1,xmm7) // movdqu 0x8(%0,%3),%%xmm7 - "pavgb %%xmm7,%%xmm6 \n" - "pshufb %%xmm3,%%xmm6 \n" - "pmaddubsw %%xmm0,%%xmm6 \n" - "paddsw %%xmm1,%%xmm6 \n" - "psrlw $0x2,%%xmm6 \n" - "packuswb %%xmm6,%%xmm6 \n" - "movq %%xmm6," MEMACCESS2(0x8,1) " \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm6 \n" - MEMOPREG(movdqu,0x10,0,3,1,xmm7) // movdqu 0x10(%0,%3),%%xmm7 - "lea " MEMLEA(0x20,0) ",%0 \n" - "pavgb %%xmm7,%%xmm6 \n" - "pshufb %%xmm4,%%xmm6 \n" - "pmaddubsw %4,%%xmm6 \n" - "paddsw %%xmm1,%%xmm6 \n" - "psrlw $0x2,%%xmm6 \n" - "packuswb %%xmm6,%%xmm6 \n" - "movq %%xmm6," MEMACCESS2(0x10,1) " \n" - "lea " MEMLEA(0x18,1) ",%1 \n" - "sub $0x18,%2 \n" - "jg 1b \n" - : "+r"(src_ptr), // %0 - "+r"(dst_ptr), // %1 - "+r"(dst_width) // %2 - : "r"((intptr_t)(src_stride)), // %3 - "m"(kMadd21) // %4 - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" - ); + asm volatile( + + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm6 \n" + "movdqu 0x00(%0,%3,1),%%xmm7 \n" + "pavgb %%xmm7,%%xmm6 \n" + "pshufb %%xmm2,%%xmm6 \n" + "pmaddubsw %%xmm5,%%xmm6 \n" + "paddsw %%xmm1,%%xmm6 \n" + "psrlw $0x2,%%xmm6 \n" + "packuswb %%xmm6,%%xmm6 \n" + "movq %%xmm6,(%1) \n" + "movdqu 0x8(%0),%%xmm6 \n" + "movdqu 0x8(%0,%3,1),%%xmm7 \n" + "pavgb %%xmm7,%%xmm6 \n" + "pshufb %%xmm3,%%xmm6 \n" + "pmaddubsw %%xmm0,%%xmm6 \n" + "paddsw %%xmm1,%%xmm6 \n" + "psrlw $0x2,%%xmm6 \n" + "packuswb %%xmm6,%%xmm6 \n" + "movq %%xmm6,0x8(%1) \n" + "movdqu 0x10(%0),%%xmm6 \n" + "movdqu 0x10(%0,%3,1),%%xmm7 \n" + "lea 0x20(%0),%0 \n" + "pavgb %%xmm7,%%xmm6 \n" + "pshufb %%xmm4,%%xmm6 \n" + "pmaddubsw %4,%%xmm6 \n" + "paddsw %%xmm1,%%xmm6 \n" + "psrlw $0x2,%%xmm6 \n" + "packuswb %%xmm6,%%xmm6 \n" + "movq %%xmm6,0x10(%1) \n" + "lea 0x18(%1),%1 \n" + "sub $0x18,%2 \n" + "jg 1b \n" + : "+r"(src_ptr), // %0 + "+r"(dst_ptr), // %1 + "+r"(dst_width) // %2 + : "r"((intptr_t)(src_stride)), // %3 + "m"(kMadd21) // %4 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", + "xmm7"); } -void ScaleRowDown34_0_Box_SSSE3(const uint8* src_ptr, +void ScaleRowDown34_0_Box_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width) { asm volatile( "movdqa %0,%%xmm2 \n" // kShuf01 @@ -602,88 +598,87 @@ void ScaleRowDown34_0_Box_SSSE3(const uint8* src_ptr, "m"(kRound34) // %2 ); - asm volatile ( - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm6 \n" - MEMOPREG(movdqu,0x00,0,3,1,xmm7) // movdqu (%0,%3,1),%%xmm7 - "pavgb %%xmm6,%%xmm7 \n" - "pavgb %%xmm7,%%xmm6 \n" - "pshufb %%xmm2,%%xmm6 \n" - "pmaddubsw %%xmm5,%%xmm6 \n" - "paddsw %%xmm1,%%xmm6 \n" - "psrlw $0x2,%%xmm6 \n" - "packuswb %%xmm6,%%xmm6 \n" - "movq %%xmm6," MEMACCESS(1) " \n" - "movdqu " MEMACCESS2(0x8,0) ",%%xmm6 \n" - MEMOPREG(movdqu,0x8,0,3,1,xmm7) // movdqu 0x8(%0,%3,1),%%xmm7 - "pavgb %%xmm6,%%xmm7 \n" - "pavgb %%xmm7,%%xmm6 \n" - "pshufb %%xmm3,%%xmm6 \n" - "pmaddubsw %%xmm0,%%xmm6 \n" - "paddsw %%xmm1,%%xmm6 \n" - "psrlw $0x2,%%xmm6 \n" - "packuswb %%xmm6,%%xmm6 \n" - "movq %%xmm6," MEMACCESS2(0x8,1) " \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm6 \n" - MEMOPREG(movdqu,0x10,0,3,1,xmm7) // movdqu 0x10(%0,%3,1),%%xmm7 - "lea " MEMLEA(0x20,0) ",%0 \n" - "pavgb %%xmm6,%%xmm7 \n" - "pavgb %%xmm7,%%xmm6 \n" - "pshufb %%xmm4,%%xmm6 \n" - "pmaddubsw %4,%%xmm6 \n" - "paddsw %%xmm1,%%xmm6 \n" - "psrlw $0x2,%%xmm6 \n" - "packuswb %%xmm6,%%xmm6 \n" - "movq %%xmm6," MEMACCESS2(0x10,1) " \n" - "lea " MEMLEA(0x18,1) ",%1 \n" - "sub $0x18,%2 \n" - "jg 1b \n" - : "+r"(src_ptr), // %0 - "+r"(dst_ptr), // %1 - "+r"(dst_width) // %2 - : "r"((intptr_t)(src_stride)), // %3 - "m"(kMadd21) // %4 - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" - ); + asm volatile( + + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm6 \n" + "movdqu 0x00(%0,%3,1),%%xmm7 \n" + "pavgb %%xmm6,%%xmm7 \n" + "pavgb %%xmm7,%%xmm6 \n" + "pshufb %%xmm2,%%xmm6 \n" + "pmaddubsw %%xmm5,%%xmm6 \n" + "paddsw %%xmm1,%%xmm6 \n" + "psrlw $0x2,%%xmm6 \n" + "packuswb %%xmm6,%%xmm6 \n" + "movq %%xmm6,(%1) \n" + "movdqu 0x8(%0),%%xmm6 \n" + "movdqu 0x8(%0,%3,1),%%xmm7 \n" + "pavgb %%xmm6,%%xmm7 \n" + "pavgb %%xmm7,%%xmm6 \n" + "pshufb %%xmm3,%%xmm6 \n" + "pmaddubsw %%xmm0,%%xmm6 \n" + "paddsw %%xmm1,%%xmm6 \n" + "psrlw $0x2,%%xmm6 \n" + "packuswb %%xmm6,%%xmm6 \n" + "movq %%xmm6,0x8(%1) \n" + "movdqu 0x10(%0),%%xmm6 \n" + "movdqu 0x10(%0,%3,1),%%xmm7 \n" + "lea 0x20(%0),%0 \n" + "pavgb %%xmm6,%%xmm7 \n" + "pavgb %%xmm7,%%xmm6 \n" + "pshufb %%xmm4,%%xmm6 \n" + "pmaddubsw %4,%%xmm6 \n" + "paddsw %%xmm1,%%xmm6 \n" + "psrlw $0x2,%%xmm6 \n" + "packuswb %%xmm6,%%xmm6 \n" + "movq %%xmm6,0x10(%1) \n" + "lea 0x18(%1),%1 \n" + "sub $0x18,%2 \n" + "jg 1b \n" + : "+r"(src_ptr), // %0 + "+r"(dst_ptr), // %1 + "+r"(dst_width) // %2 + : "r"((intptr_t)(src_stride)), // %3 + "m"(kMadd21) // %4 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", + "xmm7"); } -void ScaleRowDown38_SSSE3(const uint8* src_ptr, +void ScaleRowDown38_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width) { (void)src_stride; - asm volatile ( - "movdqa %3,%%xmm4 \n" - "movdqa %4,%%xmm5 \n" - - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "lea " MEMLEA(0x20,0) ",%0 \n" - "pshufb %%xmm4,%%xmm0 \n" - "pshufb %%xmm5,%%xmm1 \n" - "paddusb %%xmm1,%%xmm0 \n" - "movq %%xmm0," MEMACCESS(1) " \n" - "movhlps %%xmm0,%%xmm1 \n" - "movd %%xmm1," MEMACCESS2(0x8,1) " \n" - "lea " MEMLEA(0xc,1) ",%1 \n" - "sub $0xc,%2 \n" - "jg 1b \n" - : "+r"(src_ptr), // %0 - "+r"(dst_ptr), // %1 - "+r"(dst_width) // %2 - : "m"(kShuf38a), // %3 - "m"(kShuf38b) // %4 - : "memory", "cc", "xmm0", "xmm1", "xmm4", "xmm5" - ); + asm volatile( + "movdqa %3,%%xmm4 \n" + "movdqa %4,%%xmm5 \n" + + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "lea 0x20(%0),%0 \n" + "pshufb %%xmm4,%%xmm0 \n" + "pshufb %%xmm5,%%xmm1 \n" + "paddusb %%xmm1,%%xmm0 \n" + "movq %%xmm0,(%1) \n" + "movhlps %%xmm0,%%xmm1 \n" + "movd %%xmm1,0x8(%1) \n" + "lea 0xc(%1),%1 \n" + "sub $0xc,%2 \n" + "jg 1b \n" + : "+r"(src_ptr), // %0 + "+r"(dst_ptr), // %1 + "+r"(dst_width) // %2 + : "m"(kShuf38a), // %3 + "m"(kShuf38b) // %4 + : "memory", "cc", "xmm0", "xmm1", "xmm4", "xmm5"); } -void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr, +void ScaleRowDown38_2_Box_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width) { asm volatile( "movdqa %0,%%xmm2 \n" @@ -696,40 +691,39 @@ void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr, "m"(kShufAb2), // %2 "m"(kScaleAb2) // %3 ); - asm volatile ( - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - MEMOPREG(movdqu,0x00,0,3,1,xmm1) // movdqu (%0,%3,1),%%xmm1 - "lea " MEMLEA(0x10,0) ",%0 \n" - "pavgb %%xmm1,%%xmm0 \n" - "movdqa %%xmm0,%%xmm1 \n" - "pshufb %%xmm2,%%xmm1 \n" - "movdqa %%xmm0,%%xmm6 \n" - "pshufb %%xmm3,%%xmm6 \n" - "paddusw %%xmm6,%%xmm1 \n" - "pshufb %%xmm4,%%xmm0 \n" - "paddusw %%xmm0,%%xmm1 \n" - "pmulhuw %%xmm5,%%xmm1 \n" - "packuswb %%xmm1,%%xmm1 \n" - "movd %%xmm1," MEMACCESS(1) " \n" - "psrlq $0x10,%%xmm1 \n" - "movd %%xmm1," MEMACCESS2(0x2,1) " \n" - "lea " MEMLEA(0x6,1) ",%1 \n" - "sub $0x6,%2 \n" - "jg 1b \n" - : "+r"(src_ptr), // %0 - "+r"(dst_ptr), // %1 - "+r"(dst_width) // %2 - : "r"((intptr_t)(src_stride)) // %3 - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6" - ); + asm volatile( + + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x00(%0,%3,1),%%xmm1 \n" + "lea 0x10(%0),%0 \n" + "pavgb %%xmm1,%%xmm0 \n" + "movdqa %%xmm0,%%xmm1 \n" + "pshufb %%xmm2,%%xmm1 \n" + "movdqa %%xmm0,%%xmm6 \n" + "pshufb %%xmm3,%%xmm6 \n" + "paddusw %%xmm6,%%xmm1 \n" + "pshufb %%xmm4,%%xmm0 \n" + "paddusw %%xmm0,%%xmm1 \n" + "pmulhuw %%xmm5,%%xmm1 \n" + "packuswb %%xmm1,%%xmm1 \n" + "movd %%xmm1,(%1) \n" + "psrlq $0x10,%%xmm1 \n" + "movd %%xmm1,0x2(%1) \n" + "lea 0x6(%1),%1 \n" + "sub $0x6,%2 \n" + "jg 1b \n" + : "+r"(src_ptr), // %0 + "+r"(dst_ptr), // %1 + "+r"(dst_width) // %2 + : "r"((intptr_t)(src_stride)) // %3 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"); } -void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr, +void ScaleRowDown38_3_Box_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width) { asm volatile( "movdqa %0,%%xmm2 \n" @@ -741,112 +735,117 @@ void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr, "m"(kShufAc3), // %1 "m"(kScaleAc33) // %2 ); - asm volatile ( - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - MEMOPREG(movdqu,0x00,0,3,1,xmm6) // movdqu (%0,%3,1),%%xmm6 - "movhlps %%xmm0,%%xmm1 \n" - "movhlps %%xmm6,%%xmm7 \n" - "punpcklbw %%xmm5,%%xmm0 \n" - "punpcklbw %%xmm5,%%xmm1 \n" - "punpcklbw %%xmm5,%%xmm6 \n" - "punpcklbw %%xmm5,%%xmm7 \n" - "paddusw %%xmm6,%%xmm0 \n" - "paddusw %%xmm7,%%xmm1 \n" - MEMOPREG(movdqu,0x00,0,3,2,xmm6) // movdqu (%0,%3,2),%%xmm6 - "lea " MEMLEA(0x10,0) ",%0 \n" - "movhlps %%xmm6,%%xmm7 \n" - "punpcklbw %%xmm5,%%xmm6 \n" - "punpcklbw %%xmm5,%%xmm7 \n" - "paddusw %%xmm6,%%xmm0 \n" - "paddusw %%xmm7,%%xmm1 \n" - "movdqa %%xmm0,%%xmm6 \n" - "psrldq $0x2,%%xmm0 \n" - "paddusw %%xmm0,%%xmm6 \n" - "psrldq $0x2,%%xmm0 \n" - "paddusw %%xmm0,%%xmm6 \n" - "pshufb %%xmm2,%%xmm6 \n" - "movdqa %%xmm1,%%xmm7 \n" - "psrldq $0x2,%%xmm1 \n" - "paddusw %%xmm1,%%xmm7 \n" - "psrldq $0x2,%%xmm1 \n" - "paddusw %%xmm1,%%xmm7 \n" - "pshufb %%xmm3,%%xmm7 \n" - "paddusw %%xmm7,%%xmm6 \n" - "pmulhuw %%xmm4,%%xmm6 \n" - "packuswb %%xmm6,%%xmm6 \n" - "movd %%xmm6," MEMACCESS(1) " \n" - "psrlq $0x10,%%xmm6 \n" - "movd %%xmm6," MEMACCESS2(0x2,1) " \n" - "lea " MEMLEA(0x6,1) ",%1 \n" - "sub $0x6,%2 \n" - "jg 1b \n" - : "+r"(src_ptr), // %0 - "+r"(dst_ptr), // %1 - "+r"(dst_width) // %2 - : "r"((intptr_t)(src_stride)) // %3 - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" - ); + asm volatile( + + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x00(%0,%3,1),%%xmm6 \n" + "movhlps %%xmm0,%%xmm1 \n" + "movhlps %%xmm6,%%xmm7 \n" + "punpcklbw %%xmm5,%%xmm0 \n" + "punpcklbw %%xmm5,%%xmm1 \n" + "punpcklbw %%xmm5,%%xmm6 \n" + "punpcklbw %%xmm5,%%xmm7 \n" + "paddusw %%xmm6,%%xmm0 \n" + "paddusw %%xmm7,%%xmm1 \n" + "movdqu 0x00(%0,%3,2),%%xmm6 \n" + "lea 0x10(%0),%0 \n" + "movhlps %%xmm6,%%xmm7 \n" + "punpcklbw %%xmm5,%%xmm6 \n" + "punpcklbw %%xmm5,%%xmm7 \n" + "paddusw %%xmm6,%%xmm0 \n" + "paddusw %%xmm7,%%xmm1 \n" + "movdqa %%xmm0,%%xmm6 \n" + "psrldq $0x2,%%xmm0 \n" + "paddusw %%xmm0,%%xmm6 \n" + "psrldq $0x2,%%xmm0 \n" + "paddusw %%xmm0,%%xmm6 \n" + "pshufb %%xmm2,%%xmm6 \n" + "movdqa %%xmm1,%%xmm7 \n" + "psrldq $0x2,%%xmm1 \n" + "paddusw %%xmm1,%%xmm7 \n" + "psrldq $0x2,%%xmm1 \n" + "paddusw %%xmm1,%%xmm7 \n" + "pshufb %%xmm3,%%xmm7 \n" + "paddusw %%xmm7,%%xmm6 \n" + "pmulhuw %%xmm4,%%xmm6 \n" + "packuswb %%xmm6,%%xmm6 \n" + "movd %%xmm6,(%1) \n" + "psrlq $0x10,%%xmm6 \n" + "movd %%xmm6,0x2(%1) \n" + "lea 0x6(%1),%1 \n" + "sub $0x6,%2 \n" + "jg 1b \n" + : "+r"(src_ptr), // %0 + "+r"(dst_ptr), // %1 + "+r"(dst_width) // %2 + : "r"((intptr_t)(src_stride)) // %3 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", + "xmm7"); } // Reads 16xN bytes and produces 16 shorts at a time. -void ScaleAddRow_SSE2(const uint8* src_ptr, uint16* dst_ptr, int src_width) { - asm volatile ( - "pxor %%xmm5,%%xmm5 \n" - - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm3 \n" - "lea " MEMLEA(0x10,0) ",%0 \n" // src_ptr += 16 - "movdqu " MEMACCESS(1) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,1) ",%%xmm1 \n" - "movdqa %%xmm3,%%xmm2 \n" - "punpcklbw %%xmm5,%%xmm2 \n" - "punpckhbw %%xmm5,%%xmm3 \n" - "paddusw %%xmm2,%%xmm0 \n" - "paddusw %%xmm3,%%xmm1 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "movdqu %%xmm1," MEMACCESS2(0x10,1) " \n" - "lea " MEMLEA(0x20,1) ",%1 \n" - "sub $0x10,%2 \n" - "jg 1b \n" - : "+r"(src_ptr), // %0 - "+r"(dst_ptr), // %1 - "+r"(src_width) // %2 - : - : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" - ); +void ScaleAddRow_SSE2(const uint8_t* src_ptr, + uint16_t* dst_ptr, + int src_width) { + asm volatile( + + "pxor %%xmm5,%%xmm5 \n" + + // 16 pixel loop. + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm3 \n" + "lea 0x10(%0),%0 \n" // src_ptr += 16 + "movdqu (%1),%%xmm0 \n" + "movdqu 0x10(%1),%%xmm1 \n" + "movdqa %%xmm3,%%xmm2 \n" + "punpcklbw %%xmm5,%%xmm2 \n" + "punpckhbw %%xmm5,%%xmm3 \n" + "paddusw %%xmm2,%%xmm0 \n" + "paddusw %%xmm3,%%xmm1 \n" + "movdqu %%xmm0,(%1) \n" + "movdqu %%xmm1,0x10(%1) \n" + "lea 0x20(%1),%1 \n" + "sub $0x10,%2 \n" + "jg 1b \n" + : "+r"(src_ptr), // %0 + "+r"(dst_ptr), // %1 + "+r"(src_width) // %2 + : + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"); } #ifdef HAS_SCALEADDROW_AVX2 // Reads 32 bytes and accumulates to 32 shorts at a time. -void ScaleAddRow_AVX2(const uint8* src_ptr, uint16* dst_ptr, int src_width) { - asm volatile ( - "vpxor %%ymm5,%%ymm5,%%ymm5 \n" - - LABELALIGN - "1: \n" - "vmovdqu " MEMACCESS(0) ",%%ymm3 \n" - "lea " MEMLEA(0x20,0) ",%0 \n" // src_ptr += 32 - "vpermq $0xd8,%%ymm3,%%ymm3 \n" - "vpunpcklbw %%ymm5,%%ymm3,%%ymm2 \n" - "vpunpckhbw %%ymm5,%%ymm3,%%ymm3 \n" - "vpaddusw " MEMACCESS(1) ",%%ymm2,%%ymm0 \n" - "vpaddusw " MEMACCESS2(0x20,1) ",%%ymm3,%%ymm1 \n" - "vmovdqu %%ymm0," MEMACCESS(1) " \n" - "vmovdqu %%ymm1," MEMACCESS2(0x20,1) " \n" - "lea " MEMLEA(0x40,1) ",%1 \n" - "sub $0x20,%2 \n" - "jg 1b \n" - "vzeroupper \n" - : "+r"(src_ptr), // %0 - "+r"(dst_ptr), // %1 - "+r"(src_width) // %2 - : - : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" - ); +void ScaleAddRow_AVX2(const uint8_t* src_ptr, + uint16_t* dst_ptr, + int src_width) { + asm volatile( + + "vpxor %%ymm5,%%ymm5,%%ymm5 \n" + + LABELALIGN + "1: \n" + "vmovdqu (%0),%%ymm3 \n" + "lea 0x20(%0),%0 \n" // src_ptr += 32 + "vpermq $0xd8,%%ymm3,%%ymm3 \n" + "vpunpcklbw %%ymm5,%%ymm3,%%ymm2 \n" + "vpunpckhbw %%ymm5,%%ymm3,%%ymm3 \n" + "vpaddusw (%1),%%ymm2,%%ymm0 \n" + "vpaddusw 0x20(%1),%%ymm3,%%ymm1 \n" + "vmovdqu %%ymm0,(%1) \n" + "vmovdqu %%ymm1,0x20(%1) \n" + "lea 0x40(%1),%1 \n" + "sub $0x20,%2 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src_ptr), // %0 + "+r"(dst_ptr), // %1 + "+r"(src_width) // %2 + : + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"); } #endif // HAS_SCALEADDROW_AVX2 @@ -860,394 +859,393 @@ static const uvec16 kFadd40 = {0x4040, 0x4040, 0x4040, 0x4040, 0x4040, 0x4040, 0x4040, 0x4040}; // Bilinear column filtering. SSSE3 version. -void ScaleFilterCols_SSSE3(uint8* dst_ptr, - const uint8* src_ptr, +void ScaleFilterCols_SSSE3(uint8_t* dst_ptr, + const uint8_t* src_ptr, int dst_width, int x, int dx) { intptr_t x0, x1, temp_pixel; - asm volatile ( - "movd %6,%%xmm2 \n" - "movd %7,%%xmm3 \n" - "movl $0x04040000,%k2 \n" - "movd %k2,%%xmm5 \n" - "pcmpeqb %%xmm6,%%xmm6 \n" - "psrlw $0x9,%%xmm6 \n" // 0x007f007f - "pcmpeqb %%xmm7,%%xmm7 \n" - "psrlw $15,%%xmm7 \n" // 0x00010001 - - "pextrw $0x1,%%xmm2,%k3 \n" - "subl $0x2,%5 \n" - "jl 29f \n" - "movdqa %%xmm2,%%xmm0 \n" - "paddd %%xmm3,%%xmm0 \n" - "punpckldq %%xmm0,%%xmm2 \n" - "punpckldq %%xmm3,%%xmm3 \n" - "paddd %%xmm3,%%xmm3 \n" - "pextrw $0x3,%%xmm2,%k4 \n" - - LABELALIGN - "2: \n" - "movdqa %%xmm2,%%xmm1 \n" - "paddd %%xmm3,%%xmm2 \n" - MEMOPARG(movzwl,0x00,1,3,1,k2) // movzwl (%1,%3,1),%k2 - "movd %k2,%%xmm0 \n" - "psrlw $0x9,%%xmm1 \n" - MEMOPARG(movzwl,0x00,1,4,1,k2) // movzwl (%1,%4,1),%k2 - "movd %k2,%%xmm4 \n" - "pshufb %%xmm5,%%xmm1 \n" - "punpcklwd %%xmm4,%%xmm0 \n" - "psubb %8,%%xmm0 \n" // make pixels signed. - "pxor %%xmm6,%%xmm1 \n" // 128 - f = (f ^ 127 ) + 1 - "paddusb %%xmm7,%%xmm1 \n" - "pmaddubsw %%xmm0,%%xmm1 \n" - "pextrw $0x1,%%xmm2,%k3 \n" - "pextrw $0x3,%%xmm2,%k4 \n" - "paddw %9,%%xmm1 \n" // make pixels unsigned. - "psrlw $0x7,%%xmm1 \n" - "packuswb %%xmm1,%%xmm1 \n" - "movd %%xmm1,%k2 \n" - "mov %w2," MEMACCESS(0) " \n" - "lea " MEMLEA(0x2,0) ",%0 \n" - "subl $0x2,%5 \n" - "jge 2b \n" - - LABELALIGN - "29: \n" - "addl $0x1,%5 \n" - "jl 99f \n" - MEMOPARG(movzwl,0x00,1,3,1,k2) // movzwl (%1,%3,1),%k2 - "movd %k2,%%xmm0 \n" - "psrlw $0x9,%%xmm2 \n" - "pshufb %%xmm5,%%xmm2 \n" - "psubb %8,%%xmm0 \n" // make pixels signed. - "pxor %%xmm6,%%xmm2 \n" - "paddusb %%xmm7,%%xmm2 \n" - "pmaddubsw %%xmm0,%%xmm2 \n" - "paddw %9,%%xmm2 \n" // make pixels unsigned. - "psrlw $0x7,%%xmm2 \n" - "packuswb %%xmm2,%%xmm2 \n" - "movd %%xmm2,%k2 \n" - "mov %b2," MEMACCESS(0) " \n" - "99: \n" - : "+r"(dst_ptr), // %0 - "+r"(src_ptr), // %1 - "=&a"(temp_pixel), // %2 - "=&r"(x0), // %3 - "=&r"(x1), // %4 + asm volatile( + "movd %6,%%xmm2 \n" + "movd %7,%%xmm3 \n" + "movl $0x04040000,%k2 \n" + "movd %k2,%%xmm5 \n" + "pcmpeqb %%xmm6,%%xmm6 \n" + "psrlw $0x9,%%xmm6 \n" // 0x007f007f + "pcmpeqb %%xmm7,%%xmm7 \n" + "psrlw $15,%%xmm7 \n" // 0x00010001 + + "pextrw $0x1,%%xmm2,%k3 \n" + "subl $0x2,%5 \n" + "jl 29f \n" + "movdqa %%xmm2,%%xmm0 \n" + "paddd %%xmm3,%%xmm0 \n" + "punpckldq %%xmm0,%%xmm2 \n" + "punpckldq %%xmm3,%%xmm3 \n" + "paddd %%xmm3,%%xmm3 \n" + "pextrw $0x3,%%xmm2,%k4 \n" + + LABELALIGN + "2: \n" + "movdqa %%xmm2,%%xmm1 \n" + "paddd %%xmm3,%%xmm2 \n" + "movzwl 0x00(%1,%3,1),%k2 \n" + "movd %k2,%%xmm0 \n" + "psrlw $0x9,%%xmm1 \n" + "movzwl 0x00(%1,%4,1),%k2 \n" + "movd %k2,%%xmm4 \n" + "pshufb %%xmm5,%%xmm1 \n" + "punpcklwd %%xmm4,%%xmm0 \n" + "psubb %8,%%xmm0 \n" // make pixels signed. + "pxor %%xmm6,%%xmm1 \n" // 128 - f = (f ^ 127 ) + + // 1 + "paddusb %%xmm7,%%xmm1 \n" + "pmaddubsw %%xmm0,%%xmm1 \n" + "pextrw $0x1,%%xmm2,%k3 \n" + "pextrw $0x3,%%xmm2,%k4 \n" + "paddw %9,%%xmm1 \n" // make pixels unsigned. + "psrlw $0x7,%%xmm1 \n" + "packuswb %%xmm1,%%xmm1 \n" + "movd %%xmm1,%k2 \n" + "mov %w2,(%0) \n" + "lea 0x2(%0),%0 \n" + "subl $0x2,%5 \n" + "jge 2b \n" + + LABELALIGN + "29: \n" + "addl $0x1,%5 \n" + "jl 99f \n" + "movzwl 0x00(%1,%3,1),%k2 \n" + "movd %k2,%%xmm0 \n" + "psrlw $0x9,%%xmm2 \n" + "pshufb %%xmm5,%%xmm2 \n" + "psubb %8,%%xmm0 \n" // make pixels signed. + "pxor %%xmm6,%%xmm2 \n" + "paddusb %%xmm7,%%xmm2 \n" + "pmaddubsw %%xmm0,%%xmm2 \n" + "paddw %9,%%xmm2 \n" // make pixels unsigned. + "psrlw $0x7,%%xmm2 \n" + "packuswb %%xmm2,%%xmm2 \n" + "movd %%xmm2,%k2 \n" + "mov %b2,(%0) \n" + "99: \n" + : "+r"(dst_ptr), // %0 + "+r"(src_ptr), // %1 + "=&a"(temp_pixel), // %2 + "=&r"(x0), // %3 + "=&r"(x1), // %4 #if defined(__x86_64__) - "+rm"(dst_width) // %5 + "+rm"(dst_width) // %5 #else - "+m"(dst_width) // %5 + "+m"(dst_width) // %5 #endif - : "rm"(x), // %6 - "rm"(dx), // %7 + : "rm"(x), // %6 + "rm"(dx), // %7 #if defined(__x86_64__) - "x"(kFsub80), // %8 - "x"(kFadd40) // %9 + "x"(kFsub80), // %8 + "x"(kFadd40) // %9 #else - "m"(kFsub80), // %8 - "m"(kFadd40) // %9 + "m"(kFsub80), // %8 + "m"(kFadd40) // %9 #endif - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" - ); + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", + "xmm7"); } // Reads 4 pixels, duplicates them and writes 8 pixels. // Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned. -void ScaleColsUp2_SSE2(uint8* dst_ptr, - const uint8* src_ptr, +void ScaleColsUp2_SSE2(uint8_t* dst_ptr, + const uint8_t* src_ptr, int dst_width, int x, int dx) { (void)x; (void)dx; - asm volatile ( - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(1) ",%%xmm0 \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "movdqa %%xmm0,%%xmm1 \n" - "punpcklbw %%xmm0,%%xmm0 \n" - "punpckhbw %%xmm1,%%xmm1 \n" - "movdqu %%xmm0," MEMACCESS(0) " \n" - "movdqu %%xmm1," MEMACCESS2(0x10,0) " \n" - "lea " MEMLEA(0x20,0) ",%0 \n" - "sub $0x20,%2 \n" - "jg 1b \n" - - : "+r"(dst_ptr), // %0 - "+r"(src_ptr), // %1 - "+r"(dst_width) // %2 - :: "memory", "cc", "xmm0", "xmm1" - ); + asm volatile( + + LABELALIGN + "1: \n" + "movdqu (%1),%%xmm0 \n" + "lea 0x10(%1),%1 \n" + "movdqa %%xmm0,%%xmm1 \n" + "punpcklbw %%xmm0,%%xmm0 \n" + "punpckhbw %%xmm1,%%xmm1 \n" + "movdqu %%xmm0,(%0) \n" + "movdqu %%xmm1,0x10(%0) \n" + "lea 0x20(%0),%0 \n" + "sub $0x20,%2 \n" + "jg 1b \n" + + : "+r"(dst_ptr), // %0 + "+r"(src_ptr), // %1 + "+r"(dst_width) // %2 + ::"memory", + "cc", "xmm0", "xmm1"); } -void ScaleARGBRowDown2_SSE2(const uint8* src_argb, +void ScaleARGBRowDown2_SSE2(const uint8_t* src_argb, ptrdiff_t src_stride, - uint8* dst_argb, + uint8_t* dst_argb, int dst_width) { (void)src_stride; - asm volatile ( - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "lea " MEMLEA(0x20,0) ",%0 \n" - "shufps $0xdd,%%xmm1,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x4,%2 \n" - "jg 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_argb), // %1 - "+r"(dst_width) // %2 - :: "memory", "cc", "xmm0", "xmm1" - ); + asm volatile( + + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "lea 0x20(%0),%0 \n" + "shufps $0xdd,%%xmm1,%%xmm0 \n" + "movdqu %%xmm0,(%1) \n" + "lea 0x10(%1),%1 \n" + "sub $0x4,%2 \n" + "jg 1b \n" + : "+r"(src_argb), // %0 + "+r"(dst_argb), // %1 + "+r"(dst_width) // %2 + ::"memory", + "cc", "xmm0", "xmm1"); } -void ScaleARGBRowDown2Linear_SSE2(const uint8* src_argb, +void ScaleARGBRowDown2Linear_SSE2(const uint8_t* src_argb, ptrdiff_t src_stride, - uint8* dst_argb, + uint8_t* dst_argb, int dst_width) { (void)src_stride; - asm volatile ( - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "lea " MEMLEA(0x20,0) ",%0 \n" - "movdqa %%xmm0,%%xmm2 \n" - "shufps $0x88,%%xmm1,%%xmm0 \n" - "shufps $0xdd,%%xmm1,%%xmm2 \n" - "pavgb %%xmm2,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x4,%2 \n" - "jg 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_argb), // %1 - "+r"(dst_width) // %2 - :: "memory", "cc", "xmm0", "xmm1" - ); + asm volatile( + + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "lea 0x20(%0),%0 \n" + "movdqa %%xmm0,%%xmm2 \n" + "shufps $0x88,%%xmm1,%%xmm0 \n" + "shufps $0xdd,%%xmm1,%%xmm2 \n" + "pavgb %%xmm2,%%xmm0 \n" + "movdqu %%xmm0,(%1) \n" + "lea 0x10(%1),%1 \n" + "sub $0x4,%2 \n" + "jg 1b \n" + : "+r"(src_argb), // %0 + "+r"(dst_argb), // %1 + "+r"(dst_width) // %2 + ::"memory", + "cc", "xmm0", "xmm1"); } -void ScaleARGBRowDown2Box_SSE2(const uint8* src_argb, +void ScaleARGBRowDown2Box_SSE2(const uint8_t* src_argb, ptrdiff_t src_stride, - uint8* dst_argb, + uint8_t* dst_argb, int dst_width) { - asm volatile ( - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - MEMOPREG(movdqu,0x00,0,3,1,xmm2) // movdqu (%0,%3,1),%%xmm2 - MEMOPREG(movdqu,0x10,0,3,1,xmm3) // movdqu 0x10(%0,%3,1),%%xmm3 - "lea " MEMLEA(0x20,0) ",%0 \n" - "pavgb %%xmm2,%%xmm0 \n" - "pavgb %%xmm3,%%xmm1 \n" - "movdqa %%xmm0,%%xmm2 \n" - "shufps $0x88,%%xmm1,%%xmm0 \n" - "shufps $0xdd,%%xmm1,%%xmm2 \n" - "pavgb %%xmm2,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x4,%2 \n" - "jg 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_argb), // %1 - "+r"(dst_width) // %2 - : "r"((intptr_t)(src_stride)) // %3 - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm3" - ); + asm volatile( + + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "movdqu 0x00(%0,%3,1),%%xmm2 \n" + "movdqu 0x10(%0,%3,1),%%xmm3 \n" + "lea 0x20(%0),%0 \n" + "pavgb %%xmm2,%%xmm0 \n" + "pavgb %%xmm3,%%xmm1 \n" + "movdqa %%xmm0,%%xmm2 \n" + "shufps $0x88,%%xmm1,%%xmm0 \n" + "shufps $0xdd,%%xmm1,%%xmm2 \n" + "pavgb %%xmm2,%%xmm0 \n" + "movdqu %%xmm0,(%1) \n" + "lea 0x10(%1),%1 \n" + "sub $0x4,%2 \n" + "jg 1b \n" + : "+r"(src_argb), // %0 + "+r"(dst_argb), // %1 + "+r"(dst_width) // %2 + : "r"((intptr_t)(src_stride)) // %3 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3"); } // Reads 4 pixels at a time. // Alignment requirement: dst_argb 16 byte aligned. -void ScaleARGBRowDownEven_SSE2(const uint8* src_argb, +void ScaleARGBRowDownEven_SSE2(const uint8_t* src_argb, ptrdiff_t src_stride, int src_stepx, - uint8* dst_argb, + uint8_t* dst_argb, int dst_width) { intptr_t src_stepx_x4 = (intptr_t)(src_stepx); intptr_t src_stepx_x12; (void)src_stride; - asm volatile ( - "lea " MEMLEA3(0x00,1,4) ",%1 \n" - "lea " MEMLEA4(0x00,1,1,2) ",%4 \n" - LABELALIGN - "1: \n" - "movd " MEMACCESS(0) ",%%xmm0 \n" - MEMOPREG(movd,0x00,0,1,1,xmm1) // movd (%0,%1,1),%%xmm1 - "punpckldq %%xmm1,%%xmm0 \n" - MEMOPREG(movd,0x00,0,1,2,xmm2) // movd (%0,%1,2),%%xmm2 - MEMOPREG(movd,0x00,0,4,1,xmm3) // movd (%0,%4,1),%%xmm3 - "lea " MEMLEA4(0x00,0,1,4) ",%0 \n" - "punpckldq %%xmm3,%%xmm2 \n" - "punpcklqdq %%xmm2,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS(2) " \n" - "lea " MEMLEA(0x10,2) ",%2 \n" - "sub $0x4,%3 \n" - "jg 1b \n" - : "+r"(src_argb), // %0 - "+r"(src_stepx_x4), // %1 - "+r"(dst_argb), // %2 - "+r"(dst_width), // %3 - "=&r"(src_stepx_x12) // %4 - :: "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm3" - ); + asm volatile( + "lea 0x00(,%1,4),%1 \n" + "lea 0x00(%1,%1,2),%4 \n" + + LABELALIGN + "1: \n" + "movd (%0),%%xmm0 \n" + "movd 0x00(%0,%1,1),%%xmm1 \n" + "punpckldq %%xmm1,%%xmm0 \n" + "movd 0x00(%0,%1,2),%%xmm2 \n" + "movd 0x00(%0,%4,1),%%xmm3 \n" + "lea 0x00(%0,%1,4),%0 \n" + "punpckldq %%xmm3,%%xmm2 \n" + "punpcklqdq %%xmm2,%%xmm0 \n" + "movdqu %%xmm0,(%2) \n" + "lea 0x10(%2),%2 \n" + "sub $0x4,%3 \n" + "jg 1b \n" + : "+r"(src_argb), // %0 + "+r"(src_stepx_x4), // %1 + "+r"(dst_argb), // %2 + "+r"(dst_width), // %3 + "=&r"(src_stepx_x12) // %4 + ::"memory", + "cc", "xmm0", "xmm1", "xmm2", "xmm3"); } // Blends four 2x2 to 4x1. // Alignment requirement: dst_argb 16 byte aligned. -void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb, +void ScaleARGBRowDownEvenBox_SSE2(const uint8_t* src_argb, ptrdiff_t src_stride, int src_stepx, - uint8* dst_argb, + uint8_t* dst_argb, int dst_width) { intptr_t src_stepx_x4 = (intptr_t)(src_stepx); intptr_t src_stepx_x12; intptr_t row1 = (intptr_t)(src_stride); - asm volatile ( - "lea " MEMLEA3(0x00,1,4) ",%1 \n" - "lea " MEMLEA4(0x00,1,1,2) ",%4 \n" - "lea " MEMLEA4(0x00,0,5,1) ",%5 \n" - - LABELALIGN - "1: \n" - "movq " MEMACCESS(0) ",%%xmm0 \n" - MEMOPREG(movhps,0x00,0,1,1,xmm0) // movhps (%0,%1,1),%%xmm0 - MEMOPREG(movq,0x00,0,1,2,xmm1) // movq (%0,%1,2),%%xmm1 - MEMOPREG(movhps,0x00,0,4,1,xmm1) // movhps (%0,%4,1),%%xmm1 - "lea " MEMLEA4(0x00,0,1,4) ",%0 \n" - "movq " MEMACCESS(5) ",%%xmm2 \n" - MEMOPREG(movhps,0x00,5,1,1,xmm2) // movhps (%5,%1,1),%%xmm2 - MEMOPREG(movq,0x00,5,1,2,xmm3) // movq (%5,%1,2),%%xmm3 - MEMOPREG(movhps,0x00,5,4,1,xmm3) // movhps (%5,%4,1),%%xmm3 - "lea " MEMLEA4(0x00,5,1,4) ",%5 \n" - "pavgb %%xmm2,%%xmm0 \n" - "pavgb %%xmm3,%%xmm1 \n" - "movdqa %%xmm0,%%xmm2 \n" - "shufps $0x88,%%xmm1,%%xmm0 \n" - "shufps $0xdd,%%xmm1,%%xmm2 \n" - "pavgb %%xmm2,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS(2) " \n" - "lea " MEMLEA(0x10,2) ",%2 \n" - "sub $0x4,%3 \n" - "jg 1b \n" - : "+r"(src_argb), // %0 - "+r"(src_stepx_x4), // %1 - "+r"(dst_argb), // %2 - "+rm"(dst_width), // %3 - "=&r"(src_stepx_x12), // %4 - "+r"(row1) // %5 - :: "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm3" - ); + asm volatile( + "lea 0x00(,%1,4),%1 \n" + "lea 0x00(%1,%1,2),%4 \n" + "lea 0x00(%0,%5,1),%5 \n" + + LABELALIGN + "1: \n" + "movq (%0),%%xmm0 \n" + "movhps 0x00(%0,%1,1),%%xmm0 \n" + "movq 0x00(%0,%1,2),%%xmm1 \n" + "movhps 0x00(%0,%4,1),%%xmm1 \n" + "lea 0x00(%0,%1,4),%0 \n" + "movq (%5),%%xmm2 \n" + "movhps 0x00(%5,%1,1),%%xmm2 \n" + "movq 0x00(%5,%1,2),%%xmm3 \n" + "movhps 0x00(%5,%4,1),%%xmm3 \n" + "lea 0x00(%5,%1,4),%5 \n" + "pavgb %%xmm2,%%xmm0 \n" + "pavgb %%xmm3,%%xmm1 \n" + "movdqa %%xmm0,%%xmm2 \n" + "shufps $0x88,%%xmm1,%%xmm0 \n" + "shufps $0xdd,%%xmm1,%%xmm2 \n" + "pavgb %%xmm2,%%xmm0 \n" + "movdqu %%xmm0,(%2) \n" + "lea 0x10(%2),%2 \n" + "sub $0x4,%3 \n" + "jg 1b \n" + : "+r"(src_argb), // %0 + "+r"(src_stepx_x4), // %1 + "+r"(dst_argb), // %2 + "+rm"(dst_width), // %3 + "=&r"(src_stepx_x12), // %4 + "+r"(row1) // %5 + ::"memory", + "cc", "xmm0", "xmm1", "xmm2", "xmm3"); } -void ScaleARGBCols_SSE2(uint8* dst_argb, - const uint8* src_argb, +void ScaleARGBCols_SSE2(uint8_t* dst_argb, + const uint8_t* src_argb, int dst_width, int x, int dx) { intptr_t x0, x1; - asm volatile ( - "movd %5,%%xmm2 \n" - "movd %6,%%xmm3 \n" - "pshufd $0x0,%%xmm2,%%xmm2 \n" - "pshufd $0x11,%%xmm3,%%xmm0 \n" - "paddd %%xmm0,%%xmm2 \n" - "paddd %%xmm3,%%xmm3 \n" - "pshufd $0x5,%%xmm3,%%xmm0 \n" - "paddd %%xmm0,%%xmm2 \n" - "paddd %%xmm3,%%xmm3 \n" - "pshufd $0x0,%%xmm3,%%xmm3 \n" - "pextrw $0x1,%%xmm2,%k0 \n" - "pextrw $0x3,%%xmm2,%k1 \n" - "cmp $0x0,%4 \n" - "jl 99f \n" - "sub $0x4,%4 \n" - "jl 49f \n" - - LABELALIGN - "40: \n" - MEMOPREG(movd,0x00,3,0,4,xmm0) // movd (%3,%0,4),%%xmm0 - MEMOPREG(movd,0x00,3,1,4,xmm1) // movd (%3,%1,4),%%xmm1 - "pextrw $0x5,%%xmm2,%k0 \n" - "pextrw $0x7,%%xmm2,%k1 \n" - "paddd %%xmm3,%%xmm2 \n" - "punpckldq %%xmm1,%%xmm0 \n" - MEMOPREG(movd,0x00,3,0,4,xmm1) // movd (%3,%0,4),%%xmm1 - MEMOPREG(movd,0x00,3,1,4,xmm4) // movd (%3,%1,4),%%xmm4 - "pextrw $0x1,%%xmm2,%k0 \n" - "pextrw $0x3,%%xmm2,%k1 \n" - "punpckldq %%xmm4,%%xmm1 \n" - "punpcklqdq %%xmm1,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS(2) " \n" - "lea " MEMLEA(0x10,2) ",%2 \n" - "sub $0x4,%4 \n" - "jge 40b \n" - - "49: \n" - "test $0x2,%4 \n" - "je 29f \n" - MEMOPREG(movd,0x00,3,0,4,xmm0) // movd (%3,%0,4),%%xmm0 - MEMOPREG(movd,0x00,3,1,4,xmm1) // movd (%3,%1,4),%%xmm1 - "pextrw $0x5,%%xmm2,%k0 \n" - "punpckldq %%xmm1,%%xmm0 \n" - "movq %%xmm0," MEMACCESS(2) " \n" - "lea " MEMLEA(0x8,2) ",%2 \n" - "29: \n" - "test $0x1,%4 \n" - "je 99f \n" - MEMOPREG(movd,0x00,3,0,4,xmm0) // movd (%3,%0,4),%%xmm0 - "movd %%xmm0," MEMACCESS(2) " \n" - "99: \n" - : "=&a"(x0), // %0 - "=&d"(x1), // %1 - "+r"(dst_argb), // %2 - "+r"(src_argb), // %3 - "+r"(dst_width) // %4 - : "rm"(x), // %5 - "rm"(dx) // %6 - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm3", "xmm4" - ); + asm volatile( + "movd %5,%%xmm2 \n" + "movd %6,%%xmm3 \n" + "pshufd $0x0,%%xmm2,%%xmm2 \n" + "pshufd $0x11,%%xmm3,%%xmm0 \n" + "paddd %%xmm0,%%xmm2 \n" + "paddd %%xmm3,%%xmm3 \n" + "pshufd $0x5,%%xmm3,%%xmm0 \n" + "paddd %%xmm0,%%xmm2 \n" + "paddd %%xmm3,%%xmm3 \n" + "pshufd $0x0,%%xmm3,%%xmm3 \n" + "pextrw $0x1,%%xmm2,%k0 \n" + "pextrw $0x3,%%xmm2,%k1 \n" + "cmp $0x0,%4 \n" + "jl 99f \n" + "sub $0x4,%4 \n" + "jl 49f \n" + + LABELALIGN + "40: \n" + "movd 0x00(%3,%0,4),%%xmm0 \n" + "movd 0x00(%3,%1,4),%%xmm1 \n" + "pextrw $0x5,%%xmm2,%k0 \n" + "pextrw $0x7,%%xmm2,%k1 \n" + "paddd %%xmm3,%%xmm2 \n" + "punpckldq %%xmm1,%%xmm0 \n" + "movd 0x00(%3,%0,4),%%xmm1 \n" + "movd 0x00(%3,%1,4),%%xmm4 \n" + "pextrw $0x1,%%xmm2,%k0 \n" + "pextrw $0x3,%%xmm2,%k1 \n" + "punpckldq %%xmm4,%%xmm1 \n" + "punpcklqdq %%xmm1,%%xmm0 \n" + "movdqu %%xmm0,(%2) \n" + "lea 0x10(%2),%2 \n" + "sub $0x4,%4 \n" + "jge 40b \n" + + "49: \n" + "test $0x2,%4 \n" + "je 29f \n" + "movd 0x00(%3,%0,4),%%xmm0 \n" + "movd 0x00(%3,%1,4),%%xmm1 \n" + "pextrw $0x5,%%xmm2,%k0 \n" + "punpckldq %%xmm1,%%xmm0 \n" + "movq %%xmm0,(%2) \n" + "lea 0x8(%2),%2 \n" + "29: \n" + "test $0x1,%4 \n" + "je 99f \n" + "movd 0x00(%3,%0,4),%%xmm0 \n" + "movd %%xmm0,(%2) \n" + "99: \n" + : "=&a"(x0), // %0 + "=&d"(x1), // %1 + "+r"(dst_argb), // %2 + "+r"(src_argb), // %3 + "+r"(dst_width) // %4 + : "rm"(x), // %5 + "rm"(dx) // %6 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4"); } // Reads 4 pixels, duplicates them and writes 8 pixels. // Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned. -void ScaleARGBColsUp2_SSE2(uint8* dst_argb, - const uint8* src_argb, +void ScaleARGBColsUp2_SSE2(uint8_t* dst_argb, + const uint8_t* src_argb, int dst_width, int x, int dx) { (void)x; (void)dx; - asm volatile ( - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(1) ",%%xmm0 \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "movdqa %%xmm0,%%xmm1 \n" - "punpckldq %%xmm0,%%xmm0 \n" - "punpckhdq %%xmm1,%%xmm1 \n" - "movdqu %%xmm0," MEMACCESS(0) " \n" - "movdqu %%xmm1," MEMACCESS2(0x10,0) " \n" - "lea " MEMLEA(0x20,0) ",%0 \n" - "sub $0x8,%2 \n" - "jg 1b \n" - - : "+r"(dst_argb), // %0 - "+r"(src_argb), // %1 - "+r"(dst_width) // %2 - :: "memory", "cc", NACL_R14 - "xmm0", "xmm1" - ); + asm volatile( + + LABELALIGN + "1: \n" + "movdqu (%1),%%xmm0 \n" + "lea 0x10(%1),%1 \n" + "movdqa %%xmm0,%%xmm1 \n" + "punpckldq %%xmm0,%%xmm0 \n" + "punpckhdq %%xmm1,%%xmm1 \n" + "movdqu %%xmm0,(%0) \n" + "movdqu %%xmm1,0x10(%0) \n" + "lea 0x20(%0),%0 \n" + "sub $0x8,%2 \n" + "jg 1b \n" + + : "+r"(dst_argb), // %0 + "+r"(src_argb), // %1 + "+r"(dst_width) // %2 + ::"memory", + "cc", "xmm0", "xmm1"); } // Shuffle table for arranging 2 pixels into pairs for pmaddubsw @@ -1262,8 +1260,8 @@ static const uvec8 kShuffleFractions = { }; // Bilinear row filtering combines 4x2 -> 4x1. SSSE3 version -void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, - const uint8* src_argb, +void ScaleARGBFilterCols_SSSE3(uint8_t* dst_argb, + const uint8_t* src_argb, int dst_width, int x, int dx) { @@ -1276,67 +1274,65 @@ void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, "m"(kShuffleFractions) // %1 ); - asm volatile ( - "movd %5,%%xmm2 \n" - "movd %6,%%xmm3 \n" - "pcmpeqb %%xmm6,%%xmm6 \n" - "psrlw $0x9,%%xmm6 \n" - "pextrw $0x1,%%xmm2,%k3 \n" - "sub $0x2,%2 \n" - "jl 29f \n" - "movdqa %%xmm2,%%xmm0 \n" - "paddd %%xmm3,%%xmm0 \n" - "punpckldq %%xmm0,%%xmm2 \n" - "punpckldq %%xmm3,%%xmm3 \n" - "paddd %%xmm3,%%xmm3 \n" - "pextrw $0x3,%%xmm2,%k4 \n" - - LABELALIGN - "2: \n" - "movdqa %%xmm2,%%xmm1 \n" - "paddd %%xmm3,%%xmm2 \n" - MEMOPREG(movq,0x00,1,3,4,xmm0) // movq (%1,%3,4),%%xmm0 - "psrlw $0x9,%%xmm1 \n" - MEMOPREG(movhps,0x00,1,4,4,xmm0) // movhps (%1,%4,4),%%xmm0 - "pshufb %%xmm5,%%xmm1 \n" - "pshufb %%xmm4,%%xmm0 \n" - "pxor %%xmm6,%%xmm1 \n" - "pmaddubsw %%xmm1,%%xmm0 \n" - "psrlw $0x7,%%xmm0 \n" - "pextrw $0x1,%%xmm2,%k3 \n" - "pextrw $0x3,%%xmm2,%k4 \n" - "packuswb %%xmm0,%%xmm0 \n" - "movq %%xmm0," MEMACCESS(0) " \n" - "lea " MEMLEA(0x8,0) ",%0 \n" - "sub $0x2,%2 \n" - "jge 2b \n" - - LABELALIGN - "29: \n" - "add $0x1,%2 \n" - "jl 99f \n" - "psrlw $0x9,%%xmm2 \n" - MEMOPREG(movq,0x00,1,3,4,xmm0) // movq (%1,%3,4),%%xmm0 - "pshufb %%xmm5,%%xmm2 \n" - "pshufb %%xmm4,%%xmm0 \n" - "pxor %%xmm6,%%xmm2 \n" - "pmaddubsw %%xmm2,%%xmm0 \n" - "psrlw $0x7,%%xmm0 \n" - "packuswb %%xmm0,%%xmm0 \n" - "movd %%xmm0," MEMACCESS(0) " \n" - - LABELALIGN - "99: \n" - : "+r"(dst_argb), // %0 - "+r"(src_argb), // %1 - "+rm"(dst_width), // %2 - "=&r"(x0), // %3 - "=&r"(x1) // %4 - : "rm"(x), // %5 - "rm"(dx) // %6 - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6" - ); + asm volatile( + "movd %5,%%xmm2 \n" + "movd %6,%%xmm3 \n" + "pcmpeqb %%xmm6,%%xmm6 \n" + "psrlw $0x9,%%xmm6 \n" + "pextrw $0x1,%%xmm2,%k3 \n" + "sub $0x2,%2 \n" + "jl 29f \n" + "movdqa %%xmm2,%%xmm0 \n" + "paddd %%xmm3,%%xmm0 \n" + "punpckldq %%xmm0,%%xmm2 \n" + "punpckldq %%xmm3,%%xmm3 \n" + "paddd %%xmm3,%%xmm3 \n" + "pextrw $0x3,%%xmm2,%k4 \n" + + LABELALIGN + "2: \n" + "movdqa %%xmm2,%%xmm1 \n" + "paddd %%xmm3,%%xmm2 \n" + "movq 0x00(%1,%3,4),%%xmm0 \n" + "psrlw $0x9,%%xmm1 \n" + "movhps 0x00(%1,%4,4),%%xmm0 \n" + "pshufb %%xmm5,%%xmm1 \n" + "pshufb %%xmm4,%%xmm0 \n" + "pxor %%xmm6,%%xmm1 \n" + "pmaddubsw %%xmm1,%%xmm0 \n" + "psrlw $0x7,%%xmm0 \n" + "pextrw $0x1,%%xmm2,%k3 \n" + "pextrw $0x3,%%xmm2,%k4 \n" + "packuswb %%xmm0,%%xmm0 \n" + "movq %%xmm0,(%0) \n" + "lea 0x8(%0),%0 \n" + "sub $0x2,%2 \n" + "jge 2b \n" + + LABELALIGN + "29: \n" + "add $0x1,%2 \n" + "jl 99f \n" + "psrlw $0x9,%%xmm2 \n" + "movq 0x00(%1,%3,4),%%xmm0 \n" + "pshufb %%xmm5,%%xmm2 \n" + "pshufb %%xmm4,%%xmm0 \n" + "pxor %%xmm6,%%xmm2 \n" + "pmaddubsw %%xmm2,%%xmm0 \n" + "psrlw $0x7,%%xmm0 \n" + "packuswb %%xmm0,%%xmm0 \n" + "movd %%xmm0,(%0) \n" + + LABELALIGN "99: \n" // clang-format error. + + : "+r"(dst_argb), // %0 + "+r"(src_argb), // %1 + "+rm"(dst_width), // %2 + "=&r"(x0), // %3 + "=&r"(x1) // %4 + : "rm"(x), // %5 + "rm"(dx) // %6 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"); } // Divide num by div and return as 16.16 fixed point result. diff --git a/chromium/third_party/libyuv/source/scale_msa.cc b/chromium/third_party/libyuv/source/scale_msa.cc index df1f482be6d..482a521f0d2 100644 --- a/chromium/third_party/libyuv/source/scale_msa.cc +++ b/chromium/third_party/libyuv/source/scale_msa.cc @@ -127,13 +127,13 @@ void ScaleARGBRowDownEven_MSA(const uint8_t* src_argb, } } -void ScaleARGBRowDownEvenBox_MSA(const uint8* src_argb, +void ScaleARGBRowDownEvenBox_MSA(const uint8_t* src_argb, ptrdiff_t src_stride, int src_stepx, - uint8* dst_argb, + uint8_t* dst_argb, int dst_width) { int x; - const uint8* nxt_argb = src_argb + src_stride; + const uint8_t* nxt_argb = src_argb + src_stride; int32_t stepx = src_stepx * 4; int64_t data0, data1, data2, data3; v16u8 src0 = {0}, src1 = {0}, src2 = {0}, src3 = {0}; @@ -553,8 +553,8 @@ void ScaleAddRow_MSA(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width) { } } -void ScaleFilterCols_MSA(uint8* dst_ptr, - const uint8* src_ptr, +void ScaleFilterCols_MSA(uint8_t* dst_ptr, + const uint8_t* src_ptr, int dst_width, int x, int dx) { @@ -630,13 +630,13 @@ void ScaleFilterCols_MSA(uint8* dst_ptr, } } -void ScaleARGBCols_MSA(uint8* dst_argb, - const uint8* src_argb, +void ScaleARGBCols_MSA(uint8_t* dst_argb, + const uint8_t* src_argb, int dst_width, int x, int dx) { - const uint32* src = (const uint32*)(src_argb); - uint32* dst = (uint32*)(dst_argb); + const uint32_t* src = (const uint32_t*)(src_argb); + uint32_t* dst = (uint32_t*)(dst_argb); int j; v4i32 x_vec = __msa_fill_w(x); v4i32 dx_vec = __msa_fill_w(dx); @@ -657,12 +657,12 @@ void ScaleARGBCols_MSA(uint8* dst_argb, } } -void ScaleARGBFilterCols_MSA(uint8* dst_argb, - const uint8* src_argb, +void ScaleARGBFilterCols_MSA(uint8_t* dst_argb, + const uint8_t* src_argb, int dst_width, int x, int dx) { - const uint32* src = (const uint32*)(src_argb); + const uint32_t* src = (const uint32_t*)(src_argb); int j; v4u32 src0, src1, src2, src3; v4u32 vec0, vec1, vec2, vec3; @@ -722,9 +722,9 @@ void ScaleARGBFilterCols_MSA(uint8* dst_argb, } } -void ScaleRowDown34_MSA(const uint8* src_ptr, +void ScaleRowDown34_MSA(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst, + uint8_t* dst, int dst_width) { int x; (void)src_stride; @@ -753,12 +753,12 @@ void ScaleRowDown34_MSA(const uint8* src_ptr, } } -void ScaleRowDown34_0_Box_MSA(const uint8* src_ptr, +void ScaleRowDown34_0_Box_MSA(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* d, + uint8_t* d, int dst_width) { - const uint8* s = src_ptr; - const uint8* t = src_ptr + src_stride; + const uint8_t* s = src_ptr; + const uint8_t* t = src_ptr + src_stride; int x; v16u8 src0, src1, src2, src3, src4, src5, src6, src7, dst0, dst1, dst2; v16u8 vec0, vec1, vec2, vec3, vec4, vec5; @@ -847,12 +847,12 @@ void ScaleRowDown34_0_Box_MSA(const uint8* src_ptr, } } -void ScaleRowDown34_1_Box_MSA(const uint8* src_ptr, +void ScaleRowDown34_1_Box_MSA(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* d, + uint8_t* d, int dst_width) { - const uint8* s = src_ptr; - const uint8* t = src_ptr + src_stride; + const uint8_t* s = src_ptr; + const uint8_t* t = src_ptr + src_stride; int x; v16u8 src0, src1, src2, src3, src4, src5, src6, src7, dst0, dst1, dst2; v16u8 vec0, vec1, vec2, vec3, vec4, vec5; diff --git a/chromium/third_party/libyuv/source/scale_neon.cc b/chromium/third_party/libyuv/source/scale_neon.cc index 46da9d5e272..459a2995dfe 100644 --- a/chromium/third_party/libyuv/source/scale_neon.cc +++ b/chromium/third_party/libyuv/source/scale_neon.cc @@ -23,9 +23,9 @@ extern "C" { // Provided by Fritz Koenig // Read 32x1 throw away even pixels, and write 16x1. -void ScaleRowDown2_NEON(const uint8* src_ptr, +void ScaleRowDown2_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst, + uint8_t* dst, int dst_width) { (void)src_stride; asm volatile( @@ -44,9 +44,9 @@ void ScaleRowDown2_NEON(const uint8* src_ptr, } // Read 32x1 average down and write 16x1. -void ScaleRowDown2Linear_NEON(const uint8* src_ptr, +void ScaleRowDown2Linear_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst, + uint8_t* dst, int dst_width) { (void)src_stride; asm volatile( @@ -65,9 +65,9 @@ void ScaleRowDown2Linear_NEON(const uint8* src_ptr, } // Read 32x2 average down and write 16x1. -void ScaleRowDown2Box_NEON(const uint8* src_ptr, +void ScaleRowDown2Box_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst, + uint8_t* dst, int dst_width) { asm volatile( // change the stride to row 2 pointer @@ -95,9 +95,9 @@ void ScaleRowDown2Box_NEON(const uint8* src_ptr, ); } -void ScaleRowDown4_NEON(const uint8* src_ptr, +void ScaleRowDown4_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width) { (void)src_stride; asm volatile( @@ -113,13 +113,13 @@ void ScaleRowDown4_NEON(const uint8* src_ptr, : "q0", "q1", "memory", "cc"); } -void ScaleRowDown4Box_NEON(const uint8* src_ptr, +void ScaleRowDown4Box_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width) { - const uint8* src_ptr1 = src_ptr + src_stride; - const uint8* src_ptr2 = src_ptr + src_stride * 2; - const uint8* src_ptr3 = src_ptr + src_stride * 3; + const uint8_t* src_ptr1 = src_ptr + src_stride; + const uint8_t* src_ptr2 = src_ptr + src_stride * 2; + const uint8_t* src_ptr3 = src_ptr + src_stride * 3; asm volatile( "1: \n" "vld1.8 {q0}, [%0]! \n" // load up 16x4 @@ -149,9 +149,9 @@ void ScaleRowDown4Box_NEON(const uint8* src_ptr, // Down scale from 4 to 3 pixels. Use the neon multilane read/write // to load up the every 4th pixel into a 4 different registers. // Point samples 32 pixels to 24 pixels. -void ScaleRowDown34_NEON(const uint8* src_ptr, +void ScaleRowDown34_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width) { (void)src_stride; asm volatile( @@ -168,9 +168,9 @@ void ScaleRowDown34_NEON(const uint8* src_ptr, : "d0", "d1", "d2", "d3", "memory", "cc"); } -void ScaleRowDown34_0_Box_NEON(const uint8* src_ptr, +void ScaleRowDown34_0_Box_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width) { asm volatile( "vmov.u8 d24, #3 \n" @@ -225,9 +225,9 @@ void ScaleRowDown34_0_Box_NEON(const uint8* src_ptr, "cc"); } -void ScaleRowDown34_1_Box_NEON(const uint8* src_ptr, +void ScaleRowDown34_1_Box_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width) { asm volatile( "vmov.u8 d24, #3 \n" @@ -276,9 +276,9 @@ static const vec16 kMult38_Div9 = {65536 / 18, 65536 / 18, 65536 / 18, 65536 / 18, 65536 / 18}; // 32 -> 12 -void ScaleRowDown38_NEON(const uint8* src_ptr, +void ScaleRowDown38_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width) { (void)src_stride; asm volatile( @@ -299,11 +299,11 @@ void ScaleRowDown38_NEON(const uint8* src_ptr, } // 32x3 -> 12x1 -void OMITFP ScaleRowDown38_3_Box_NEON(const uint8* src_ptr, +void OMITFP ScaleRowDown38_3_Box_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width) { - const uint8* src_ptr1 = src_ptr + src_stride * 2; + const uint8_t* src_ptr1 = src_ptr + src_stride * 2; asm volatile( "vld1.16 {q13}, [%5] \n" @@ -411,9 +411,9 @@ void OMITFP ScaleRowDown38_3_Box_NEON(const uint8* src_ptr, } // 32x2 -> 12x1 -void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr, +void ScaleRowDown38_2_Box_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width) { asm volatile( "vld1.16 {q13}, [%4] \n" @@ -504,12 +504,12 @@ void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr, : "q0", "q1", "q2", "q3", "q13", "q14", "memory", "cc"); } -void ScaleAddRows_NEON(const uint8* src_ptr, +void ScaleAddRows_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint16* dst_ptr, + uint16_t* dst_ptr, int src_width, int src_height) { - const uint8* src_tmp; + const uint8_t* src_tmp; asm volatile( "1: \n" "mov %0, %1 \n" @@ -547,17 +547,17 @@ void ScaleAddRows_NEON(const uint8* src_ptr, "vld2.8 {d6[" #n "], d7[" #n "]}, [%6] \n" // The NEON version mimics this formula (from row_common.cc): -// #define BLENDER(a, b, f) (uint8)((int)(a) + +// #define BLENDER(a, b, f) (uint8_t)((int)(a) + // ((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16)) -void ScaleFilterCols_NEON(uint8* dst_ptr, - const uint8* src_ptr, +void ScaleFilterCols_NEON(uint8_t* dst_ptr, + const uint8_t* src_ptr, int dst_width, int x, int dx) { int dx_offset[4] = {0, 1, 2, 3}; int* tmp = dx_offset; - const uint8* src_tmp = src_ptr; + const uint8_t* src_tmp = src_ptr; asm volatile ( "vdup.32 q0, %3 \n" // x "vdup.32 q1, %4 \n" // dx @@ -615,8 +615,8 @@ void ScaleFilterCols_NEON(uint8* dst_ptr, #undef LOAD2_DATA8_LANE // 16x2 -> 16x1 -void ScaleFilterRows_NEON(uint8* dst_ptr, - const uint8* src_ptr, +void ScaleFilterRows_NEON(uint8_t* dst_ptr, + const uint8_t* src_ptr, ptrdiff_t src_stride, int dst_width, int source_y_fraction) { @@ -699,9 +699,9 @@ void ScaleFilterRows_NEON(uint8* dst_ptr, : "q0", "q1", "d4", "d5", "q13", "q14", "memory", "cc"); } -void ScaleARGBRowDown2_NEON(const uint8* src_ptr, +void ScaleARGBRowDown2_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst, + uint8_t* dst, int dst_width) { (void)src_stride; asm volatile( @@ -727,9 +727,9 @@ void ScaleARGBRowDown2_NEON(const uint8* src_ptr, // 54: f942 038d vst2.32 {d16-d19}, [r2]! // 58: d1f5 bne.n 46 <ScaleARGBRowDown2_C+0x46> -void ScaleARGBRowDown2Linear_NEON(const uint8* src_argb, +void ScaleARGBRowDown2Linear_NEON(const uint8_t* src_argb, ptrdiff_t src_stride, - uint8* dst_argb, + uint8_t* dst_argb, int dst_width) { (void)src_stride; asm volatile( @@ -749,9 +749,9 @@ void ScaleARGBRowDown2Linear_NEON(const uint8* src_argb, ); } -void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, +void ScaleARGBRowDown2Box_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst, + uint8_t* dst, int dst_width) { asm volatile( // change the stride to row 2 pointer @@ -786,10 +786,10 @@ void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, // Reads 4 pixels at a time. // Alignment requirement: src_argb 4 byte aligned. -void ScaleARGBRowDownEven_NEON(const uint8* src_argb, +void ScaleARGBRowDownEven_NEON(const uint8_t* src_argb, ptrdiff_t src_stride, int src_stepx, - uint8* dst_argb, + uint8_t* dst_argb, int dst_width) { (void)src_stride; asm volatile( @@ -811,10 +811,10 @@ void ScaleARGBRowDownEven_NEON(const uint8* src_argb, // Reads 4 pixels at a time. // Alignment requirement: src_argb 4 byte aligned. -void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, +void ScaleARGBRowDownEvenBox_NEON(const uint8_t* src_argb, ptrdiff_t src_stride, int src_stepx, - uint8* dst_argb, + uint8_t* dst_argb, int dst_width) { asm volatile( "mov r12, %4, lsl #2 \n" @@ -857,13 +857,13 @@ void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, "add %3, %3, %4 \n" \ "vld1.32 {" #dn "[" #n "]}, [%6] \n" -void ScaleARGBCols_NEON(uint8* dst_argb, - const uint8* src_argb, +void ScaleARGBCols_NEON(uint8_t* dst_argb, + const uint8_t* src_argb, int dst_width, int x, int dx) { int tmp; - const uint8* src_tmp = src_argb; + const uint8_t* src_tmp = src_argb; asm volatile( "1: \n" // clang-format off @@ -900,14 +900,14 @@ void ScaleARGBCols_NEON(uint8* dst_argb, "add %3, %3, %4 \n" \ "vld2.32 {" #dn1 "[" #n "], " #dn2 "[" #n "]}, [%6] \n" -void ScaleARGBFilterCols_NEON(uint8* dst_argb, - const uint8* src_argb, +void ScaleARGBFilterCols_NEON(uint8_t* dst_argb, + const uint8_t* src_argb, int dst_width, int x, int dx) { int dx_offset[4] = {0, 1, 2, 3}; int* tmp = dx_offset; - const uint8* src_tmp = src_argb; + const uint8_t* src_tmp = src_argb; asm volatile ( "vdup.32 q0, %3 \n" // x "vdup.32 q1, %4 \n" // dx diff --git a/chromium/third_party/libyuv/source/scale_neon64.cc b/chromium/third_party/libyuv/source/scale_neon64.cc index 73aed9e1b38..494a9cfbfbe 100644 --- a/chromium/third_party/libyuv/source/scale_neon64.cc +++ b/chromium/third_party/libyuv/source/scale_neon64.cc @@ -21,9 +21,9 @@ extern "C" { #if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) // Read 32x1 throw away even pixels, and write 16x1. -void ScaleRowDown2_NEON(const uint8* src_ptr, +void ScaleRowDown2_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst, + uint8_t* dst, int dst_width) { (void)src_stride; asm volatile( @@ -42,9 +42,9 @@ void ScaleRowDown2_NEON(const uint8* src_ptr, } // Read 32x1 average down and write 16x1. -void ScaleRowDown2Linear_NEON(const uint8* src_ptr, +void ScaleRowDown2Linear_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst, + uint8_t* dst, int dst_width) { (void)src_stride; asm volatile( @@ -64,9 +64,9 @@ void ScaleRowDown2Linear_NEON(const uint8* src_ptr, } // Read 32x2 average down and write 16x1. -void ScaleRowDown2Box_NEON(const uint8* src_ptr, +void ScaleRowDown2Box_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst, + uint8_t* dst, int dst_width) { asm volatile( // change the stride to row 2 pointer @@ -92,9 +92,9 @@ void ScaleRowDown2Box_NEON(const uint8* src_ptr, ); } -void ScaleRowDown4_NEON(const uint8* src_ptr, +void ScaleRowDown4_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width) { (void)src_stride; asm volatile( @@ -110,13 +110,13 @@ void ScaleRowDown4_NEON(const uint8* src_ptr, : "v0", "v1", "v2", "v3", "memory", "cc"); } -void ScaleRowDown4Box_NEON(const uint8* src_ptr, +void ScaleRowDown4Box_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width) { - const uint8* src_ptr1 = src_ptr + src_stride; - const uint8* src_ptr2 = src_ptr + src_stride * 2; - const uint8* src_ptr3 = src_ptr + src_stride * 3; + const uint8_t* src_ptr1 = src_ptr + src_stride; + const uint8_t* src_ptr2 = src_ptr + src_stride * 2; + const uint8_t* src_ptr3 = src_ptr + src_stride * 3; asm volatile( "1: \n" "ld1 {v0.16b}, [%0], #16 \n" // load up 16x4 @@ -145,9 +145,9 @@ void ScaleRowDown4Box_NEON(const uint8* src_ptr, // Down scale from 4 to 3 pixels. Use the neon multilane read/write // to load up the every 4th pixel into a 4 different registers. // Point samples 32 pixels to 24 pixels. -void ScaleRowDown34_NEON(const uint8* src_ptr, +void ScaleRowDown34_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width) { (void)src_stride; asm volatile( @@ -164,9 +164,9 @@ void ScaleRowDown34_NEON(const uint8* src_ptr, : "v0", "v1", "v2", "v3", "memory", "cc"); } -void ScaleRowDown34_0_Box_NEON(const uint8* src_ptr, +void ScaleRowDown34_0_Box_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width) { asm volatile( "movi v20.8b, #3 \n" @@ -221,9 +221,9 @@ void ScaleRowDown34_0_Box_NEON(const uint8* src_ptr, "v19", "v20", "memory", "cc"); } -void ScaleRowDown34_1_Box_NEON(const uint8* src_ptr, +void ScaleRowDown34_1_Box_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width) { asm volatile( "movi v20.8b, #3 \n" @@ -273,9 +273,9 @@ static const vec16 kMult38_Div9 = {65536 / 18, 65536 / 18, 65536 / 18, 65536 / 18, 65536 / 18}; // 32 -> 12 -void ScaleRowDown38_NEON(const uint8* src_ptr, +void ScaleRowDown38_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width) { (void)src_stride; asm volatile( @@ -295,11 +295,11 @@ void ScaleRowDown38_NEON(const uint8* src_ptr, } // 32x3 -> 12x1 -void OMITFP ScaleRowDown38_3_Box_NEON(const uint8* src_ptr, +void OMITFP ScaleRowDown38_3_Box_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width) { - const uint8* src_ptr1 = src_ptr + src_stride * 2; + const uint8_t* src_ptr1 = src_ptr + src_stride * 2; ptrdiff_t tmp_src_stride = src_stride; asm volatile( @@ -415,9 +415,9 @@ void OMITFP ScaleRowDown38_3_Box_NEON(const uint8* src_ptr, } // 32x2 -> 12x1 -void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr, +void ScaleRowDown38_2_Box_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width) { // TODO(fbarchard): use src_stride directly for clang 3.5+. ptrdiff_t tmp_src_stride = src_stride; @@ -515,12 +515,12 @@ void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr, "v19", "v30", "v31", "memory", "cc"); } -void ScaleAddRows_NEON(const uint8* src_ptr, +void ScaleAddRows_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint16* dst_ptr, + uint16_t* dst_ptr, int src_width, int src_height) { - const uint8* src_tmp; + const uint8_t* src_tmp; asm volatile( "1: \n" "mov %0, %1 \n" @@ -558,19 +558,19 @@ void ScaleAddRows_NEON(const uint8* src_ptr, "ld2 {v4.b, v5.b}[" #n "], [%6] \n" // The NEON version mimics this formula (from row_common.cc): -// #define BLENDER(a, b, f) (uint8)((int)(a) + +// #define BLENDER(a, b, f) (uint8_t)((int)(a) + // ((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16)) -void ScaleFilterCols_NEON(uint8* dst_ptr, - const uint8* src_ptr, +void ScaleFilterCols_NEON(uint8_t* dst_ptr, + const uint8_t* src_ptr, int dst_width, int x, int dx) { int dx_offset[4] = {0, 1, 2, 3}; int* tmp = dx_offset; - const uint8* src_tmp = src_ptr; - int64 x64 = (int64)x; // NOLINT - int64 dx64 = (int64)dx; // NOLINT + const uint8_t* src_tmp = src_ptr; + int64_t x64 = (int64_t)x; // NOLINT + int64_t dx64 = (int64_t)dx; // NOLINT asm volatile ( "dup v0.4s, %w3 \n" // x "dup v1.4s, %w4 \n" // dx @@ -628,8 +628,8 @@ void ScaleFilterCols_NEON(uint8* dst_ptr, #undef LOAD2_DATA8_LANE // 16x2 -> 16x1 -void ScaleFilterRows_NEON(uint8* dst_ptr, - const uint8* src_ptr, +void ScaleFilterRows_NEON(uint8_t* dst_ptr, + const uint8_t* src_ptr, ptrdiff_t src_stride, int dst_width, int source_y_fraction) { @@ -713,9 +713,9 @@ void ScaleFilterRows_NEON(uint8* dst_ptr, : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "memory", "cc"); } -void ScaleARGBRowDown2_NEON(const uint8* src_ptr, +void ScaleARGBRowDown2_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst, + uint8_t* dst, int dst_width) { (void)src_stride; asm volatile( @@ -734,9 +734,9 @@ void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ); } -void ScaleARGBRowDown2Linear_NEON(const uint8* src_argb, +void ScaleARGBRowDown2Linear_NEON(const uint8_t* src_argb, ptrdiff_t src_stride, - uint8* dst_argb, + uint8_t* dst_argb, int dst_width) { (void)src_stride; asm volatile( @@ -757,9 +757,9 @@ void ScaleARGBRowDown2Linear_NEON(const uint8* src_argb, ); } -void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, +void ScaleARGBRowDown2Box_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst, + uint8_t* dst, int dst_width) { asm volatile( // change the stride to row 2 pointer @@ -792,10 +792,10 @@ void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, // Reads 4 pixels at a time. // Alignment requirement: src_argb 4 byte aligned. -void ScaleARGBRowDownEven_NEON(const uint8* src_argb, +void ScaleARGBRowDownEven_NEON(const uint8_t* src_argb, ptrdiff_t src_stride, int src_stepx, - uint8* dst_argb, + uint8_t* dst_argb, int dst_width) { (void)src_stride; asm volatile( @@ -807,10 +807,10 @@ void ScaleARGBRowDownEven_NEON(const uint8* src_argb, "subs %w2, %w2, #4 \n" // 4 pixels per loop. "st1 {v0.16b}, [%1], #16 \n" "b.gt 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_argb), // %1 - "+r"(dst_width) // %2 - : "r"((int64)(src_stepx * 4)) // %3 + : "+r"(src_argb), // %0 + "+r"(dst_argb), // %1 + "+r"(dst_width) // %2 + : "r"((int64_t)(src_stepx * 4)) // %3 : "memory", "cc", "v0"); } @@ -818,10 +818,10 @@ void ScaleARGBRowDownEven_NEON(const uint8* src_argb, // Alignment requirement: src_argb 4 byte aligned. // TODO(Yang Zhang): Might be worth another optimization pass in future. // It could be upgraded to 8 pixels at a time to start with. -void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, +void ScaleARGBRowDownEvenBox_NEON(const uint8_t* src_argb, ptrdiff_t src_stride, int src_stepx, - uint8* dst_argb, + uint8_t* dst_argb, int dst_width) { asm volatile( "add %1, %1, %0 \n" @@ -851,11 +851,11 @@ void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, "subs %w3, %w3, #4 \n" // 4 pixels per loop. "st1 {v0.16b}, [%2], #16 \n" "b.gt 1b \n" - : "+r"(src_argb), // %0 - "+r"(src_stride), // %1 - "+r"(dst_argb), // %2 - "+r"(dst_width) // %3 - : "r"((int64)(src_stepx * 4)) // %4 + : "+r"(src_argb), // %0 + "+r"(src_stride), // %1 + "+r"(dst_argb), // %2 + "+r"(dst_width) // %3 + : "r"((int64_t)(src_stepx * 4)) // %4 : "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16"); } @@ -867,15 +867,15 @@ void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, "add %3, %3, %4 \n" \ "ld1 {" #vn ".s}[" #n "], [%6] \n" -void ScaleARGBCols_NEON(uint8* dst_argb, - const uint8* src_argb, +void ScaleARGBCols_NEON(uint8_t* dst_argb, + const uint8_t* src_argb, int dst_width, int x, int dx) { - const uint8* src_tmp = src_argb; - int64 x64 = (int64)x; // NOLINT - int64 dx64 = (int64)dx; // NOLINT - int64 tmp64; + const uint8_t* src_tmp = src_argb; + int64_t x64 = (int64_t)x; // NOLINT + int64_t dx64 = (int64_t)dx; // NOLINT + int64_t tmp64; asm volatile( "1: \n" // clang-format off @@ -912,16 +912,16 @@ void ScaleARGBCols_NEON(uint8* dst_argb, "add %3, %3, %4 \n" \ "ld2 {" #vn1 ".s, " #vn2 ".s}[" #n "], [%6] \n" -void ScaleARGBFilterCols_NEON(uint8* dst_argb, - const uint8* src_argb, +void ScaleARGBFilterCols_NEON(uint8_t* dst_argb, + const uint8_t* src_argb, int dst_width, int x, int dx) { int dx_offset[4] = {0, 1, 2, 3}; int* tmp = dx_offset; - const uint8* src_tmp = src_argb; - int64 x64 = (int64)x; // NOLINT - int64 dx64 = (int64)dx; // NOLINT + const uint8_t* src_tmp = src_argb; + int64_t x64 = (int64_t)x; // NOLINT + int64_t dx64 = (int64_t)dx; // NOLINT asm volatile ( "dup v0.4s, %w3 \n" // x "dup v1.4s, %w4 \n" // dx @@ -978,9 +978,9 @@ void ScaleARGBFilterCols_NEON(uint8* dst_argb, #undef LOAD2_DATA32_LANE // Read 16x2 average down and write 8x1. -void ScaleRowDown2Box_16_NEON(const uint16* src_ptr, +void ScaleRowDown2Box_16_NEON(const uint16_t* src_ptr, ptrdiff_t src_stride, - uint16* dst, + uint16_t* dst, int dst_width) { asm volatile( // change the stride to row 2 pointer @@ -1008,9 +1008,9 @@ void ScaleRowDown2Box_16_NEON(const uint16* src_ptr, // Read 8x2 upsample with filtering and write 16x1. // Actually reads an extra pixel, so 9x2. -void ScaleRowUp2_16_NEON(const uint16* src_ptr, +void ScaleRowUp2_16_NEON(const uint16_t* src_ptr, ptrdiff_t src_stride, - uint16* dst, + uint16_t* dst, int dst_width) { asm volatile( "add %1, %0, %1, lsl #1 \n" // ptr + stide * 2 diff --git a/chromium/third_party/libyuv/source/scale_win.cc b/chromium/third_party/libyuv/source/scale_win.cc index b33881998aa..c5fc86f3e96 100644 --- a/chromium/third_party/libyuv/source/scale_win.cc +++ b/chromium/third_party/libyuv/source/scale_win.cc @@ -89,9 +89,9 @@ static const uvec16 kScaleAb2 = {65536 / 3, 65536 / 3, 65536 / 2, 65536 / 3, 65536 / 3, 65536 / 2, 0, 0}; // Reads 32 pixels, throws half away and writes 16 pixels. -__declspec(naked) void ScaleRowDown2_SSSE3(const uint8* src_ptr, +__declspec(naked) void ScaleRowDown2_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width) { __asm { mov eax, [esp + 4] // src_ptr @@ -116,9 +116,9 @@ __declspec(naked) void ScaleRowDown2_SSSE3(const uint8* src_ptr, } // Blends 32x1 rectangle to 16x1. -__declspec(naked) void ScaleRowDown2Linear_SSSE3(const uint8* src_ptr, +__declspec(naked) void ScaleRowDown2Linear_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width) { __asm { mov eax, [esp + 4] // src_ptr @@ -150,9 +150,9 @@ __declspec(naked) void ScaleRowDown2Linear_SSSE3(const uint8* src_ptr, } // Blends 32x2 rectangle to 16x1. -__declspec(naked) void ScaleRowDown2Box_SSSE3(const uint8* src_ptr, +__declspec(naked) void ScaleRowDown2Box_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width) { __asm { push esi @@ -195,9 +195,9 @@ __declspec(naked) void ScaleRowDown2Box_SSSE3(const uint8* src_ptr, #ifdef HAS_SCALEROWDOWN2_AVX2 // Reads 64 pixels, throws half away and writes 32 pixels. -__declspec(naked) void ScaleRowDown2_AVX2(const uint8* src_ptr, +__declspec(naked) void ScaleRowDown2_AVX2(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width) { __asm { mov eax, [esp + 4] // src_ptr @@ -224,9 +224,9 @@ __declspec(naked) void ScaleRowDown2_AVX2(const uint8* src_ptr, } // Blends 64x1 rectangle to 32x1. -__declspec(naked) void ScaleRowDown2Linear_AVX2(const uint8* src_ptr, +__declspec(naked) void ScaleRowDown2Linear_AVX2(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width) { __asm { mov eax, [esp + 4] // src_ptr @@ -262,9 +262,9 @@ __declspec(naked) void ScaleRowDown2Linear_AVX2(const uint8* src_ptr, // For rounding, average = (sum + 2) / 4 // becomes average((sum >> 1), 0) // Blends 64x2 rectangle to 32x1. -__declspec(naked) void ScaleRowDown2Box_AVX2(const uint8* src_ptr, +__declspec(naked) void ScaleRowDown2Box_AVX2(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width) { __asm { push esi @@ -309,9 +309,9 @@ __declspec(naked) void ScaleRowDown2Box_AVX2(const uint8* src_ptr, #endif // HAS_SCALEROWDOWN2_AVX2 // Point samples 32 pixels to 8 pixels. -__declspec(naked) void ScaleRowDown4_SSSE3(const uint8* src_ptr, +__declspec(naked) void ScaleRowDown4_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width) { __asm { mov eax, [esp + 4] // src_ptr @@ -341,9 +341,9 @@ __declspec(naked) void ScaleRowDown4_SSSE3(const uint8* src_ptr, } // Blends 32x4 rectangle to 8x1. -__declspec(naked) void ScaleRowDown4Box_SSSE3(const uint8* src_ptr, +__declspec(naked) void ScaleRowDown4Box_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width) { __asm { push esi @@ -400,9 +400,9 @@ __declspec(naked) void ScaleRowDown4Box_SSSE3(const uint8* src_ptr, #ifdef HAS_SCALEROWDOWN4_AVX2 // Point samples 64 pixels to 16 pixels. -__declspec(naked) void ScaleRowDown4_AVX2(const uint8* src_ptr, +__declspec(naked) void ScaleRowDown4_AVX2(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width) { __asm { mov eax, [esp + 4] // src_ptr @@ -435,9 +435,9 @@ __declspec(naked) void ScaleRowDown4_AVX2(const uint8* src_ptr, } // Blends 64x4 rectangle to 16x1. -__declspec(naked) void ScaleRowDown4Box_AVX2(const uint8* src_ptr, +__declspec(naked) void ScaleRowDown4Box_AVX2(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width) { __asm { push esi @@ -499,9 +499,9 @@ __declspec(naked) void ScaleRowDown4Box_AVX2(const uint8* src_ptr, // Produces three 8 byte values. For each 8 bytes, 16 bytes are read. // Then shuffled to do the scaling. -__declspec(naked) void ScaleRowDown34_SSSE3(const uint8* src_ptr, +__declspec(naked) void ScaleRowDown34_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width) { __asm { mov eax, [esp + 4] // src_ptr @@ -547,9 +547,9 @@ __declspec(naked) void ScaleRowDown34_SSSE3(const uint8* src_ptr, // xmm7 kRound34 // Note that movdqa+palign may be better than movdqu. -__declspec(naked) void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr, +__declspec(naked) void ScaleRowDown34_1_Box_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width) { __asm { push esi @@ -604,9 +604,9 @@ __declspec(naked) void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr, } // Note that movdqa+palign may be better than movdqu. -__declspec(naked) void ScaleRowDown34_0_Box_SSSE3(const uint8* src_ptr, +__declspec(naked) void ScaleRowDown34_0_Box_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width) { __asm { push esi @@ -666,9 +666,9 @@ __declspec(naked) void ScaleRowDown34_0_Box_SSSE3(const uint8* src_ptr, // 3/8 point sampler // Scale 32 pixels to 12 -__declspec(naked) void ScaleRowDown38_SSSE3(const uint8* src_ptr, +__declspec(naked) void ScaleRowDown38_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width) { __asm { mov eax, [esp + 4] // src_ptr @@ -698,9 +698,9 @@ __declspec(naked) void ScaleRowDown38_SSSE3(const uint8* src_ptr, } // Scale 16x3 pixels to 6x1 with interpolation -__declspec(naked) void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr, +__declspec(naked) void ScaleRowDown38_3_Box_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width) { __asm { push esi @@ -763,9 +763,9 @@ __declspec(naked) void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr, } // Scale 16x2 pixels to 6x1 with interpolation -__declspec(naked) void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr, +__declspec(naked) void ScaleRowDown38_2_Box_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, + uint8_t* dst_ptr, int dst_width) { __asm { push esi @@ -808,8 +808,8 @@ __declspec(naked) void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr, } // Reads 16 bytes and accumulates to 16 shorts at a time. -__declspec(naked) void ScaleAddRow_SSE2(const uint8* src_ptr, - uint16* dst_ptr, +__declspec(naked) void ScaleAddRow_SSE2(const uint8_t* src_ptr, + uint16_t* dst_ptr, int src_width) { __asm { mov eax, [esp + 4] // src_ptr @@ -839,8 +839,8 @@ __declspec(naked) void ScaleAddRow_SSE2(const uint8* src_ptr, #ifdef HAS_SCALEADDROW_AVX2 // Reads 32 bytes and accumulates to 32 shorts at a time. -__declspec(naked) void ScaleAddRow_AVX2(const uint8* src_ptr, - uint16* dst_ptr, +__declspec(naked) void ScaleAddRow_AVX2(const uint8_t* src_ptr, + uint16_t* dst_ptr, int src_width) { __asm { mov eax, [esp + 4] // src_ptr @@ -879,8 +879,8 @@ static const uvec16 kFadd40 = {0x4040, 0x4040, 0x4040, 0x4040, 0x4040, 0x4040, 0x4040, 0x4040}; // Bilinear column filtering. SSSE3 version. -__declspec(naked) void ScaleFilterCols_SSSE3(uint8* dst_ptr, - const uint8* src_ptr, +__declspec(naked) void ScaleFilterCols_SSSE3(uint8_t* dst_ptr, + const uint8_t* src_ptr, int dst_width, int x, int dx) { @@ -965,8 +965,8 @@ __declspec(naked) void ScaleFilterCols_SSSE3(uint8* dst_ptr, } // Reads 16 pixels, duplicates them and writes 32 pixels. -__declspec(naked) void ScaleColsUp2_SSE2(uint8* dst_ptr, - const uint8* src_ptr, +__declspec(naked) void ScaleColsUp2_SSE2(uint8_t* dst_ptr, + const uint8_t* src_ptr, int dst_width, int x, int dx) { @@ -992,9 +992,9 @@ __declspec(naked) void ScaleColsUp2_SSE2(uint8* dst_ptr, } // Reads 8 pixels, throws half away and writes 4 even pixels (0, 2, 4, 6) -__declspec(naked) void ScaleARGBRowDown2_SSE2(const uint8* src_argb, +__declspec(naked) void ScaleARGBRowDown2_SSE2(const uint8_t* src_argb, ptrdiff_t src_stride, - uint8* dst_argb, + uint8_t* dst_argb, int dst_width) { __asm { mov eax, [esp + 4] // src_argb @@ -1017,9 +1017,9 @@ __declspec(naked) void ScaleARGBRowDown2_SSE2(const uint8* src_argb, } // Blends 8x1 rectangle to 4x1. -__declspec(naked) void ScaleARGBRowDown2Linear_SSE2(const uint8* src_argb, +__declspec(naked) void ScaleARGBRowDown2Linear_SSE2(const uint8_t* src_argb, ptrdiff_t src_stride, - uint8* dst_argb, + uint8_t* dst_argb, int dst_width) { __asm { mov eax, [esp + 4] // src_argb @@ -1045,9 +1045,9 @@ __declspec(naked) void ScaleARGBRowDown2Linear_SSE2(const uint8* src_argb, } // Blends 8x2 rectangle to 4x1. -__declspec(naked) void ScaleARGBRowDown2Box_SSE2(const uint8* src_argb, +__declspec(naked) void ScaleARGBRowDown2Box_SSE2(const uint8_t* src_argb, ptrdiff_t src_stride, - uint8* dst_argb, + uint8_t* dst_argb, int dst_width) { __asm { push esi @@ -1079,10 +1079,10 @@ __declspec(naked) void ScaleARGBRowDown2Box_SSE2(const uint8* src_argb, } // Reads 4 pixels at a time. -__declspec(naked) void ScaleARGBRowDownEven_SSE2(const uint8* src_argb, +__declspec(naked) void ScaleARGBRowDownEven_SSE2(const uint8_t* src_argb, ptrdiff_t src_stride, int src_stepx, - uint8* dst_argb, + uint8_t* dst_argb, int dst_width) { __asm { push ebx @@ -1116,10 +1116,10 @@ __declspec(naked) void ScaleARGBRowDownEven_SSE2(const uint8* src_argb, } // Blends four 2x2 to 4x1. -__declspec(naked) void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb, +__declspec(naked) void ScaleARGBRowDownEvenBox_SSE2(const uint8_t* src_argb, ptrdiff_t src_stride, int src_stepx, - uint8* dst_argb, + uint8_t* dst_argb, int dst_width) { __asm { push ebx @@ -1164,8 +1164,8 @@ __declspec(naked) void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb, } // Column scaling unfiltered. SSE2 version. -__declspec(naked) void ScaleARGBCols_SSE2(uint8* dst_argb, - const uint8* src_argb, +__declspec(naked) void ScaleARGBCols_SSE2(uint8_t* dst_argb, + const uint8_t* src_argb, int dst_width, int x, int dx) { @@ -1257,8 +1257,8 @@ static const uvec8 kShuffleFractions = { 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 4u, 4u, 4u, 4u, 4u, 4u, 4u, 4u, }; -__declspec(naked) void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, - const uint8* src_argb, +__declspec(naked) void ScaleARGBFilterCols_SSSE3(uint8_t* dst_argb, + const uint8_t* src_argb, int dst_width, int x, int dx) { @@ -1330,8 +1330,8 @@ __declspec(naked) void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, } // Reads 4 pixels, duplicates them and writes 8 pixels. -__declspec(naked) void ScaleARGBColsUp2_SSE2(uint8* dst_argb, - const uint8* src_argb, +__declspec(naked) void ScaleARGBColsUp2_SSE2(uint8_t* dst_argb, + const uint8_t* src_argb, int dst_width, int x, int dx) { diff --git a/chromium/third_party/libyuv/source/video_common.cc b/chromium/third_party/libyuv/source/video_common.cc index 3e9c6a29502..92384c050cd 100644 --- a/chromium/third_party/libyuv/source/video_common.cc +++ b/chromium/third_party/libyuv/source/video_common.cc @@ -15,14 +15,13 @@ namespace libyuv { extern "C" { #endif -#define ARRAY_SIZE(x) (int)(sizeof(x) / sizeof(x[0])) - struct FourCCAliasEntry { - uint32 alias; - uint32 canonical; + uint32_t alias; + uint32_t canonical; }; -static const struct FourCCAliasEntry kFourCCAliases[] = { +#define NUM_ALIASES 18 +static const struct FourCCAliasEntry kFourCCAliases[NUM_ALIASES] = { {FOURCC_IYUV, FOURCC_I420}, {FOURCC_YU12, FOURCC_I420}, {FOURCC_YU16, FOURCC_I422}, @@ -46,9 +45,9 @@ static const struct FourCCAliasEntry kFourCCAliases[] = { // {FOURCC_BGRA, FOURCC_ARGB}, // kCMPixelFormat_32BGRA LIBYUV_API -uint32 CanonicalFourCC(uint32 fourcc) { +uint32_t CanonicalFourCC(uint32_t fourcc) { int i; - for (i = 0; i < ARRAY_SIZE(kFourCCAliases); ++i) { + for (i = 0; i < NUM_ALIASES; ++i) { if (kFourCCAliases[i].alias == fourcc) { return kFourCCAliases[i].canonical; } diff --git a/chromium/third_party/libyuv/unit_test/basictypes_test.cc b/chromium/third_party/libyuv/unit_test/basictypes_test.cc index 89f7644d58e..9aaa2dcd989 100644 --- a/chromium/third_party/libyuv/unit_test/basictypes_test.cc +++ b/chromium/third_party/libyuv/unit_test/basictypes_test.cc @@ -13,25 +13,15 @@ namespace libyuv { -TEST_F(LibYUVBaseTest, Endian) { - uint16 v16 = 0x1234u; - uint8 first_byte = *reinterpret_cast<uint8*>(&v16); -#if defined(LIBYUV_LITTLE_ENDIAN) - EXPECT_EQ(0x34u, first_byte); -#else - EXPECT_EQ(0x12u, first_byte); -#endif -} - TEST_F(LibYUVBaseTest, SizeOfTypes) { - int8 i8 = -1; - uint8 u8 = 1u; - int16 i16 = -1; - uint16 u16 = 1u; - int32 i32 = -1; - uint32 u32 = 1u; - int64 i64 = -1; - uint64 u64 = 1u; + int8_t i8 = -1; + uint8_t u8 = 1u; + int16_t i16 = -1; + uint16_t u16 = 1u; + int32_t i32 = -1; + uint32_t u32 = 1u; + int64_t i64 = -1; + uint64_t u64 = 1u; EXPECT_EQ(1u, sizeof(i8)); EXPECT_EQ(1u, sizeof(u8)); EXPECT_EQ(2u, sizeof(i16)); @@ -50,11 +40,4 @@ TEST_F(LibYUVBaseTest, SizeOfTypes) { EXPECT_LT(0u, u64); } -TEST_F(LibYUVBaseTest, SizeOfConstants) { - EXPECT_EQ(8u, sizeof(INT64_C(0))); - EXPECT_EQ(8u, sizeof(UINT64_C(0))); - EXPECT_EQ(8u, sizeof(INT64_C(0x1234567887654321))); - EXPECT_EQ(8u, sizeof(UINT64_C(0x8765432112345678))); -} - } // namespace libyuv diff --git a/chromium/third_party/libyuv/unit_test/color_test.cc b/chromium/third_party/libyuv/unit_test/color_test.cc index 30b6411283f..4bb448d56fe 100644 --- a/chromium/third_party/libyuv/unit_test/color_test.cc +++ b/chromium/third_party/libyuv/unit_test/color_test.cc @@ -63,10 +63,10 @@ namespace libyuv { \ /* The test is overall for color conversion matrix being reversible, so */ \ /* this initializes the pixel with 2x2 blocks to eliminate subsampling. */ \ - uint8* p = orig_y; \ + uint8_t* p = orig_y; \ for (int y = 0; y < benchmark_height_ - HS1; y += HS) { \ for (int x = 0; x < benchmark_width_ - 1; x += 2) { \ - uint8 r = static_cast<uint8>(fastrand()); \ + uint8_t r = static_cast<uint8_t>(fastrand()); \ p[0] = r; \ p[1] = r; \ p[HN] = r; \ @@ -74,7 +74,7 @@ namespace libyuv { p += 2; \ } \ if (benchmark_width_ & 1) { \ - uint8 r = static_cast<uint8>(fastrand()); \ + uint8_t r = static_cast<uint8_t>(fastrand()); \ p[0] = r; \ p[HN] = r; \ p += 1; \ @@ -83,13 +83,13 @@ namespace libyuv { } \ if ((benchmark_height_ & 1) && HS == 2) { \ for (int x = 0; x < benchmark_width_ - 1; x += 2) { \ - uint8 r = static_cast<uint8>(fastrand()); \ + uint8_t r = static_cast<uint8_t>(fastrand()); \ p[0] = r; \ p[1] = r; \ p += 2; \ } \ if (benchmark_width_ & 1) { \ - uint8 r = static_cast<uint8>(fastrand()); \ + uint8_t r = static_cast<uint8_t>(fastrand()); \ p[0] = r; \ p += 1; \ } \ @@ -147,10 +147,10 @@ static void YUVToRGB(int y, int u, int v, int* r, int* g, int* b) { const int kPixels = kWidth * kHeight; const int kHalfPixels = ((kWidth + 1) / 2) * ((kHeight + 1) / 2); - SIMD_ALIGNED(uint8 orig_y[16]); - SIMD_ALIGNED(uint8 orig_u[8]); - SIMD_ALIGNED(uint8 orig_v[8]); - SIMD_ALIGNED(uint8 orig_pixels[16 * 4]); + SIMD_ALIGNED(uint8_t orig_y[16]); + SIMD_ALIGNED(uint8_t orig_u[8]); + SIMD_ALIGNED(uint8_t orig_v[8]); + SIMD_ALIGNED(uint8_t orig_pixels[16 * 4]); memset(orig_y, y, kPixels); memset(orig_u, u, kHalfPixels); memset(orig_v, v, kHalfPixels); @@ -170,10 +170,10 @@ static void YUVJToRGB(int y, int u, int v, int* r, int* g, int* b) { const int kPixels = kWidth * kHeight; const int kHalfPixels = ((kWidth + 1) / 2) * ((kHeight + 1) / 2); - SIMD_ALIGNED(uint8 orig_y[16]); - SIMD_ALIGNED(uint8 orig_u[8]); - SIMD_ALIGNED(uint8 orig_v[8]); - SIMD_ALIGNED(uint8 orig_pixels[16 * 4]); + SIMD_ALIGNED(uint8_t orig_y[16]); + SIMD_ALIGNED(uint8_t orig_u[8]); + SIMD_ALIGNED(uint8_t orig_v[8]); + SIMD_ALIGNED(uint8_t orig_pixels[16 * 4]); memset(orig_y, y, kPixels); memset(orig_u, u, kHalfPixels); memset(orig_v, v, kHalfPixels); @@ -192,8 +192,8 @@ static void YToRGB(int y, int* r, int* g, int* b) { const int kHeight = 1; const int kPixels = kWidth * kHeight; - SIMD_ALIGNED(uint8 orig_y[16]); - SIMD_ALIGNED(uint8 orig_pixels[16 * 4]); + SIMD_ALIGNED(uint8_t orig_y[16]); + SIMD_ALIGNED(uint8_t orig_pixels[16 * 4]); memset(orig_y, y, kPixels); /* YUV converted to ARGB. */ @@ -209,8 +209,8 @@ static void YJToRGB(int y, int* r, int* g, int* b) { const int kHeight = 1; const int kPixels = kWidth * kHeight; - SIMD_ALIGNED(uint8 orig_y[16]); - SIMD_ALIGNED(uint8 orig_pixels[16 * 4]); + SIMD_ALIGNED(uint8_t orig_y[16]); + SIMD_ALIGNED(uint8_t orig_pixels[16 * 4]); memset(orig_y, y, kPixels); /* YUV converted to ARGB. */ diff --git a/chromium/third_party/libyuv/unit_test/compare_test.cc b/chromium/third_party/libyuv/unit_test/compare_test.cc index 1c6d988ef2c..136254e169b 100644 --- a/chromium/third_party/libyuv/unit_test/compare_test.cc +++ b/chromium/third_party/libyuv/unit_test/compare_test.cc @@ -22,8 +22,10 @@ namespace libyuv { // hash seed of 5381 recommended. -static uint32 ReferenceHashDjb2(const uint8* src, uint64 count, uint32 seed) { - uint32 hash = seed; +static uint32_t ReferenceHashDjb2(const uint8_t* src, + uint64_t count, + uint32_t seed) { + uint32_t hash = seed; if (count > 0) { do { hash = hash * 33 + *src++; @@ -41,8 +43,8 @@ TEST_F(LibYUVCompareTest, Djb2_Test) { "The quick brown fox jumps over the lazy dog" " and feels as if he were in the seventh heaven of typography" " together with Hermann Zapf"; - uint32 foxhash = HashDjb2(reinterpret_cast<const uint8*>(fox), 131, 5381); - const uint32 kExpectedFoxHash = 2611006483u; + uint32_t foxhash = HashDjb2(reinterpret_cast<const uint8_t*>(fox), 131, 5381); + const uint32_t kExpectedFoxHash = 2611006483u; EXPECT_EQ(kExpectedFoxHash, foxhash); for (int i = 0; i < kMaxTest; ++i) { @@ -50,8 +52,8 @@ TEST_F(LibYUVCompareTest, Djb2_Test) { src_b[i] = (fastrand() & 0xff); } // Compare different buffers. Expect hash is different. - uint32 h1 = HashDjb2(src_a, kMaxTest, 5381); - uint32 h2 = HashDjb2(src_b, kMaxTest, 5381); + uint32_t h1 = HashDjb2(src_a, kMaxTest, 5381); + uint32_t h2 = HashDjb2(src_b, kMaxTest, 5381); EXPECT_NE(h1, h2); // Make last half same. Expect hash is different. @@ -124,8 +126,8 @@ TEST_F(LibYUVCompareTest, BenchmarkDjb2_Opt) { for (int i = 0; i < kMaxTest; ++i) { src_a[i] = i; } - uint32 h2 = ReferenceHashDjb2(src_a, kMaxTest, 5381); - uint32 h1; + uint32_t h2 = ReferenceHashDjb2(src_a, kMaxTest, 5381); + uint32_t h1; for (int i = 0; i < benchmark_iterations_; ++i) { h1 = HashDjb2(src_a, kMaxTest, 5381); } @@ -139,8 +141,8 @@ TEST_F(LibYUVCompareTest, BenchmarkDjb2_Unaligned) { for (int i = 0; i < kMaxTest; ++i) { src_a[i + 1] = i; } - uint32 h2 = ReferenceHashDjb2(src_a + 1, kMaxTest, 5381); - uint32 h1; + uint32_t h2 = ReferenceHashDjb2(src_a + 1, kMaxTest, 5381); + uint32_t h1; for (int i = 0; i < benchmark_iterations_; ++i) { h1 = HashDjb2(src_a + 1, kMaxTest, 5381); } @@ -149,7 +151,7 @@ TEST_F(LibYUVCompareTest, BenchmarkDjb2_Unaligned) { } TEST_F(LibYUVCompareTest, BenchmarkARGBDetect_Opt) { - uint32 fourcc; + uint32_t fourcc; const int kMaxTest = benchmark_width_ * benchmark_height_ * 4; align_buffer_page_end(src_a, kMaxTest); for (int i = 0; i < kMaxTest; ++i) { @@ -159,12 +161,12 @@ TEST_F(LibYUVCompareTest, BenchmarkARGBDetect_Opt) { src_a[0] = 0; fourcc = ARGBDetect(src_a, benchmark_width_ * 4, benchmark_width_, benchmark_height_); - EXPECT_EQ(static_cast<uint32>(libyuv::FOURCC_BGRA), fourcc); + EXPECT_EQ(static_cast<uint32_t>(libyuv::FOURCC_BGRA), fourcc); src_a[0] = 255; src_a[3] = 0; fourcc = ARGBDetect(src_a, benchmark_width_ * 4, benchmark_width_, benchmark_height_); - EXPECT_EQ(static_cast<uint32>(libyuv::FOURCC_ARGB), fourcc); + EXPECT_EQ(static_cast<uint32_t>(libyuv::FOURCC_ARGB), fourcc); src_a[3] = 255; for (int i = 0; i < benchmark_iterations_; ++i) { @@ -177,7 +179,7 @@ TEST_F(LibYUVCompareTest, BenchmarkARGBDetect_Opt) { } TEST_F(LibYUVCompareTest, BenchmarkARGBDetect_Unaligned) { - uint32 fourcc; + uint32_t fourcc; const int kMaxTest = benchmark_width_ * benchmark_height_ * 4 + 1; align_buffer_page_end(src_a, kMaxTest); for (int i = 1; i < kMaxTest; ++i) { @@ -187,12 +189,12 @@ TEST_F(LibYUVCompareTest, BenchmarkARGBDetect_Unaligned) { src_a[0 + 1] = 0; fourcc = ARGBDetect(src_a + 1, benchmark_width_ * 4, benchmark_width_, benchmark_height_); - EXPECT_EQ(static_cast<uint32>(libyuv::FOURCC_BGRA), fourcc); + EXPECT_EQ(static_cast<uint32_t>(libyuv::FOURCC_BGRA), fourcc); src_a[0 + 1] = 255; src_a[3 + 1] = 0; fourcc = ARGBDetect(src_a + 1, benchmark_width_ * 4, benchmark_width_, benchmark_height_); - EXPECT_EQ(static_cast<uint32>(libyuv::FOURCC_ARGB), fourcc); + EXPECT_EQ(static_cast<uint32_t>(libyuv::FOURCC_ARGB), fourcc); src_a[3 + 1] = 255; for (int i = 0; i < benchmark_iterations_; ++i) { @@ -214,14 +216,14 @@ TEST_F(LibYUVCompareTest, BenchmarkHammingDistance_Opt) { // Test known value memcpy(src_a, "test0123test4567", 16); memcpy(src_b, "tick0123tock4567", 16); - uint32 h1 = HammingDistance_C(src_a, src_b, 16); + uint32_t h1 = HammingDistance_C(src_a, src_b, 16); EXPECT_EQ(16u, h1); // Test C vs OPT on random buffer MemRandomize(src_a, kMaxWidth); MemRandomize(src_b, kMaxWidth); - uint32 h0 = HammingDistance_C(src_a, src_b, kMaxWidth); + uint32_t h0 = HammingDistance_C(src_a, src_b, kMaxWidth); int count = benchmark_iterations_ * @@ -273,14 +275,14 @@ TEST_F(LibYUVCompareTest, BenchmarkHammingDistance_C) { // Test known value memcpy(src_a, "test0123test4567", 16); memcpy(src_b, "tick0123tock4567", 16); - uint32 h1 = HammingDistance_C(src_a, src_b, 16); + uint32_t h1 = HammingDistance_C(src_a, src_b, 16); EXPECT_EQ(16u, h1); // Test C vs OPT on random buffer MemRandomize(src_a, kMaxWidth); MemRandomize(src_b, kMaxWidth); - uint32 h0 = HammingDistance_C(src_a, src_b, kMaxWidth); + uint32_t h0 = HammingDistance_C(src_a, src_b, kMaxWidth); int count = benchmark_iterations_ * @@ -304,14 +306,14 @@ TEST_F(LibYUVCompareTest, BenchmarkHammingDistance) { memcpy(src_a, "test0123test4567", 16); memcpy(src_b, "tick0123tock4567", 16); - uint64 h1 = ComputeHammingDistance(src_a, src_b, 16); + uint64_t h1 = ComputeHammingDistance(src_a, src_b, 16); EXPECT_EQ(16u, h1); // Test C vs OPT on random buffer MemRandomize(src_a, kMaxWidth); MemRandomize(src_b, kMaxWidth); - uint32 h0 = HammingDistance_C(src_a, src_b, kMaxWidth); + uint32_t h0 = HammingDistance_C(src_a, src_b, kMaxWidth); int count = benchmark_iterations_ * @@ -337,14 +339,14 @@ static const int kMaxOptCount = (1 << (32 - 3)) - 64; // 536870848 #endif TEST_F(LibYUVCompareTest, TestHammingDistance_Opt) { - uint32 h1 = 0; + uint32_t h1 = 0; const int kMaxWidth = (benchmark_width_ * benchmark_height_ + 31) & ~31; align_buffer_page_end(src_a, kMaxWidth); align_buffer_page_end(src_b, kMaxWidth); memset(src_a, 255u, kMaxWidth); memset(src_b, 0u, kMaxWidth); - uint64 h0 = ComputeHammingDistance(src_a, src_b, kMaxWidth); + uint64_t h0 = ComputeHammingDistance(src_a, src_b, kMaxWidth); EXPECT_EQ(kMaxWidth * 8ULL, h0); for (int i = 0; i < benchmark_iterations_; ++i) { @@ -385,7 +387,7 @@ TEST_F(LibYUVCompareTest, TestHammingDistance_Opt) { if (kMaxWidth <= kMaxOptCount) { EXPECT_EQ(kMaxWidth * 8U, h1); } else { - if (kMaxWidth * 8ULL != static_cast<uint64>(h1)) { + if (kMaxWidth * 8ULL != static_cast<uint64_t>(h1)) { printf( "warning - HammingDistance_Opt %u does not match %llu " "but length of %u is longer than guaranteed.\n", @@ -408,7 +410,7 @@ TEST_F(LibYUVCompareTest, TestHammingDistance) { memset(src_a, 255u, benchmark_width_ * benchmark_height_); memset(src_b, 0, benchmark_width_ * benchmark_height_); - uint64 h1 = 0; + uint64_t h1 = 0; for (int i = 0; i < benchmark_iterations_; ++i) { h1 = ComputeHammingDistance(src_a, src_b, benchmark_width_ * benchmark_height_); @@ -428,7 +430,7 @@ TEST_F(LibYUVCompareTest, BenchmarkSumSquareError_Opt) { memcpy(src_a, "test0123test4567", 16); memcpy(src_b, "tick0123tock4567", 16); - uint64 h1 = ComputeSumSquareError(src_a, src_b, 16); + uint64_t h1 = ComputeSumSquareError(src_a, src_b, 16); EXPECT_EQ(790u, h1); for (int i = 0; i < kMaxWidth; ++i) { @@ -458,7 +460,7 @@ TEST_F(LibYUVCompareTest, SumSquareError) { memset(src_a, 0, kMaxWidth); memset(src_b, 0, kMaxWidth); - uint64 err; + uint64_t err; err = ComputeSumSquareError(src_a, src_b, kMaxWidth); EXPECT_EQ(0u, err); @@ -480,10 +482,10 @@ TEST_F(LibYUVCompareTest, SumSquareError) { } MaskCpuFlags(disable_cpu_flags_); - uint64 c_err = ComputeSumSquareError(src_a, src_b, kMaxWidth); + uint64_t c_err = ComputeSumSquareError(src_a, src_b, kMaxWidth); MaskCpuFlags(benchmark_cpu_info_); - uint64 opt_err = ComputeSumSquareError(src_a, src_b, kMaxWidth); + uint64_t opt_err = ComputeSumSquareError(src_a, src_b, kMaxWidth); EXPECT_EQ(c_err, opt_err); @@ -502,9 +504,10 @@ TEST_F(LibYUVCompareTest, BenchmarkPsnr_Opt) { MaskCpuFlags(benchmark_cpu_info_); double opt_time = get_time(); - for (int i = 0; i < benchmark_iterations_; ++i) + for (int i = 0; i < benchmark_iterations_; ++i) { CalcFramePsnr(src_a, benchmark_width_, src_b, benchmark_width_, benchmark_width_, benchmark_height_); + } opt_time = (get_time() - opt_time) / benchmark_iterations_; printf("BenchmarkPsnr_Opt - %8.2f us opt\n", opt_time * 1e6); @@ -526,9 +529,10 @@ TEST_F(LibYUVCompareTest, BenchmarkPsnr_Unaligned) { MaskCpuFlags(benchmark_cpu_info_); double opt_time = get_time(); - for (int i = 0; i < benchmark_iterations_; ++i) + for (int i = 0; i < benchmark_iterations_; ++i) { CalcFramePsnr(src_a + 1, benchmark_width_, src_b, benchmark_width_, benchmark_width_, benchmark_height_); + } opt_time = (get_time() - opt_time) / benchmark_iterations_; printf("BenchmarkPsnr_Opt - %8.2f us opt\n", opt_time * 1e6); @@ -627,9 +631,10 @@ TEST_F(LibYUVCompareTest, DISABLED_BenchmarkSsim_Opt) { MaskCpuFlags(benchmark_cpu_info_); double opt_time = get_time(); - for (int i = 0; i < benchmark_iterations_; ++i) + for (int i = 0; i < benchmark_iterations_; ++i) { CalcFrameSsim(src_a, benchmark_width_, src_b, benchmark_width_, benchmark_width_, benchmark_height_); + } opt_time = (get_time() - opt_time) / benchmark_iterations_; printf("BenchmarkSsim_Opt - %8.2f us opt\n", opt_time * 1e6); diff --git a/chromium/third_party/libyuv/unit_test/convert_test.cc b/chromium/third_party/libyuv/unit_test/convert_test.cc index 7d196a1d8e0..750bd871992 100644 --- a/chromium/third_party/libyuv/unit_test/convert_test.cc +++ b/chromium/third_party/libyuv/unit_test/convert_test.cc @@ -41,6 +41,7 @@ namespace libyuv { // Alias to copy pixels as is #define AR30ToAR30 ARGBCopy +#define ABGRToABGR ARGBCopy #define SUBSAMPLE(v, a) ((((v) + (a)-1)) / (a)) @@ -136,20 +137,20 @@ namespace libyuv { FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ benchmark_width_, _Opt, +, 0) -TESTPLANARTOP(I420, uint8, 1, 2, 2, I420, uint8, 1, 2, 2) -TESTPLANARTOP(I422, uint8, 1, 2, 1, I420, uint8, 1, 2, 2) -TESTPLANARTOP(I444, uint8, 1, 1, 1, I420, uint8, 1, 2, 2) -TESTPLANARTOP(I420, uint8, 1, 2, 2, I422, uint8, 1, 2, 1) -TESTPLANARTOP(I420, uint8, 1, 2, 2, I444, uint8, 1, 1, 1) -TESTPLANARTOP(I420, uint8, 1, 2, 2, I420Mirror, uint8, 1, 2, 2) -TESTPLANARTOP(I422, uint8, 1, 2, 1, I422, uint8, 1, 2, 1) -TESTPLANARTOP(I444, uint8, 1, 1, 1, I444, uint8, 1, 1, 1) -TESTPLANARTOP(I010, uint16, 2, 2, 2, I010, uint16, 2, 2, 2) -TESTPLANARTOP(I010, uint16, 2, 2, 2, I420, uint8, 1, 2, 2) -TESTPLANARTOP(I420, uint8, 1, 2, 2, I010, uint16, 2, 2, 2) -TESTPLANARTOP(H010, uint16, 2, 2, 2, H010, uint16, 2, 2, 2) -TESTPLANARTOP(H010, uint16, 2, 2, 2, H420, uint8, 1, 2, 2) -TESTPLANARTOP(H420, uint8, 1, 2, 2, H010, uint16, 2, 2, 2) +TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2) +TESTPLANARTOP(I422, uint8_t, 1, 2, 1, I420, uint8_t, 1, 2, 2) +TESTPLANARTOP(I444, uint8_t, 1, 1, 1, I420, uint8_t, 1, 2, 2) +TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I422, uint8_t, 1, 2, 1) +TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I444, uint8_t, 1, 1, 1) +TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I420Mirror, uint8_t, 1, 2, 2) +TESTPLANARTOP(I422, uint8_t, 1, 2, 1, I422, uint8_t, 1, 2, 1) +TESTPLANARTOP(I444, uint8_t, 1, 1, 1, I444, uint8_t, 1, 1, 1) +TESTPLANARTOP(I010, uint16_t, 2, 2, 2, I010, uint16_t, 2, 2, 2) +TESTPLANARTOP(I010, uint16_t, 2, 2, 2, I420, uint8_t, 1, 2, 2) +TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I010, uint16_t, 2, 2, 2) +TESTPLANARTOP(H010, uint16_t, 2, 2, 2, H010, uint16_t, 2, 2, 2) +TESTPLANARTOP(H010, uint16_t, 2, 2, 2, H420, uint8_t, 1, 2, 2) +TESTPLANARTOP(H420, uint8_t, 1, 2, 2, H010, uint16_t, 2, 2, 2) // Test Android 420 to I420 #define TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, \ @@ -173,8 +174,8 @@ TESTPLANARTOP(H420, uint8, 1, 2, 2, H010, uint16, 2, 2, 2) SUBSAMPLE(kHeight, SUBSAMP_Y)); \ align_buffer_page_end(dst_v_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * \ SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - uint8* src_u = src_uv + OFF_U; \ - uint8* src_v = src_uv + (PIXEL_STRIDE == 1 ? kSizeUV : OFF_V); \ + uint8_t* src_u = src_uv + OFF_U; \ + uint8_t* src_v = src_uv + (PIXEL_STRIDE == 1 ? kSizeUV : OFF_V); \ int src_stride_uv = SUBSAMPLE(kWidth, SUBSAMP_X) * PIXEL_STRIDE; \ for (int i = 0; i < kHeight; ++i) \ for (int j = 0; j < kWidth; ++j) \ @@ -594,6 +595,7 @@ TESTPLANARTOB(I422, 2, 1, UYVY, 2, 4, 1, 0, ARGB, 4) TESTPLANARTOB(I420, 2, 2, I400, 1, 1, 1, 0, ARGB, 4) TESTPLANARTOB(J420, 2, 2, J400, 1, 1, 1, 0, ARGB, 4) TESTPLANARTOB(I420, 2, 2, AR30, 4, 4, 1, 0, ARGB, 4) +TESTPLANARTOB(H420, 2, 2, AR30, 4, 4, 1, 0, ARGB, 4) // TESTPLANARTOB(I420, 2, 2, AR30, 4, 4, 1, 0, ABGR, 4) #define TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ @@ -1064,6 +1066,7 @@ TESTATOB(ARGB, 4, 4, 1, RGB24, 3, 3, 1, 0) TESTATOB(ARGB, 4, 4, 1, RGB565, 2, 2, 1, 0) TESTATOB(ARGB, 4, 4, 1, ARGB1555, 2, 2, 1, 0) TESTATOB(ARGB, 4, 4, 1, ARGB4444, 2, 2, 1, 0) +TESTATOB(ABGR, 4, 4, 1, AR30, 4, 4, 1, 0) TESTATOB(ARGB, 4, 4, 1, AR30, 4, 4, 1, 0) TESTATOB(ARGB, 4, 4, 1, YUY2, 2, 4, 1, 4) TESTATOB(ARGB, 4, 4, 1, UYVY, 2, 4, 1, 4) @@ -1080,6 +1083,7 @@ TESTATOB(RGB565, 2, 2, 1, ARGB, 4, 4, 1, 0) TESTATOB(ARGB1555, 2, 2, 1, ARGB, 4, 4, 1, 0) TESTATOB(ARGB4444, 2, 2, 1, ARGB, 4, 4, 1, 0) TESTATOB(AR30, 4, 4, 1, ARGB, 4, 4, 1, 0) +TESTATOB(AR30, 4, 4, 1, ABGR, 4, 4, 1, 0) TESTATOB(YUY2, 2, 4, 1, ARGB, 4, 4, 1, ARM_YUV_ERROR) TESTATOB(UYVY, 2, 4, 1, ARGB, 4, 4, 1, ARM_YUV_ERROR) TESTATOB(YUY2, 2, 4, 1, Y, 1, 1, 1, 0) @@ -1238,8 +1242,8 @@ TESTSYM(BGRAToARGB, 4, 4, 1) TESTSYM(ABGRToARGB, 4, 4, 1) TEST_F(LibYUVConvertTest, Test565) { - SIMD_ALIGNED(uint8 orig_pixels[256][4]); - SIMD_ALIGNED(uint8 pixels565[256][2]); + SIMD_ALIGNED(uint8_t orig_pixels[256][4]); + SIMD_ALIGNED(uint8_t pixels565[256][2]); for (int i = 0; i < 256; ++i) { for (int j = 0; j < 4; ++j) { @@ -1247,7 +1251,7 @@ TEST_F(LibYUVConvertTest, Test565) { } } ARGBToRGB565(&orig_pixels[0][0], 0, &pixels565[0][0], 0, 256, 1); - uint32 checksum = HashDjb2(&pixels565[0][0], sizeof(pixels565), 5381); + uint32_t checksum = HashDjb2(&pixels565[0][0], sizeof(pixels565), 5381); EXPECT_EQ(610919429u, checksum); } @@ -1442,7 +1446,7 @@ TEST_F(LibYUVConvertTest, NV12Crop) { const int sample_size = kWidth * kHeight + kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y) * 2; align_buffer_page_end(src_y, sample_size); - uint8* src_uv = src_y + kWidth * kHeight; + uint8_t* src_uv = src_y + kWidth * kHeight; align_buffer_page_end(dst_y, kDestWidth * kDestHeight); align_buffer_page_end(dst_u, SUBSAMPLE(kDestWidth, SUBSAMP_X) * @@ -1510,13 +1514,13 @@ TEST_F(LibYUVConvertTest, NV12Crop) { } TEST_F(LibYUVConvertTest, TestYToARGB) { - uint8 y[32]; - uint8 expectedg[32]; + uint8_t y[32]; + uint8_t expectedg[32]; for (int i = 0; i < 32; ++i) { y[i] = i * 5 + 17; expectedg[i] = static_cast<int>((y[i] - 16) * 1.164f + 0.5f); } - uint8 argb[32 * 4]; + uint8_t argb[32 * 4]; YToARGB(y, 0, argb, 0, 32, 1); for (int i = 0; i < 32; ++i) { @@ -1528,7 +1532,7 @@ TEST_F(LibYUVConvertTest, TestYToARGB) { } } -static const uint8 kNoDither4x4[16] = { +static const uint8_t kNoDither4x4[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; @@ -1555,7 +1559,7 @@ TEST_F(LibYUVConvertTest, TestNoDither) { } // Ordered 4x4 dither for 888 to 565. Values from 0 to 7. -static const uint8 kDither565_4x4[16] = { +static const uint8_t kDither565_4x4[16] = { 0, 4, 1, 5, 6, 2, 7, 3, 1, 5, 0, 4, 7, 3, 6, 2, }; @@ -1943,8 +1947,9 @@ TESTQPLANARTOE(I420Alpha, 2, 2, ABGR, 1, 4, ARGB, 4) // Caveat: Destination needs to be 4 bytes TESTPLANETOE(ARGB, 1, 4, AR30, 1, 4, ARGB, 4) - -// TESTPLANETOE(ARGB, 1, 4, AR30, 1, 4, ABGR, 4) +TESTPLANETOE(ABGR, 1, 4, AR30, 1, 4, ABGR, 4) +TESTPLANETOE(AR30, 1, 4, ARGB, 1, 4, ABGR, 4) +TESTPLANETOE(AR30, 1, 4, ABGR, 1, 4, ARGB, 4) TEST_F(LibYUVConvertTest, RotateWithARGBSource) { // 2x2 frames @@ -2015,79 +2020,113 @@ TEST_F(LibYUVConvertTest, ARGBToAR30Row_Opt) { } #endif // HAS_ARGBTOAR30ROW_AVX2 -#define TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \ - ALIGN, YALIGN, W1280, DIFF, N, NEG, SOFF, DOFF, \ - FMT_C, BPP_C) \ - TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \ - const int kWidth = ((W1280) > 0) ? (W1280) : 1; \ - const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \ - const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN); \ - const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \ - const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \ - const int kBpc = 2; \ - align_buffer_page_end(src_y, kWidth* kHeight* kBpc + SOFF); \ - align_buffer_page_end(src_u, kSizeUV* kBpc + SOFF); \ - align_buffer_page_end(src_v, kSizeUV* kBpc + SOFF); \ - align_buffer_page_end(dst_argb_c, kStrideB* kHeight + DOFF); \ - align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + DOFF); \ - for (int i = 0; i < kWidth * kHeight; ++i) { \ - reinterpret_cast<uint16*>(src_y + SOFF)[i] = (fastrand() & 0x3ff); \ - } \ - for (int i = 0; i < kSizeUV; ++i) { \ - reinterpret_cast<uint16*>(src_u + SOFF)[i] = (fastrand() & 0x3ff); \ - reinterpret_cast<uint16*>(src_v + SOFF)[i] = (fastrand() & 0x3ff); \ - } \ - memset(dst_argb_c + DOFF, 1, kStrideB * kHeight); \ - memset(dst_argb_opt + DOFF, 101, kStrideB * kHeight); \ - MaskCpuFlags(disable_cpu_flags_); \ - FMT_PLANAR##To##FMT_B(reinterpret_cast<uint16*>(src_y + SOFF), kWidth, \ - reinterpret_cast<uint16*>(src_u + SOFF), kStrideUV, \ - reinterpret_cast<uint16*>(src_v + SOFF), kStrideUV, \ - dst_argb_c + DOFF, kStrideB, kWidth, NEG kHeight); \ - MaskCpuFlags(benchmark_cpu_info_); \ - for (int i = 0; i < benchmark_iterations_; ++i) { \ - FMT_PLANAR##To##FMT_B( \ - reinterpret_cast<uint16*>(src_y + SOFF), kWidth, \ - reinterpret_cast<uint16*>(src_u + SOFF), kStrideUV, \ - reinterpret_cast<uint16*>(src_v + SOFF), kStrideUV, \ - dst_argb_opt + DOFF, kStrideB, kWidth, NEG kHeight); \ - } \ - int max_diff = 0; \ - for (int i = 0; i < kWidth * BPP_C * kHeight; ++i) { \ - int abs_diff = abs(static_cast<int>(dst_argb_c[i + DOFF]) - \ - static_cast<int>(dst_argb_opt[i + DOFF])); \ - if (abs_diff > max_diff) { \ - max_diff = abs_diff; \ - } \ - } \ - EXPECT_LE(max_diff, DIFF); \ - free_aligned_buffer_page_end(src_y); \ - free_aligned_buffer_page_end(src_u); \ - free_aligned_buffer_page_end(src_v); \ - free_aligned_buffer_page_end(dst_argb_c); \ - free_aligned_buffer_page_end(dst_argb_opt); \ +#ifdef HAS_ABGRTOAR30ROW_AVX2 +TEST_F(LibYUVConvertTest, ABGRToAR30Row_Opt) { + // ABGRToAR30Row_AVX2 expects a multiple of 8 pixels. + const int kPixels = (benchmark_width_ * benchmark_height_ + 7) & ~7; + align_buffer_page_end(src, kPixels * 4); + align_buffer_page_end(dst_opt, kPixels * 4); + align_buffer_page_end(dst_c, kPixels * 4); + MemRandomize(src, kPixels * 4); + memset(dst_opt, 0, kPixels * 4); + memset(dst_c, 1, kPixels * 4); + + ABGRToAR30Row_C(src, dst_c, kPixels); + + int has_avx2 = TestCpuFlag(kCpuHasAVX2); + int has_ssse3 = TestCpuFlag(kCpuHasSSSE3); + for (int i = 0; i < benchmark_iterations_; ++i) { + if (has_avx2) { + ABGRToAR30Row_AVX2(src, dst_opt, kPixels); + } else if (has_ssse3) { + ABGRToAR30Row_SSSE3(src, dst_opt, kPixels); + } else { + ABGRToAR30Row_C(src, dst_opt, kPixels); + } + } + for (int i = 0; i < kPixels * 4; ++i) { + EXPECT_EQ(dst_opt[i], dst_c[i]); + } + + free_aligned_buffer_page_end(src); + free_aligned_buffer_page_end(dst_opt); + free_aligned_buffer_page_end(dst_c); +} +#endif // HAS_ABGRTOAR30ROW_AVX2 + +// TODO(fbarchard): Fix clamping issue affected by U channel. +#define TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \ + ALIGN, YALIGN, W1280, DIFF, N, NEG, SOFF, DOFF) \ + TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \ + const int kWidth = ((W1280) > 0) ? (W1280) : 1; \ + const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \ + const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN); \ + const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \ + const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \ + const int kBpc = 2; \ + align_buffer_page_end(src_y, kWidth* kHeight* kBpc + SOFF); \ + align_buffer_page_end(src_u, kSizeUV* kBpc + SOFF); \ + align_buffer_page_end(src_v, kSizeUV* kBpc + SOFF); \ + align_buffer_page_end(dst_argb_c, kStrideB* kHeight + DOFF); \ + align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + DOFF); \ + for (int i = 0; i < kWidth * kHeight; ++i) { \ + reinterpret_cast<uint16_t*>(src_y + SOFF)[i] = (fastrand() & 0x3ff); \ + } \ + for (int i = 0; i < kSizeUV; ++i) { \ + reinterpret_cast<uint16_t*>(src_u + SOFF)[i] = (fastrand() & 0x3ff); \ + reinterpret_cast<uint16_t*>(src_v + SOFF)[i] = (fastrand() & 0x3ff); \ + } \ + memset(dst_argb_c + DOFF, 1, kStrideB * kHeight); \ + memset(dst_argb_opt + DOFF, 101, kStrideB * kHeight); \ + MaskCpuFlags(disable_cpu_flags_); \ + FMT_PLANAR##To##FMT_B( \ + reinterpret_cast<uint16_t*>(src_y + SOFF), kWidth, \ + reinterpret_cast<uint16_t*>(src_u + SOFF), kStrideUV, \ + reinterpret_cast<uint16_t*>(src_v + SOFF), kStrideUV, \ + dst_argb_c + DOFF, kStrideB, kWidth, NEG kHeight); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + FMT_PLANAR##To##FMT_B( \ + reinterpret_cast<uint16_t*>(src_y + SOFF), kWidth, \ + reinterpret_cast<uint16_t*>(src_u + SOFF), kStrideUV, \ + reinterpret_cast<uint16_t*>(src_v + SOFF), kStrideUV, \ + dst_argb_opt + DOFF, kStrideB, kWidth, NEG kHeight); \ + } \ + int max_diff = 0; \ + for (int i = 0; i < kWidth * BPP_B * kHeight; ++i) { \ + int abs_diff = abs(static_cast<int>(dst_argb_c[i + DOFF]) - \ + static_cast<int>(dst_argb_opt[i + DOFF])); \ + if (abs_diff > max_diff) { \ + max_diff = abs_diff; \ + } \ + } \ + EXPECT_LE(max_diff, DIFF); \ + free_aligned_buffer_page_end(src_y); \ + free_aligned_buffer_page_end(src_u); \ + free_aligned_buffer_page_end(src_v); \ + free_aligned_buffer_page_end(dst_argb_c); \ + free_aligned_buffer_page_end(dst_argb_opt); \ } #define TESTPLANAR16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN, DIFF, FMT_C, BPP_C) \ + YALIGN, DIFF) \ TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN, benchmark_width_ - 4, DIFF, _Any, +, 0, 0, FMT_C, \ - BPP_C) \ + YALIGN, benchmark_width_ - 4, DIFF, _Any, +, 0, 0) \ TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN, benchmark_width_, DIFF, _Unaligned, +, 1, 1, FMT_C, \ - BPP_C) \ + YALIGN, benchmark_width_, DIFF, _Unaligned, +, 1, 1) \ TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN, benchmark_width_, DIFF, _Invert, -, 0, 0, FMT_C, \ - BPP_C) \ + YALIGN, benchmark_width_, DIFF, _Invert, -, 0, 0) \ TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN, benchmark_width_, DIFF, _Opt, +, 0, 0, FMT_C, \ - BPP_C) + YALIGN, benchmark_width_, DIFF, _Opt, +, 0, 0) -TESTPLANAR16TOB(H010, 2, 2, AR30, 4, 4, 1, 2, AR30, 4) -TESTPLANAR16TOB(H010, 2, 2, ARGB, 4, 4, 1, 2, ARGB, 4) -TESTPLANAR16TOB(H010, 2, 2, ABGR, 4, 4, 1, 2, ARGB, 4) -TESTPLANAR16TOB(I010, 2, 2, ARGB, 4, 4, 1, 2, ARGB, 4) -TESTPLANAR16TOB(I010, 2, 2, ABGR, 4, 4, 1, 2, ARGB, 4) +TESTPLANAR16TOB(I010, 2, 2, ARGB, 4, 4, 1, 2) +TESTPLANAR16TOB(I010, 2, 2, ABGR, 4, 4, 1, 2) +TESTPLANAR16TOB(I010, 2, 2, AR30, 4, 4, 1, 2) +TESTPLANAR16TOB(I010, 2, 2, AB30, 4, 4, 1, 2) +TESTPLANAR16TOB(H010, 2, 2, ARGB, 4, 4, 1, 2) +TESTPLANAR16TOB(H010, 2, 2, ABGR, 4, 4, 1, 2) +TESTPLANAR16TOB(H010, 2, 2, AR30, 4, 4, 1, 2) +TESTPLANAR16TOB(H010, 2, 2, AB30, 4, 4, 1, 2) static int Clamp(int y) { if (y < 0) { @@ -2099,13 +2138,30 @@ static int Clamp(int y) { return y; } +static int Clamp10(int y) { + if (y < 0) { + y = 0; + } + if (y > 1023) { + y = 1023; + } + return y; +} + +// Test 8 bit YUV to 8 bit RGB TEST_F(LibYUVConvertTest, TestH420ToARGB) { const int kSize = 256; + int histogram_b[256]; + int histogram_g[256]; + int histogram_r[256]; + memset(histogram_b, 0, sizeof(histogram_b)); + memset(histogram_g, 0, sizeof(histogram_g)); + memset(histogram_r, 0, sizeof(histogram_r)); align_buffer_page_end(orig_yuv, kSize + kSize / 2 * 2); align_buffer_page_end(argb_pixels, kSize * 4); - uint8* orig_y = orig_yuv; - uint8* orig_u = orig_y + kSize; - uint8* orig_v = orig_u + kSize / 2; + uint8_t* orig_y = orig_yuv; + uint8_t* orig_u = orig_y + kSize; + uint8_t* orig_v = orig_u + kSize / 2; // Test grey scale for (int i = 0; i < kSize; ++i) { @@ -2119,23 +2175,54 @@ TEST_F(LibYUVConvertTest, TestH420ToARGB) { H420ToARGB(orig_y, 0, orig_u, 0, orig_v, 0, argb_pixels, 0, kSize, 1); for (int i = 0; i < kSize; ++i) { + int b = argb_pixels[i * 4 + 0]; + int g = argb_pixels[i * 4 + 1]; + int r = argb_pixels[i * 4 + 2]; + int a = argb_pixels[i * 4 + 3]; + ++histogram_b[b]; + ++histogram_g[g]; + ++histogram_r[r]; int expected_y = Clamp(static_cast<int>((i - 16) * 1.164f)); - EXPECT_NEAR(argb_pixels[i * 4 + 0], expected_y, 1); - EXPECT_NEAR(argb_pixels[i * 4 + 1], expected_y, 1); - EXPECT_NEAR(argb_pixels[i * 4 + 2], expected_y, 1); - EXPECT_EQ(argb_pixels[i * 4 + 3], 255); + EXPECT_NEAR(b, expected_y, 1); + EXPECT_NEAR(g, expected_y, 1); + EXPECT_NEAR(r, expected_y, 1); + EXPECT_EQ(a, 255); } + + int count_b = 0; + int count_g = 0; + int count_r = 0; + for (int i = 0; i < kSize; ++i) { + if (histogram_b[i]) { + ++count_b; + } + if (histogram_g[i]) { + ++count_g; + } + if (histogram_r[i]) { + ++count_r; + } + } + printf("uniques: B %d, G, %d, R %d\n", count_b, count_g, count_r); + free_aligned_buffer_page_end(orig_yuv); free_aligned_buffer_page_end(argb_pixels); } +// Test 10 bit YUV to 8 bit RGB TEST_F(LibYUVConvertTest, TestH010ToARGB) { const int kSize = 1024; + int histogram_b[1024]; + int histogram_g[1024]; + int histogram_r[1024]; + memset(histogram_b, 0, sizeof(histogram_b)); + memset(histogram_g, 0, sizeof(histogram_g)); + memset(histogram_r, 0, sizeof(histogram_r)); align_buffer_page_end(orig_yuv, kSize * 2 + kSize / 2 * 2 * 2); align_buffer_page_end(argb_pixels, kSize * 4); - uint16* orig_y = reinterpret_cast<uint16*>(orig_yuv); - uint16* orig_u = orig_y + kSize; - uint16* orig_v = orig_u + kSize / 2; + uint16_t* orig_y = reinterpret_cast<uint16_t*>(orig_yuv); + uint16_t* orig_u = orig_y + kSize; + uint16_t* orig_v = orig_u + kSize / 2; // Test grey scale for (int i = 0; i < kSize; ++i) { @@ -2149,14 +2236,226 @@ TEST_F(LibYUVConvertTest, TestH010ToARGB) { H010ToARGB(orig_y, 0, orig_u, 0, orig_v, 0, argb_pixels, 0, kSize, 1); for (int i = 0; i < kSize; ++i) { + int b = argb_pixels[i * 4 + 0]; + int g = argb_pixels[i * 4 + 1]; + int r = argb_pixels[i * 4 + 2]; + int a = argb_pixels[i * 4 + 3]; + ++histogram_b[b]; + ++histogram_g[g]; + ++histogram_r[r]; int expected_y = Clamp(static_cast<int>((i - 64) * 1.164f / 4)); - EXPECT_NEAR(argb_pixels[i * 4 + 0], expected_y, 1); - EXPECT_NEAR(argb_pixels[i * 4 + 1], expected_y, 1); - EXPECT_NEAR(argb_pixels[i * 4 + 2], expected_y, 1); - EXPECT_EQ(argb_pixels[i * 4 + 3], 255); + EXPECT_NEAR(b, expected_y, 1); + EXPECT_NEAR(g, expected_y, 1); + EXPECT_NEAR(r, expected_y, 1); + EXPECT_EQ(a, 255); + } + + int count_b = 0; + int count_g = 0; + int count_r = 0; + for (int i = 0; i < kSize; ++i) { + if (histogram_b[i]) { + ++count_b; + } + if (histogram_g[i]) { + ++count_g; + } + if (histogram_r[i]) { + ++count_r; + } } + printf("uniques: B %d, G, %d, R %d\n", count_b, count_g, count_r); + free_aligned_buffer_page_end(orig_yuv); free_aligned_buffer_page_end(argb_pixels); } +// Test 10 bit YUV to 10 bit RGB +// Caveat: Result is near due to float rounding in expected result. +TEST_F(LibYUVConvertTest, TestH010ToAR30) { + const int kSize = 1024; + int histogram_b[1024]; + int histogram_g[1024]; + int histogram_r[1024]; + memset(histogram_b, 0, sizeof(histogram_b)); + memset(histogram_g, 0, sizeof(histogram_g)); + memset(histogram_r, 0, sizeof(histogram_r)); + + align_buffer_page_end(orig_yuv, kSize * 2 + kSize / 2 * 2 * 2); + align_buffer_page_end(ar30_pixels, kSize * 4); + uint16_t* orig_y = reinterpret_cast<uint16_t*>(orig_yuv); + uint16_t* orig_u = orig_y + kSize; + uint16_t* orig_v = orig_u + kSize / 2; + + // Test grey scale + for (int i = 0; i < kSize; ++i) { + orig_y[i] = i; + } + for (int i = 0; i < kSize / 2; ++i) { + orig_u[i] = 512; // 512 is 0. + orig_v[i] = 512; + } + + H010ToAR30(orig_y, 0, orig_u, 0, orig_v, 0, ar30_pixels, 0, kSize, 1); + + for (int i = 0; i < kSize; ++i) { + int b10 = reinterpret_cast<uint32_t*>(ar30_pixels)[i] & 1023; + int g10 = (reinterpret_cast<uint32_t*>(ar30_pixels)[i] >> 10) & 1023; + int r10 = (reinterpret_cast<uint32_t*>(ar30_pixels)[i] >> 20) & 1023; + int a2 = (reinterpret_cast<uint32_t*>(ar30_pixels)[i] >> 30) & 3; + ++histogram_b[b10]; + ++histogram_g[g10]; + ++histogram_r[r10]; + int expected_y = Clamp10(static_cast<int>((i - 64) * 1.164f)); + EXPECT_NEAR(b10, expected_y, 4); + EXPECT_NEAR(g10, expected_y, 4); + EXPECT_NEAR(r10, expected_y, 4); + EXPECT_EQ(a2, 3); + } + + int count_b = 0; + int count_g = 0; + int count_r = 0; + for (int i = 0; i < kSize; ++i) { + if (histogram_b[i]) { + ++count_b; + } + if (histogram_g[i]) { + ++count_g; + } + if (histogram_r[i]) { + ++count_r; + } + } + printf("uniques: B %d, G, %d, R %d\n", count_b, count_g, count_r); + + free_aligned_buffer_page_end(orig_yuv); + free_aligned_buffer_page_end(ar30_pixels); +} + +// Test 10 bit YUV to 10 bit RGB +// Caveat: Result is near due to float rounding in expected result. +TEST_F(LibYUVConvertTest, TestH010ToAB30) { + const int kSize = 1024; + int histogram_b[1024]; + int histogram_g[1024]; + int histogram_r[1024]; + memset(histogram_b, 0, sizeof(histogram_b)); + memset(histogram_g, 0, sizeof(histogram_g)); + memset(histogram_r, 0, sizeof(histogram_r)); + + align_buffer_page_end(orig_yuv, kSize * 2 + kSize / 2 * 2 * 2); + align_buffer_page_end(ab30_pixels, kSize * 4); + uint16_t* orig_y = reinterpret_cast<uint16_t*>(orig_yuv); + uint16_t* orig_u = orig_y + kSize; + uint16_t* orig_v = orig_u + kSize / 2; + + // Test grey scale + for (int i = 0; i < kSize; ++i) { + orig_y[i] = i; + } + for (int i = 0; i < kSize / 2; ++i) { + orig_u[i] = 512; // 512 is 0. + orig_v[i] = 512; + } + + H010ToAB30(orig_y, 0, orig_u, 0, orig_v, 0, ab30_pixels, 0, kSize, 1); + + for (int i = 0; i < kSize; ++i) { + int r10 = reinterpret_cast<uint32_t*>(ab30_pixels)[i] & 1023; + int g10 = (reinterpret_cast<uint32_t*>(ab30_pixels)[i] >> 10) & 1023; + int b10 = (reinterpret_cast<uint32_t*>(ab30_pixels)[i] >> 20) & 1023; + int a2 = (reinterpret_cast<uint32_t*>(ab30_pixels)[i] >> 30) & 3; + ++histogram_b[b10]; + ++histogram_g[g10]; + ++histogram_r[r10]; + int expected_y = Clamp10(static_cast<int>((i - 64) * 1.164f)); + EXPECT_NEAR(b10, expected_y, 4); + EXPECT_NEAR(g10, expected_y, 4); + EXPECT_NEAR(r10, expected_y, 4); + EXPECT_EQ(a2, 3); + } + + int count_b = 0; + int count_g = 0; + int count_r = 0; + for (int i = 0; i < kSize; ++i) { + if (histogram_b[i]) { + ++count_b; + } + if (histogram_g[i]) { + ++count_g; + } + if (histogram_r[i]) { + ++count_r; + } + } + printf("uniques: B %d, G, %d, R %d\n", count_b, count_g, count_r); + + free_aligned_buffer_page_end(orig_yuv); + free_aligned_buffer_page_end(ab30_pixels); +} + +// Test 8 bit YUV to 10 bit RGB +TEST_F(LibYUVConvertTest, TestH420ToAR30) { + const int kSize = 256; + const int kHistSize = 1024; + int histogram_b[kHistSize]; + int histogram_g[kHistSize]; + int histogram_r[kHistSize]; + memset(histogram_b, 0, sizeof(histogram_b)); + memset(histogram_g, 0, sizeof(histogram_g)); + memset(histogram_r, 0, sizeof(histogram_r)); + align_buffer_page_end(orig_yuv, kSize + kSize / 2 * 2); + align_buffer_page_end(ar30_pixels, kSize * 4); + uint8_t* orig_y = orig_yuv; + uint8_t* orig_u = orig_y + kSize; + uint8_t* orig_v = orig_u + kSize / 2; + + // Test grey scale + for (int i = 0; i < kSize; ++i) { + orig_y[i] = i; + } + for (int i = 0; i < kSize / 2; ++i) { + orig_u[i] = 128; // 128 is 0. + orig_v[i] = 128; + } + + H420ToAR30(orig_y, 0, orig_u, 0, orig_v, 0, ar30_pixels, 0, kSize, 1); + + for (int i = 0; i < kSize; ++i) { + int b10 = reinterpret_cast<uint32_t*>(ar30_pixels)[i] & 1023; + int g10 = (reinterpret_cast<uint32_t*>(ar30_pixels)[i] >> 10) & 1023; + int r10 = (reinterpret_cast<uint32_t*>(ar30_pixels)[i] >> 20) & 1023; + int a2 = (reinterpret_cast<uint32_t*>(ar30_pixels)[i] >> 30) & 3; + ++histogram_b[b10]; + ++histogram_g[g10]; + ++histogram_r[r10]; + int expected_y = Clamp10(static_cast<int>((i - 16) * 1.164f * 4.f)); + EXPECT_NEAR(b10, expected_y, 4); + EXPECT_NEAR(g10, expected_y, 4); + EXPECT_NEAR(r10, expected_y, 4); + EXPECT_EQ(a2, 3); + } + + int count_b = 0; + int count_g = 0; + int count_r = 0; + for (int i = 0; i < kHistSize; ++i) { + if (histogram_b[i]) { + ++count_b; + } + if (histogram_g[i]) { + ++count_g; + } + if (histogram_r[i]) { + ++count_r; + } + } + printf("uniques: B %d, G, %d, R %d\n", count_b, count_g, count_r); + + free_aligned_buffer_page_end(orig_yuv); + free_aligned_buffer_page_end(ar30_pixels); +} + } // namespace libyuv diff --git a/chromium/third_party/libyuv/unit_test/math_test.cc b/chromium/third_party/libyuv/unit_test/math_test.cc index 2b4b57b1cea..0abbad51321 100644 --- a/chromium/third_party/libyuv/unit_test/math_test.cc +++ b/chromium/third_party/libyuv/unit_test/math_test.cc @@ -65,8 +65,8 @@ TEST_F(LibYUVBaseTest, TestFixedDiv) { } EXPECT_EQ(123 * 65536, libyuv::FixedDiv(123, 1)); - MemRandomize(reinterpret_cast<uint8*>(&num[0]), sizeof(num)); - MemRandomize(reinterpret_cast<uint8*>(&div[0]), sizeof(div)); + MemRandomize(reinterpret_cast<uint8_t*>(&num[0]), sizeof(num)); + MemRandomize(reinterpret_cast<uint8_t*>(&div[0]), sizeof(div)); for (int j = 0; j < 1280; ++j) { if (div[j] == 0) { div[j] = 1280; @@ -90,8 +90,8 @@ TEST_F(LibYUVBaseTest, TestFixedDiv_Opt) { int result_opt[1280]; int result_c[1280]; - MemRandomize(reinterpret_cast<uint8*>(&num[0]), sizeof(num)); - MemRandomize(reinterpret_cast<uint8*>(&div[0]), sizeof(div)); + MemRandomize(reinterpret_cast<uint8_t*>(&num[0]), sizeof(num)); + MemRandomize(reinterpret_cast<uint8_t*>(&div[0]), sizeof(div)); for (int j = 0; j < 1280; ++j) { num[j] &= 4095; // Make numerator smaller. div[j] &= 4095; // Make divisor smaller. @@ -124,8 +124,8 @@ TEST_F(LibYUVBaseTest, TestFixedDiv1_Opt) { int result_opt[1280]; int result_c[1280]; - MemRandomize(reinterpret_cast<uint8*>(&num[0]), sizeof(num)); - MemRandomize(reinterpret_cast<uint8*>(&div[0]), sizeof(div)); + MemRandomize(reinterpret_cast<uint8_t*>(&num[0]), sizeof(num)); + MemRandomize(reinterpret_cast<uint8_t*>(&div[0]), sizeof(div)); for (int j = 0; j < 1280; ++j) { num[j] &= 4095; // Make numerator smaller. div[j] &= 4095; // Make divisor smaller. diff --git a/chromium/third_party/libyuv/unit_test/planar_test.cc b/chromium/third_party/libyuv/unit_test/planar_test.cc index a499688feed..9f95941ce03 100644 --- a/chromium/third_party/libyuv/unit_test/planar_test.cc +++ b/chromium/third_party/libyuv/unit_test/planar_test.cc @@ -252,8 +252,8 @@ TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Opt) { } TEST_F(LibYUVPlanarTest, TestARGBComputeCumulativeSum) { - SIMD_ALIGNED(uint8 orig_pixels[16][16][4]); - SIMD_ALIGNED(int32 added_pixels[16][16][4]); + SIMD_ALIGNED(uint8_t orig_pixels[16][16][4]); + SIMD_ALIGNED(int32_t added_pixels[16][16][4]); for (int y = 0; y < 16; ++y) { for (int x = 0; x < 16; ++x) { @@ -278,7 +278,7 @@ TEST_F(LibYUVPlanarTest, TestARGBComputeCumulativeSum) { } TEST_F(LibYUVPlanarTest, TestARGBGray) { - SIMD_ALIGNED(uint8 orig_pixels[1280][4]); + SIMD_ALIGNED(uint8_t orig_pixels[1280][4]); memset(orig_pixels, 0, sizeof(orig_pixels)); // Test blue @@ -349,8 +349,8 @@ TEST_F(LibYUVPlanarTest, TestARGBGray) { } TEST_F(LibYUVPlanarTest, TestARGBGrayTo) { - SIMD_ALIGNED(uint8 orig_pixels[1280][4]); - SIMD_ALIGNED(uint8 gray_pixels[1280][4]); + SIMD_ALIGNED(uint8_t orig_pixels[1280][4]); + SIMD_ALIGNED(uint8_t gray_pixels[1280][4]); memset(orig_pixels, 0, sizeof(orig_pixels)); // Test blue @@ -421,7 +421,7 @@ TEST_F(LibYUVPlanarTest, TestARGBGrayTo) { } TEST_F(LibYUVPlanarTest, TestARGBSepia) { - SIMD_ALIGNED(uint8 orig_pixels[1280][4]); + SIMD_ALIGNED(uint8_t orig_pixels[1280][4]); memset(orig_pixels, 0, sizeof(orig_pixels)); // Test blue @@ -493,12 +493,12 @@ TEST_F(LibYUVPlanarTest, TestARGBSepia) { } TEST_F(LibYUVPlanarTest, TestARGBColorMatrix) { - SIMD_ALIGNED(uint8 orig_pixels[1280][4]); - SIMD_ALIGNED(uint8 dst_pixels_opt[1280][4]); - SIMD_ALIGNED(uint8 dst_pixels_c[1280][4]); + SIMD_ALIGNED(uint8_t orig_pixels[1280][4]); + SIMD_ALIGNED(uint8_t dst_pixels_opt[1280][4]); + SIMD_ALIGNED(uint8_t dst_pixels_c[1280][4]); // Matrix for Sepia. - SIMD_ALIGNED(static const int8 kRGBToSepia[]) = { + SIMD_ALIGNED(static const int8_t kRGBToSepia[]) = { 17 / 2, 68 / 2, 35 / 2, 0, 22 / 2, 88 / 2, 45 / 2, 0, 24 / 2, 98 / 2, 50 / 2, 0, 0, 0, 0, 64, // Copy alpha. }; @@ -569,10 +569,10 @@ TEST_F(LibYUVPlanarTest, TestARGBColorMatrix) { } TEST_F(LibYUVPlanarTest, TestRGBColorMatrix) { - SIMD_ALIGNED(uint8 orig_pixels[1280][4]); + SIMD_ALIGNED(uint8_t orig_pixels[1280][4]); // Matrix for Sepia. - SIMD_ALIGNED(static const int8 kRGBToSepia[]) = { + SIMD_ALIGNED(static const int8_t kRGBToSepia[]) = { 17, 68, 35, 0, 22, 88, 45, 0, 24, 98, 50, 0, 0, 0, 0, 0, // Unused but makes matrix 16 bytes. }; @@ -629,11 +629,11 @@ TEST_F(LibYUVPlanarTest, TestRGBColorMatrix) { } TEST_F(LibYUVPlanarTest, TestARGBColorTable) { - SIMD_ALIGNED(uint8 orig_pixels[1280][4]); + SIMD_ALIGNED(uint8_t orig_pixels[1280][4]); memset(orig_pixels, 0, sizeof(orig_pixels)); // Matrix for Sepia. - static const uint8 kARGBTable[256 * 4] = { + static const uint8_t kARGBTable[256 * 4] = { 1u, 2u, 3u, 4u, 5u, 6u, 7u, 8u, 9u, 10u, 11u, 12u, 13u, 14u, 15u, 16u, }; @@ -685,11 +685,11 @@ TEST_F(LibYUVPlanarTest, TestARGBColorTable) { // Same as TestARGBColorTable except alpha does not change. TEST_F(LibYUVPlanarTest, TestRGBColorTable) { - SIMD_ALIGNED(uint8 orig_pixels[1280][4]); + SIMD_ALIGNED(uint8_t orig_pixels[1280][4]); memset(orig_pixels, 0, sizeof(orig_pixels)); // Matrix for Sepia. - static const uint8 kARGBTable[256 * 4] = { + static const uint8_t kARGBTable[256 * 4] = { 1u, 2u, 3u, 4u, 5u, 6u, 7u, 8u, 9u, 10u, 11u, 12u, 13u, 14u, 15u, 16u, }; @@ -740,7 +740,7 @@ TEST_F(LibYUVPlanarTest, TestRGBColorTable) { } TEST_F(LibYUVPlanarTest, TestARGBQuantize) { - SIMD_ALIGNED(uint8 orig_pixels[1280][4]); + SIMD_ALIGNED(uint8_t orig_pixels[1280][4]); for (int i = 0; i < 1280; ++i) { orig_pixels[i][0] = i; @@ -764,8 +764,8 @@ TEST_F(LibYUVPlanarTest, TestARGBQuantize) { } TEST_F(LibYUVPlanarTest, TestARGBMirror) { - SIMD_ALIGNED(uint8 orig_pixels[1280][4]); - SIMD_ALIGNED(uint8 dst_pixels[1280][4]); + SIMD_ALIGNED(uint8_t orig_pixels[1280][4]); + SIMD_ALIGNED(uint8_t dst_pixels[1280][4]); for (int i = 0; i < 1280; ++i) { orig_pixels[i][0] = i; @@ -787,8 +787,8 @@ TEST_F(LibYUVPlanarTest, TestARGBMirror) { } TEST_F(LibYUVPlanarTest, TestShade) { - SIMD_ALIGNED(uint8 orig_pixels[1280][4]); - SIMD_ALIGNED(uint8 shade_pixels[1280][4]); + SIMD_ALIGNED(uint8_t orig_pixels[1280][4]); + SIMD_ALIGNED(uint8_t shade_pixels[1280][4]); memset(orig_pixels, 0, sizeof(orig_pixels)); orig_pixels[0][0] = 10u; @@ -845,9 +845,9 @@ TEST_F(LibYUVPlanarTest, TestShade) { } TEST_F(LibYUVPlanarTest, TestARGBInterpolate) { - SIMD_ALIGNED(uint8 orig_pixels_0[1280][4]); - SIMD_ALIGNED(uint8 orig_pixels_1[1280][4]); - SIMD_ALIGNED(uint8 interpolate_pixels[1280][4]); + SIMD_ALIGNED(uint8_t orig_pixels_0[1280][4]); + SIMD_ALIGNED(uint8_t orig_pixels_1[1280][4]); + SIMD_ALIGNED(uint8_t interpolate_pixels[1280][4]); memset(orig_pixels_0, 0, sizeof(orig_pixels_0)); memset(orig_pixels_1, 0, sizeof(orig_pixels_1)); @@ -926,9 +926,9 @@ TEST_F(LibYUVPlanarTest, TestARGBInterpolate) { } TEST_F(LibYUVPlanarTest, TestInterpolatePlane) { - SIMD_ALIGNED(uint8 orig_pixels_0[1280]); - SIMD_ALIGNED(uint8 orig_pixels_1[1280]); - SIMD_ALIGNED(uint8 interpolate_pixels[1280]); + SIMD_ALIGNED(uint8_t orig_pixels_0[1280]); + SIMD_ALIGNED(uint8_t orig_pixels_1[1280]); + SIMD_ALIGNED(uint8_t interpolate_pixels[1280]); memset(orig_pixels_0, 0, sizeof(orig_pixels_0)); memset(orig_pixels_1, 0, sizeof(orig_pixels_1)); @@ -1192,7 +1192,6 @@ static void TestBlendPlane(int width, free_aligned_buffer_page_end(src_argb_alpha); free_aligned_buffer_page_end(dst_argb_c); free_aligned_buffer_page_end(dst_argb_opt); - return; } TEST_F(LibYUVPlanarTest, BlendPlane_Opt) { @@ -1286,7 +1285,6 @@ static void TestI420Blend(int width, free_aligned_buffer_page_end(dst_y_opt); free_aligned_buffer_page_end(dst_u_opt); free_aligned_buffer_page_end(dst_v_opt); - return; } TEST_F(LibYUVPlanarTest, I420Blend_Opt) { @@ -1309,8 +1307,8 @@ TEST_F(LibYUVPlanarTest, I420Blend_Invert) { } TEST_F(LibYUVPlanarTest, TestAffine) { - SIMD_ALIGNED(uint8 orig_pixels_0[1280][4]); - SIMD_ALIGNED(uint8 interpolate_pixels_C[1280][4]); + SIMD_ALIGNED(uint8_t orig_pixels_0[1280][4]); + SIMD_ALIGNED(uint8_t interpolate_pixels_C[1280][4]); for (int i = 0; i < 1280; ++i) { for (int j = 0; j < 4; ++j) { @@ -1327,7 +1325,7 @@ TEST_F(LibYUVPlanarTest, TestAffine) { EXPECT_EQ(191u, interpolate_pixels_C[255][3]); #if defined(HAS_ARGBAFFINEROW_SSE2) - SIMD_ALIGNED(uint8 interpolate_pixels_Opt[1280][4]); + SIMD_ALIGNED(uint8_t interpolate_pixels_Opt[1280][4]); ARGBAffineRow_SSE2(&orig_pixels_0[0][0], 0, &interpolate_pixels_Opt[0][0], uv_step, 1280); EXPECT_EQ(0, memcmp(interpolate_pixels_Opt, interpolate_pixels_C, 1280 * 4)); @@ -1367,7 +1365,7 @@ TEST_F(LibYUVPlanarTest, TestCopyPlane) { // Fill destination buffers with random data. for (i = 0; i < y_plane_size; ++i) { - uint8 random_number = fastrand() & 0x7f; + uint8_t random_number = fastrand() & 0x7f; dst_c[i] = random_number; dst_opt[i] = dst_c[i]; } @@ -1390,8 +1388,9 @@ TEST_F(LibYUVPlanarTest, TestCopyPlane) { } for (i = 0; i < y_plane_size; ++i) { - if (dst_c[i] != dst_opt[i]) + if (dst_c[i] != dst_opt[i]) { ++err; + } } free_aligned_buffer_page_end(orig_y); @@ -1867,12 +1866,12 @@ static int TestBlur(int width, MaskCpuFlags(disable_cpu_flags); ARGBBlur(src_argb_a + off, kStride, dst_argb_c, kStride, - reinterpret_cast<int32*>(dst_cumsum), width * 4, width, + reinterpret_cast<int32_t*>(dst_cumsum), width * 4, width, invert * height, radius); MaskCpuFlags(benchmark_cpu_info); for (int i = 0; i < benchmark_iterations; ++i) { ARGBBlur(src_argb_a + off, kStride, dst_argb_opt, kStride, - reinterpret_cast<int32*>(dst_cumsum), width * 4, width, + reinterpret_cast<int32_t*>(dst_cumsum), width * 4, width, invert * height, radius); } int max_diff = 0; @@ -1949,9 +1948,9 @@ TEST_F(LibYUVPlanarTest, ARGBBlurSmall_Opt) { } TEST_F(LibYUVPlanarTest, TestARGBPolynomial) { - SIMD_ALIGNED(uint8 orig_pixels[1280][4]); - SIMD_ALIGNED(uint8 dst_pixels_opt[1280][4]); - SIMD_ALIGNED(uint8 dst_pixels_c[1280][4]); + SIMD_ALIGNED(uint8_t orig_pixels[1280][4]); + SIMD_ALIGNED(uint8_t dst_pixels_opt[1280][4]); + SIMD_ALIGNED(uint8_t dst_pixels_c[1280][4]); memset(orig_pixels, 0, sizeof(orig_pixels)); SIMD_ALIGNED(static const float kWarmifyPolynomial[16]) = { @@ -2046,37 +2045,38 @@ int TestHalfFloatPlane(int benchmark_width, const int y_plane_size = benchmark_width * benchmark_height * 2; align_buffer_page_end(orig_y, y_plane_size * 3); - uint8* dst_opt = orig_y + y_plane_size; - uint8* dst_c = orig_y + y_plane_size * 2; + uint8_t* dst_opt = orig_y + y_plane_size; + uint8_t* dst_c = orig_y + y_plane_size * 2; MemRandomize(orig_y, y_plane_size); memset(dst_c, 0, y_plane_size); memset(dst_opt, 1, y_plane_size); for (i = 0; i < y_plane_size / 2; ++i) { - reinterpret_cast<uint16*>(orig_y)[i] &= mask; + reinterpret_cast<uint16_t*>(orig_y)[i] &= mask; } // Disable all optimizations. MaskCpuFlags(disable_cpu_flags); for (j = 0; j < benchmark_iterations; j++) { - HalfFloatPlane(reinterpret_cast<uint16*>(orig_y), benchmark_width * 2, - reinterpret_cast<uint16*>(dst_c), benchmark_width * 2, scale, - benchmark_width, benchmark_height); + HalfFloatPlane(reinterpret_cast<uint16_t*>(orig_y), benchmark_width * 2, + reinterpret_cast<uint16_t*>(dst_c), benchmark_width * 2, + scale, benchmark_width, benchmark_height); } // Enable optimizations. MaskCpuFlags(benchmark_cpu_info); for (j = 0; j < benchmark_iterations; j++) { - HalfFloatPlane(reinterpret_cast<uint16*>(orig_y), benchmark_width * 2, - reinterpret_cast<uint16*>(dst_opt), benchmark_width * 2, + HalfFloatPlane(reinterpret_cast<uint16_t*>(orig_y), benchmark_width * 2, + reinterpret_cast<uint16_t*>(dst_opt), benchmark_width * 2, scale, benchmark_width, benchmark_height); } int max_diff = 0; for (i = 0; i < y_plane_size / 2; ++i) { - int abs_diff = abs(static_cast<int>(reinterpret_cast<uint16*>(dst_c)[i]) - - static_cast<int>(reinterpret_cast<uint16*>(dst_opt)[i])); + int abs_diff = + abs(static_cast<int>(reinterpret_cast<uint16_t*>(dst_c)[i]) - + static_cast<int>(reinterpret_cast<uint16_t*>(dst_opt)[i])); if (abs_diff > max_diff) { max_diff = abs_diff; } @@ -2169,9 +2169,9 @@ TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_12bit_One) { } TEST_F(LibYUVPlanarTest, TestARGBLumaColorTable) { - SIMD_ALIGNED(uint8 orig_pixels[1280][4]); - SIMD_ALIGNED(uint8 dst_pixels_opt[1280][4]); - SIMD_ALIGNED(uint8 dst_pixels_c[1280][4]); + SIMD_ALIGNED(uint8_t orig_pixels[1280][4]); + SIMD_ALIGNED(uint8_t dst_pixels_opt[1280][4]); + SIMD_ALIGNED(uint8_t dst_pixels_c[1280][4]); memset(orig_pixels, 0, sizeof(orig_pixels)); align_buffer_page_end(lumacolortable, 32768); @@ -2343,7 +2343,7 @@ static int TestARGBRect(int width, } const int kStride = width * bpp; const int kSize = kStride * height; - const uint32 v32 = fastrand() & (bpp == 4 ? 0xffffffff : 0xff); + const uint32_t v32 = fastrand() & (bpp == 4 ? 0xffffffff : 0xff); align_buffer_page_end(dst_argb_c, kSize + off); align_buffer_page_end(dst_argb_opt, kSize + off); @@ -2631,21 +2631,21 @@ TEST_F(LibYUVPlanarTest, MergeUVRow_16_Opt) { memset(dst_pixels_uv_opt, 0, kPixels * 2 * 2); memset(dst_pixels_uv_c, 1, kPixels * 2 * 2); - MergeUVRow_16_C(reinterpret_cast<const uint16*>(src_pixels_u), - reinterpret_cast<const uint16*>(src_pixels_v), - reinterpret_cast<uint16*>(dst_pixels_uv_c), 64, kPixels); + MergeUVRow_16_C(reinterpret_cast<const uint16_t*>(src_pixels_u), + reinterpret_cast<const uint16_t*>(src_pixels_v), + reinterpret_cast<uint16_t*>(dst_pixels_uv_c), 64, kPixels); int has_avx2 = TestCpuFlag(kCpuHasAVX2); for (int i = 0; i < benchmark_iterations_; ++i) { if (has_avx2) { - MergeUVRow_16_AVX2(reinterpret_cast<const uint16*>(src_pixels_u), - reinterpret_cast<const uint16*>(src_pixels_v), - reinterpret_cast<uint16*>(dst_pixels_uv_opt), 64, + MergeUVRow_16_AVX2(reinterpret_cast<const uint16_t*>(src_pixels_u), + reinterpret_cast<const uint16_t*>(src_pixels_v), + reinterpret_cast<uint16_t*>(dst_pixels_uv_opt), 64, kPixels); } else { - MergeUVRow_16_C(reinterpret_cast<const uint16*>(src_pixels_u), - reinterpret_cast<const uint16*>(src_pixels_v), - reinterpret_cast<uint16*>(dst_pixels_uv_opt), 64, + MergeUVRow_16_C(reinterpret_cast<const uint16_t*>(src_pixels_u), + reinterpret_cast<const uint16_t*>(src_pixels_v), + reinterpret_cast<uint16_t*>(dst_pixels_uv_opt), 64, kPixels); } } @@ -2673,18 +2673,18 @@ TEST_F(LibYUVPlanarTest, MultiplyRow_16_Opt) { memset(dst_pixels_y_opt, 0, kPixels * 2); memset(dst_pixels_y_c, 1, kPixels * 2); - MultiplyRow_16_C(reinterpret_cast<const uint16*>(src_pixels_y), - reinterpret_cast<uint16*>(dst_pixels_y_c), 64, kPixels); + MultiplyRow_16_C(reinterpret_cast<const uint16_t*>(src_pixels_y), + reinterpret_cast<uint16_t*>(dst_pixels_y_c), 64, kPixels); int has_avx2 = TestCpuFlag(kCpuHasAVX2); for (int i = 0; i < benchmark_iterations_; ++i) { if (has_avx2) { - MultiplyRow_16_AVX2(reinterpret_cast<const uint16*>(src_pixels_y), - reinterpret_cast<uint16*>(dst_pixels_y_opt), 64, + MultiplyRow_16_AVX2(reinterpret_cast<const uint16_t*>(src_pixels_y), + reinterpret_cast<uint16_t*>(dst_pixels_y_opt), 64, kPixels); } else { - MultiplyRow_16_C(reinterpret_cast<const uint16*>(src_pixels_y), - reinterpret_cast<uint16*>(dst_pixels_y_opt), 64, + MultiplyRow_16_C(reinterpret_cast<const uint16_t*>(src_pixels_y), + reinterpret_cast<uint16_t*>(dst_pixels_y_opt), 64, kPixels); } } @@ -2710,13 +2710,13 @@ TEST_F(LibYUVPlanarTest, Convert16To8Plane) { memset(dst_pixels_y_c, 1, kPixels); MaskCpuFlags(disable_cpu_flags_); - Convert16To8Plane(reinterpret_cast<const uint16*>(src_pixels_y), + Convert16To8Plane(reinterpret_cast<const uint16_t*>(src_pixels_y), benchmark_width_, dst_pixels_y_c, benchmark_width_, 16384, benchmark_width_, benchmark_height_); MaskCpuFlags(benchmark_cpu_info_); for (int i = 0; i < benchmark_iterations_; ++i) { - Convert16To8Plane(reinterpret_cast<const uint16*>(src_pixels_y), + Convert16To8Plane(reinterpret_cast<const uint16_t*>(src_pixels_y), benchmark_width_, dst_pixels_y_opt, benchmark_width_, 16384, benchmark_width_, benchmark_height_); } @@ -2742,26 +2742,26 @@ TEST_F(LibYUVPlanarTest, Convert16To8Row_Opt) { MemRandomize(src_pixels_y, kPixels * 2); // clamp source range to 10 bits. for (int i = 0; i < kPixels; ++i) { - reinterpret_cast<uint16*>(src_pixels_y)[i] &= 1023; + reinterpret_cast<uint16_t*>(src_pixels_y)[i] &= 1023; } memset(dst_pixels_y_opt, 0, kPixels); memset(dst_pixels_y_c, 1, kPixels); - Convert16To8Row_C(reinterpret_cast<const uint16*>(src_pixels_y), + Convert16To8Row_C(reinterpret_cast<const uint16_t*>(src_pixels_y), dst_pixels_y_c, 16384, kPixels); int has_avx2 = TestCpuFlag(kCpuHasAVX2); int has_ssse3 = TestCpuFlag(kCpuHasSSSE3); for (int i = 0; i < benchmark_iterations_; ++i) { if (has_avx2) { - Convert16To8Row_AVX2(reinterpret_cast<const uint16*>(src_pixels_y), + Convert16To8Row_AVX2(reinterpret_cast<const uint16_t*>(src_pixels_y), dst_pixels_y_opt, 16384, kPixels); } else if (has_ssse3) { - Convert16To8Row_SSSE3(reinterpret_cast<const uint16*>(src_pixels_y), + Convert16To8Row_SSSE3(reinterpret_cast<const uint16_t*>(src_pixels_y), dst_pixels_y_opt, 16384, kPixels); } else { - Convert16To8Row_C(reinterpret_cast<const uint16*>(src_pixels_y), + Convert16To8Row_C(reinterpret_cast<const uint16_t*>(src_pixels_y), dst_pixels_y_opt, 16384, kPixels); } } @@ -2788,13 +2788,14 @@ TEST_F(LibYUVPlanarTest, Convert8To16Plane) { MaskCpuFlags(disable_cpu_flags_); Convert8To16Plane(src_pixels_y, benchmark_width_, - reinterpret_cast<uint16*>(dst_pixels_y_c), benchmark_width_, - 1024, benchmark_width_, benchmark_height_); + reinterpret_cast<uint16_t*>(dst_pixels_y_c), + benchmark_width_, 1024, benchmark_width_, + benchmark_height_); MaskCpuFlags(benchmark_cpu_info_); for (int i = 0; i < benchmark_iterations_; ++i) { Convert8To16Plane(src_pixels_y, benchmark_width_, - reinterpret_cast<uint16*>(dst_pixels_y_opt), + reinterpret_cast<uint16_t*>(dst_pixels_y_opt), benchmark_width_, 1024, benchmark_width_, benchmark_height_); } @@ -2820,7 +2821,7 @@ TEST_F(LibYUVPlanarTest, Convert8To16Row_Opt) { memset(dst_pixels_y_opt, 0, kPixels * 2); memset(dst_pixels_y_c, 1, kPixels * 2); - Convert8To16Row_C(src_pixels_y, reinterpret_cast<uint16*>(dst_pixels_y_c), + Convert8To16Row_C(src_pixels_y, reinterpret_cast<uint16_t*>(dst_pixels_y_c), 1024, kPixels); int has_avx2 = TestCpuFlag(kCpuHasAVX2); @@ -2828,15 +2829,15 @@ TEST_F(LibYUVPlanarTest, Convert8To16Row_Opt) { for (int i = 0; i < benchmark_iterations_; ++i) { if (has_avx2) { Convert8To16Row_AVX2(src_pixels_y, - reinterpret_cast<uint16*>(dst_pixels_y_opt), 1024, + reinterpret_cast<uint16_t*>(dst_pixels_y_opt), 1024, kPixels); } else if (has_sse2) { Convert8To16Row_SSE2(src_pixels_y, - reinterpret_cast<uint16*>(dst_pixels_y_opt), 1024, + reinterpret_cast<uint16_t*>(dst_pixels_y_opt), 1024, kPixels); } else { Convert8To16Row_C(src_pixels_y, - reinterpret_cast<uint16*>(dst_pixels_y_opt), 1024, + reinterpret_cast<uint16_t*>(dst_pixels_y_opt), 1024, kPixels); } } @@ -2861,8 +2862,8 @@ float TestScaleMaxSamples(int benchmark_width, // NEON does multiple of 8, so round count up const int kPixels = (benchmark_width * benchmark_height + 7) & ~7; align_buffer_page_end(orig_y, kPixels * 4 * 3 + 48); - uint8* dst_c = orig_y + kPixels * 4 + 16; - uint8* dst_opt = orig_y + kPixels * 4 * 2 + 32; + uint8_t* dst_c = orig_y + kPixels * 4 + 16; + uint8_t* dst_opt = orig_y + kPixels * 4 * 2 + 32; // Randomize works but may contain some denormals affecting performance. // MemRandomize(orig_y, kPixels * 4); @@ -2929,8 +2930,8 @@ float TestScaleSumSamples(int benchmark_width, // NEON does multiple of 8, so round count up const int kPixels = (benchmark_width * benchmark_height + 7) & ~7; align_buffer_page_end(orig_y, kPixels * 4 * 3); - uint8* dst_c = orig_y + kPixels * 4; - uint8* dst_opt = orig_y + kPixels * 4 * 2; + uint8_t* dst_c = orig_y + kPixels * 4; + uint8_t* dst_opt = orig_y + kPixels * 4 * 2; // Randomize works but may contain some denormals affecting performance. // MemRandomize(orig_y, kPixels * 4); @@ -3007,8 +3008,8 @@ float TestScaleSamples(int benchmark_width, // NEON does multiple of 8, so round count up const int kPixels = (benchmark_width * benchmark_height + 7) & ~7; align_buffer_page_end(orig_y, kPixels * 4 * 3); - uint8* dst_c = orig_y + kPixels * 4; - uint8* dst_opt = orig_y + kPixels * 4 * 2; + uint8_t* dst_c = orig_y + kPixels * 4; + uint8_t* dst_opt = orig_y + kPixels * 4 * 2; // Randomize works but may contain some denormals affecting performance. // MemRandomize(orig_y, kPixels * 4); @@ -3070,8 +3071,8 @@ float TestCopySamples(int benchmark_width, // NEON does multiple of 16 floats, so round count up const int kPixels = (benchmark_width * benchmark_height + 15) & ~15; align_buffer_page_end(orig_y, kPixels * 4 * 3); - uint8* dst_c = orig_y + kPixels * 4; - uint8* dst_opt = orig_y + kPixels * 4 * 2; + uint8_t* dst_c = orig_y + kPixels * 4; + uint8_t* dst_opt = orig_y + kPixels * 4 * 2; // Randomize works but may contain some denormals affecting performance. // MemRandomize(orig_y, kPixels * 4); @@ -3122,13 +3123,13 @@ TEST_F(LibYUVPlanarTest, TestCopySamples_Opt) { EXPECT_EQ(0, diff); } -extern "C" void GaussRow_NEON(const uint32* src, uint16* dst, int width); -extern "C" void GaussRow_C(const uint32* src, uint16* dst, int width); +extern "C" void GaussRow_NEON(const uint32_t* src, uint16_t* dst, int width); +extern "C" void GaussRow_C(const uint32_t* src, uint16_t* dst, int width); TEST_F(LibYUVPlanarTest, TestGaussRow_Opt) { - SIMD_ALIGNED(uint32 orig_pixels[640 + 4]); - SIMD_ALIGNED(uint16 dst_pixels_c[640]); - SIMD_ALIGNED(uint16 dst_pixels_opt[640]); + SIMD_ALIGNED(uint32_t orig_pixels[640 + 4]); + SIMD_ALIGNED(uint16_t dst_pixels_c[640]); + SIMD_ALIGNED(uint16_t dst_pixels_opt[640]); memset(orig_pixels, 0, sizeof(orig_pixels)); memset(dst_pixels_c, 1, sizeof(dst_pixels_c)); @@ -3156,30 +3157,30 @@ TEST_F(LibYUVPlanarTest, TestGaussRow_Opt) { } EXPECT_EQ(dst_pixels_c[0], - static_cast<uint16>(0 * 1 + 1 * 4 + 2 * 6 + 3 * 4 + 4 * 1)); - EXPECT_EQ(dst_pixels_c[639], static_cast<uint16>(10256)); + static_cast<uint16_t>(0 * 1 + 1 * 4 + 2 * 6 + 3 * 4 + 4 * 1)); + EXPECT_EQ(dst_pixels_c[639], static_cast<uint16_t>(10256)); } -extern "C" void GaussCol_NEON(const uint16* src0, - const uint16* src1, - const uint16* src2, - const uint16* src3, - const uint16* src4, - uint32* dst, +extern "C" void GaussCol_NEON(const uint16_t* src0, + const uint16_t* src1, + const uint16_t* src2, + const uint16_t* src3, + const uint16_t* src4, + uint32_t* dst, int width); -extern "C" void GaussCol_C(const uint16* src0, - const uint16* src1, - const uint16* src2, - const uint16* src3, - const uint16* src4, - uint32* dst, +extern "C" void GaussCol_C(const uint16_t* src0, + const uint16_t* src1, + const uint16_t* src2, + const uint16_t* src3, + const uint16_t* src4, + uint32_t* dst, int width); TEST_F(LibYUVPlanarTest, TestGaussCol_Opt) { - SIMD_ALIGNED(uint16 orig_pixels[640 * 5]); - SIMD_ALIGNED(uint32 dst_pixels_c[640]); - SIMD_ALIGNED(uint32 dst_pixels_opt[640]); + SIMD_ALIGNED(uint16_t orig_pixels[640 * 5]); + SIMD_ALIGNED(uint32_t dst_pixels_c[640]); + SIMD_ALIGNED(uint32_t dst_pixels_opt[640]); memset(orig_pixels, 0, sizeof(orig_pixels)); memset(dst_pixels_c, 1, sizeof(dst_pixels_c)); @@ -3214,9 +3215,10 @@ TEST_F(LibYUVPlanarTest, TestGaussCol_Opt) { EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); } - EXPECT_EQ(dst_pixels_c[0], static_cast<uint32>(0 * 1 + 640 * 4 + 640 * 2 * 6 + - 640 * 3 * 4 + 640 * 4 * 1)); - EXPECT_EQ(dst_pixels_c[639], static_cast<uint32>(30704)); + EXPECT_EQ(dst_pixels_c[0], + static_cast<uint32_t>(0 * 1 + 640 * 4 + 640 * 2 * 6 + 640 * 3 * 4 + + 640 * 4 * 1)); + EXPECT_EQ(dst_pixels_c[639], static_cast<uint32_t>(30704)); } } // namespace libyuv diff --git a/chromium/third_party/libyuv/unit_test/scale_argb_test.cc b/chromium/third_party/libyuv/unit_test/scale_argb_test.cc index d11aec20476..a1be85b8d8a 100644 --- a/chromium/third_party/libyuv/unit_test/scale_argb_test.cc +++ b/chromium/third_party/libyuv/unit_test/scale_argb_test.cc @@ -37,7 +37,7 @@ static int ARGBTestFilter(int src_width, int i, j; const int b = 0; // 128 to test for padding/stride. - int64 src_argb_plane_size = + int64_t src_argb_plane_size = (Abs(src_width) + b * 2) * (Abs(src_height) + b * 2) * 4LL; int src_stride_argb = (b * 2 + Abs(src_width)) * 4; @@ -48,7 +48,8 @@ static int ARGBTestFilter(int src_width, } MemRandomize(src_argb, src_argb_plane_size); - int64 dst_argb_plane_size = (dst_width + b * 2) * (dst_height + b * 2) * 4LL; + int64_t dst_argb_plane_size = + (dst_width + b * 2) * (dst_height + b * 2) * 4LL; int dst_stride_argb = (b * 2 + dst_width) * 4; align_buffer_page_end(dst_argb_c, dst_argb_plane_size); @@ -116,11 +117,11 @@ static int ARGBTestFilter(int src_width, static const int kTileX = 8; static const int kTileY = 8; -static int TileARGBScale(const uint8* src_argb, +static int TileARGBScale(const uint8_t* src_argb, int src_stride_argb, int src_width, int src_height, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int dst_width, int dst_height, @@ -157,7 +158,7 @@ static int ARGBClipTestFilter(int src_width, } const int b = 128; - int64 src_argb_plane_size = + int64_t src_argb_plane_size = (Abs(src_width) + b * 2) * (Abs(src_height) + b * 2) * 4; int src_stride_argb = (b * 2 + Abs(src_width)) * 4; @@ -168,7 +169,7 @@ static int ARGBClipTestFilter(int src_width, } memset(src_argb, 1, src_argb_plane_size); - int64 dst_argb_plane_size = (dst_width + b * 2) * (dst_height + b * 2) * 4; + int64_t dst_argb_plane_size = (dst_width + b * 2) * (dst_height + b * 2) * 4; int dst_stride_argb = (b * 2 + dst_width) * 4; int i, j; @@ -310,19 +311,20 @@ TEST_SCALETO(ARGBScale, 1280, 720) #undef TEST_SCALETO // Scale with YUV conversion to ARGB and clipping. +// TODO(fbarchard): Add fourcc support. All 4 ARGB formats is easy to support. LIBYUV_API -int YUVToARGBScaleReference2(const uint8* src_y, +int YUVToARGBScaleReference2(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint32 /* src_fourcc */, // TODO: Add support. + uint32 /* src_fourcc */, int src_width, int src_height, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, - uint32 /* dst_fourcc */, // TODO: Add support. + uint32 /* dst_fourcc */, int dst_width, int dst_height, int clip_x, @@ -330,7 +332,8 @@ int YUVToARGBScaleReference2(const uint8* src_y, int clip_width, int clip_height, enum FilterMode filtering) { - uint8* argb_buffer = static_cast<uint8*>(malloc(src_width * src_height * 4)); + uint8_t* argb_buffer = + static_cast<uint8_t*>(malloc(src_width * src_height * 4)); int r; I420ToARGB(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, argb_buffer, src_width * 4, src_width, src_height); @@ -342,7 +345,12 @@ int YUVToARGBScaleReference2(const uint8* src_y, return r; } -static void FillRamp(uint8* buf, int width, int height, int v, int dx, int dy) { +static void FillRamp(uint8_t* buf, + int width, + int height, + int v, + int dx, + int dy) { int rv = v; for (int y = 0; y < height; ++y) { for (int x = 0; x < width; ++x) { @@ -369,8 +377,8 @@ static int YUVToARGBTestFilter(int src_width, int dst_height, FilterMode f, int benchmark_iterations) { - int64 src_y_plane_size = Abs(src_width) * Abs(src_height); - int64 src_uv_plane_size = + int64_t src_y_plane_size = Abs(src_width) * Abs(src_height); + int64_t src_uv_plane_size = ((Abs(src_width) + 1) / 2) * ((Abs(src_height) + 1) / 2); int src_stride_y = Abs(src_width); int src_stride_uv = (Abs(src_width) + 1) / 2; @@ -379,7 +387,7 @@ static int YUVToARGBTestFilter(int src_width, align_buffer_page_end(src_u, src_uv_plane_size); align_buffer_page_end(src_v, src_uv_plane_size); - int64 dst_argb_plane_size = (dst_width) * (dst_height)*4LL; + int64_t dst_argb_plane_size = (dst_width) * (dst_height)*4LL; int dst_stride_argb = (dst_width)*4; align_buffer_page_end(dst_argb_c, dst_argb_plane_size); align_buffer_page_end(dst_argb_opt, dst_argb_plane_size); diff --git a/chromium/third_party/libyuv/unit_test/scale_test.cc b/chromium/third_party/libyuv/unit_test/scale_test.cc index c39211a161b..08b6cffaa26 100644 --- a/chromium/third_party/libyuv/unit_test/scale_test.cc +++ b/chromium/third_party/libyuv/unit_test/scale_test.cc @@ -38,8 +38,8 @@ static int TestFilter(int src_width, int src_width_uv = (Abs(src_width) + 1) >> 1; int src_height_uv = (Abs(src_height) + 1) >> 1; - int64 src_y_plane_size = (Abs(src_width)) * (Abs(src_height)); - int64 src_uv_plane_size = (src_width_uv) * (src_height_uv); + int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height)); + int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv); int src_stride_y = Abs(src_width); int src_stride_uv = src_width_uv; @@ -58,8 +58,8 @@ static int TestFilter(int src_width, int dst_width_uv = (dst_width + 1) >> 1; int dst_height_uv = (dst_height + 1) >> 1; - int64 dst_y_plane_size = (dst_width) * (dst_height); - int64 dst_uv_plane_size = (dst_width_uv) * (dst_height_uv); + int64_t dst_y_plane_size = (dst_width) * (dst_height); + int64_t dst_uv_plane_size = (dst_width_uv) * (dst_height_uv); int dst_stride_y = dst_width; int dst_stride_uv = dst_width_uv; @@ -157,8 +157,8 @@ static int TestFilter_16(int src_width, int src_width_uv = (Abs(src_width) + 1) >> 1; int src_height_uv = (Abs(src_height) + 1) >> 1; - int64 src_y_plane_size = (Abs(src_width)) * (Abs(src_height)); - int64 src_uv_plane_size = (src_width_uv) * (src_height_uv); + int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height)); + int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv); int src_stride_y = Abs(src_width); int src_stride_uv = src_width_uv; @@ -173,9 +173,9 @@ static int TestFilter_16(int src_width, printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n"); return 0; } - uint16* p_src_y_16 = reinterpret_cast<uint16*>(src_y_16); - uint16* p_src_u_16 = reinterpret_cast<uint16*>(src_u_16); - uint16* p_src_v_16 = reinterpret_cast<uint16*>(src_v_16); + uint16_t* p_src_y_16 = reinterpret_cast<uint16_t*>(src_y_16); + uint16_t* p_src_u_16 = reinterpret_cast<uint16_t*>(src_u_16); + uint16_t* p_src_v_16 = reinterpret_cast<uint16_t*>(src_v_16); MemRandomize(src_y, src_y_plane_size); MemRandomize(src_u, src_uv_plane_size); @@ -205,9 +205,9 @@ static int TestFilter_16(int src_width, align_buffer_page_end(dst_u_16, dst_uv_plane_size * 2); align_buffer_page_end(dst_v_16, dst_uv_plane_size * 2); - uint16* p_dst_y_16 = reinterpret_cast<uint16*>(dst_y_16); - uint16* p_dst_u_16 = reinterpret_cast<uint16*>(dst_u_16); - uint16* p_dst_v_16 = reinterpret_cast<uint16*>(dst_v_16); + uint16_t* p_dst_y_16 = reinterpret_cast<uint16_t*>(dst_y_16); + uint16_t* p_dst_u_16 = reinterpret_cast<uint16_t*>(dst_u_16); + uint16_t* p_dst_v_16 = reinterpret_cast<uint16_t*>(dst_v_16); MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization. I420Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv, @@ -345,9 +345,9 @@ TEST_SCALETO(Scale, 1280, 720) #ifdef HAS_SCALEROWDOWN2_SSSE3 TEST_F(LibYUVScaleTest, TestScaleRowDown2Box_Odd_SSSE3) { - SIMD_ALIGNED(uint8 orig_pixels[128 * 2]); - SIMD_ALIGNED(uint8 dst_pixels_opt[64]); - SIMD_ALIGNED(uint8 dst_pixels_c[64]); + SIMD_ALIGNED(uint8_t orig_pixels[128 * 2]); + SIMD_ALIGNED(uint8_t dst_pixels_opt[64]); + SIMD_ALIGNED(uint8_t dst_pixels_c[64]); memset(orig_pixels, 0, sizeof(orig_pixels)); memset(dst_pixels_opt, 0, sizeof(dst_pixels_opt)); memset(dst_pixels_c, 0, sizeof(dst_pixels_c)); @@ -433,19 +433,19 @@ TEST_F(LibYUVScaleTest, TestScaleRowDown2Box_Odd_SSSE3) { } #endif // HAS_SCALEROWDOWN2_SSSE3 -extern "C" void ScaleRowUp2_16_NEON(const uint16* src_ptr, +extern "C" void ScaleRowUp2_16_NEON(const uint16_t* src_ptr, ptrdiff_t src_stride, - uint16* dst, + uint16_t* dst, int dst_width); -extern "C" void ScaleRowUp2_16_C(const uint16* src_ptr, +extern "C" void ScaleRowUp2_16_C(const uint16_t* src_ptr, ptrdiff_t src_stride, - uint16* dst, + uint16_t* dst, int dst_width); TEST_F(LibYUVScaleTest, TestScaleRowUp2_16) { - SIMD_ALIGNED(uint16 orig_pixels[640 * 2 + 1]); // 2 rows + 1 pixel overrun. - SIMD_ALIGNED(uint16 dst_pixels_opt[1280]); - SIMD_ALIGNED(uint16 dst_pixels_c[1280]); + SIMD_ALIGNED(uint16_t orig_pixels[640 * 2 + 1]); // 2 rows + 1 pixel overrun. + SIMD_ALIGNED(uint16_t dst_pixels_opt[1280]); + SIMD_ALIGNED(uint16_t dst_pixels_c[1280]); memset(orig_pixels, 0, sizeof(orig_pixels)); memset(dst_pixels_opt, 1, sizeof(dst_pixels_opt)); @@ -475,15 +475,15 @@ TEST_F(LibYUVScaleTest, TestScaleRowUp2_16) { EXPECT_EQ(dst_pixels_c[1279], 800); } -extern "C" void ScaleRowDown2Box_16_NEON(const uint16* src_ptr, +extern "C" void ScaleRowDown2Box_16_NEON(const uint16_t* src_ptr, ptrdiff_t src_stride, - uint16* dst, + uint16_t* dst, int dst_width); TEST_F(LibYUVScaleTest, TestScaleRowDown2Box_16) { - SIMD_ALIGNED(uint16 orig_pixels[2560 * 2]); - SIMD_ALIGNED(uint16 dst_pixels_c[1280]); - SIMD_ALIGNED(uint16 dst_pixels_opt[1280]); + SIMD_ALIGNED(uint16_t orig_pixels[2560 * 2]); + SIMD_ALIGNED(uint16_t dst_pixels_c[1280]); + SIMD_ALIGNED(uint16_t dst_pixels_opt[1280]); memset(orig_pixels, 0, sizeof(orig_pixels)); memset(dst_pixels_c, 1, sizeof(dst_pixels_c)); @@ -530,7 +530,7 @@ static int TestPlaneFilter_16(int src_width, } int i; - int64 src_y_plane_size = (Abs(src_width)) * (Abs(src_height)); + int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height)); int src_stride_y = Abs(src_width); int dst_y_plane_size = dst_width * dst_height; int dst_stride_y = dst_width; @@ -539,8 +539,8 @@ static int TestPlaneFilter_16(int src_width, align_buffer_page_end(src_y_16, src_y_plane_size * 2); align_buffer_page_end(dst_y_8, dst_y_plane_size); align_buffer_page_end(dst_y_16, dst_y_plane_size * 2); - uint16* p_src_y_16 = reinterpret_cast<uint16*>(src_y_16); - uint16* p_dst_y_16 = reinterpret_cast<uint16*>(dst_y_16); + uint16_t* p_src_y_16 = reinterpret_cast<uint16_t*>(src_y_16); + uint16_t* p_dst_y_16 = reinterpret_cast<uint16_t*>(dst_y_16); MemRandomize(src_y, src_y_plane_size); memset(dst_y_8, 0, dst_y_plane_size); diff --git a/chromium/third_party/libyuv/unit_test/unit_test.cc b/chromium/third_party/libyuv/unit_test/unit_test.cc index c2d7a1db7b5..20aadb44e2f 100644 --- a/chromium/third_party/libyuv/unit_test/unit_test.cc +++ b/chromium/third_party/libyuv/unit_test/unit_test.cc @@ -31,11 +31,11 @@ DEFINE_int32(libyuv_cpu_info, "cpu flags for benchmark code. 1 = C, -1 = SIMD"); #else // Disable command line parameters if gflags disabled. -static const int32 FLAGS_libyuv_width = 0; -static const int32 FLAGS_libyuv_height = 0; -static const int32 FLAGS_libyuv_repeat = 0; -static const int32 FLAGS_libyuv_flags = 0; -static const int32 FLAGS_libyuv_cpu_info = 0; +static const int32_t FLAGS_libyuv_width = 0; +static const int32_t FLAGS_libyuv_height = 0; +static const int32_t FLAGS_libyuv_repeat = 0; +static const int32_t FLAGS_libyuv_flags = 0; +static const int32_t FLAGS_libyuv_cpu_info = 0; #endif // For quicker unittests, default is 128 x 72. But when benchmarking, diff --git a/chromium/third_party/libyuv/unit_test/unit_test.h b/chromium/third_party/libyuv/unit_test/unit_test.h index 6454389d52d..dee3952fdc8 100644 --- a/chromium/third_party/libyuv/unit_test/unit_test.h +++ b/chromium/third_party/libyuv/unit_test/unit_test.h @@ -69,10 +69,10 @@ static inline bool SizeValid(int src_width, return true; } -#define align_buffer_page_end(var, size) \ - uint8* var##_mem = \ - reinterpret_cast<uint8*>(malloc(((size) + 4095 + 63) & ~4095)); \ - uint8* var = reinterpret_cast<uint8*>( \ +#define align_buffer_page_end(var, size) \ + uint8_t* var##_mem = \ + reinterpret_cast<uint8_t*>(malloc(((size) + 4095 + 63) & ~4095)); \ + uint8_t* var = reinterpret_cast<uint8_t*>( \ (intptr_t)(var##_mem + (((size) + 4095 + 63) & ~4095) - (size)) & ~63) #define free_aligned_buffer_page_end(var) \ @@ -111,10 +111,10 @@ inline int fastrand() { return static_cast<int>((fastrand_seed >> 16) & 0xffff); } -static inline void MemRandomize(uint8* dst, int64 len) { - int64 i; +static inline void MemRandomize(uint8_t* dst, int64_t len) { + int64_t i; for (i = 0; i < len - 1; i += 2) { - *reinterpret_cast<uint16*>(dst) = fastrand(); + *reinterpret_cast<uint16_t*>(dst) = fastrand(); dst += 2; } for (; i < len; ++i) { @@ -129,7 +129,6 @@ class LibYUVColorTest : public ::testing::Test { int benchmark_iterations_; // Default 1. Use 1000 for benchmarking. int benchmark_width_; // Default 1280. Use 640 for benchmarking VGA. int benchmark_height_; // Default 720. Use 360 for benchmarking VGA. - int benchmark_pixels_div256_; // Total pixels to benchmark / 256. int benchmark_pixels_div1280_; // Total pixels to benchmark / 1280. int disable_cpu_flags_; // Default 1. Use -1 for benchmarking. int benchmark_cpu_info_; // Default -1. Use 1 to disable SIMD. @@ -142,7 +141,6 @@ class LibYUVConvertTest : public ::testing::Test { int benchmark_iterations_; // Default 1. Use 1000 for benchmarking. int benchmark_width_; // Default 1280. Use 640 for benchmarking VGA. int benchmark_height_; // Default 720. Use 360 for benchmarking VGA. - int benchmark_pixels_div256_; // Total pixels to benchmark / 256. int benchmark_pixels_div1280_; // Total pixels to benchmark / 1280. int disable_cpu_flags_; // Default 1. Use -1 for benchmarking. int benchmark_cpu_info_; // Default -1. Use 1 to disable SIMD. @@ -155,7 +153,6 @@ class LibYUVScaleTest : public ::testing::Test { int benchmark_iterations_; // Default 1. Use 1000 for benchmarking. int benchmark_width_; // Default 1280. Use 640 for benchmarking VGA. int benchmark_height_; // Default 720. Use 360 for benchmarking VGA. - int benchmark_pixels_div256_; // Total pixels to benchmark / 256. int benchmark_pixels_div1280_; // Total pixels to benchmark / 1280. int disable_cpu_flags_; // Default 1. Use -1 for benchmarking. int benchmark_cpu_info_; // Default -1. Use 1 to disable SIMD. @@ -168,7 +165,6 @@ class LibYUVRotateTest : public ::testing::Test { int benchmark_iterations_; // Default 1. Use 1000 for benchmarking. int benchmark_width_; // Default 1280. Use 640 for benchmarking VGA. int benchmark_height_; // Default 720. Use 360 for benchmarking VGA. - int benchmark_pixels_div256_; // Total pixels to benchmark / 256. int benchmark_pixels_div1280_; // Total pixels to benchmark / 1280. int disable_cpu_flags_; // Default 1. Use -1 for benchmarking. int benchmark_cpu_info_; // Default -1. Use 1 to disable SIMD. @@ -181,7 +177,6 @@ class LibYUVPlanarTest : public ::testing::Test { int benchmark_iterations_; // Default 1. Use 1000 for benchmarking. int benchmark_width_; // Default 1280. Use 640 for benchmarking VGA. int benchmark_height_; // Default 720. Use 360 for benchmarking VGA. - int benchmark_pixels_div256_; // Total pixels to benchmark / 256. int benchmark_pixels_div1280_; // Total pixels to benchmark / 1280. int disable_cpu_flags_; // Default 1. Use -1 for benchmarking. int benchmark_cpu_info_; // Default -1. Use 1 to disable SIMD. @@ -194,7 +189,6 @@ class LibYUVBaseTest : public ::testing::Test { int benchmark_iterations_; // Default 1. Use 1000 for benchmarking. int benchmark_width_; // Default 1280. Use 640 for benchmarking VGA. int benchmark_height_; // Default 720. Use 360 for benchmarking VGA. - int benchmark_pixels_div256_; // Total pixels to benchmark / 256. int benchmark_pixels_div1280_; // Total pixels to benchmark / 1280. int disable_cpu_flags_; // Default 1. Use -1 for benchmarking. int benchmark_cpu_info_; // Default -1. Use 1 to disable SIMD. @@ -207,7 +201,6 @@ class LibYUVCompareTest : public ::testing::Test { int benchmark_iterations_; // Default 1. Use 1000 for benchmarking. int benchmark_width_; // Default 1280. Use 640 for benchmarking VGA. int benchmark_height_; // Default 720. Use 360 for benchmarking VGA. - int benchmark_pixels_div256_; // Total pixels to benchmark / 256. int benchmark_pixels_div1280_; // Total pixels to benchmark / 1280. int disable_cpu_flags_; // Default 1. Use -1 for benchmarking. int benchmark_cpu_info_; // Default -1. Use 1 to disable SIMD. diff --git a/chromium/third_party/libyuv/unit_test/video_common_test.cc b/chromium/third_party/libyuv/unit_test/video_common_test.cc index ba7b15a9d28..4d89586e76f 100644 --- a/chromium/third_party/libyuv/unit_test/video_common_test.cc +++ b/chromium/third_party/libyuv/unit_test/video_common_test.cc @@ -18,15 +18,12 @@ namespace libyuv { // Tests FourCC codes in video common, which are used for ConvertToI420(). -static bool TestValidChar(uint32 onecc) { - if ((onecc >= '0' && onecc <= '9') || (onecc >= 'A' && onecc <= 'Z') || - (onecc >= 'a' && onecc <= 'z') || (onecc == ' ') || (onecc == 0xff)) { - return true; - } - return false; +static bool TestValidChar(uint32_t onecc) { + return (onecc >= '0' && onecc <= '9') || (onecc >= 'A' && onecc <= 'Z') || + (onecc >= 'a' && onecc <= 'z') || (onecc == ' ') || (onecc == 0xff); } -static bool TestValidFourCC(uint32 fourcc, int bpp) { +static bool TestValidFourCC(uint32_t fourcc, int bpp) { if (!TestValidChar(fourcc & 0xff) || !TestValidChar((fourcc >> 8) & 0xff) || !TestValidChar((fourcc >> 16) & 0xff) || !TestValidChar((fourcc >> 24) & 0xff)) { @@ -39,23 +36,23 @@ static bool TestValidFourCC(uint32 fourcc, int bpp) { } TEST_F(LibYUVBaseTest, TestCanonicalFourCC) { - EXPECT_EQ(static_cast<uint32>(FOURCC_I420), CanonicalFourCC(FOURCC_IYUV)); - EXPECT_EQ(static_cast<uint32>(FOURCC_I420), CanonicalFourCC(FOURCC_YU12)); - EXPECT_EQ(static_cast<uint32>(FOURCC_I422), CanonicalFourCC(FOURCC_YU16)); - EXPECT_EQ(static_cast<uint32>(FOURCC_I444), CanonicalFourCC(FOURCC_YU24)); - EXPECT_EQ(static_cast<uint32>(FOURCC_YUY2), CanonicalFourCC(FOURCC_YUYV)); - EXPECT_EQ(static_cast<uint32>(FOURCC_YUY2), CanonicalFourCC(FOURCC_YUVS)); - EXPECT_EQ(static_cast<uint32>(FOURCC_UYVY), CanonicalFourCC(FOURCC_HDYC)); - EXPECT_EQ(static_cast<uint32>(FOURCC_UYVY), CanonicalFourCC(FOURCC_2VUY)); - EXPECT_EQ(static_cast<uint32>(FOURCC_MJPG), CanonicalFourCC(FOURCC_JPEG)); - EXPECT_EQ(static_cast<uint32>(FOURCC_MJPG), CanonicalFourCC(FOURCC_DMB1)); - EXPECT_EQ(static_cast<uint32>(FOURCC_RAW), CanonicalFourCC(FOURCC_RGB3)); - EXPECT_EQ(static_cast<uint32>(FOURCC_24BG), CanonicalFourCC(FOURCC_BGR3)); - EXPECT_EQ(static_cast<uint32>(FOURCC_BGRA), CanonicalFourCC(FOURCC_CM32)); - EXPECT_EQ(static_cast<uint32>(FOURCC_RAW), CanonicalFourCC(FOURCC_CM24)); - EXPECT_EQ(static_cast<uint32>(FOURCC_RGBO), CanonicalFourCC(FOURCC_L555)); - EXPECT_EQ(static_cast<uint32>(FOURCC_RGBP), CanonicalFourCC(FOURCC_L565)); - EXPECT_EQ(static_cast<uint32>(FOURCC_RGBO), CanonicalFourCC(FOURCC_5551)); + EXPECT_EQ(static_cast<uint32_t>(FOURCC_I420), CanonicalFourCC(FOURCC_IYUV)); + EXPECT_EQ(static_cast<uint32_t>(FOURCC_I420), CanonicalFourCC(FOURCC_YU12)); + EXPECT_EQ(static_cast<uint32_t>(FOURCC_I422), CanonicalFourCC(FOURCC_YU16)); + EXPECT_EQ(static_cast<uint32_t>(FOURCC_I444), CanonicalFourCC(FOURCC_YU24)); + EXPECT_EQ(static_cast<uint32_t>(FOURCC_YUY2), CanonicalFourCC(FOURCC_YUYV)); + EXPECT_EQ(static_cast<uint32_t>(FOURCC_YUY2), CanonicalFourCC(FOURCC_YUVS)); + EXPECT_EQ(static_cast<uint32_t>(FOURCC_UYVY), CanonicalFourCC(FOURCC_HDYC)); + EXPECT_EQ(static_cast<uint32_t>(FOURCC_UYVY), CanonicalFourCC(FOURCC_2VUY)); + EXPECT_EQ(static_cast<uint32_t>(FOURCC_MJPG), CanonicalFourCC(FOURCC_JPEG)); + EXPECT_EQ(static_cast<uint32_t>(FOURCC_MJPG), CanonicalFourCC(FOURCC_DMB1)); + EXPECT_EQ(static_cast<uint32_t>(FOURCC_RAW), CanonicalFourCC(FOURCC_RGB3)); + EXPECT_EQ(static_cast<uint32_t>(FOURCC_24BG), CanonicalFourCC(FOURCC_BGR3)); + EXPECT_EQ(static_cast<uint32_t>(FOURCC_BGRA), CanonicalFourCC(FOURCC_CM32)); + EXPECT_EQ(static_cast<uint32_t>(FOURCC_RAW), CanonicalFourCC(FOURCC_CM24)); + EXPECT_EQ(static_cast<uint32_t>(FOURCC_RGBO), CanonicalFourCC(FOURCC_L555)); + EXPECT_EQ(static_cast<uint32_t>(FOURCC_RGBP), CanonicalFourCC(FOURCC_L565)); + EXPECT_EQ(static_cast<uint32_t>(FOURCC_RGBO), CanonicalFourCC(FOURCC_5551)); } TEST_F(LibYUVBaseTest, TestFourCC) { @@ -74,6 +71,7 @@ TEST_F(LibYUVBaseTest, TestFourCC) { EXPECT_TRUE(TestValidFourCC(FOURCC_BGRA, FOURCC_BPP_BGRA)); EXPECT_TRUE(TestValidFourCC(FOURCC_ABGR, FOURCC_BPP_ABGR)); EXPECT_TRUE(TestValidFourCC(FOURCC_AR30, FOURCC_BPP_AR30)); + EXPECT_TRUE(TestValidFourCC(FOURCC_AB30, FOURCC_BPP_AB30)); EXPECT_TRUE(TestValidFourCC(FOURCC_24BG, FOURCC_BPP_24BG)); EXPECT_TRUE(TestValidFourCC(FOURCC_RAW, FOURCC_BPP_RAW)); EXPECT_TRUE(TestValidFourCC(FOURCC_RGBA, FOURCC_BPP_RGBA)); diff --git a/chromium/third_party/libyuv/util/compare.cc b/chromium/third_party/libyuv/util/compare.cc index ef0beefafee..a16613ee2f9 100644 --- a/chromium/third_party/libyuv/util/compare.cc +++ b/chromium/third_party/libyuv/util/compare.cc @@ -29,22 +29,24 @@ int main(int argc, char** argv) { FILE* fin2 = name2 ? fopen(name2, "rb") : NULL; const int kBlockSize = 32768; - uint8 buf1[kBlockSize]; - uint8 buf2[kBlockSize]; - uint32 hash1 = 5381; - uint32 hash2 = 5381; - uint64 sum_square_err = 0; - uint64 size_min = 0; + uint8_t buf1[kBlockSize]; + uint8_t buf2[kBlockSize]; + uint32_t hash1 = 5381; + uint32_t hash2 = 5381; + uint64_t sum_square_err = 0; + uint64_t size_min = 0; int amt1 = 0; int amt2 = 0; do { amt1 = static_cast<int>(fread(buf1, 1, kBlockSize, fin1)); - if (amt1 > 0) + if (amt1 > 0) { hash1 = libyuv::HashDjb2(buf1, amt1, hash1); + } if (fin2) { amt2 = static_cast<int>(fread(buf2, 1, kBlockSize, fin2)); - if (amt2 > 0) + if (amt2 > 0) { hash2 = libyuv::HashDjb2(buf2, amt2, hash2); + } int amt_min = (amt1 < amt2) ? amt1 : amt2; size_min += amt_min; sum_square_err += libyuv::ComputeSumSquareError(buf1, buf2, amt_min); diff --git a/chromium/third_party/libyuv/util/psnr.cc b/chromium/third_party/libyuv/util/psnr.cc index 27f876c0b4a..f54015bab82 100644 --- a/chromium/third_party/libyuv/util/psnr.cc +++ b/chromium/third_party/libyuv/util/psnr.cc @@ -21,14 +21,14 @@ extern "C" { #endif -typedef unsigned int uint32; // NOLINT +typedef unsigned int uint32_t; // NOLINT #ifdef _MSC_VER -typedef unsigned __int64 uint64; +typedef unsigned __int64 uint64_t; #else // COMPILER_MSVC #if defined(__LP64__) && !defined(__OpenBSD__) && !defined(__APPLE__) -typedef unsigned long uint64; // NOLINT +typedef unsigned long uint64_t; // NOLINT #else // defined(__LP64__) && !defined(__OpenBSD__) && !defined(__APPLE__) -typedef unsigned long long uint64; // NOLINT +typedef unsigned long long uint64_t; // NOLINT #endif // __LP64__ #endif // _MSC_VER @@ -38,10 +38,10 @@ typedef unsigned long long uint64; // NOLINT #if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__) && \ !defined(__aarch64__) #define HAS_SUMSQUAREERROR_NEON -static uint32 SumSquareError_NEON(const uint8* src_a, - const uint8* src_b, - int count) { - volatile uint32 sse; +static uint32_t SumSquareError_NEON(const uint8_t* src_a, + const uint8_t* src_b, + int count) { + volatile uint32_t sse; asm volatile( "vmov.u8 q7, #0 \n" "vmov.u8 q9, #0 \n" @@ -73,10 +73,10 @@ static uint32 SumSquareError_NEON(const uint8* src_a, } #elif !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) #define HAS_SUMSQUAREERROR_NEON -static uint32 SumSquareError_NEON(const uint8* src_a, - const uint8* src_b, - int count) { - volatile uint32 sse; +static uint32_t SumSquareError_NEON(const uint8_t* src_a, + const uint8_t* src_b, + int count) { + volatile uint32_t sse; asm volatile( "eor v16.16b, v16.16b, v16.16b \n" "eor v18.16b, v18.16b, v18.16b \n" @@ -107,9 +107,9 @@ static uint32 SumSquareError_NEON(const uint8* src_a, } #elif !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER) #define HAS_SUMSQUAREERROR_SSE2 -__declspec(naked) static uint32 SumSquareError_SSE2(const uint8* /*src_a*/, - const uint8* /*src_b*/, - int /*count*/) { +__declspec(naked) static uint32_t SumSquareError_SSE2(const uint8_t* /*src_a*/, + const uint8_t* /*src_b*/, + int /*count*/) { __asm { mov eax, [esp + 4] // src_a mov edx, [esp + 8] // src_b @@ -146,10 +146,10 @@ __declspec(naked) static uint32 SumSquareError_SSE2(const uint8* /*src_a*/, } #elif !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__)) #define HAS_SUMSQUAREERROR_SSE2 -static uint32 SumSquareError_SSE2(const uint8* src_a, - const uint8* src_b, - int count) { - uint32 sse; +static uint32_t SumSquareError_SSE2(const uint8_t* src_a, + const uint8_t* src_b, + int count) { + uint32_t sse; asm volatile( // NOLINT "pxor %%xmm0,%%xmm0 \n" "pxor %%xmm5,%%xmm5 \n" @@ -228,22 +228,22 @@ static int CpuHasSSE2() { } #endif // HAS_SUMSQUAREERROR_SSE2 -static uint32 SumSquareError_C(const uint8* src_a, - const uint8* src_b, - int count) { - uint32 sse = 0u; +static uint32_t SumSquareError_C(const uint8_t* src_a, + const uint8_t* src_b, + int count) { + uint32_t sse = 0u; for (int x = 0; x < count; ++x) { int diff = src_a[x] - src_b[x]; - sse += static_cast<uint32>(diff * diff); + sse += static_cast<uint32_t>(diff * diff); } return sse; } -double ComputeSumSquareError(const uint8* src_a, - const uint8* src_b, +double ComputeSumSquareError(const uint8_t* src_a, + const uint8_t* src_b, int count) { - uint32 (*SumSquareError)(const uint8* src_a, const uint8* src_b, int count) = - SumSquareError_C; + uint32_t (*SumSquareError)(const uint8_t* src_a, const uint8_t* src_b, + int count) = SumSquareError_C; #if defined(HAS_SUMSQUAREERROR_NEON) SumSquareError = SumSquareError_NEON; #endif @@ -253,7 +253,7 @@ double ComputeSumSquareError(const uint8* src_a, } #endif const int kBlockSize = 1 << 15; - uint64 sse = 0; + uint64_t sse = 0; #ifdef _OPENMP #pragma omp parallel for reduction(+ : sse) #endif @@ -280,8 +280,9 @@ double ComputeSumSquareError(const uint8* src_a, // Returns 128.0 (kMaxPSNR) if sse is 0 (perfect match). double ComputePSNR(double sse, double size) { const double kMINSSE = 255.0 * 255.0 * size / pow(10.0, kMaxPSNR / 10.0); - if (sse <= kMINSSE) + if (sse <= kMINSSE) { sse = kMINSSE; // Produces max PSNR of 128 + } return 10.0 * log10(255.0 * 255.0 * size / sse); } diff --git a/chromium/third_party/libyuv/util/psnr.h b/chromium/third_party/libyuv/util/psnr.h index 0816b976001..aac128cbca8 100644 --- a/chromium/third_party/libyuv/util/psnr.h +++ b/chromium/third_party/libyuv/util/psnr.h @@ -20,7 +20,7 @@ extern "C" { #endif #if !defined(INT_TYPES_DEFINED) && !defined(UINT8_TYPE_DEFINED) -typedef unsigned char uint8; +typedef unsigned char uint8_t; #define UINT8_TYPE_DEFINED #endif @@ -31,7 +31,9 @@ static const double kMaxPSNR = 128.0; #if !defined(HAVE_JPEG) // Computer Sum of Squared Error (SSE). // Pass this to ComputePSNR for final result. -double ComputeSumSquareError(const uint8* org, const uint8* rec, int size); +double ComputeSumSquareError(const uint8_t* src_a, + const uint8_t* src_b, + int count); #endif // PSNR formula: psnr = 10 * log10 (Peak Signal^2 * size / sse) diff --git a/chromium/third_party/libyuv/util/psnr_main.cc b/chromium/third_party/libyuv/util/psnr_main.cc index 4d930be4aed..a930b202ecf 100644 --- a/chromium/third_party/libyuv/util/psnr_main.cc +++ b/chromium/third_party/libyuv/util/psnr_main.cc @@ -90,9 +90,9 @@ bool ExtractResolutionFromFilename(const char* name, fseek(file_org, 0, SEEK_END); size_t total_size = ftell(file_org); fseek(file_org, 0, SEEK_SET); - uint8* const ch_org = new uint8[total_size]; + uint8_t* const ch_org = new uint8_t[total_size]; memset(ch_org, 0, total_size); - size_t bytes_org = fread(ch_org, sizeof(uint8), total_size, file_org); + size_t bytes_org = fread(ch_org, sizeof(uint8_t), total_size, file_org); fclose(file_org); if (bytes_org == total_size) { if (0 == libyuv::MJPGSize(ch_org, total_size, width_ptr, height_ptr)) { @@ -107,13 +107,15 @@ bool ExtractResolutionFromFilename(const char* name, // Scale Y channel from 16..240 to 0..255. // This can be useful when comparing codecs that are inconsistant about Y -uint8 ScaleY(uint8 y) { +uint8_t ScaleY(uint8_t y) { int ny = (y - 16) * 256 / 224; - if (ny < 0) + if (ny < 0) { ny = 0; - if (ny > 255) + } + if (ny > 255) { ny = 255; - return static_cast<uint8>(ny); + } + return static_cast<uint8_t>(ny); } // MSE = Mean Square Error @@ -150,8 +152,9 @@ void PrintHelp(const char* program) { } void ParseOptions(int argc, const char* argv[]) { - if (argc <= 1) + if (argc <= 1) { PrintHelp(argv[0]); + } for (int c = 1; c < argc; ++c) { if (!strcmp(argv[c], "-v")) { verbose = true; @@ -237,8 +240,8 @@ void ParseOptions(int argc, const char* argv[]) { } } -bool UpdateMetrics(uint8* ch_org, - uint8* ch_rec, +bool UpdateMetrics(uint8_t* ch_org, + uint8_t* ch_rec, const int y_size, const int uv_size, const size_t total_size, @@ -247,10 +250,10 @@ bool UpdateMetrics(uint8* ch_org, metric* distorted_frame, bool do_psnr) { const int uv_offset = (do_swap_uv ? uv_size : 0); - const uint8* const u_org = ch_org + y_size + uv_offset; - const uint8* const u_rec = ch_rec + y_size; - const uint8* const v_org = ch_org + y_size + (uv_size - uv_offset); - const uint8* const v_rec = ch_rec + y_size + uv_size; + const uint8_t* const u_org = ch_org + y_size + uv_offset; + const uint8_t* const u_rec = ch_rec + y_size; + const uint8_t* const v_org = ch_org + y_size + (uv_size - uv_offset); + const uint8_t* const v_rec = ch_rec + y_size + uv_size; if (do_psnr) { #ifdef HAVE_JPEG double y_err = static_cast<double>( @@ -301,12 +304,15 @@ bool UpdateMetrics(uint8* ch_org, cur_distortion_psnr->all += distorted_frame->all; bool ismin = false; - if (distorted_frame->y < cur_distortion_psnr->min_y) + if (distorted_frame->y < cur_distortion_psnr->min_y) { cur_distortion_psnr->min_y = distorted_frame->y; - if (distorted_frame->u < cur_distortion_psnr->min_u) + } + if (distorted_frame->u < cur_distortion_psnr->min_u) { cur_distortion_psnr->min_u = distorted_frame->u; - if (distorted_frame->v < cur_distortion_psnr->min_v) + } + if (distorted_frame->v < cur_distortion_psnr->min_v) { cur_distortion_psnr->min_v = distorted_frame->v; + } if (distorted_frame->all < cur_distortion_psnr->min_all) { cur_distortion_psnr->min_all = distorted_frame->all; cur_distortion_psnr->min_frame = number_of_frames; @@ -374,8 +380,8 @@ int main(int argc, const char* argv[]) { #endif } - uint8* const ch_org = new uint8[total_size]; - uint8* const ch_rec = new uint8[total_size]; + uint8_t* const ch_org = new uint8_t[total_size]; + uint8_t* const ch_rec = new uint8_t[total_size]; if (ch_org == NULL || ch_rec == NULL) { fprintf(stderr, "No memory available\n"); fclose(file_org); @@ -429,14 +435,15 @@ int main(int argc, const char* argv[]) { int number_of_frames; for (number_of_frames = 0;; ++number_of_frames) { - if (num_frames && number_of_frames >= num_frames) + if (num_frames && number_of_frames >= num_frames) { break; + } - size_t bytes_org = fread(ch_org, sizeof(uint8), total_size, file_org); + size_t bytes_org = fread(ch_org, sizeof(uint8_t), total_size, file_org); if (bytes_org < total_size) { #ifdef HAVE_JPEG // Try parsing file as a jpeg. - uint8* const ch_jpeg = new uint8[bytes_org]; + uint8_t* const ch_jpeg = new uint8_t[bytes_org]; memcpy(ch_jpeg, ch_org, bytes_org); memset(ch_org, 0, total_size); @@ -456,11 +463,11 @@ int main(int argc, const char* argv[]) { for (int cur_rec = 0; cur_rec < num_rec; ++cur_rec) { size_t bytes_rec = - fread(ch_rec, sizeof(uint8), total_size, file_rec[cur_rec]); + fread(ch_rec, sizeof(uint8_t), total_size, file_rec[cur_rec]); if (bytes_rec < total_size) { #ifdef HAVE_JPEG // Try parsing file as a jpeg. - uint8* const ch_jpeg = new uint8[bytes_rec]; + uint8_t* const ch_jpeg = new uint8_t[bytes_rec]; memcpy(ch_jpeg, ch_rec, bytes_rec); memset(ch_rec, 0, total_size); @@ -482,7 +489,7 @@ int main(int argc, const char* argv[]) { printf("%5d", number_of_frames); } if (do_psnr) { - metric distorted_frame; + metric distorted_frame = {}; metric* cur_distortion_psnr = &distortion_psnr[cur_rec]; bool ismin = UpdateMetrics(ch_org, ch_rec, y_size, uv_size, total_size, number_of_frames, cur_distortion_psnr, @@ -496,7 +503,7 @@ int main(int argc, const char* argv[]) { } } if (do_ssim) { - metric distorted_frame; + metric distorted_frame = {}; metric* cur_distortion_ssim = &distortion_ssim[cur_rec]; bool ismin = UpdateMetrics(ch_org, ch_rec, y_size, uv_size, total_size, number_of_frames, cur_distortion_ssim, diff --git a/chromium/third_party/libyuv/util/ssim.cc b/chromium/third_party/libyuv/util/ssim.cc index 43e725d8210..096fbcf0610 100644 --- a/chromium/third_party/libyuv/util/ssim.cc +++ b/chromium/third_party/libyuv/util/ssim.cc @@ -16,8 +16,8 @@ extern "C" { #endif -typedef unsigned int uint32; // NOLINT -typedef unsigned short uint16; // NOLINT +typedef unsigned int uint32_t; // NOLINT +typedef unsigned short uint16_t; // NOLINT #if !defined(LIBYUV_DISABLE_X86) && !defined(__SSE2__) && \ (defined(_M_X64) || (defined(_M_IX86_FP) && (_M_IX86_FP >= 2))) @@ -50,7 +50,7 @@ static const double kiW[KERNEL + 1 + 1] = { #if !defined(LIBYUV_DISABLE_X86) && defined(__SSE2__) -#define PWEIGHT(A, B) static_cast<uint16>(K[(A)] * K[(B)]) // weight product +#define PWEIGHT(A, B) static_cast<uint16_t>(K[(A)] * K[(B)]) // weight product #define MAKE_WEIGHT(L) \ { \ { \ @@ -66,7 +66,7 @@ static const double kiW[KERNEL + 1 + 1] = { // values. We can't call _mm_set_epi16() for static compile-time initialization. static const struct { union { - uint16 i16_[8]; + uint16_t i16_[8]; __m128i m_; } values_; } W0 = MAKE_WEIGHT(0), W1 = MAKE_WEIGHT(1), W2 = MAKE_WEIGHT(2), @@ -88,10 +88,12 @@ static double FinalizeSSIM(double iw, double sxx = xxm * iw - iwx * iwx; double syy = yym * iw - iwy * iwy; // small errors are possible, due to rounding. Clamp to zero. - if (sxx < 0.) + if (sxx < 0.) { sxx = 0.; - if (syy < 0.) + } + if (syy < 0.) { syy = 0.; + } const double sxsy = sqrt(sxx * syy); const double sxy = xym * iw - iwx * iwy; static const double C11 = (0.01 * 0.01) * (255 * 255); @@ -109,21 +111,22 @@ static double FinalizeSSIM(double iw, // Note: worst case of accumulation is a weight of 33 = 11 + 2 * (7 + 3 + 1) // with a diff of 255, squared. The maximum error is thus 0x4388241, // which fits into 32 bits integers. -double GetSSIM(const uint8* org, - const uint8* rec, +double GetSSIM(const uint8_t* org, + const uint8_t* rec, int xo, int yo, int W, int H, int stride) { - uint32 ws = 0, xm = 0, ym = 0, xxm = 0, xym = 0, yym = 0; + uint32_t ws = 0, xm = 0, ym = 0, xxm = 0, xym = 0, yym = 0; org += (yo - KERNEL) * stride; org += (xo - KERNEL); rec += (yo - KERNEL) * stride; rec += (xo - KERNEL); for (int y_ = 0; y_ < KERNEL_SIZE; ++y_, org += stride, rec += stride) { - if (((yo - KERNEL + y_) < 0) || ((yo - KERNEL + y_) >= H)) + if (((yo - KERNEL + y_) < 0) || ((yo - KERNEL + y_) >= H)) { continue; + } const int Wy = K[y_]; for (int x_ = 0; x_ < KERNEL_SIZE; ++x_) { const int Wxy = Wy * K[x_]; @@ -142,13 +145,13 @@ double GetSSIM(const uint8* org, return FinalizeSSIM(1. / ws, xm, ym, xxm, xym, yym); } -double GetSSIMFullKernel(const uint8* org, - const uint8* rec, +double GetSSIMFullKernel(const uint8_t* org, + const uint8_t* rec, int xo, int yo, int stride, double area_weight) { - uint32 xm = 0, ym = 0, xxm = 0, xym = 0, yym = 0; + uint32_t xm = 0, ym = 0, xxm = 0, xym = 0, yym = 0; #if defined(LIBYUV_DISABLE_X86) || !defined(__SSE2__) @@ -262,7 +265,7 @@ double GetSSIMFullKernel(const uint8* org, #define ADD_AND_STORE_FOUR_EPI32(M, OUT) \ do { \ - uint32 tmp[4]; \ + uint32_t tmp[4]; \ _mm_storeu_si128(reinterpret_cast<__m128i*>(tmp), (M)); \ (OUT) = tmp[3] + tmp[2] + tmp[1] + tmp[0]; \ } while (0) @@ -292,8 +295,8 @@ static int start_max(int x, int y) { return (x > y) ? x : y; } -double CalcSSIM(const uint8* org, - const uint8* rec, +double CalcSSIM(const uint8_t* org, + const uint8_t* rec, const int image_width, const int image_height) { double SSIM = 0.; @@ -328,8 +331,8 @@ double CalcSSIM(const uint8* org, // NOTE: we could use similar method for the left-most pixels too. const int kScratchWidth = 8; const int kScratchStride = kScratchWidth + KERNEL + 1; - uint8 scratch_org[KERNEL_SIZE * kScratchStride] = {0}; - uint8 scratch_rec[KERNEL_SIZE * kScratchStride] = {0}; + uint8_t scratch_org[KERNEL_SIZE * kScratchStride] = {0}; + uint8_t scratch_rec[KERNEL_SIZE * kScratchStride] = {0}; for (int k = 0; k < KERNEL_SIZE; ++k) { const int offset = diff --git a/chromium/third_party/libyuv/util/ssim.h b/chromium/third_party/libyuv/util/ssim.h index 4647f45de14..a855f1d1233 100644 --- a/chromium/third_party/libyuv/util/ssim.h +++ b/chromium/third_party/libyuv/util/ssim.h @@ -20,12 +20,12 @@ extern "C" { #endif #if !defined(INT_TYPES_DEFINED) && !defined(UINT8_TYPE_DEFINED) -typedef unsigned char uint8; +typedef unsigned char uint8_t; #define UINT8_TYPE_DEFINED #endif -double CalcSSIM(const uint8* org, - const uint8* rec, +double CalcSSIM(const uint8_t* org, + const uint8_t* rec, const int image_width, const int image_height); diff --git a/chromium/third_party/libyuv/util/yuvconvert.cc b/chromium/third_party/libyuv/util/yuvconvert.cc index bc01d9ff503..27cdfe9e375 100644 --- a/chromium/third_party/libyuv/util/yuvconvert.cc +++ b/chromium/third_party/libyuv/util/yuvconvert.cc @@ -37,7 +37,7 @@ int num_skip_org = 0; // Number of frames to skip in original. int num_frames = 0; // Number of frames to convert. int filter = 1; // Bilinear filter for scaling. -static __inline uint32 Abs(int32 v) { +static __inline uint32_t Abs(int32_t v) { return v >= 0 ? v : -v; } @@ -79,8 +79,9 @@ void PrintHelp(const char* program) { } void ParseOptions(int argc, const char* argv[]) { - if (argc <= 1) + if (argc <= 1) { PrintHelp(argv[0]); + } for (int c = 1; c < argc; ++c) { if (!strcmp(argv[c], "-v")) { verbose = true; @@ -158,11 +159,11 @@ void ParseOptions(int argc, const char* argv[]) { static const int kTileX = 32; static const int kTileY = 32; -static int TileARGBScale(const uint8* src_argb, +static int TileARGBScale(const uint8_t* src_argb, int src_stride_argb, int src_width, int src_height, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int dst_width, int dst_height, @@ -242,9 +243,9 @@ int main(int argc, const char* argv[]) { fseek(file_org, num_skip_org * total_size, SEEK_SET); #endif - uint8* const ch_org = new uint8[org_size]; - uint8* const ch_dst = new uint8[dst_size]; - uint8* const ch_rec = new uint8[total_size]; + uint8_t* const ch_org = new uint8_t[org_size]; + uint8_t* const ch_dst = new uint8_t[dst_size]; + uint8_t* const ch_rec = new uint8_t[total_size]; if (ch_org == NULL || ch_rec == NULL) { fprintf(stderr, "No memory available\n"); fclose(file_org); @@ -265,14 +266,16 @@ int main(int argc, const char* argv[]) { int number_of_frames; for (number_of_frames = 0;; ++number_of_frames) { - if (num_frames && number_of_frames >= num_frames) + if (num_frames && number_of_frames >= num_frames) { break; + } // Load original YUV or ARGB frame. size_t bytes_org = - fread(ch_org, sizeof(uint8), static_cast<size_t>(org_size), file_org); - if (bytes_org < static_cast<size_t>(org_size)) + fread(ch_org, sizeof(uint8_t), static_cast<size_t>(org_size), file_org); + if (bytes_org < static_cast<size_t>(org_size)) { break; + } // TODO(fbarchard): Attenuate doesnt need to know dimensions. // ARGB attenuate frame @@ -329,16 +332,18 @@ int main(int argc, const char* argv[]) { // Output YUV or ARGB frame. if (rec_is_yuv) { size_t bytes_rec = - fwrite(ch_rec, sizeof(uint8), static_cast<size_t>(total_size), + fwrite(ch_rec, sizeof(uint8_t), static_cast<size_t>(total_size), file_rec[cur_rec]); - if (bytes_rec < static_cast<size_t>(total_size)) + if (bytes_rec < static_cast<size_t>(total_size)) { break; + } } else { size_t bytes_rec = - fwrite(ch_dst, sizeof(uint8), static_cast<size_t>(dst_size), + fwrite(ch_dst, sizeof(uint8_t), static_cast<size_t>(dst_size), file_rec[cur_rec]); - if (bytes_rec < static_cast<size_t>(dst_size)) + if (bytes_rec < static_cast<size_t>(dst_size)) { break; + } } if (verbose) { printf("%5d", number_of_frames); |