summaryrefslogtreecommitdiff
path: root/chromium/third_party/libyuv
diff options
context:
space:
mode:
authorAllan Sandfeld Jensen <allan.jensen@qt.io>2018-05-03 13:42:47 +0200
committerAllan Sandfeld Jensen <allan.jensen@qt.io>2018-05-15 10:27:51 +0000
commit8c5c43c7b138c9b4b0bf56d946e61d3bbc111bec (patch)
treed29d987c4d7b173cf853279b79a51598f104b403 /chromium/third_party/libyuv
parent830c9e163d31a9180fadca926b3e1d7dfffb5021 (diff)
downloadqtwebengine-chromium-8c5c43c7b138c9b4b0bf56d946e61d3bbc111bec.tar.gz
BASELINE: Update Chromium to 66.0.3359.156
Change-Id: I0c9831ad39911a086b6377b16f995ad75a51e441 Reviewed-by: Michal Klocek <michal.klocek@qt.io>
Diffstat (limited to 'chromium/third_party/libyuv')
-rw-r--r--chromium/third_party/libyuv/BUILD.gn30
-rw-r--r--chromium/third_party/libyuv/DEPS4
-rw-r--r--chromium/third_party/libyuv/README.chromium2
-rw-r--r--chromium/third_party/libyuv/docs/deprecated_builds.md4
-rw-r--r--chromium/third_party/libyuv/docs/formats.md16
-rw-r--r--chromium/third_party/libyuv/docs/getting_started.md6
-rw-r--r--chromium/third_party/libyuv/include/libyuv/basic_types.h98
-rw-r--r--chromium/third_party/libyuv/include/libyuv/compare.h63
-rw-r--r--chromium/third_party/libyuv/include/libyuv/compare_row.h61
-rw-r--r--chromium/third_party/libyuv/include/libyuv/convert.h243
-rw-r--r--chromium/third_party/libyuv/include/libyuv/convert_argb.h372
-rw-r--r--chromium/third_party/libyuv/include/libyuv/convert_from.h240
-rw-r--r--chromium/third_party/libyuv/include/libyuv/convert_from_argb.h137
-rw-r--r--chromium/third_party/libyuv/include/libyuv/cpu_id.h2
-rw-r--r--chromium/third_party/libyuv/include/libyuv/macros_msa.h108
-rw-r--r--chromium/third_party/libyuv/include/libyuv/mjpeg_decoder.h26
-rw-r--r--chromium/third_party/libyuv/include/libyuv/planar_functions.h459
-rw-r--r--chromium/third_party/libyuv/include/libyuv/rotate.h78
-rw-r--r--chromium/third_party/libyuv/include/libyuv/rotate_argb.h4
-rw-r--r--chromium/third_party/libyuv/include/libyuv/rotate_row.h117
-rw-r--r--chromium/third_party/libyuv/include/libyuv/row.h3902
-rw-r--r--chromium/third_party/libyuv/include/libyuv/scale.h55
-rw-r--r--chromium/third_party/libyuv/include/libyuv/scale_argb.h20
-rw-r--r--chromium/third_party/libyuv/include/libyuv/scale_row.h651
-rw-r--r--chromium/third_party/libyuv/include/libyuv/version.h2
-rw-r--r--chromium/third_party/libyuv/include/libyuv/video_common.h18
-rw-r--r--chromium/third_party/libyuv/source/compare.cc152
-rw-r--r--chromium/third_party/libyuv/source/compare_common.cc32
-rw-r--r--chromium/third_party/libyuv/source/compare_gcc.cc209
-rw-r--r--chromium/third_party/libyuv/source/compare_msa.cc20
-rw-r--r--chromium/third_party/libyuv/source/compare_neon.cc12
-rw-r--r--chromium/third_party/libyuv/source/compare_neon64.cc12
-rw-r--r--chromium/third_party/libyuv/source/compare_win.cc26
-rw-r--r--chromium/third_party/libyuv/source/convert.cc365
-rw-r--r--chromium/third_party/libyuv/source/convert_argb.cc613
-rw-r--r--chromium/third_party/libyuv/source/convert_from.cc431
-rw-r--r--chromium/third_party/libyuv/source/convert_from_argb.cc284
-rw-r--r--chromium/third_party/libyuv/source/convert_jpeg.cc41
-rw-r--r--chromium/third_party/libyuv/source/convert_to_argb.cc114
-rw-r--r--chromium/third_party/libyuv/source/convert_to_i420.cc181
-rw-r--r--chromium/third_party/libyuv/source/cpu_id.cc6
-rw-r--r--chromium/third_party/libyuv/source/mjpeg_decoder.cc28
-rw-r--r--chromium/third_party/libyuv/source/mjpeg_validate.cc10
-rw-r--r--chromium/third_party/libyuv/source/planar_functions.cc615
-rw-r--r--chromium/third_party/libyuv/source/rotate.cc90
-rw-r--r--chromium/third_party/libyuv/source/rotate_any.cc9
-rw-r--r--chromium/third_party/libyuv/source/rotate_argb.cc52
-rw-r--r--chromium/third_party/libyuv/source/rotate_common.cc20
-rw-r--r--chromium/third_party/libyuv/source/rotate_gcc.cc14
-rw-r--r--chromium/third_party/libyuv/source/rotate_msa.cc24
-rw-r--r--chromium/third_party/libyuv/source/rotate_neon.cc14
-rw-r--r--chromium/third_party/libyuv/source/rotate_neon64.cc16
-rw-r--r--chromium/third_party/libyuv/source/rotate_win.cc10
-rw-r--r--chromium/third_party/libyuv/source/row_any.cc429
-rw-r--r--chromium/third_party/libyuv/source/row_common.cc1547
-rw-r--r--chromium/third_party/libyuv/source/row_gcc.cc8817
-rw-r--r--chromium/third_party/libyuv/source/row_msa.cc539
-rw-r--r--chromium/third_party/libyuv/source/row_neon.cc430
-rw-r--r--chromium/third_party/libyuv/source/row_neon64.cc474
-rw-r--r--chromium/third_party/libyuv/source/row_win.cc758
-rw-r--r--chromium/third_party/libyuv/source/scale.cc291
-rw-r--r--chromium/third_party/libyuv/source/scale_any.cc92
-rw-r--r--chromium/third_party/libyuv/source/scale_argb.cc114
-rw-r--r--chromium/third_party/libyuv/source/scale_common.cc362
-rw-r--r--chromium/third_party/libyuv/source/scale_gcc.cc1972
-rw-r--r--chromium/third_party/libyuv/source/scale_msa.cc44
-rw-r--r--chromium/third_party/libyuv/source/scale_neon.cc102
-rw-r--r--chromium/third_party/libyuv/source/scale_neon64.cc142
-rw-r--r--chromium/third_party/libyuv/source/scale_win.cc112
-rw-r--r--chromium/third_party/libyuv/source/video_common.cc13
-rw-r--r--chromium/third_party/libyuv/unit_test/basictypes_test.cc33
-rw-r--r--chromium/third_party/libyuv/unit_test/color_test.cc34
-rw-r--r--chromium/third_party/libyuv/unit_test/compare_test.cc71
-rw-r--r--chromium/third_party/libyuv/unit_test/convert_test.cc511
-rw-r--r--chromium/third_party/libyuv/unit_test/math_test.cc12
-rw-r--r--chromium/third_party/libyuv/unit_test/planar_test.cc232
-rw-r--r--chromium/third_party/libyuv/unit_test/scale_argb_test.cc42
-rw-r--r--chromium/third_party/libyuv/unit_test/scale_test.cc60
-rw-r--r--chromium/third_party/libyuv/unit_test/unit_test.cc10
-rw-r--r--chromium/third_party/libyuv/unit_test/unit_test.h21
-rw-r--r--chromium/third_party/libyuv/unit_test/video_common_test.cc46
-rw-r--r--chromium/third_party/libyuv/util/compare.cc18
-rw-r--r--chromium/third_party/libyuv/util/psnr.cc61
-rw-r--r--chromium/third_party/libyuv/util/psnr.h6
-rw-r--r--chromium/third_party/libyuv/util/psnr_main.cc57
-rw-r--r--chromium/third_party/libyuv/util/ssim.cc39
-rw-r--r--chromium/third_party/libyuv/util/ssim.h6
-rw-r--r--chromium/third_party/libyuv/util/yuvconvert.cc33
88 files changed, 14514 insertions, 13134 deletions
diff --git a/chromium/third_party/libyuv/BUILD.gn b/chromium/third_party/libyuv/BUILD.gn
index 10b5b819a9c..03ce499e6d9 100644
--- a/chromium/third_party/libyuv/BUILD.gn
+++ b/chromium/third_party/libyuv/BUILD.gn
@@ -48,6 +48,7 @@ group("default") {
group("libyuv") {
all_dependent_configs = [ ":libyuv_config" ]
+ deps = []
if (is_win && target_cpu == "x64") {
# Compile with clang in order to get inline assembly
@@ -60,13 +61,19 @@ group("libyuv") {
]
}
+ if (libyuv_use_neon) {
+ deps += [ ":libyuv_neon" ]
+ }
+
+ if (libyuv_use_msa) {
+ deps += [ ":libyuv_msa" ]
+ }
+
if (!is_ios) {
# Make sure that clients of libyuv link with libjpeg. This can't go in
# libyuv_internal because in Windows x64 builds that will generate a clang
# build of libjpeg, and we don't want two copies.
- deps = [
- "//third_party:jpeg",
- ]
+ deps += [ "//third_party:jpeg" ]
}
}
@@ -147,14 +154,6 @@ static_library("libyuv_internal") {
deps += [ "//third_party:jpeg_includes" ]
}
- if (libyuv_use_neon) {
- deps += [ ":libyuv_neon" ]
- }
-
- if (libyuv_use_msa) {
- deps += [ ":libyuv_msa" ]
- }
-
# Always enable optimization for Release and NaCl builds (to workaround
# crbug.com/538243).
if (!is_debug || is_nacl) {
@@ -174,6 +173,7 @@ static_library("libyuv_internal") {
]
}
}
+
if (libyuv_use_neon) {
static_library("libyuv_neon") {
sources = [
@@ -188,6 +188,10 @@ if (libyuv_use_neon) {
"source/scale_neon64.cc",
]
+ deps = [
+ ":libyuv_internal",
+ ]
+
public_configs = [ ":libyuv_config" ]
# Always enable optimization for Release and NaCl builds (to workaround
@@ -217,6 +221,10 @@ if (libyuv_use_msa) {
"source/scale_msa.cc",
]
+ deps = [
+ ":libyuv_internal",
+ ]
+
public_configs = [ ":libyuv_config" ]
}
}
diff --git a/chromium/third_party/libyuv/DEPS b/chromium/third_party/libyuv/DEPS
index 688ff1299c2..ccc701c4f69 100644
--- a/chromium/third_party/libyuv/DEPS
+++ b/chromium/third_party/libyuv/DEPS
@@ -56,6 +56,10 @@ deps = {
'url': Var('chromium_git') + '/chromium/src/base' + '@' + '9b543d487c7c38be191c6180001ff9ce186ae326',
'condition': 'checkout_android',
},
+ 'src/third_party/android_ndk': {
+ 'url': Var('chromium_git') + '/android_ndk.git' + '@' + 'e951c37287c7d8cd915bf8d4149fd4a06d808b55',
+ 'condition': 'checkout_android',
+ },
'src/third_party/android_tools': {
'url': Var('chromium_git') + '/android_tools.git' + '@' + 'aadb2fed04af8606545b0afe4e3060bc1a15fad7',
'condition': 'checkout_android',
diff --git a/chromium/third_party/libyuv/README.chromium b/chromium/third_party/libyuv/README.chromium
index 373e5f75c87..f38414b3a80 100644
--- a/chromium/third_party/libyuv/README.chromium
+++ b/chromium/third_party/libyuv/README.chromium
@@ -1,6 +1,6 @@
Name: libyuv
URL: http://code.google.com/p/libyuv/
-Version: 1688
+Version: 1698
License: BSD
License File: LICENSE
diff --git a/chromium/third_party/libyuv/docs/deprecated_builds.md b/chromium/third_party/libyuv/docs/deprecated_builds.md
index d54a0282c15..29e0bf9bc30 100644
--- a/chromium/third_party/libyuv/docs/deprecated_builds.md
+++ b/chromium/third_party/libyuv/docs/deprecated_builds.md
@@ -165,11 +165,11 @@ mipsel
arm32 disassembly:
- third_party/android_tools/ndk/toolchains/arm-linux-androideabi-4.9/prebuilt/linux-x86_64/bin/arm-linux-androideabi-objdump -d out/Release/obj/source/libyuv.row_neon.o
+ third_party/android_ndk/toolchains/arm-linux-androideabi-4.9/prebuilt/linux-x86_64/bin/arm-linux-androideabi-objdump -d out/Release/obj/source/libyuv.row_neon.o
arm64 disassembly:
- third_party/android_tools/ndk/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/bin/aarch64-linux-android-objdump -d out/Release/obj/source/libyuv.row_neon64.o
+ third_party/android_ndk/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/bin/aarch64-linux-android-objdump -d out/Release/obj/source/libyuv.row_neon64.o
Running tests:
diff --git a/chromium/third_party/libyuv/docs/formats.md b/chromium/third_party/libyuv/docs/formats.md
index 3973e5d5ad8..f78f57bb4c4 100644
--- a/chromium/third_party/libyuv/docs/formats.md
+++ b/chromium/third_party/libyuv/docs/formats.md
@@ -50,11 +50,12 @@ The following is extracted from video_common.h as a complete list of formats sup
// 1 Secondary YUV format: row biplanar.
FOURCC_M420 = FOURCC('M', '4', '2', '0'),
- // 10 Primary RGB formats: 4 32 bpp, 2 24 bpp, 3 16 bpp, 1 10 bpc
+ // 11 Primary RGB formats: 4 32 bpp, 2 24 bpp, 3 16 bpp, 1 10 bpc
FOURCC_ARGB = FOURCC('A', 'R', 'G', 'B'),
FOURCC_BGRA = FOURCC('B', 'G', 'R', 'A'),
FOURCC_ABGR = FOURCC('A', 'B', 'G', 'R'),
FOURCC_AR30 = FOURCC('A', 'R', '3', '0'), // 10 bit per channel. 2101010.
+ FOURCC_AB30 = FOURCC('A', 'B', '3', '0'), // ABGR version of 10 bit
FOURCC_24BG = FOURCC('2', '4', 'B', 'G'),
FOURCC_RAW = FOURCC('r', 'a', 'w', ' '),
FOURCC_RGBA = FOURCC('R', 'G', 'B', 'A'),
@@ -139,7 +140,7 @@ There are 2 RGB layouts - RGB24 (aka 24BG) and RAW
RGB24 is B,G,R in memory
RAW is R,G,B in memory
-# AR30
+# AR30 and XR30
AR30 is 2 10 10 10 ARGB stored in little endian order.
The 2 bit alpha has 4 values. Here are the comparable 8 bit alpha values.
@@ -148,3 +149,14 @@ The 2 bit alpha has 4 values. Here are the comparable 8 bit alpha values.
2 - 66%. 10101010b = 0xaa = 170
3 - 100%. 11111111b = 0xff = 255
The 10 bit RGB values range from 0 to 1023.
+XR30 is the same as AR30 but with no alpha channel.
+
+# NV12 and NV21
+
+NV12 is a biplanar format with a full sized Y plane followed by a single
+chroma plane with weaved U and V values.
+NV21 is the same but with weaved V and U values.
+The 12 in NV12 refers to 12 bits per pixel. NV12 has a half width and half
+height chroma channel, and therefore is a 420 subsampling.
+NV16 is 16 bits per pixel, with half width and full height. aka 422.
+NV24 is 24 bits per pixel with full sized chroma channel. aka 444.
diff --git a/chromium/third_party/libyuv/docs/getting_started.md b/chromium/third_party/libyuv/docs/getting_started.md
index fefffce4f1f..09297b66a5c 100644
--- a/chromium/third_party/libyuv/docs/getting_started.md
+++ b/chromium/third_party/libyuv/docs/getting_started.md
@@ -138,11 +138,11 @@ mips
arm disassembly:
- third_party/android_tools/ndk/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/bin/aarch64-linux-android-objdump -d ./out/Release/obj/libyuv/row_common.o >row_common.txt
+ third_party/android_ndk/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/bin/aarch64-linux-android-objdump -d ./out/Release/obj/libyuv/row_common.o >row_common.txt
- third_party/android_tools/ndk/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/bin/aarch64-linux-android-objdump -d ./out/Release/obj/libyuv_neon/row_neon.o >row_neon.txt
+ third_party/android_ndk/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/bin/aarch64-linux-android-objdump -d ./out/Release/obj/libyuv_neon/row_neon.o >row_neon.txt
- third_party/android_tools/ndk/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/bin/aarch64-linux-android-objdump -d ./out/Release/obj/libyuv_neon/row_neon64.o >row_neon64.txt
+ third_party/android_ndk/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/bin/aarch64-linux-android-objdump -d ./out/Release/obj/libyuv_neon/row_neon64.o >row_neon64.txt
Running tests:
diff --git a/chromium/third_party/libyuv/include/libyuv/basic_types.h b/chromium/third_party/libyuv/include/libyuv/basic_types.h
index 7d98bb93f0e..01d9dfc7736 100644
--- a/chromium/third_party/libyuv/include/libyuv/basic_types.h
+++ b/chromium/third_party/libyuv/include/libyuv/basic_types.h
@@ -11,79 +11,33 @@
#ifndef INCLUDE_LIBYUV_BASIC_TYPES_H_
#define INCLUDE_LIBYUV_BASIC_TYPES_H_
-#include <stddef.h> // for NULL, size_t
+#include <stddef.h> // For size_t and NULL
+
+#if !defined(INT_TYPES_DEFINED) && !defined(GG_LONGLONG)
+#define INT_TYPES_DEFINED
#if defined(_MSC_VER) && (_MSC_VER < 1600)
#include <sys/types.h> // for uintptr_t on x86
+typedef unsigned __int64 uint64_t;
+typedef __int64 int64_t;
+typedef unsigned int uint32_t;
+typedef int int32_t;
+typedef unsigned short uint16_t;
+typedef short int16_t;
+typedef unsigned char uint8_t;
+typedef signed char int8_t;
#else
-#include <stdint.h> // for uintptr_t
-#endif
-
-#ifndef GG_LONGLONG
-#ifndef INT_TYPES_DEFINED
-#define INT_TYPES_DEFINED
-#ifdef COMPILER_MSVC
-typedef unsigned __int64 uint64;
-typedef __int64 int64;
-#ifndef INT64_C
-#define INT64_C(x) x##I64
-#endif
-#ifndef UINT64_C
-#define UINT64_C(x) x##UI64
-#endif
-#define INT64_F "I64"
-#else // COMPILER_MSVC
-#if defined(__LP64__) && !defined(__OpenBSD__) && !defined(__APPLE__)
-typedef unsigned long uint64; // NOLINT
-typedef long int64; // NOLINT
-#ifndef INT64_C
-#define INT64_C(x) x##L
-#endif
-#ifndef UINT64_C
-#define UINT64_C(x) x##UL
-#endif
-#define INT64_F "l"
-#else // defined(__LP64__) && !defined(__OpenBSD__) && !defined(__APPLE__)
-typedef unsigned long long uint64; // NOLINT
-typedef long long int64; // NOLINT
-#ifndef INT64_C
-#define INT64_C(x) x##LL
-#endif
-#ifndef UINT64_C
-#define UINT64_C(x) x##ULL
-#endif
-#define INT64_F "ll"
-#endif // __LP64__
-#endif // COMPILER_MSVC
-typedef unsigned int uint32;
-typedef int int32;
-typedef unsigned short uint16; // NOLINT
-typedef short int16; // NOLINT
-typedef unsigned char uint8;
-typedef signed char int8;
+#include <stdint.h> // for uintptr_t and C99 types
+#endif // defined(_MSC_VER) && (_MSC_VER < 1600)
+typedef uint64_t uint64;
+typedef int64_t int64;
+typedef uint32_t uint32;
+typedef int32_t int32;
+typedef uint16_t uint16;
+typedef int16_t int16;
+typedef uint8_t uint8;
+typedef int8_t int8;
#endif // INT_TYPES_DEFINED
-#endif // GG_LONGLONG
-
-// Detect compiler is for x86 or x64.
-#if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || \
- defined(_M_IX86)
-#define CPU_X86 1
-#endif
-// Detect compiler is for ARM.
-#if defined(__arm__) || defined(_M_ARM)
-#define CPU_ARM 1
-#endif
-
-#ifndef ALIGNP
-#ifdef __cplusplus
-#define ALIGNP(p, t) \
- reinterpret_cast<uint8*>( \
- ((reinterpret_cast<uintptr_t>(p) + ((t)-1)) & ~((t)-1)))
-#else
-#define ALIGNP(p, t) \
- (uint8*)((((uintptr_t)(p) + ((t)-1)) & ~((t)-1))) /* NOLINT */
-#endif
-#endif
#if !defined(LIBYUV_API)
#if defined(_WIN32) || defined(__CYGWIN__)
@@ -103,15 +57,9 @@ typedef signed char int8;
#endif // __GNUC__
#endif // LIBYUV_API
+// TODO(fbarchard): Remove bool macros.
#define LIBYUV_BOOL int
#define LIBYUV_FALSE 0
#define LIBYUV_TRUE 1
-// Visual C x86 or GCC little endian.
-#if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || \
- defined(_M_IX86) || defined(__arm__) || defined(_M_ARM) || \
- (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
-#define LIBYUV_LITTLE_ENDIAN
-#endif
-
#endif // INCLUDE_LIBYUV_BASIC_TYPES_H_
diff --git a/chromium/third_party/libyuv/include/libyuv/compare.h b/chromium/third_party/libyuv/include/libyuv/compare.h
index a06eff2066f..3353ad71c68 100644
--- a/chromium/third_party/libyuv/include/libyuv/compare.h
+++ b/chromium/third_party/libyuv/include/libyuv/compare.h
@@ -20,80 +20,85 @@ extern "C" {
// Compute a hash for specified memory. Seed of 5381 recommended.
LIBYUV_API
-uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed);
+uint32_t HashDjb2(const uint8_t* src, uint64_t count, uint32_t seed);
// Hamming Distance
LIBYUV_API
-uint64 ComputeHammingDistance(const uint8* src_a,
- const uint8* src_b,
- int count);
+uint64_t ComputeHammingDistance(const uint8_t* src_a,
+ const uint8_t* src_b,
+ int count);
// Scan an opaque argb image and return fourcc based on alpha offset.
// Returns FOURCC_ARGB, FOURCC_BGRA, or 0 if unknown.
LIBYUV_API
-uint32 ARGBDetect(const uint8* argb, int stride_argb, int width, int height);
+uint32_t ARGBDetect(const uint8_t* argb,
+ int stride_argb,
+ int width,
+ int height);
// Sum Square Error - used to compute Mean Square Error or PSNR.
LIBYUV_API
-uint64 ComputeSumSquareError(const uint8* src_a, const uint8* src_b, int count);
+uint64_t ComputeSumSquareError(const uint8_t* src_a,
+ const uint8_t* src_b,
+ int count);
LIBYUV_API
-uint64 ComputeSumSquareErrorPlane(const uint8* src_a,
- int stride_a,
- const uint8* src_b,
- int stride_b,
- int width,
- int height);
+uint64_t ComputeSumSquareErrorPlane(const uint8_t* src_a,
+ int stride_a,
+ const uint8_t* src_b,
+ int stride_b,
+ int width,
+ int height);
static const int kMaxPsnr = 128;
LIBYUV_API
-double SumSquareErrorToPsnr(uint64 sse, uint64 count);
+double SumSquareErrorToPsnr(uint64_t sse, uint64_t count);
LIBYUV_API
-double CalcFramePsnr(const uint8* src_a,
+double CalcFramePsnr(const uint8_t* src_a,
int stride_a,
- const uint8* src_b,
+ const uint8_t* src_b,
int stride_b,
int width,
int height);
LIBYUV_API
-double I420Psnr(const uint8* src_y_a,
+double I420Psnr(const uint8_t* src_y_a,
int stride_y_a,
- const uint8* src_u_a,
+ const uint8_t* src_u_a,
int stride_u_a,
- const uint8* src_v_a,
+ const uint8_t* src_v_a,
int stride_v_a,
- const uint8* src_y_b,
+ const uint8_t* src_y_b,
int stride_y_b,
- const uint8* src_u_b,
+ const uint8_t* src_u_b,
int stride_u_b,
- const uint8* src_v_b,
+ const uint8_t* src_v_b,
int stride_v_b,
int width,
int height);
LIBYUV_API
-double CalcFrameSsim(const uint8* src_a,
+double CalcFrameSsim(const uint8_t* src_a,
int stride_a,
- const uint8* src_b,
+ const uint8_t* src_b,
int stride_b,
int width,
int height);
LIBYUV_API
-double I420Ssim(const uint8* src_y_a,
+double I420Ssim(const uint8_t* src_y_a,
int stride_y_a,
- const uint8* src_u_a,
+ const uint8_t* src_u_a,
int stride_u_a,
- const uint8* src_v_a,
+ const uint8_t* src_v_a,
int stride_v_a,
- const uint8* src_y_b,
+ const uint8_t* src_y_b,
int stride_y_b,
- const uint8* src_u_b,
+ const uint8_t* src_u_b,
int stride_u_b,
- const uint8* src_v_b,
+ const uint8_t* src_v_b,
int stride_v_b,
int width,
int height);
diff --git a/chromium/third_party/libyuv/include/libyuv/compare_row.h b/chromium/third_party/libyuv/include/libyuv/compare_row.h
index 2e5ebe508d1..72ee740600a 100644
--- a/chromium/third_party/libyuv/include/libyuv/compare_row.h
+++ b/chromium/third_party/libyuv/include/libyuv/compare_row.h
@@ -18,17 +18,20 @@ namespace libyuv {
extern "C" {
#endif
-#if defined(__pnacl__) || defined(__CLR_VER) || \
+#if defined(__pnacl__) || defined(__CLR_VER) || \
+ (defined(__native_client__) && defined(__x86_64__)) || \
(defined(__i386__) && !defined(__SSE__) && !defined(__clang__))
#define LIBYUV_DISABLE_X86
#endif
+#if defined(__native_client__)
+#define LIBYUV_DISABLE_NEON
+#endif
// MemorySanitizer does not support assembly code yet. http://crbug.com/344505
#if defined(__has_feature)
#if __has_feature(memory_sanitizer)
#define LIBYUV_DISABLE_X86
#endif
#endif
-
// Visual C 2012 required for AVX2.
#if defined(_M_IX86) && !defined(__clang__) && defined(_MSC_VER) && \
_MSC_VER >= 1700
@@ -87,22 +90,44 @@ extern "C" {
#define HAS_SUMSQUAREERROR_MSA
#endif
-uint32 HammingDistance_C(const uint8* src_a, const uint8* src_b, int count);
-uint32 HammingDistance_SSE42(const uint8* src_a, const uint8* src_b, int count);
-uint32 HammingDistance_SSSE3(const uint8* src_a, const uint8* src_b, int count);
-uint32 HammingDistance_AVX2(const uint8* src_a, const uint8* src_b, int count);
-uint32 HammingDistance_NEON(const uint8* src_a, const uint8* src_b, int count);
-uint32 HammingDistance_MSA(const uint8* src_a, const uint8* src_b, int count);
-
-uint32 SumSquareError_C(const uint8* src_a, const uint8* src_b, int count);
-uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count);
-uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count);
-uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count);
-uint32 SumSquareError_MSA(const uint8* src_a, const uint8* src_b, int count);
-
-uint32 HashDjb2_C(const uint8* src, int count, uint32 seed);
-uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed);
-uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed);
+uint32_t HammingDistance_C(const uint8_t* src_a,
+ const uint8_t* src_b,
+ int count);
+uint32_t HammingDistance_SSE42(const uint8_t* src_a,
+ const uint8_t* src_b,
+ int count);
+uint32_t HammingDistance_SSSE3(const uint8_t* src_a,
+ const uint8_t* src_b,
+ int count);
+uint32_t HammingDistance_AVX2(const uint8_t* src_a,
+ const uint8_t* src_b,
+ int count);
+uint32_t HammingDistance_NEON(const uint8_t* src_a,
+ const uint8_t* src_b,
+ int count);
+uint32_t HammingDistance_MSA(const uint8_t* src_a,
+ const uint8_t* src_b,
+ int count);
+
+uint32_t SumSquareError_C(const uint8_t* src_a,
+ const uint8_t* src_b,
+ int count);
+uint32_t SumSquareError_SSE2(const uint8_t* src_a,
+ const uint8_t* src_b,
+ int count);
+uint32_t SumSquareError_AVX2(const uint8_t* src_a,
+ const uint8_t* src_b,
+ int count);
+uint32_t SumSquareError_NEON(const uint8_t* src_a,
+ const uint8_t* src_b,
+ int count);
+uint32_t SumSquareError_MSA(const uint8_t* src_a,
+ const uint8_t* src_b,
+ int count);
+
+uint32_t HashDjb2_C(const uint8_t* src, int count, uint32_t seed);
+uint32_t HashDjb2_SSE41(const uint8_t* src, int count, uint32_t seed);
+uint32_t HashDjb2_AVX2(const uint8_t* src, int count, uint32_t seed);
#ifdef __cplusplus
} // extern "C"
diff --git a/chromium/third_party/libyuv/include/libyuv/convert.h b/chromium/third_party/libyuv/include/libyuv/convert.h
index d310f8493bf..d12ef24f799 100644
--- a/chromium/third_party/libyuv/include/libyuv/convert.h
+++ b/chromium/third_party/libyuv/include/libyuv/convert.h
@@ -27,34 +27,34 @@ extern "C" {
// Convert I444 to I420.
LIBYUV_API
-int I444ToI420(const uint8* src_y,
+int I444ToI420(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
// Convert I422 to I420.
LIBYUV_API
-int I422ToI420(const uint8* src_y,
+int I422ToI420(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
@@ -62,17 +62,17 @@ int I422ToI420(const uint8* src_y,
// Copy I420 to I420.
#define I420ToI420 I420Copy
LIBYUV_API
-int I420Copy(const uint8* src_y,
+int I420Copy(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
@@ -81,17 +81,17 @@ int I420Copy(const uint8* src_y,
#define I010ToI010 I010Copy
#define H010ToH010 I010Copy
LIBYUV_API
-int I010Copy(const uint16* src_y,
+int I010Copy(const uint16_t* src_y,
int src_stride_y,
- const uint16* src_u,
+ const uint16_t* src_u,
int src_stride_u,
- const uint16* src_v,
+ const uint16_t* src_v,
int src_stride_v,
- uint16* dst_y,
+ uint16_t* dst_y,
int dst_stride_y,
- uint16* dst_u,
+ uint16_t* dst_u,
int dst_stride_u,
- uint16* dst_v,
+ uint16_t* dst_v,
int dst_stride_v,
int width,
int height);
@@ -99,30 +99,30 @@ int I010Copy(const uint16* src_y,
// Convert 10 bit YUV to 8 bit
#define H010ToH420 I010ToI420
LIBYUV_API
-int I010ToI420(const uint16* src_y,
+int I010ToI420(const uint16_t* src_y,
int src_stride_y,
- const uint16* src_u,
+ const uint16_t* src_u,
int src_stride_u,
- const uint16* src_v,
+ const uint16_t* src_v,
int src_stride_v,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
// Convert I400 (grey) to I420.
LIBYUV_API
-int I400ToI420(const uint8* src_y,
+int I400ToI420(const uint8_t* src_y,
int src_stride_y,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
@@ -131,204 +131,204 @@ int I400ToI420(const uint8* src_y,
// Convert NV12 to I420.
LIBYUV_API
-int NV12ToI420(const uint8* src_y,
+int NV12ToI420(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_uv,
+ const uint8_t* src_uv,
int src_stride_uv,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
// Convert NV21 to I420.
LIBYUV_API
-int NV21ToI420(const uint8* src_y,
+int NV21ToI420(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_vu,
+ const uint8_t* src_vu,
int src_stride_vu,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
// Convert YUY2 to I420.
LIBYUV_API
-int YUY2ToI420(const uint8* src_yuy2,
+int YUY2ToI420(const uint8_t* src_yuy2,
int src_stride_yuy2,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
// Convert UYVY to I420.
LIBYUV_API
-int UYVYToI420(const uint8* src_uyvy,
+int UYVYToI420(const uint8_t* src_uyvy,
int src_stride_uyvy,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
// Convert M420 to I420.
LIBYUV_API
-int M420ToI420(const uint8* src_m420,
+int M420ToI420(const uint8_t* src_m420,
int src_stride_m420,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
// Convert Android420 to I420.
LIBYUV_API
-int Android420ToI420(const uint8* src_y,
+int Android420ToI420(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- int pixel_stride_uv,
- uint8* dst_y,
+ int src_pixel_stride_uv,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
// ARGB little endian (bgra in memory) to I420.
LIBYUV_API
-int ARGBToI420(const uint8* src_frame,
- int src_stride_frame,
- uint8* dst_y,
+int ARGBToI420(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
// BGRA little endian (argb in memory) to I420.
LIBYUV_API
-int BGRAToI420(const uint8* src_frame,
- int src_stride_frame,
- uint8* dst_y,
+int BGRAToI420(const uint8_t* src_bgra,
+ int src_stride_bgra,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
// ABGR little endian (rgba in memory) to I420.
LIBYUV_API
-int ABGRToI420(const uint8* src_frame,
- int src_stride_frame,
- uint8* dst_y,
+int ABGRToI420(const uint8_t* src_abgr,
+ int src_stride_abgr,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
// RGBA little endian (abgr in memory) to I420.
LIBYUV_API
-int RGBAToI420(const uint8* src_frame,
- int src_stride_frame,
- uint8* dst_y,
+int RGBAToI420(const uint8_t* src_rgba,
+ int src_stride_rgba,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
// RGB little endian (bgr in memory) to I420.
LIBYUV_API
-int RGB24ToI420(const uint8* src_frame,
- int src_stride_frame,
- uint8* dst_y,
+int RGB24ToI420(const uint8_t* src_rgb24,
+ int src_stride_rgb24,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
// RGB big endian (rgb in memory) to I420.
LIBYUV_API
-int RAWToI420(const uint8* src_frame,
- int src_stride_frame,
- uint8* dst_y,
+int RAWToI420(const uint8_t* src_raw,
+ int src_stride_raw,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
// RGB16 (RGBP fourcc) little endian to I420.
LIBYUV_API
-int RGB565ToI420(const uint8* src_frame,
- int src_stride_frame,
- uint8* dst_y,
+int RGB565ToI420(const uint8_t* src_rgb565,
+ int src_stride_rgb565,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
// RGB15 (RGBO fourcc) little endian to I420.
LIBYUV_API
-int ARGB1555ToI420(const uint8* src_frame,
- int src_stride_frame,
- uint8* dst_y,
+int ARGB1555ToI420(const uint8_t* src_argb1555,
+ int src_stride_argb1555,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
// RGB12 (R444 fourcc) little endian to I420.
LIBYUV_API
-int ARGB4444ToI420(const uint8* src_frame,
- int src_stride_frame,
- uint8* dst_y,
+int ARGB4444ToI420(const uint8_t* src_argb4444,
+ int src_stride_argb4444,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
@@ -337,13 +337,13 @@ int ARGB4444ToI420(const uint8* src_frame,
// src_width/height provided by capture.
// dst_width/height for clipping determine final size.
LIBYUV_API
-int MJPGToI420(const uint8* sample,
+int MJPGToI420(const uint8_t* sample,
size_t sample_size,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int src_width,
int src_height,
@@ -352,7 +352,10 @@ int MJPGToI420(const uint8* sample,
// Query size of MJPG in pixels.
LIBYUV_API
-int MJPGSize(const uint8* sample, size_t sample_size, int* width, int* height);
+int MJPGSize(const uint8_t* sample,
+ size_t sample_size,
+ int* width,
+ int* height);
#endif
// Convert camera sample to I420 with cropping, rotation and vertical flip.
@@ -375,16 +378,16 @@ int MJPGSize(const uint8* sample, size_t sample_size, int* width, int* height);
// Must be less than or equal to src_width/src_height
// Cropping parameters are pre-rotation.
// "rotation" can be 0, 90, 180 or 270.
-// "format" is a fourcc. ie 'I420', 'YUY2'
+// "fourcc" is a fourcc. ie 'I420', 'YUY2'
// Returns 0 for successful; -1 for invalid parameter. Non-zero for failure.
LIBYUV_API
-int ConvertToI420(const uint8* src_frame,
- size_t src_size,
- uint8* dst_y,
+int ConvertToI420(const uint8_t* sample,
+ size_t sample_size,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int crop_x,
int crop_y,
@@ -393,7 +396,7 @@ int ConvertToI420(const uint8* src_frame,
int crop_width,
int crop_height,
enum RotationMode rotation,
- uint32 format);
+ uint32_t fourcc);
#ifdef __cplusplus
} // extern "C"
diff --git a/chromium/third_party/libyuv/include/libyuv/convert_argb.h b/chromium/third_party/libyuv/include/libyuv/convert_argb.h
index b8b57cb12b3..cd4a611de51 100644
--- a/chromium/third_party/libyuv/include/libyuv/convert_argb.h
+++ b/chromium/third_party/libyuv/include/libyuv/convert_argb.h
@@ -30,167 +30,167 @@ extern "C" {
// Copy ARGB to ARGB.
LIBYUV_API
-int ARGBCopy(const uint8* src_argb,
+int ARGBCopy(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Convert I420 to ARGB.
LIBYUV_API
-int I420ToARGB(const uint8* src_y,
+int I420ToARGB(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Duplicate prototype for function in convert_from.h for remoting.
LIBYUV_API
-int I420ToABGR(const uint8* src_y,
+int I420ToABGR(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_abgr,
+ uint8_t* dst_abgr,
int dst_stride_abgr,
int width,
int height);
// Convert I010 to ARGB.
LIBYUV_API
-int I010ToARGB(const uint16* src_y,
+int I010ToARGB(const uint16_t* src_y,
int src_stride_y,
- const uint16* src_u,
+ const uint16_t* src_u,
int src_stride_u,
- const uint16* src_v,
+ const uint16_t* src_v,
int src_stride_v,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Convert I010 to ARGB.
LIBYUV_API
-int I010ToARGB(const uint16* src_y,
+int I010ToARGB(const uint16_t* src_y,
int src_stride_y,
- const uint16* src_u,
+ const uint16_t* src_u,
int src_stride_u,
- const uint16* src_v,
+ const uint16_t* src_v,
int src_stride_v,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Convert I010 to ABGR.
LIBYUV_API
-int I010ToABGR(const uint16* src_y,
+int I010ToABGR(const uint16_t* src_y,
int src_stride_y,
- const uint16* src_u,
+ const uint16_t* src_u,
int src_stride_u,
- const uint16* src_v,
+ const uint16_t* src_v,
int src_stride_v,
- uint8* dst_abgr,
+ uint8_t* dst_abgr,
int dst_stride_abgr,
int width,
int height);
// Convert H010 to ARGB.
LIBYUV_API
-int H010ToARGB(const uint16* src_y,
+int H010ToARGB(const uint16_t* src_y,
int src_stride_y,
- const uint16* src_u,
+ const uint16_t* src_u,
int src_stride_u,
- const uint16* src_v,
+ const uint16_t* src_v,
int src_stride_v,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Convert H010 to ABGR.
LIBYUV_API
-int H010ToABGR(const uint16* src_y,
+int H010ToABGR(const uint16_t* src_y,
int src_stride_y,
- const uint16* src_u,
+ const uint16_t* src_u,
int src_stride_u,
- const uint16* src_v,
+ const uint16_t* src_v,
int src_stride_v,
- uint8* dst_abgr,
+ uint8_t* dst_abgr,
int dst_stride_abgr,
int width,
int height);
// Convert I422 to ARGB.
LIBYUV_API
-int I422ToARGB(const uint8* src_y,
+int I422ToARGB(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Convert I444 to ARGB.
LIBYUV_API
-int I444ToARGB(const uint8* src_y,
+int I444ToARGB(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Convert J444 to ARGB.
LIBYUV_API
-int J444ToARGB(const uint8* src_y,
+int J444ToARGB(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Convert I444 to ABGR.
LIBYUV_API
-int I444ToABGR(const uint8* src_y,
+int I444ToABGR(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_abgr,
+ uint8_t* dst_abgr,
int dst_stride_abgr,
int width,
int height);
// Convert I420 with Alpha to preattenuated ARGB.
LIBYUV_API
-int I420AlphaToARGB(const uint8* src_y,
+int I420AlphaToARGB(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- const uint8* src_a,
+ const uint8_t* src_a,
int src_stride_a,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height,
@@ -198,15 +198,15 @@ int I420AlphaToARGB(const uint8* src_y,
// Convert I420 with Alpha to preattenuated ABGR.
LIBYUV_API
-int I420AlphaToABGR(const uint8* src_y,
+int I420AlphaToABGR(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- const uint8* src_a,
+ const uint8_t* src_a,
int src_stride_a,
- uint8* dst_abgr,
+ uint8_t* dst_abgr,
int dst_stride_abgr,
int width,
int height,
@@ -214,18 +214,18 @@ int I420AlphaToABGR(const uint8* src_y,
// Convert I400 (grey) to ARGB. Reverse of ARGBToI400.
LIBYUV_API
-int I400ToARGB(const uint8* src_y,
+int I400ToARGB(const uint8_t* src_y,
int src_stride_y,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Convert J400 (jpeg grey) to ARGB.
LIBYUV_API
-int J400ToARGB(const uint8* src_y,
+int J400ToARGB(const uint8_t* src_y,
int src_stride_y,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
@@ -235,227 +235,266 @@ int J400ToARGB(const uint8* src_y,
// Convert NV12 to ARGB.
LIBYUV_API
-int NV12ToARGB(const uint8* src_y,
+int NV12ToARGB(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_uv,
+ const uint8_t* src_uv,
int src_stride_uv,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Convert NV21 to ARGB.
LIBYUV_API
-int NV21ToARGB(const uint8* src_y,
+int NV21ToARGB(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_vu,
+ const uint8_t* src_vu,
int src_stride_vu,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Convert NV12 to ABGR.
-int NV12ToABGR(const uint8* src_y,
+int NV12ToABGR(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_uv,
+ const uint8_t* src_uv,
int src_stride_uv,
- uint8* dst_abgr,
+ uint8_t* dst_abgr,
int dst_stride_abgr,
int width,
int height);
// Convert NV21 to ABGR.
LIBYUV_API
-int NV21ToABGR(const uint8* src_y,
+int NV21ToABGR(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_uv,
- int src_stride_uv,
- uint8* dst_abgr,
+ const uint8_t* src_vu,
+ int src_stride_vu,
+ uint8_t* dst_abgr,
int dst_stride_abgr,
int width,
int height);
// Convert M420 to ARGB.
LIBYUV_API
-int M420ToARGB(const uint8* src_m420,
+int M420ToARGB(const uint8_t* src_m420,
int src_stride_m420,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Convert YUY2 to ARGB.
LIBYUV_API
-int YUY2ToARGB(const uint8* src_yuy2,
+int YUY2ToARGB(const uint8_t* src_yuy2,
int src_stride_yuy2,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Convert UYVY to ARGB.
LIBYUV_API
-int UYVYToARGB(const uint8* src_uyvy,
+int UYVYToARGB(const uint8_t* src_uyvy,
int src_stride_uyvy,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Convert J420 to ARGB.
LIBYUV_API
-int J420ToARGB(const uint8* src_y,
+int J420ToARGB(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Convert J422 to ARGB.
LIBYUV_API
-int J422ToARGB(const uint8* src_y,
+int J422ToARGB(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Convert J420 to ABGR.
LIBYUV_API
-int J420ToABGR(const uint8* src_y,
+int J420ToABGR(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_abgr,
+ uint8_t* dst_abgr,
int dst_stride_abgr,
int width,
int height);
// Convert J422 to ABGR.
LIBYUV_API
-int J422ToABGR(const uint8* src_y,
+int J422ToABGR(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_abgr,
+ uint8_t* dst_abgr,
int dst_stride_abgr,
int width,
int height);
// Convert H420 to ARGB.
LIBYUV_API
-int H420ToARGB(const uint8* src_y,
+int H420ToARGB(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Convert H422 to ARGB.
LIBYUV_API
-int H422ToARGB(const uint8* src_y,
+int H422ToARGB(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Convert H420 to ABGR.
LIBYUV_API
-int H420ToABGR(const uint8* src_y,
+int H420ToABGR(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_abgr,
+ uint8_t* dst_abgr,
int dst_stride_abgr,
int width,
int height);
// Convert H422 to ABGR.
LIBYUV_API
-int H422ToABGR(const uint8* src_y,
+int H422ToABGR(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_abgr,
+ uint8_t* dst_abgr,
int dst_stride_abgr,
int width,
int height);
// Convert H010 to ARGB.
LIBYUV_API
-int H010ToARGB(const uint16* src_y,
+int H010ToARGB(const uint16_t* src_y,
int src_stride_y,
- const uint16* src_u,
+ const uint16_t* src_u,
int src_stride_u,
- const uint16* src_v,
+ const uint16_t* src_v,
int src_stride_v,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
+// Convert I010 to AR30.
+LIBYUV_API
+int I010ToAR30(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_ar30,
+ int dst_stride_ar30,
+ int width,
+ int height);
+
// Convert H010 to AR30.
LIBYUV_API
-int H010ToAR30(const uint16* src_y,
+int H010ToAR30(const uint16_t* src_y,
int src_stride_y,
- const uint16* src_u,
+ const uint16_t* src_u,
int src_stride_u,
- const uint16* src_v,
+ const uint16_t* src_v,
int src_stride_v,
- uint8* dst_ar30,
+ uint8_t* dst_ar30,
int dst_stride_ar30,
int width,
int height);
+// Convert I010 to AB30.
+LIBYUV_API
+int I010ToAB30(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_ab30,
+ int dst_stride_ab30,
+ int width,
+ int height);
+
+// Convert H010 to AB30.
+LIBYUV_API
+int H010ToAB30(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_ab30,
+ int dst_stride_ab30,
+ int width,
+ int height);
+
// BGRA little endian (argb in memory) to ARGB.
LIBYUV_API
-int BGRAToARGB(const uint8* src_frame,
- int src_stride_frame,
- uint8* dst_argb,
+int BGRAToARGB(const uint8_t* src_bgra,
+ int src_stride_bgra,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// ABGR little endian (rgba in memory) to ARGB.
LIBYUV_API
-int ABGRToARGB(const uint8* src_frame,
- int src_stride_frame,
- uint8* dst_argb,
+int ABGRToARGB(const uint8_t* src_abgr,
+ int src_stride_abgr,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// RGBA little endian (abgr in memory) to ARGB.
LIBYUV_API
-int RGBAToARGB(const uint8* src_frame,
- int src_stride_frame,
- uint8* dst_argb,
+int RGBAToARGB(const uint8_t* src_rgba,
+ int src_stride_rgba,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
@@ -465,65 +504,76 @@ int RGBAToARGB(const uint8* src_frame,
// RGB little endian (bgr in memory) to ARGB.
LIBYUV_API
-int RGB24ToARGB(const uint8* src_frame,
- int src_stride_frame,
- uint8* dst_argb,
+int RGB24ToARGB(const uint8_t* src_rgb24,
+ int src_stride_rgb24,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// RGB big endian (rgb in memory) to ARGB.
LIBYUV_API
-int RAWToARGB(const uint8* src_frame,
- int src_stride_frame,
- uint8* dst_argb,
+int RAWToARGB(const uint8_t* src_raw,
+ int src_stride_raw,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// RGB16 (RGBP fourcc) little endian to ARGB.
LIBYUV_API
-int RGB565ToARGB(const uint8* src_frame,
- int src_stride_frame,
- uint8* dst_argb,
+int RGB565ToARGB(const uint8_t* src_rgb565,
+ int src_stride_rgb565,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// RGB15 (RGBO fourcc) little endian to ARGB.
LIBYUV_API
-int ARGB1555ToARGB(const uint8* src_frame,
- int src_stride_frame,
- uint8* dst_argb,
+int ARGB1555ToARGB(const uint8_t* src_argb1555,
+ int src_stride_argb1555,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// RGB12 (R444 fourcc) little endian to ARGB.
LIBYUV_API
-int ARGB4444ToARGB(const uint8* src_frame,
- int src_stride_frame,
- uint8* dst_argb,
+int ARGB4444ToARGB(const uint8_t* src_argb4444,
+ int src_stride_argb4444,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Convert AR30 To ARGB.
LIBYUV_API
-int AR30ToARGB(const uint8* src_ar30,
+int AR30ToARGB(const uint8_t* src_ar30,
int src_stride_ar30,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
+#define AB30ToABGR
+
+// Convert AR30 To ABGR.
+LIBYUV_API
+int AR30ToABGR(const uint8_t* src_ar30,
+ int src_stride_ar30,
+ uint8_t* dst_abgr,
+ int dst_stride_abgr,
+ int width,
+ int height);
+
#ifdef HAVE_JPEG
// src_width/height provided by capture
// dst_width/height for clipping determine final size.
LIBYUV_API
-int MJPGToARGB(const uint8* sample,
+int MJPGToARGB(const uint8_t* sample,
size_t sample_size,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int src_width,
int src_height,
@@ -533,34 +583,34 @@ int MJPGToARGB(const uint8* sample,
// Convert Android420 to ARGB.
LIBYUV_API
-int Android420ToARGB(const uint8* src_y,
+int Android420ToARGB(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
int src_pixel_stride_uv,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Convert Android420 to ABGR.
LIBYUV_API
-int Android420ToABGR(const uint8* src_y,
+int Android420ToABGR(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
int src_pixel_stride_uv,
- uint8* dst_abgr,
+ uint8_t* dst_abgr,
int dst_stride_abgr,
int width,
int height);
// Convert camera sample to ARGB with cropping, rotation and vertical flip.
-// "src_size" is needed to parse MJPG.
+// "sample_size" is needed to parse MJPG.
// "dst_stride_argb" number of bytes in a row of the dst_argb plane.
// Normally this would be the same as dst_width, with recommended alignment
// to 16 bytes for better efficiency.
@@ -579,12 +629,12 @@ int Android420ToABGR(const uint8* src_y,
// Must be less than or equal to src_width/src_height
// Cropping parameters are pre-rotation.
// "rotation" can be 0, 90, 180 or 270.
-// "format" is a fourcc. ie 'I420', 'YUY2'
+// "fourcc" is a fourcc. ie 'I420', 'YUY2'
// Returns 0 for successful; -1 for invalid parameter. Non-zero for failure.
LIBYUV_API
-int ConvertToARGB(const uint8* src_frame,
- size_t src_size,
- uint8* dst_argb,
+int ConvertToARGB(const uint8_t* sample,
+ size_t sample_size,
+ uint8_t* dst_argb,
int dst_stride_argb,
int crop_x,
int crop_y,
@@ -593,7 +643,7 @@ int ConvertToARGB(const uint8* src_frame,
int crop_width,
int crop_height,
enum RotationMode rotation,
- uint32 format);
+ uint32_t fourcc);
#ifdef __cplusplus
} // extern "C"
diff --git a/chromium/third_party/libyuv/include/libyuv/convert_from.h b/chromium/third_party/libyuv/include/libyuv/convert_from.h
index b5a422903a5..5cd8a4bfc04 100644
--- a/chromium/third_party/libyuv/include/libyuv/convert_from.h
+++ b/chromium/third_party/libyuv/include/libyuv/convert_from.h
@@ -23,231 +23,231 @@ extern "C" {
// Convert 8 bit YUV to 10 bit.
#define H420ToH010 I420ToI010
-int I420ToI010(const uint8* src_y,
+int I420ToI010(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint16* dst_y,
+ uint16_t* dst_y,
int dst_stride_y,
- uint16* dst_u,
+ uint16_t* dst_u,
int dst_stride_u,
- uint16* dst_v,
+ uint16_t* dst_v,
int dst_stride_v,
int width,
int height);
LIBYUV_API
-int I420ToI422(const uint8* src_y,
+int I420ToI422(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
LIBYUV_API
-int I420ToI444(const uint8* src_y,
+int I420ToI444(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
// Copy to I400. Source can be I420, I422, I444, I400, NV12 or NV21.
LIBYUV_API
-int I400Copy(const uint8* src_y,
+int I400Copy(const uint8_t* src_y,
int src_stride_y,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
int width,
int height);
LIBYUV_API
-int I420ToNV12(const uint8* src_y,
+int I420ToNV12(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_uv,
+ uint8_t* dst_uv,
int dst_stride_uv,
int width,
int height);
LIBYUV_API
-int I420ToNV21(const uint8* src_y,
+int I420ToNV21(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_vu,
+ uint8_t* dst_vu,
int dst_stride_vu,
int width,
int height);
LIBYUV_API
-int I420ToYUY2(const uint8* src_y,
+int I420ToYUY2(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_frame,
- int dst_stride_frame,
+ uint8_t* dst_yuy2,
+ int dst_stride_yuy2,
int width,
int height);
LIBYUV_API
-int I420ToUYVY(const uint8* src_y,
+int I420ToUYVY(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_frame,
- int dst_stride_frame,
+ uint8_t* dst_uyvy,
+ int dst_stride_uyvy,
int width,
int height);
LIBYUV_API
-int I420ToARGB(const uint8* src_y,
+int I420ToARGB(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
LIBYUV_API
-int I420ToBGRA(const uint8* src_y,
+int I420ToBGRA(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_argb,
- int dst_stride_argb,
+ uint8_t* dst_bgra,
+ int dst_stride_bgra,
int width,
int height);
LIBYUV_API
-int I420ToABGR(const uint8* src_y,
+int I420ToABGR(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_argb,
- int dst_stride_argb,
+ uint8_t* dst_abgr,
+ int dst_stride_abgr,
int width,
int height);
LIBYUV_API
-int I420ToRGBA(const uint8* src_y,
+int I420ToRGBA(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_rgba,
+ uint8_t* dst_rgba,
int dst_stride_rgba,
int width,
int height);
LIBYUV_API
-int I420ToRGB24(const uint8* src_y,
+int I420ToRGB24(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_frame,
- int dst_stride_frame,
+ uint8_t* dst_rgb24,
+ int dst_stride_rgb24,
int width,
int height);
LIBYUV_API
-int I420ToRAW(const uint8* src_y,
+int I420ToRAW(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_frame,
- int dst_stride_frame,
+ uint8_t* dst_raw,
+ int dst_stride_raw,
int width,
int height);
LIBYUV_API
-int H420ToRGB24(const uint8* src_y,
+int H420ToRGB24(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_frame,
- int dst_stride_frame,
+ uint8_t* dst_rgb24,
+ int dst_stride_rgb24,
int width,
int height);
LIBYUV_API
-int H420ToRAW(const uint8* src_y,
+int H420ToRAW(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_frame,
- int dst_stride_frame,
+ uint8_t* dst_raw,
+ int dst_stride_raw,
int width,
int height);
LIBYUV_API
-int I420ToRGB565(const uint8* src_y,
+int I420ToRGB565(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_frame,
- int dst_stride_frame,
+ uint8_t* dst_rgb565,
+ int dst_stride_rgb565,
int width,
int height);
LIBYUV_API
-int I422ToRGB565(const uint8* src_y,
+int I422ToRGB565(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_frame,
- int dst_stride_frame,
+ uint8_t* dst_rgb565,
+ int dst_stride_rgb565,
int width,
int height);
@@ -256,50 +256,64 @@ int I422ToRGB565(const uint8* src_y,
// The order of the dither matrix is first byte is upper left.
LIBYUV_API
-int I420ToRGB565Dither(const uint8* src_y,
+int I420ToRGB565Dither(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_frame,
- int dst_stride_frame,
- const uint8* dither4x4,
+ uint8_t* dst_rgb565,
+ int dst_stride_rgb565,
+ const uint8_t* dither4x4,
int width,
int height);
LIBYUV_API
-int I420ToARGB1555(const uint8* src_y,
+int I420ToARGB1555(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_frame,
- int dst_stride_frame,
+ uint8_t* dst_argb1555,
+ int dst_stride_argb1555,
int width,
int height);
LIBYUV_API
-int I420ToARGB4444(const uint8* src_y,
+int I420ToARGB4444(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_frame,
- int dst_stride_frame,
+ uint8_t* dst_argb4444,
+ int dst_stride_argb4444,
int width,
int height);
+
// Convert I420 to AR30.
LIBYUV_API
-int I420ToAR30(const uint8* src_y,
+int I420ToAR30(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_ar30,
+ int dst_stride_ar30,
+ int width,
+ int height);
+
+// Convert H420 to AR30.
+LIBYUV_API
+int H420ToAR30(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_ar30,
+ uint8_t* dst_ar30,
int dst_stride_ar30,
int width,
int height);
@@ -308,17 +322,17 @@ int I420ToAR30(const uint8* src_y,
// "dst_sample_stride" is bytes in a row for the destination. Pass 0 if the
// buffer has contiguous rows. Can be negative. A multiple of 16 is optimal.
LIBYUV_API
-int ConvertFromI420(const uint8* y,
+int ConvertFromI420(const uint8_t* y,
int y_stride,
- const uint8* u,
+ const uint8_t* u,
int u_stride,
- const uint8* v,
+ const uint8_t* v,
int v_stride,
- uint8* dst_sample,
+ uint8_t* dst_sample,
int dst_sample_stride,
int width,
int height,
- uint32 format);
+ uint32_t fourcc);
#ifdef __cplusplus
} // extern "C"
diff --git a/chromium/third_party/libyuv/include/libyuv/convert_from_argb.h b/chromium/third_party/libyuv/include/libyuv/convert_from_argb.h
index 4d613502a16..857b46c5611 100644
--- a/chromium/third_party/libyuv/include/libyuv/convert_from_argb.h
+++ b/chromium/third_party/libyuv/include/libyuv/convert_from_argb.h
@@ -21,72 +21,81 @@ extern "C" {
// Copy ARGB to ARGB.
#define ARGBToARGB ARGBCopy
LIBYUV_API
-int ARGBCopy(const uint8* src_argb,
+int ARGBCopy(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Convert ARGB To BGRA.
LIBYUV_API
-int ARGBToBGRA(const uint8* src_argb,
+int ARGBToBGRA(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_bgra,
+ uint8_t* dst_bgra,
int dst_stride_bgra,
int width,
int height);
// Convert ARGB To ABGR.
LIBYUV_API
-int ARGBToABGR(const uint8* src_argb,
+int ARGBToABGR(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_abgr,
+ uint8_t* dst_abgr,
int dst_stride_abgr,
int width,
int height);
// Convert ARGB To RGBA.
LIBYUV_API
-int ARGBToRGBA(const uint8* src_argb,
+int ARGBToRGBA(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_rgba,
+ uint8_t* dst_rgba,
int dst_stride_rgba,
int width,
int height);
+// Convert ABGR To AR30.
+LIBYUV_API
+int ABGRToAR30(const uint8_t* src_abgr,
+ int src_stride_abgr,
+ uint8_t* dst_ar30,
+ int dst_stride_ar30,
+ int width,
+ int height);
+
// Convert ARGB To AR30.
LIBYUV_API
-int ARGBToAR30(const uint8* src_argb,
+int ARGBToAR30(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_ar30,
+ uint8_t* dst_ar30,
int dst_stride_ar30,
int width,
int height);
// Convert ARGB To RGB24.
LIBYUV_API
-int ARGBToRGB24(const uint8* src_argb,
+int ARGBToRGB24(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_rgb24,
+ uint8_t* dst_rgb24,
int dst_stride_rgb24,
int width,
int height);
// Convert ARGB To RAW.
LIBYUV_API
-int ARGBToRAW(const uint8* src_argb,
+int ARGBToRAW(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_rgb,
- int dst_stride_rgb,
+ uint8_t* dst_raw,
+ int dst_stride_raw,
int width,
int height);
// Convert ARGB To RGB565.
LIBYUV_API
-int ARGBToRGB565(const uint8* src_argb,
+int ARGBToRGB565(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_rgb565,
+ uint8_t* dst_rgb565,
int dst_stride_rgb565,
int width,
int height);
@@ -95,173 +104,173 @@ int ARGBToRGB565(const uint8* src_argb,
// Values in dither matrix from 0 to 7 recommended.
// The order of the dither matrix is first byte is upper left.
// TODO(fbarchard): Consider pointer to 2d array for dither4x4.
-// const uint8(*dither)[4][4];
+// const uint8_t(*dither)[4][4];
LIBYUV_API
-int ARGBToRGB565Dither(const uint8* src_argb,
+int ARGBToRGB565Dither(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_rgb565,
+ uint8_t* dst_rgb565,
int dst_stride_rgb565,
- const uint8* dither4x4,
+ const uint8_t* dither4x4,
int width,
int height);
// Convert ARGB To ARGB1555.
LIBYUV_API
-int ARGBToARGB1555(const uint8* src_argb,
+int ARGBToARGB1555(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_argb1555,
+ uint8_t* dst_argb1555,
int dst_stride_argb1555,
int width,
int height);
// Convert ARGB To ARGB4444.
LIBYUV_API
-int ARGBToARGB4444(const uint8* src_argb,
+int ARGBToARGB4444(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_argb4444,
+ uint8_t* dst_argb4444,
int dst_stride_argb4444,
int width,
int height);
// Convert ARGB To I444.
LIBYUV_API
-int ARGBToI444(const uint8* src_argb,
+int ARGBToI444(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
// Convert ARGB To I422.
LIBYUV_API
-int ARGBToI422(const uint8* src_argb,
+int ARGBToI422(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
// Convert ARGB To I420. (also in convert.h)
LIBYUV_API
-int ARGBToI420(const uint8* src_argb,
+int ARGBToI420(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
// Convert ARGB to J420. (JPeg full range I420).
LIBYUV_API
-int ARGBToJ420(const uint8* src_argb,
+int ARGBToJ420(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_yj,
+ uint8_t* dst_yj,
int dst_stride_yj,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
// Convert ARGB to J422.
LIBYUV_API
-int ARGBToJ422(const uint8* src_argb,
+int ARGBToJ422(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_yj,
+ uint8_t* dst_yj,
int dst_stride_yj,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
// Convert ARGB to J400. (JPeg full range).
LIBYUV_API
-int ARGBToJ400(const uint8* src_argb,
+int ARGBToJ400(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_yj,
+ uint8_t* dst_yj,
int dst_stride_yj,
int width,
int height);
// Convert ARGB to I400.
LIBYUV_API
-int ARGBToI400(const uint8* src_argb,
+int ARGBToI400(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
int width,
int height);
// Convert ARGB to G. (Reverse of J400toARGB, which replicates G back to ARGB)
LIBYUV_API
-int ARGBToG(const uint8* src_argb,
+int ARGBToG(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_g,
+ uint8_t* dst_g,
int dst_stride_g,
int width,
int height);
// Convert ARGB To NV12.
LIBYUV_API
-int ARGBToNV12(const uint8* src_argb,
+int ARGBToNV12(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_uv,
+ uint8_t* dst_uv,
int dst_stride_uv,
int width,
int height);
// Convert ARGB To NV21.
LIBYUV_API
-int ARGBToNV21(const uint8* src_argb,
+int ARGBToNV21(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_vu,
+ uint8_t* dst_vu,
int dst_stride_vu,
int width,
int height);
// Convert ARGB To NV21.
LIBYUV_API
-int ARGBToNV21(const uint8* src_argb,
+int ARGBToNV21(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_vu,
+ uint8_t* dst_vu,
int dst_stride_vu,
int width,
int height);
// Convert ARGB To YUY2.
LIBYUV_API
-int ARGBToYUY2(const uint8* src_argb,
+int ARGBToYUY2(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_yuy2,
+ uint8_t* dst_yuy2,
int dst_stride_yuy2,
int width,
int height);
// Convert ARGB To UYVY.
LIBYUV_API
-int ARGBToUYVY(const uint8* src_argb,
+int ARGBToUYVY(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_uyvy,
+ uint8_t* dst_uyvy,
int dst_stride_uyvy,
int width,
int height);
diff --git a/chromium/third_party/libyuv/include/libyuv/cpu_id.h b/chromium/third_party/libyuv/include/libyuv/cpu_id.h
index 14f735f57b2..91480c68b01 100644
--- a/chromium/third_party/libyuv/include/libyuv/cpu_id.h
+++ b/chromium/third_party/libyuv/include/libyuv/cpu_id.h
@@ -84,7 +84,7 @@ int MaskCpuFlags(int enable_flags);
// eax is the info type that you want.
// ecx is typically the cpu number, and should normally be zero.
LIBYUV_API
-void CpuId(int eax, int ecx, int* cpu_info);
+void CpuId(int info_eax, int info_ecx, int* cpu_info);
#ifdef __cplusplus
} // extern "C"
diff --git a/chromium/third_party/libyuv/include/libyuv/macros_msa.h b/chromium/third_party/libyuv/include/libyuv/macros_msa.h
index 61be352e3af..921eb0714d6 100644
--- a/chromium/third_party/libyuv/include/libyuv/macros_msa.h
+++ b/chromium/third_party/libyuv/include/libyuv/macros_msa.h
@@ -16,38 +16,38 @@
#include <stdint.h>
#if (__mips_isa_rev >= 6)
-#define LW(psrc) \
- ({ \
- uint8* psrc_lw_m = (uint8*)(psrc); /* NOLINT */ \
- uint32 val_m; \
- asm volatile("lw %[val_m], %[psrc_lw_m] \n" \
- : [val_m] "=r"(val_m) \
- : [psrc_lw_m] "m"(*psrc_lw_m)); \
- val_m; \
+#define LW(psrc) \
+ ({ \
+ uint8_t* psrc_lw_m = (uint8_t*)(psrc); /* NOLINT */ \
+ uint32_t val_m; \
+ asm volatile("lw %[val_m], %[psrc_lw_m] \n" \
+ : [val_m] "=r"(val_m) \
+ : [psrc_lw_m] "m"(*psrc_lw_m)); \
+ val_m; \
})
#if (__mips == 64)
-#define LD(psrc) \
- ({ \
- uint8* psrc_ld_m = (uint8*)(psrc); /* NOLINT */ \
- uint64 val_m = 0; \
- asm volatile("ld %[val_m], %[psrc_ld_m] \n" \
- : [val_m] "=r"(val_m) \
- : [psrc_ld_m] "m"(*psrc_ld_m)); \
- val_m; \
+#define LD(psrc) \
+ ({ \
+ uint8_t* psrc_ld_m = (uint8_t*)(psrc); /* NOLINT */ \
+ uint64_t val_m = 0; \
+ asm volatile("ld %[val_m], %[psrc_ld_m] \n" \
+ : [val_m] "=r"(val_m) \
+ : [psrc_ld_m] "m"(*psrc_ld_m)); \
+ val_m; \
})
#else // !(__mips == 64)
-#define LD(psrc) \
- ({ \
- uint8* psrc_ld_m = (uint8*)(psrc); /* NOLINT */ \
- uint32 val0_m, val1_m; \
- uint64 val_m = 0; \
- val0_m = LW(psrc_ld_m); \
- val1_m = LW(psrc_ld_m + 4); \
- val_m = (uint64)(val1_m); /* NOLINT */ \
- val_m = (uint64)((val_m << 32) & 0xFFFFFFFF00000000); /* NOLINT */ \
- val_m = (uint64)(val_m | (uint64)val0_m); /* NOLINT */ \
- val_m; \
+#define LD(psrc) \
+ ({ \
+ uint8_t* psrc_ld_m = (uint8_t*)(psrc); /* NOLINT */ \
+ uint32_t val0_m, val1_m; \
+ uint64_t val_m = 0; \
+ val0_m = LW(psrc_ld_m); \
+ val1_m = LW(psrc_ld_m + 4); \
+ val_m = (uint64_t)(val1_m); /* NOLINT */ \
+ val_m = (uint64_t)((val_m << 32) & 0xFFFFFFFF00000000); /* NOLINT */ \
+ val_m = (uint64_t)(val_m | (uint64_t)val0_m); /* NOLINT */ \
+ val_m; \
})
#endif // (__mips == 64)
@@ -81,38 +81,38 @@
})
#endif // !(__mips == 64)
#else // !(__mips_isa_rev >= 6)
-#define LW(psrc) \
- ({ \
- uint8* psrc_lw_m = (uint8*)(psrc); /* NOLINT */ \
- uint32 val_m; \
- asm volatile("ulw %[val_m], %[psrc_lw_m] \n" \
- : [val_m] "=r"(val_m) \
- : [psrc_lw_m] "m"(*psrc_lw_m)); \
- val_m; \
+#define LW(psrc) \
+ ({ \
+ uint8_t* psrc_lw_m = (uint8_t*)(psrc); /* NOLINT */ \
+ uint32_t val_m; \
+ asm volatile("ulw %[val_m], %[psrc_lw_m] \n" \
+ : [val_m] "=r"(val_m) \
+ : [psrc_lw_m] "m"(*psrc_lw_m)); \
+ val_m; \
})
#if (__mips == 64)
-#define LD(psrc) \
- ({ \
- uint8* psrc_ld_m = (uint8*)(psrc); /* NOLINT */ \
- uint64 val_m = 0; \
- asm volatile("uld %[val_m], %[psrc_ld_m] \n" \
- : [val_m] "=r"(val_m) \
- : [psrc_ld_m] "m"(*psrc_ld_m)); \
- val_m; \
+#define LD(psrc) \
+ ({ \
+ uint8_t* psrc_ld_m = (uint8_t*)(psrc); /* NOLINT */ \
+ uint64_t val_m = 0; \
+ asm volatile("uld %[val_m], %[psrc_ld_m] \n" \
+ : [val_m] "=r"(val_m) \
+ : [psrc_ld_m] "m"(*psrc_ld_m)); \
+ val_m; \
})
#else // !(__mips == 64)
-#define LD(psrc) \
- ({ \
- uint8* psrc_ld_m = (uint8*)(psrc); /* NOLINT */ \
- uint32 val0_m, val1_m; \
- uint64 val_m = 0; \
- val0_m = LW(psrc_ld_m); \
- val1_m = LW(psrc_ld_m + 4); \
- val_m = (uint64)(val1_m); /* NOLINT */ \
- val_m = (uint64)((val_m << 32) & 0xFFFFFFFF00000000); /* NOLINT */ \
- val_m = (uint64)(val_m | (uint64)val0_m); /* NOLINT */ \
- val_m; \
+#define LD(psrc) \
+ ({ \
+ uint8_t* psrc_ld_m = (uint8_t*)(psrc); /* NOLINT */ \
+ uint32_t val0_m, val1_m; \
+ uint64_t val_m = 0; \
+ val0_m = LW(psrc_ld_m); \
+ val1_m = LW(psrc_ld_m + 4); \
+ val_m = (uint64_t)(val1_m); /* NOLINT */ \
+ val_m = (uint64_t)((val_m << 32) & 0xFFFFFFFF00000000); /* NOLINT */ \
+ val_m = (uint64_t)(val_m | (uint64_t)val0_m); /* NOLINT */ \
+ val_m; \
})
#endif // (__mips == 64)
diff --git a/chromium/third_party/libyuv/include/libyuv/mjpeg_decoder.h b/chromium/third_party/libyuv/include/libyuv/mjpeg_decoder.h
index 8a4f282205b..6c12633387f 100644
--- a/chromium/third_party/libyuv/include/libyuv/mjpeg_decoder.h
+++ b/chromium/third_party/libyuv/include/libyuv/mjpeg_decoder.h
@@ -26,13 +26,13 @@ namespace libyuv {
extern "C" {
#endif
-LIBYUV_BOOL ValidateJpeg(const uint8* sample, size_t sample_size);
+LIBYUV_BOOL ValidateJpeg(const uint8_t* sample, size_t sample_size);
#ifdef __cplusplus
} // extern "C"
#endif
-static const uint32 kUnknownDataSize = 0xFFFFFFFF;
+static const uint32_t kUnknownDataSize = 0xFFFFFFFF;
enum JpegSubsamplingType {
kJpegYuv420,
@@ -43,7 +43,7 @@ enum JpegSubsamplingType {
};
struct Buffer {
- const uint8* data;
+ const uint8_t* data;
int len;
};
@@ -65,7 +65,7 @@ struct SetJmpErrorMgr;
class LIBYUV_API MJpegDecoder {
public:
typedef void (*CallbackFunction)(void* opaque,
- const uint8* const* data,
+ const uint8_t* const* data,
const int* strides,
int rows);
@@ -85,7 +85,7 @@ class LIBYUV_API MJpegDecoder {
// If return value is LIBYUV_TRUE, then the values for all the following
// getters are populated.
// src_len is the size of the compressed mjpeg frame in bytes.
- LIBYUV_BOOL LoadFrame(const uint8* src, size_t src_len);
+ LIBYUV_BOOL LoadFrame(const uint8_t* src, size_t src_len);
// Returns width of the last loaded frame in pixels.
int GetWidth();
@@ -138,7 +138,7 @@ class LIBYUV_API MJpegDecoder {
// at least GetComponentSize(i). The pointers in planes are incremented
// to point to after the end of the written data.
// TODO(fbarchard): Add dst_x, dst_y to allow specific rect to be decoded.
- LIBYUV_BOOL DecodeToBuffers(uint8** planes, int dst_width, int dst_height);
+ LIBYUV_BOOL DecodeToBuffers(uint8_t** planes, int dst_width, int dst_height);
// Decodes the entire image and passes the data via repeated calls to a
// callback function. Each call will get the data for a whole number of
@@ -162,14 +162,14 @@ class LIBYUV_API MJpegDecoder {
LIBYUV_BOOL StartDecode();
LIBYUV_BOOL FinishDecode();
- void SetScanlinePointers(uint8** data);
+ void SetScanlinePointers(uint8_t** data);
LIBYUV_BOOL DecodeImcuRow();
int GetComponentScanlinePadding(int component);
// A buffer holding the input data for a frame.
- Buffer buf_;
- BufferVector buf_vec_;
+ Buffer buf_{};
+ BufferVector buf_vec_{};
jpeg_decompress_struct* decompress_struct_;
jpeg_source_mgr* source_mgr_;
@@ -181,12 +181,12 @@ class LIBYUV_API MJpegDecoder {
// Temporaries used to point to scanline outputs.
int num_outbufs_; // Outermost size of all arrays below.
- uint8*** scanlines_;
- int* scanlines_sizes_;
+ uint8_t*** scanlines_{};
+ int* scanlines_sizes_{};
// Temporary buffer used for decoding when we can't decode directly to the
// output buffers. Large enough for just one iMCU row.
- uint8** databuf_;
- int* databuf_strides_;
+ uint8_t** databuf_{};
+ int* databuf_strides_{};
};
} // namespace libyuv
diff --git a/chromium/third_party/libyuv/include/libyuv/planar_functions.h b/chromium/third_party/libyuv/include/libyuv/planar_functions.h
index 653b0619710..324bb1ed0ea 100644
--- a/chromium/third_party/libyuv/include/libyuv/planar_functions.h
+++ b/chromium/third_party/libyuv/include/libyuv/planar_functions.h
@@ -22,36 +22,54 @@ namespace libyuv {
extern "C" {
#endif
+// TODO(fbarchard): Move cpu macros to row.h
+#if defined(__pnacl__) || defined(__CLR_VER) || \
+ (defined(__native_client__) && defined(__x86_64__)) || \
+ (defined(__i386__) && !defined(__SSE__) && !defined(__clang__))
+#define LIBYUV_DISABLE_X86
+#endif
+// MemorySanitizer does not support assembly code yet. http://crbug.com/344505
+#if defined(__has_feature)
+#if __has_feature(memory_sanitizer)
+#define LIBYUV_DISABLE_X86
+#endif
+#endif
+// The following are available on all x86 platforms:
+#if !defined(LIBYUV_DISABLE_X86) && \
+ (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
+#define HAS_ARGBAFFINEROW_SSE2
+#endif
+
// Copy a plane of data.
LIBYUV_API
-void CopyPlane(const uint8* src_y,
+void CopyPlane(const uint8_t* src_y,
int src_stride_y,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
int width,
int height);
LIBYUV_API
-void CopyPlane_16(const uint16* src_y,
+void CopyPlane_16(const uint16_t* src_y,
int src_stride_y,
- uint16* dst_y,
+ uint16_t* dst_y,
int dst_stride_y,
int width,
int height);
LIBYUV_API
-void Convert16To8Plane(const uint16* src_y,
+void Convert16To8Plane(const uint16_t* src_y,
int src_stride_y,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
int scale, // 16384 for 10 bits
int width,
int height);
LIBYUV_API
-void Convert8To16Plane(const uint8* src_y,
+void Convert8To16Plane(const uint8_t* src_y,
int src_stride_y,
- uint16* dst_y,
+ uint16_t* dst_y,
int dst_stride_y,
int scale, // 1024 for 10 bits
int width,
@@ -59,65 +77,65 @@ void Convert8To16Plane(const uint8* src_y,
// Set a plane of data to a 32 bit value.
LIBYUV_API
-void SetPlane(uint8* dst_y,
+void SetPlane(uint8_t* dst_y,
int dst_stride_y,
int width,
int height,
- uint32 value);
+ uint32_t value);
// Split interleaved UV plane into separate U and V planes.
LIBYUV_API
-void SplitUVPlane(const uint8* src_uv,
+void SplitUVPlane(const uint8_t* src_uv,
int src_stride_uv,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
// Merge separate U and V planes into one interleaved UV plane.
LIBYUV_API
-void MergeUVPlane(const uint8* src_u,
+void MergeUVPlane(const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_uv,
+ uint8_t* dst_uv,
int dst_stride_uv,
int width,
int height);
// Split interleaved RGB plane into separate R, G and B planes.
LIBYUV_API
-void SplitRGBPlane(const uint8* src_rgb,
+void SplitRGBPlane(const uint8_t* src_rgb,
int src_stride_rgb,
- uint8* dst_r,
+ uint8_t* dst_r,
int dst_stride_r,
- uint8* dst_g,
+ uint8_t* dst_g,
int dst_stride_g,
- uint8* dst_b,
+ uint8_t* dst_b,
int dst_stride_b,
int width,
int height);
// Merge separate R, G and B planes into one interleaved RGB plane.
LIBYUV_API
-void MergeRGBPlane(const uint8* src_r,
+void MergeRGBPlane(const uint8_t* src_r,
int src_stride_r,
- const uint8* src_g,
+ const uint8_t* src_g,
int src_stride_g,
- const uint8* src_b,
+ const uint8_t* src_b,
int src_stride_b,
- uint8* dst_rgb,
+ uint8_t* dst_rgb,
int dst_stride_rgb,
int width,
int height);
// Copy I400. Supports inverting.
LIBYUV_API
-int I400ToI400(const uint8* src_y,
+int I400ToI400(const uint8_t* src_y,
int src_stride_y,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
int width,
int height);
@@ -127,17 +145,17 @@ int I400ToI400(const uint8* src_y,
// Copy I422 to I422.
#define I422ToI422 I422Copy
LIBYUV_API
-int I422Copy(const uint8* src_y,
+int I422Copy(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
@@ -145,84 +163,84 @@ int I422Copy(const uint8* src_y,
// Copy I444 to I444.
#define I444ToI444 I444Copy
LIBYUV_API
-int I444Copy(const uint8* src_y,
+int I444Copy(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
// Convert YUY2 to I422.
LIBYUV_API
-int YUY2ToI422(const uint8* src_yuy2,
+int YUY2ToI422(const uint8_t* src_yuy2,
int src_stride_yuy2,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
// Convert UYVY to I422.
LIBYUV_API
-int UYVYToI422(const uint8* src_uyvy,
+int UYVYToI422(const uint8_t* src_uyvy,
int src_stride_uyvy,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
LIBYUV_API
-int YUY2ToNV12(const uint8* src_yuy2,
+int YUY2ToNV12(const uint8_t* src_yuy2,
int src_stride_yuy2,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_uv,
+ uint8_t* dst_uv,
int dst_stride_uv,
int width,
int height);
LIBYUV_API
-int UYVYToNV12(const uint8* src_uyvy,
+int UYVYToNV12(const uint8_t* src_uyvy,
int src_stride_uyvy,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_uv,
+ uint8_t* dst_uv,
int dst_stride_uv,
int width,
int height);
LIBYUV_API
-int YUY2ToY(const uint8* src_yuy2,
+int YUY2ToY(const uint8_t* src_yuy2,
int src_stride_yuy2,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
int width,
int height);
// Convert I420 to I400. (calls CopyPlane ignoring u/v).
LIBYUV_API
-int I420ToI400(const uint8* src_y,
+int I420ToI400(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
int width,
int height);
@@ -233,17 +251,17 @@ int I420ToI400(const uint8* src_y,
// I420 mirror.
LIBYUV_API
-int I420Mirror(const uint8* src_y,
+int I420Mirror(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
@@ -254,9 +272,9 @@ int I420Mirror(const uint8* src_y,
// I400 mirror. A single plane is mirrored horizontally.
// Pass negative height to achieve 180 degree rotation.
LIBYUV_API
-int I400Mirror(const uint8* src_y,
+int I400Mirror(const uint8_t* src_y,
int src_stride_y,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
int width,
int height);
@@ -266,20 +284,20 @@ int I400Mirror(const uint8* src_y,
// ARGB mirror.
LIBYUV_API
-int ARGBMirror(const uint8* src_argb,
+int ARGBMirror(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Convert NV12 to RGB565.
LIBYUV_API
-int NV12ToRGB565(const uint8* src_y,
+int NV12ToRGB565(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_uv,
+ const uint8_t* src_uv,
int src_stride_uv,
- uint8* dst_rgb565,
+ uint8_t* dst_rgb565,
int dst_stride_rgb565,
int width,
int height);
@@ -287,39 +305,39 @@ int NV12ToRGB565(const uint8* src_y,
// I422ToARGB is in convert_argb.h
// Convert I422 to BGRA.
LIBYUV_API
-int I422ToBGRA(const uint8* src_y,
+int I422ToBGRA(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_bgra,
+ uint8_t* dst_bgra,
int dst_stride_bgra,
int width,
int height);
// Convert I422 to ABGR.
LIBYUV_API
-int I422ToABGR(const uint8* src_y,
+int I422ToABGR(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_abgr,
+ uint8_t* dst_abgr,
int dst_stride_abgr,
int width,
int height);
// Convert I422 to RGBA.
LIBYUV_API
-int I422ToRGBA(const uint8* src_y,
+int I422ToRGBA(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_rgba,
+ uint8_t* dst_rgba,
int dst_stride_rgba,
int width,
int height);
@@ -328,20 +346,20 @@ int I422ToRGBA(const uint8* src_y,
#define RGB24ToRAW RAWToRGB24
LIBYUV_API
-int RAWToRGB24(const uint8* src_raw,
+int RAWToRGB24(const uint8_t* src_raw,
int src_stride_raw,
- uint8* dst_rgb24,
+ uint8_t* dst_rgb24,
int dst_stride_rgb24,
int width,
int height);
// Draw a rectangle into I420.
LIBYUV_API
-int I420Rect(uint8* dst_y,
+int I420Rect(uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int x,
int y,
@@ -353,38 +371,38 @@ int I420Rect(uint8* dst_y,
// Draw a rectangle into ARGB.
LIBYUV_API
-int ARGBRect(uint8* dst_argb,
+int ARGBRect(uint8_t* dst_argb,
int dst_stride_argb,
- int x,
- int y,
+ int dst_x,
+ int dst_y,
int width,
int height,
- uint32 value);
+ uint32_t value);
// Convert ARGB to gray scale ARGB.
LIBYUV_API
-int ARGBGrayTo(const uint8* src_argb,
+int ARGBGrayTo(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Make a rectangle of ARGB gray scale.
LIBYUV_API
-int ARGBGray(uint8* dst_argb,
+int ARGBGray(uint8_t* dst_argb,
int dst_stride_argb,
- int x,
- int y,
+ int dst_x,
+ int dst_y,
int width,
int height);
// Make a rectangle of ARGB Sepia tone.
LIBYUV_API
-int ARGBSepia(uint8* dst_argb,
+int ARGBSepia(uint8_t* dst_argb,
int dst_stride_argb,
- int x,
- int y,
+ int dst_x,
+ int dst_y,
int width,
int height);
@@ -395,11 +413,11 @@ int ARGBSepia(uint8* dst_argb,
// The next 4 coefficients apply to B, G, R, A and produce R of the output.
// The last 4 coefficients apply to B, G, R, A and produce A of the output.
LIBYUV_API
-int ARGBColorMatrix(const uint8* src_argb,
+int ARGBColorMatrix(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
- const int8* matrix_argb,
+ const int8_t* matrix_argb,
int width,
int height);
@@ -410,33 +428,33 @@ int ARGBColorMatrix(const uint8* src_argb,
// The next 4 coefficients apply to B, G, R, A and produce G of the output.
// The last 4 coefficients apply to B, G, R, A and produce R of the output.
LIBYUV_API
-int RGBColorMatrix(uint8* dst_argb,
+int RGBColorMatrix(uint8_t* dst_argb,
int dst_stride_argb,
- const int8* matrix_rgb,
- int x,
- int y,
+ const int8_t* matrix_rgb,
+ int dst_x,
+ int dst_y,
int width,
int height);
// Apply a color table each ARGB pixel.
// Table contains 256 ARGB values.
LIBYUV_API
-int ARGBColorTable(uint8* dst_argb,
+int ARGBColorTable(uint8_t* dst_argb,
int dst_stride_argb,
- const uint8* table_argb,
- int x,
- int y,
+ const uint8_t* table_argb,
+ int dst_x,
+ int dst_y,
int width,
int height);
// Apply a color table each ARGB pixel but preserve destination alpha.
// Table contains 256 ARGB values.
LIBYUV_API
-int RGBColorTable(uint8* dst_argb,
+int RGBColorTable(uint8_t* dst_argb,
int dst_stride_argb,
- const uint8* table_argb,
- int x,
- int y,
+ const uint8_t* table_argb,
+ int dst_x,
+ int dst_y,
int width,
int height);
@@ -444,11 +462,11 @@ int RGBColorTable(uint8* dst_argb,
// Table contains 32768 values indexed by [Y][C] where 7 it 7 bit luma from
// RGB (YJ style) and C is an 8 bit color component (R, G or B).
LIBYUV_API
-int ARGBLumaColorTable(const uint8* src_argb,
+int ARGBLumaColorTable(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
- const uint8* luma_rgb_table,
+ const uint8_t* luma,
int width,
int height);
@@ -461,9 +479,9 @@ int ARGBLumaColorTable(const uint8* src_argb,
// A polynomial approximation can be dirived using software such as 'R'.
LIBYUV_API
-int ARGBPolynomial(const uint8* src_argb,
+int ARGBPolynomial(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
const float* poly,
int width,
@@ -472,9 +490,9 @@ int ARGBPolynomial(const uint8* src_argb,
// Convert plane of 16 bit shorts to half floats.
// Source values are multiplied by scale before storing as half float.
LIBYUV_API
-int HalfFloatPlane(const uint16* src_y,
+int HalfFloatPlane(const uint16_t* src_y,
int src_stride_y,
- uint16* dst_y,
+ uint16_t* dst_y,
int dst_stride_y,
float scale,
int width,
@@ -485,55 +503,55 @@ int HalfFloatPlane(const uint16* src_y,
// interval_size should be a value between 1 and 255.
// interval_offset should be a value between 0 and 255.
LIBYUV_API
-int ARGBQuantize(uint8* dst_argb,
+int ARGBQuantize(uint8_t* dst_argb,
int dst_stride_argb,
int scale,
int interval_size,
int interval_offset,
- int x,
- int y,
+ int dst_x,
+ int dst_y,
int width,
int height);
// Copy ARGB to ARGB.
LIBYUV_API
-int ARGBCopy(const uint8* src_argb,
+int ARGBCopy(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Copy Alpha channel of ARGB to alpha of ARGB.
LIBYUV_API
-int ARGBCopyAlpha(const uint8* src_argb,
+int ARGBCopyAlpha(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Extract the alpha channel from ARGB.
LIBYUV_API
-int ARGBExtractAlpha(const uint8* src_argb,
+int ARGBExtractAlpha(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_a,
+ uint8_t* dst_a,
int dst_stride_a,
int width,
int height);
// Copy Y channel to Alpha of ARGB.
LIBYUV_API
-int ARGBCopyYToAlpha(const uint8* src_y,
+int ARGBCopyYToAlpha(const uint8_t* src_y,
int src_stride_y,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
-typedef void (*ARGBBlendRow)(const uint8* src_argb0,
- const uint8* src_argb1,
- uint8* dst_argb,
+typedef void (*ARGBBlendRow)(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
int width);
// Get function to Alpha Blend ARGB pixels and store to destination.
@@ -544,11 +562,11 @@ ARGBBlendRow GetARGBBlend();
// Source is pre-multiplied by alpha using ARGBAttenuate.
// Alpha of destination is set to 255.
LIBYUV_API
-int ARGBBlend(const uint8* src_argb0,
+int ARGBBlend(const uint8_t* src_argb0,
int src_stride_argb0,
- const uint8* src_argb1,
+ const uint8_t* src_argb1,
int src_stride_argb1,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
@@ -556,13 +574,13 @@ int ARGBBlend(const uint8* src_argb0,
// Alpha Blend plane and store to destination.
// Source is not pre-multiplied by alpha.
LIBYUV_API
-int BlendPlane(const uint8* src_y0,
+int BlendPlane(const uint8_t* src_y0,
int src_stride_y0,
- const uint8* src_y1,
+ const uint8_t* src_y1,
int src_stride_y1,
- const uint8* alpha,
+ const uint8_t* alpha,
int alpha_stride,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
int width,
int height);
@@ -571,102 +589,102 @@ int BlendPlane(const uint8* src_y0,
// Source is not pre-multiplied by alpha.
// Alpha is full width x height and subsampled to half size to apply to UV.
LIBYUV_API
-int I420Blend(const uint8* src_y0,
+int I420Blend(const uint8_t* src_y0,
int src_stride_y0,
- const uint8* src_u0,
+ const uint8_t* src_u0,
int src_stride_u0,
- const uint8* src_v0,
+ const uint8_t* src_v0,
int src_stride_v0,
- const uint8* src_y1,
+ const uint8_t* src_y1,
int src_stride_y1,
- const uint8* src_u1,
+ const uint8_t* src_u1,
int src_stride_u1,
- const uint8* src_v1,
+ const uint8_t* src_v1,
int src_stride_v1,
- const uint8* alpha,
+ const uint8_t* alpha,
int alpha_stride,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
// Multiply ARGB image by ARGB image. Shifted down by 8. Saturates to 255.
LIBYUV_API
-int ARGBMultiply(const uint8* src_argb0,
+int ARGBMultiply(const uint8_t* src_argb0,
int src_stride_argb0,
- const uint8* src_argb1,
+ const uint8_t* src_argb1,
int src_stride_argb1,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Add ARGB image with ARGB image. Saturates to 255.
LIBYUV_API
-int ARGBAdd(const uint8* src_argb0,
+int ARGBAdd(const uint8_t* src_argb0,
int src_stride_argb0,
- const uint8* src_argb1,
+ const uint8_t* src_argb1,
int src_stride_argb1,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Subtract ARGB image (argb1) from ARGB image (argb0). Saturates to 0.
LIBYUV_API
-int ARGBSubtract(const uint8* src_argb0,
+int ARGBSubtract(const uint8_t* src_argb0,
int src_stride_argb0,
- const uint8* src_argb1,
+ const uint8_t* src_argb1,
int src_stride_argb1,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Convert I422 to YUY2.
LIBYUV_API
-int I422ToYUY2(const uint8* src_y,
+int I422ToYUY2(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_frame,
- int dst_stride_frame,
+ uint8_t* dst_yuy2,
+ int dst_stride_yuy2,
int width,
int height);
// Convert I422 to UYVY.
LIBYUV_API
-int I422ToUYVY(const uint8* src_y,
+int I422ToUYVY(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_frame,
- int dst_stride_frame,
+ uint8_t* dst_uyvy,
+ int dst_stride_uyvy,
int width,
int height);
// Convert unattentuated ARGB to preattenuated ARGB.
LIBYUV_API
-int ARGBAttenuate(const uint8* src_argb,
+int ARGBAttenuate(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Convert preattentuated ARGB to unattenuated ARGB.
LIBYUV_API
-int ARGBUnattenuate(const uint8* src_argb,
+int ARGBUnattenuate(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
@@ -675,9 +693,9 @@ int ARGBUnattenuate(const uint8* src_argb,
// Computes table of cumulative sum for image where the value is the sum
// of all values above and to the left of the entry. Used by ARGBBlur.
LIBYUV_API
-int ARGBComputeCumulativeSum(const uint8* src_argb,
+int ARGBComputeCumulativeSum(const uint8_t* src_argb,
int src_stride_argb,
- int32* dst_cumsum,
+ int32_t* dst_cumsum,
int dst_stride32_cumsum,
int width,
int height);
@@ -689,11 +707,11 @@ int ARGBComputeCumulativeSum(const uint8* src_argb,
// radius is number of pixels around the center. e.g. 1 = 3x3. 2=5x5.
// Blur is optimized for radius of 5 (11x11) or less.
LIBYUV_API
-int ARGBBlur(const uint8* src_argb,
+int ARGBBlur(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
- int32* dst_cumsum,
+ int32_t* dst_cumsum,
int dst_stride32_cumsum,
int width,
int height,
@@ -701,24 +719,24 @@ int ARGBBlur(const uint8* src_argb,
// Multiply ARGB image by ARGB value.
LIBYUV_API
-int ARGBShade(const uint8* src_argb,
+int ARGBShade(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height,
- uint32 value);
+ uint32_t value);
// Interpolate between two images using specified amount of interpolation
// (0 to 255) and store to destination.
// 'interpolation' is specified as 8 bit fraction where 0 means 100% src0
// and 255 means 1% src0 and 99% src1.
LIBYUV_API
-int InterpolatePlane(const uint8* src0,
+int InterpolatePlane(const uint8_t* src0,
int src_stride0,
- const uint8* src1,
+ const uint8_t* src1,
int src_stride1,
- uint8* dst,
+ uint8_t* dst,
int dst_stride,
int width,
int height,
@@ -727,11 +745,11 @@ int InterpolatePlane(const uint8* src0,
// Interpolate between two ARGB images using specified amount of interpolation
// Internally calls InterpolatePlane with width * 4 (bpp).
LIBYUV_API
-int ARGBInterpolate(const uint8* src_argb0,
+int ARGBInterpolate(const uint8_t* src_argb0,
int src_stride_argb0,
- const uint8* src_argb1,
+ const uint8_t* src_argb1,
int src_stride_argb1,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height,
@@ -741,93 +759,78 @@ int ARGBInterpolate(const uint8* src_argb0,
// Internally calls InterpolatePlane on each plane where the U and V planes
// are half width and half height.
LIBYUV_API
-int I420Interpolate(const uint8* src0_y,
+int I420Interpolate(const uint8_t* src0_y,
int src0_stride_y,
- const uint8* src0_u,
+ const uint8_t* src0_u,
int src0_stride_u,
- const uint8* src0_v,
+ const uint8_t* src0_v,
int src0_stride_v,
- const uint8* src1_y,
+ const uint8_t* src1_y,
int src1_stride_y,
- const uint8* src1_u,
+ const uint8_t* src1_u,
int src1_stride_u,
- const uint8* src1_v,
+ const uint8_t* src1_v,
int src1_stride_v,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int width,
int height,
int interpolation);
-#if defined(__pnacl__) || defined(__CLR_VER) || \
- (defined(__i386__) && !defined(__SSE__) && !defined(__clang__))
-#define LIBYUV_DISABLE_X86
-#endif
-// MemorySanitizer does not support assembly code yet. http://crbug.com/344505
-#if defined(__has_feature)
-#if __has_feature(memory_sanitizer)
-#define LIBYUV_DISABLE_X86
-#endif
-#endif
-// The following are available on all x86 platforms:
-#if !defined(LIBYUV_DISABLE_X86) && \
- (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
-#define HAS_ARGBAFFINEROW_SSE2
-#endif
-
// Row function for copying pixels from a source with a slope to a row
// of destination. Useful for scaling, rotation, mirror, texture mapping.
LIBYUV_API
-void ARGBAffineRow_C(const uint8* src_argb,
+void ARGBAffineRow_C(const uint8_t* src_argb,
int src_argb_stride,
- uint8* dst_argb,
+ uint8_t* dst_argb,
const float* uv_dudv,
int width);
+// TODO(fbarchard): Move ARGBAffineRow_SSE2 to row.h
LIBYUV_API
-void ARGBAffineRow_SSE2(const uint8* src_argb,
+void ARGBAffineRow_SSE2(const uint8_t* src_argb,
int src_argb_stride,
- uint8* dst_argb,
+ uint8_t* dst_argb,
const float* uv_dudv,
int width);
// Shuffle ARGB channel order. e.g. BGRA to ARGB.
// shuffler is 16 bytes and must be aligned.
LIBYUV_API
-int ARGBShuffle(const uint8* src_bgra,
+int ARGBShuffle(const uint8_t* src_bgra,
int src_stride_bgra,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
- const uint8* shuffler,
+ const uint8_t* shuffler,
int width,
int height);
// Sobel ARGB effect with planar output.
LIBYUV_API
-int ARGBSobelToPlane(const uint8* src_argb,
+int ARGBSobelToPlane(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
int width,
int height);
// Sobel ARGB effect.
LIBYUV_API
-int ARGBSobel(const uint8* src_argb,
+int ARGBSobel(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Sobel ARGB effect w/ Sobel X, Sobel, Sobel Y in ARGB.
LIBYUV_API
-int ARGBSobelXY(const uint8* src_argb,
+int ARGBSobelXY(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
diff --git a/chromium/third_party/libyuv/include/libyuv/rotate.h b/chromium/third_party/libyuv/include/libyuv/rotate.h
index b9f7154a51d..76b692be8b0 100644
--- a/chromium/third_party/libyuv/include/libyuv/rotate.h
+++ b/chromium/third_party/libyuv/include/libyuv/rotate.h
@@ -33,79 +33,79 @@ typedef enum RotationMode {
// Rotate I420 frame.
LIBYUV_API
-int I420Rotate(const uint8* src_y,
+int I420Rotate(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
- int src_width,
- int src_height,
+ int width,
+ int height,
enum RotationMode mode);
// Rotate NV12 input and store in I420.
LIBYUV_API
-int NV12ToI420Rotate(const uint8* src_y,
+int NV12ToI420Rotate(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_uv,
+ const uint8_t* src_uv,
int src_stride_uv,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
- int src_width,
- int src_height,
+ int width,
+ int height,
enum RotationMode mode);
// Rotate a plane by 0, 90, 180, or 270.
LIBYUV_API
-int RotatePlane(const uint8* src,
+int RotatePlane(const uint8_t* src,
int src_stride,
- uint8* dst,
+ uint8_t* dst,
int dst_stride,
- int src_width,
- int src_height,
+ int width,
+ int height,
enum RotationMode mode);
// Rotate planes by 90, 180, 270. Deprecated.
LIBYUV_API
-void RotatePlane90(const uint8* src,
+void RotatePlane90(const uint8_t* src,
int src_stride,
- uint8* dst,
+ uint8_t* dst,
int dst_stride,
int width,
int height);
LIBYUV_API
-void RotatePlane180(const uint8* src,
+void RotatePlane180(const uint8_t* src,
int src_stride,
- uint8* dst,
+ uint8_t* dst,
int dst_stride,
int width,
int height);
LIBYUV_API
-void RotatePlane270(const uint8* src,
+void RotatePlane270(const uint8_t* src,
int src_stride,
- uint8* dst,
+ uint8_t* dst,
int dst_stride,
int width,
int height);
LIBYUV_API
-void RotateUV90(const uint8* src,
+void RotateUV90(const uint8_t* src,
int src_stride,
- uint8* dst_a,
+ uint8_t* dst_a,
int dst_stride_a,
- uint8* dst_b,
+ uint8_t* dst_b,
int dst_stride_b,
int width,
int height);
@@ -115,21 +115,21 @@ void RotateUV90(const uint8* src,
// split the data into two buffers while
// rotating them. Deprecated.
LIBYUV_API
-void RotateUV180(const uint8* src,
+void RotateUV180(const uint8_t* src,
int src_stride,
- uint8* dst_a,
+ uint8_t* dst_a,
int dst_stride_a,
- uint8* dst_b,
+ uint8_t* dst_b,
int dst_stride_b,
int width,
int height);
LIBYUV_API
-void RotateUV270(const uint8* src,
+void RotateUV270(const uint8_t* src,
int src_stride,
- uint8* dst_a,
+ uint8_t* dst_a,
int dst_stride_a,
- uint8* dst_b,
+ uint8_t* dst_b,
int dst_stride_b,
int width,
int height);
@@ -139,19 +139,19 @@ void RotateUV270(const uint8* src,
// order will result in a rotation by +- 90 degrees.
// Deprecated.
LIBYUV_API
-void TransposePlane(const uint8* src,
+void TransposePlane(const uint8_t* src,
int src_stride,
- uint8* dst,
+ uint8_t* dst,
int dst_stride,
int width,
int height);
LIBYUV_API
-void TransposeUV(const uint8* src,
+void TransposeUV(const uint8_t* src,
int src_stride,
- uint8* dst_a,
+ uint8_t* dst_a,
int dst_stride_a,
- uint8* dst_b,
+ uint8_t* dst_b,
int dst_stride_b,
int width,
int height);
diff --git a/chromium/third_party/libyuv/include/libyuv/rotate_argb.h b/chromium/third_party/libyuv/include/libyuv/rotate_argb.h
index be0190c1787..20432949ab4 100644
--- a/chromium/third_party/libyuv/include/libyuv/rotate_argb.h
+++ b/chromium/third_party/libyuv/include/libyuv/rotate_argb.h
@@ -21,9 +21,9 @@ extern "C" {
// Rotate ARGB frame
LIBYUV_API
-int ARGBRotate(const uint8* src_argb,
+int ARGBRotate(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int src_width,
int src_height,
diff --git a/chromium/third_party/libyuv/include/libyuv/rotate_row.h b/chromium/third_party/libyuv/include/libyuv/rotate_row.h
index 7e9dfd2cf4e..5edc0fcf13a 100644
--- a/chromium/third_party/libyuv/include/libyuv/rotate_row.h
+++ b/chromium/third_party/libyuv/include/libyuv/rotate_row.h
@@ -18,10 +18,14 @@ namespace libyuv {
extern "C" {
#endif
-#if defined(__pnacl__) || defined(__CLR_VER) || \
+#if defined(__pnacl__) || defined(__CLR_VER) || \
+ (defined(__native_client__) && defined(__x86_64__)) || \
(defined(__i386__) && !defined(__SSE__) && !defined(__clang__))
#define LIBYUV_DISABLE_X86
#endif
+#if defined(__native_client__)
+#define LIBYUV_DISABLE_NEON
+#endif
// MemorySanitizer does not support assembly code yet. http://crbug.com/344505
#if defined(__has_feature)
#if __has_feature(memory_sanitizer)
@@ -34,21 +38,18 @@ extern "C" {
#define HAS_TRANSPOSEUVWX8_SSE2
#endif
-// The following are available for GCC 32 or 64 bit but not NaCL for 64 bit:
-#if !defined(LIBYUV_DISABLE_X86) && \
- (defined(__i386__) || \
- (defined(__x86_64__) && !defined(__native_client__)))
+// The following are available for GCC 32 or 64 bit:
+#if !defined(LIBYUV_DISABLE_X86) && (defined(__i386__) || defined(__x86_64__))
#define HAS_TRANSPOSEWX8_SSSE3
#endif
-// The following are available for 64 bit GCC but not NaCL:
-#if !defined(LIBYUV_DISABLE_X86) && !defined(__native_client__) && \
- defined(__x86_64__)
+// The following are available for 64 bit GCC:
+#if !defined(LIBYUV_DISABLE_X86) && defined(__x86_64__)
#define HAS_TRANSPOSEWX8_FAST_SSSE3
#define HAS_TRANSPOSEUVWX8_SSE2
#endif
-#if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \
+#if !defined(LIBYUV_DISABLE_NEON) && \
(defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__))
#define HAS_TRANSPOSEWX8_NEON
#define HAS_TRANSPOSEUVWX8_NEON
@@ -59,129 +60,129 @@ extern "C" {
#define HAS_TRANSPOSEUVWX16_MSA
#endif
-void TransposeWxH_C(const uint8* src,
+void TransposeWxH_C(const uint8_t* src,
int src_stride,
- uint8* dst,
+ uint8_t* dst,
int dst_stride,
int width,
int height);
-void TransposeWx8_C(const uint8* src,
+void TransposeWx8_C(const uint8_t* src,
int src_stride,
- uint8* dst,
+ uint8_t* dst,
int dst_stride,
int width);
-void TransposeWx16_C(const uint8* src,
+void TransposeWx16_C(const uint8_t* src,
int src_stride,
- uint8* dst,
+ uint8_t* dst,
int dst_stride,
int width);
-void TransposeWx8_NEON(const uint8* src,
+void TransposeWx8_NEON(const uint8_t* src,
int src_stride,
- uint8* dst,
+ uint8_t* dst,
int dst_stride,
int width);
-void TransposeWx8_SSSE3(const uint8* src,
+void TransposeWx8_SSSE3(const uint8_t* src,
int src_stride,
- uint8* dst,
+ uint8_t* dst,
int dst_stride,
int width);
-void TransposeWx8_Fast_SSSE3(const uint8* src,
+void TransposeWx8_Fast_SSSE3(const uint8_t* src,
int src_stride,
- uint8* dst,
+ uint8_t* dst,
int dst_stride,
int width);
-void TransposeWx16_MSA(const uint8* src,
+void TransposeWx16_MSA(const uint8_t* src,
int src_stride,
- uint8* dst,
+ uint8_t* dst,
int dst_stride,
int width);
-void TransposeWx8_Any_NEON(const uint8* src,
+void TransposeWx8_Any_NEON(const uint8_t* src,
int src_stride,
- uint8* dst,
+ uint8_t* dst,
int dst_stride,
int width);
-void TransposeWx8_Any_SSSE3(const uint8* src,
+void TransposeWx8_Any_SSSE3(const uint8_t* src,
int src_stride,
- uint8* dst,
+ uint8_t* dst,
int dst_stride,
int width);
-void TransposeWx8_Fast_Any_SSSE3(const uint8* src,
+void TransposeWx8_Fast_Any_SSSE3(const uint8_t* src,
int src_stride,
- uint8* dst,
+ uint8_t* dst,
int dst_stride,
int width);
-void TransposeWx16_Any_MSA(const uint8* src,
+void TransposeWx16_Any_MSA(const uint8_t* src,
int src_stride,
- uint8* dst,
+ uint8_t* dst,
int dst_stride,
int width);
-void TransposeUVWxH_C(const uint8* src,
+void TransposeUVWxH_C(const uint8_t* src,
int src_stride,
- uint8* dst_a,
+ uint8_t* dst_a,
int dst_stride_a,
- uint8* dst_b,
+ uint8_t* dst_b,
int dst_stride_b,
int width,
int height);
-void TransposeUVWx8_C(const uint8* src,
+void TransposeUVWx8_C(const uint8_t* src,
int src_stride,
- uint8* dst_a,
+ uint8_t* dst_a,
int dst_stride_a,
- uint8* dst_b,
+ uint8_t* dst_b,
int dst_stride_b,
int width);
-void TransposeUVWx16_C(const uint8* src,
+void TransposeUVWx16_C(const uint8_t* src,
int src_stride,
- uint8* dst_a,
+ uint8_t* dst_a,
int dst_stride_a,
- uint8* dst_b,
+ uint8_t* dst_b,
int dst_stride_b,
int width);
-void TransposeUVWx8_SSE2(const uint8* src,
+void TransposeUVWx8_SSE2(const uint8_t* src,
int src_stride,
- uint8* dst_a,
+ uint8_t* dst_a,
int dst_stride_a,
- uint8* dst_b,
+ uint8_t* dst_b,
int dst_stride_b,
int width);
-void TransposeUVWx8_NEON(const uint8* src,
+void TransposeUVWx8_NEON(const uint8_t* src,
int src_stride,
- uint8* dst_a,
+ uint8_t* dst_a,
int dst_stride_a,
- uint8* dst_b,
+ uint8_t* dst_b,
int dst_stride_b,
int width);
-void TransposeUVWx16_MSA(const uint8* src,
+void TransposeUVWx16_MSA(const uint8_t* src,
int src_stride,
- uint8* dst_a,
+ uint8_t* dst_a,
int dst_stride_a,
- uint8* dst_b,
+ uint8_t* dst_b,
int dst_stride_b,
int width);
-void TransposeUVWx8_Any_SSE2(const uint8* src,
+void TransposeUVWx8_Any_SSE2(const uint8_t* src,
int src_stride,
- uint8* dst_a,
+ uint8_t* dst_a,
int dst_stride_a,
- uint8* dst_b,
+ uint8_t* dst_b,
int dst_stride_b,
int width);
-void TransposeUVWx8_Any_NEON(const uint8* src,
+void TransposeUVWx8_Any_NEON(const uint8_t* src,
int src_stride,
- uint8* dst_a,
+ uint8_t* dst_a,
int dst_stride_a,
- uint8* dst_b,
+ uint8_t* dst_b,
int dst_stride_b,
int width);
-void TransposeUVWx16_Any_MSA(const uint8* src,
+void TransposeUVWx16_Any_MSA(const uint8_t* src,
int src_stride,
- uint8* dst_a,
+ uint8_t* dst_a,
int dst_stride_a,
- uint8* dst_b,
+ uint8_t* dst_b,
int dst_stride_b,
int width);
diff --git a/chromium/third_party/libyuv/include/libyuv/row.h b/chromium/third_party/libyuv/include/libyuv/row.h
index 992d2ceb5d5..62ed119db7b 100644
--- a/chromium/third_party/libyuv/include/libyuv/row.h
+++ b/chromium/third_party/libyuv/include/libyuv/row.h
@@ -20,29 +20,20 @@ namespace libyuv {
extern "C" {
#endif
-#define IS_ALIGNED(p, a) (!((uintptr_t)(p) & ((a)-1)))
-
-#define align_buffer_64(var, size) \
- uint8* var##_mem = (uint8*)(malloc((size) + 63)); /* NOLINT */ \
- uint8* var = (uint8*)(((intptr_t)(var##_mem) + 63) & ~63) /* NOLINT */
-
-#define free_aligned_buffer_64(var) \
- free(var##_mem); \
- var = 0
-
-#if defined(__pnacl__) || defined(__CLR_VER) || \
+#if defined(__pnacl__) || defined(__CLR_VER) || \
+ (defined(__native_client__) && defined(__x86_64__)) || \
(defined(__i386__) && !defined(__SSE__) && !defined(__clang__))
#define LIBYUV_DISABLE_X86
#endif
+#if defined(__native_client__)
+#define LIBYUV_DISABLE_NEON
+#endif
// MemorySanitizer does not support assembly code yet. http://crbug.com/344505
#if defined(__has_feature)
#if __has_feature(memory_sanitizer)
-// define LIBYUV_DISABLE_X86
+#define LIBYUV_DISABLE_X86
#endif
#endif
-#if defined(__native_client__)
-#define LIBYUV_DISABLE_NEON
-#endif
// clang >= 3.5.0 required for Arm64.
#if defined(__clang__) && defined(__aarch64__) && !defined(LIBYUV_DISABLE_NEON)
#if (__clang_major__ < 3) || (__clang_major__ == 3 && (__clang_minor__ < 5))
@@ -178,7 +169,6 @@ extern "C" {
// The following are available on all x86 platforms, but
// require VS2012, clang 3.4 or gcc 4.7.
-// The code supports NaCL but requires a new compiler and validator.
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(VISUALC_HAS_AVX2) || defined(CLANG_HAS_AVX2) || \
defined(GCC_HAS_AVX2))
@@ -262,11 +252,14 @@ extern "C" {
// TODO(fbarchard): Port to Visual C
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER)))
+#define HAS_ABGRTOAR30ROW_SSSE3
#define HAS_ARGBTOAR30ROW_SSSE3
#define HAS_CONVERT16TO8ROW_SSSE3
#define HAS_CONVERT8TO16ROW_SSE2
// I210 is for H010. 2 = 422. I for 601 vs H for 709.
+#define HAS_I210TOAR30ROW_SSSE3
#define HAS_I210TOARGBROW_SSSE3
+#define HAS_I422TOAR30ROW_SSSE3
#define HAS_MERGERGBROW_SSSE3
#define HAS_SPLITRGBROW_SSSE3
#endif
@@ -276,9 +269,15 @@ extern "C" {
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER))) && \
(defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2))
+#define HAS_ABGRTOAR30ROW_AVX2
#define HAS_ARGBTOAR30ROW_AVX2
#define HAS_CONVERT16TO8ROW_AVX2
#define HAS_CONVERT8TO16ROW_AVX2
+#define HAS_I210TOARGBROW_AVX2
+#define HAS_I210TOAR30ROW_AVX2
+#define HAS_I422TOAR30ROW_AVX2
+#define HAS_I422TOUYVYROW_AVX2
+#define HAS_I422TOYUY2ROW_AVX2
#define HAS_MERGEUVROW_16_AVX2
#define HAS_MULTIPLYROW_16_AVX2
#endif
@@ -463,18 +462,18 @@ extern "C" {
#else
#define SIMD_ALIGNED(var) __declspec(align(16)) var
#endif
-typedef __declspec(align(16)) int16 vec16[8];
-typedef __declspec(align(16)) int32 vec32[4];
-typedef __declspec(align(16)) int8 vec8[16];
-typedef __declspec(align(16)) uint16 uvec16[8];
-typedef __declspec(align(16)) uint32 uvec32[4];
-typedef __declspec(align(16)) uint8 uvec8[16];
-typedef __declspec(align(32)) int16 lvec16[16];
-typedef __declspec(align(32)) int32 lvec32[8];
-typedef __declspec(align(32)) int8 lvec8[32];
-typedef __declspec(align(32)) uint16 ulvec16[16];
-typedef __declspec(align(32)) uint32 ulvec32[8];
-typedef __declspec(align(32)) uint8 ulvec8[32];
+typedef __declspec(align(16)) int16_t vec16[8];
+typedef __declspec(align(16)) int32_t vec32[4];
+typedef __declspec(align(16)) int8_t vec8[16];
+typedef __declspec(align(16)) uint16_t uvec16[8];
+typedef __declspec(align(16)) uint32_t uvec32[4];
+typedef __declspec(align(16)) uint8_t uvec8[16];
+typedef __declspec(align(32)) int16_t lvec16[16];
+typedef __declspec(align(32)) int32_t lvec32[8];
+typedef __declspec(align(32)) int8_t lvec8[32];
+typedef __declspec(align(32)) uint16_t ulvec16[16];
+typedef __declspec(align(32)) uint32_t ulvec32[8];
+typedef __declspec(align(32)) uint8_t ulvec8[32];
#elif !defined(__pnacl__) && (defined(__GNUC__) || defined(__clang__))
// Caveat GCC 4.2 to 4.7 have a known issue using vectors with const.
#if defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2)
@@ -482,32 +481,32 @@ typedef __declspec(align(32)) uint8 ulvec8[32];
#else
#define SIMD_ALIGNED(var) var __attribute__((aligned(16)))
#endif
-typedef int16 __attribute__((vector_size(16))) vec16;
-typedef int32 __attribute__((vector_size(16))) vec32;
-typedef int8 __attribute__((vector_size(16))) vec8;
-typedef uint16 __attribute__((vector_size(16))) uvec16;
-typedef uint32 __attribute__((vector_size(16))) uvec32;
-typedef uint8 __attribute__((vector_size(16))) uvec8;
-typedef int16 __attribute__((vector_size(32))) lvec16;
-typedef int32 __attribute__((vector_size(32))) lvec32;
-typedef int8 __attribute__((vector_size(32))) lvec8;
-typedef uint16 __attribute__((vector_size(32))) ulvec16;
-typedef uint32 __attribute__((vector_size(32))) ulvec32;
-typedef uint8 __attribute__((vector_size(32))) ulvec8;
+typedef int16_t __attribute__((vector_size(16))) vec16;
+typedef int32_t __attribute__((vector_size(16))) vec32;
+typedef int8_t __attribute__((vector_size(16))) vec8;
+typedef uint16_t __attribute__((vector_size(16))) uvec16;
+typedef uint32_t __attribute__((vector_size(16))) uvec32;
+typedef uint8_t __attribute__((vector_size(16))) uvec8;
+typedef int16_t __attribute__((vector_size(32))) lvec16;
+typedef int32_t __attribute__((vector_size(32))) lvec32;
+typedef int8_t __attribute__((vector_size(32))) lvec8;
+typedef uint16_t __attribute__((vector_size(32))) ulvec16;
+typedef uint32_t __attribute__((vector_size(32))) ulvec32;
+typedef uint8_t __attribute__((vector_size(32))) ulvec8;
#else
#define SIMD_ALIGNED(var) var
-typedef int16 vec16[8];
-typedef int32 vec32[4];
-typedef int8 vec8[16];
-typedef uint16 uvec16[8];
-typedef uint32 uvec32[4];
-typedef uint8 uvec8[16];
-typedef int16 lvec16[16];
-typedef int32 lvec32[8];
-typedef int8 lvec8[32];
-typedef uint16 ulvec16[16];
-typedef uint32 ulvec32[8];
-typedef uint8 ulvec8[32];
+typedef int16_t vec16[8];
+typedef int32_t vec32[4];
+typedef int8_t vec8[16];
+typedef uint16_t uvec16[8];
+typedef uint32_t uvec32[4];
+typedef uint8_t uvec8[16];
+typedef int16_t lvec16[16];
+typedef int32_t lvec32[8];
+typedef int8_t lvec8[32];
+typedef uint16_t ulvec16[16];
+typedef uint32_t ulvec32[8];
+typedef uint8_t ulvec8[32];
#endif
#if defined(__aarch64__)
@@ -531,13 +530,13 @@ struct YuvConstants {
#else
// This struct is for Intel color conversion.
struct YuvConstants {
- int8 kUVToB[32];
- int8 kUVToG[32];
- int8 kUVToR[32];
- int16 kUVBiasB[16];
- int16 kUVBiasG[16];
- int16 kUVBiasR[16];
- int16 kYToRgb[16];
+ int8_t kUVToB[32];
+ int8_t kUVToG[32];
+ int8_t kUVToR[32];
+ int16_t kUVBiasB[16];
+ int16_t kUVBiasG[16];
+ int16_t kUVBiasR[16];
+ int16_t kYToRgb[16];
};
// Offsets into YuvConstants structure
@@ -560,6 +559,16 @@ extern const struct YuvConstants SIMD_ALIGNED(kYvuI601Constants); // BT.601
extern const struct YuvConstants SIMD_ALIGNED(kYvuJPEGConstants); // JPeg
extern const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants); // BT.709
+#define IS_ALIGNED(p, a) (!((uintptr_t)(p) & ((a)-1)))
+
+#define align_buffer_64(var, size) \
+ uint8_t* var##_mem = (uint8_t*)(malloc((size) + 63)); /* NOLINT */ \
+ uint8_t* var = (uint8_t*)(((intptr_t)(var##_mem) + 63) & ~63) /* NOLINT */
+
+#define free_aligned_buffer_64(var) \
+ free(var##_mem); \
+ var = 0
+
#if defined(__APPLE__) || defined(__x86_64__) || defined(__llvm__)
#define OMITFP
#else
@@ -572,62 +581,6 @@ extern const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants); // BT.709
#else
#define LABELALIGN
#endif
-#if defined(__native_client__) && defined(__x86_64__)
-// r14 is used for MEMOP macros.
-#define NACL_R14 "r14",
-#define BUNDLELOCK ".bundle_lock\n"
-#define BUNDLEUNLOCK ".bundle_unlock\n"
-#define MEMACCESS(base) "%%nacl:(%%r15,%q" #base ")"
-#define MEMACCESS2(offset, base) "%%nacl:" #offset "(%%r15,%q" #base ")"
-#define MEMLEA(offset, base) #offset "(%q" #base ")"
-#define MEMLEA3(offset, index, scale) #offset "(,%q" #index "," #scale ")"
-#define MEMLEA4(offset, base, index, scale) \
- #offset "(%q" #base ",%q" #index "," #scale ")"
-#define MEMMOVESTRING(s, d) "%%nacl:(%q" #s "),%%nacl:(%q" #d "), %%r15"
-#define MEMSTORESTRING(reg, d) "%%" #reg ",%%nacl:(%q" #d "), %%r15"
-#define MEMOPREG(opcode, offset, base, index, scale, reg) \
- BUNDLELOCK \
- "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" #opcode \
- " (%%r15,%%r14),%%" #reg "\n" BUNDLEUNLOCK
-#define MEMOPMEM(opcode, reg, offset, base, index, scale) \
- BUNDLELOCK \
- "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" #opcode \
- " %%" #reg ",(%%r15,%%r14)\n" BUNDLEUNLOCK
-#define MEMOPARG(opcode, offset, base, index, scale, arg) \
- BUNDLELOCK \
- "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" #opcode \
- " (%%r15,%%r14),%" #arg "\n" BUNDLEUNLOCK
-#define VMEMOPREG(opcode, offset, base, index, scale, reg1, reg2) \
- BUNDLELOCK \
- "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" #opcode \
- " (%%r15,%%r14),%%" #reg1 ",%%" #reg2 "\n" BUNDLEUNLOCK
-#define VEXTOPMEM(op, sel, reg, offset, base, index, scale) \
- BUNDLELOCK \
- "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" #op \
- " $" #sel ",%%" #reg ",(%%r15,%%r14)\n" BUNDLEUNLOCK
-#else // defined(__native_client__) && defined(__x86_64__)
-#define NACL_R14
-#define BUNDLEALIGN
-#define MEMACCESS(base) "(%" #base ")"
-#define MEMACCESS2(offset, base) #offset "(%" #base ")"
-#define MEMLEA(offset, base) #offset "(%" #base ")"
-#define MEMLEA3(offset, index, scale) #offset "(,%" #index "," #scale ")"
-#define MEMLEA4(offset, base, index, scale) \
- #offset "(%" #base ",%" #index "," #scale ")"
-#define MEMMOVESTRING(s, d)
-#define MEMSTORESTRING(reg, d)
-#define MEMOPREG(opcode, offset, base, index, scale, reg) \
- #opcode " " #offset "(%" #base ",%" #index "," #scale "),%%" #reg "\n"
-#define MEMOPMEM(opcode, reg, offset, base, index, scale) \
- #opcode " %%" #reg "," #offset "(%" #base ",%" #index "," #scale ")\n"
-#define MEMOPARG(opcode, offset, base, index, scale, arg) \
- #opcode " " #offset "(%" #base ",%" #index "," #scale "),%" #arg "\n"
-#define VMEMOPREG(opcode, offset, base, index, scale, reg1, reg2) \
- #opcode " " #offset "(%" #base ",%" #index "," #scale "),%%" #reg1 \
- ",%%" #reg2 "\n"
-#define VEXTOPMEM(op, sel, reg, offset, base, index, scale) \
- #op " $" #sel ",%%" #reg "," #offset "(%" #base ",%" #index "," #scale ")\n"
-#endif // defined(__native_client__) && defined(__x86_64__)
// Intel Code Analizer markers. Insert IACA_START IACA_END around code to be
// measured and then run with iaca -64 libyuv_unittest.
@@ -680,2452 +633,2701 @@ extern const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants); // BT.709
IACA_UD_BYTES \
}
-void I444ToARGBRow_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
+void I444ToARGBRow_NEON(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
-void I422ToARGBRow_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
+void I422ToARGBRow_NEON(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
-void I422AlphaToARGBRow_NEON(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- const uint8* a_buf,
- uint8* dst_argb,
+void I422AlphaToARGBRow_NEON(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ const uint8_t* src_a,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
-void I422ToARGBRow_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
+void I422ToARGBRow_NEON(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
-void I422ToRGBARow_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_rgba,
+void I422ToRGBARow_NEON(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_rgba,
const struct YuvConstants* yuvconstants,
int width);
-void I422ToRGB24Row_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_rgb24,
+void I422ToRGB24Row_NEON(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_rgb24,
const struct YuvConstants* yuvconstants,
int width);
-void I422ToRGB565Row_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_rgb565,
+void I422ToRGB565Row_NEON(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_rgb565,
const struct YuvConstants* yuvconstants,
int width);
-void I422ToARGB1555Row_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb1555,
+void I422ToARGB1555Row_NEON(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb1555,
const struct YuvConstants* yuvconstants,
int width);
-void I422ToARGB4444Row_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb4444,
+void I422ToARGB4444Row_NEON(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb4444,
const struct YuvConstants* yuvconstants,
int width);
-void NV12ToARGBRow_NEON(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_argb,
+void NV12ToARGBRow_NEON(const uint8_t* src_y,
+ const uint8_t* src_uv,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
-void NV12ToRGB565Row_NEON(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_rgb565,
+void NV12ToRGB565Row_NEON(const uint8_t* src_y,
+ const uint8_t* src_uv,
+ uint8_t* dst_rgb565,
const struct YuvConstants* yuvconstants,
int width);
-void NV21ToARGBRow_NEON(const uint8* src_y,
- const uint8* src_vu,
- uint8* dst_argb,
+void NV21ToARGBRow_NEON(const uint8_t* src_y,
+ const uint8_t* src_vu,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
-void YUY2ToARGBRow_NEON(const uint8* src_yuy2,
- uint8* dst_argb,
+void YUY2ToARGBRow_NEON(const uint8_t* src_yuy2,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
-void UYVYToARGBRow_NEON(const uint8* src_uyvy,
- uint8* dst_argb,
+void UYVYToARGBRow_NEON(const uint8_t* src_uyvy,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
-void I444ToARGBRow_MSA(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
+void I444ToARGBRow_MSA(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
-void I422ToARGBRow_MSA(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
+void I422ToARGBRow_MSA(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
-void I422ToRGBARow_MSA(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_rgba,
+void I422ToRGBARow_MSA(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
-void I422AlphaToARGBRow_MSA(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- const uint8* a_buf,
- uint8* dst_argb,
+void I422AlphaToARGBRow_MSA(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ const uint8_t* src_a,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
-void I422ToRGB24Row_MSA(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_rgb24,
+void I422ToRGB24Row_MSA(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
-void I422ToRGB565Row_MSA(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_rgb565,
+void I422ToRGB565Row_MSA(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_rgb565,
const struct YuvConstants* yuvconstants,
int width);
-void I422ToARGB4444Row_MSA(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb4444,
+void I422ToARGB4444Row_MSA(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb4444,
const struct YuvConstants* yuvconstants,
int width);
-void I422ToARGB1555Row_MSA(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb1555,
+void I422ToARGB1555Row_MSA(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb1555,
const struct YuvConstants* yuvconstants,
int width);
-void NV12ToARGBRow_MSA(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_argb,
+void NV12ToARGBRow_MSA(const uint8_t* src_y,
+ const uint8_t* src_uv,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
-void NV12ToRGB565Row_MSA(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_rgb565,
+void NV12ToRGB565Row_MSA(const uint8_t* src_y,
+ const uint8_t* src_uv,
+ uint8_t* dst_rgb565,
const struct YuvConstants* yuvconstants,
int width);
-void NV21ToARGBRow_MSA(const uint8* src_y,
- const uint8* src_vu,
- uint8* dst_argb,
+void NV21ToARGBRow_MSA(const uint8_t* src_y,
+ const uint8_t* src_vu,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
-void YUY2ToARGBRow_MSA(const uint8* src_yuy2,
- uint8* dst_argb,
+void YUY2ToARGBRow_MSA(const uint8_t* src_yuy2,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
-void UYVYToARGBRow_MSA(const uint8* src_uyvy,
- uint8* dst_argb,
+void UYVYToARGBRow_MSA(const uint8_t* src_uyvy,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
-void ARGBToYRow_AVX2(const uint8* src_argb, uint8* dst_y, int width);
-void ARGBToYRow_Any_AVX2(const uint8* src_argb, uint8* dst_y, int width);
-void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int width);
-void ARGBToYJRow_AVX2(const uint8* src_argb, uint8* dst_y, int width);
-void ARGBToYJRow_Any_AVX2(const uint8* src_argb, uint8* dst_y, int width);
-void ARGBToYJRow_SSSE3(const uint8* src_argb, uint8* dst_y, int width);
-void BGRAToYRow_SSSE3(const uint8* src_bgra, uint8* dst_y, int width);
-void ABGRToYRow_SSSE3(const uint8* src_abgr, uint8* dst_y, int width);
-void RGBAToYRow_SSSE3(const uint8* src_rgba, uint8* dst_y, int width);
-void RGB24ToYRow_SSSE3(const uint8* src_rgb24, uint8* dst_y, int width);
-void RAWToYRow_SSSE3(const uint8* src_raw, uint8* dst_y, int width);
-void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int width);
-void ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int width);
-void ARGBToYRow_MSA(const uint8* src_argb, uint8* dst_y, int width);
-void ARGBToYJRow_MSA(const uint8* src_argb, uint8* dst_y, int width);
-void ARGBToUV444Row_NEON(const uint8* src_argb,
- uint8* dst_u,
- uint8* dst_v,
+void ARGBToYRow_AVX2(const uint8_t* src_argb, uint8_t* dst_y, int width);
+void ARGBToYRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void ARGBToYRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_y, int width);
+void ARGBToYJRow_AVX2(const uint8_t* src_argb, uint8_t* dst_y, int width);
+void ARGBToYJRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void ARGBToYJRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_y, int width);
+void BGRAToYRow_SSSE3(const uint8_t* src_bgra, uint8_t* dst_y, int width);
+void ABGRToYRow_SSSE3(const uint8_t* src_abgr, uint8_t* dst_y, int width);
+void RGBAToYRow_SSSE3(const uint8_t* src_rgba, uint8_t* dst_y, int width);
+void RGB24ToYRow_SSSE3(const uint8_t* src_rgb24, uint8_t* dst_y, int width);
+void RAWToYRow_SSSE3(const uint8_t* src_raw, uint8_t* dst_y, int width);
+void ARGBToYRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width);
+void ARGBToYJRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width);
+void ARGBToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width);
+void ARGBToYJRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width);
+void ARGBToUV444Row_NEON(const uint8_t* src_argb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void ARGBToUVRow_NEON(const uint8* src_argb,
+void ARGBToUVRow_NEON(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void ARGBToUV444Row_MSA(const uint8* src_argb,
- uint8* dst_u,
- uint8* dst_v,
+void ARGBToUV444Row_MSA(const uint8_t* src_argb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void ARGBToUVRow_MSA(const uint8* src_argb,
+void ARGBToUVRow_MSA(const uint8_t* src_argb0,
int src_stride_argb,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void ARGBToUVJRow_NEON(const uint8* src_argb,
+void ARGBToUVJRow_NEON(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void BGRAToUVRow_NEON(const uint8* src_bgra,
+void BGRAToUVRow_NEON(const uint8_t* src_bgra,
int src_stride_bgra,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void ABGRToUVRow_NEON(const uint8* src_abgr,
+void ABGRToUVRow_NEON(const uint8_t* src_abgr,
int src_stride_abgr,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void RGBAToUVRow_NEON(const uint8* src_rgba,
+void RGBAToUVRow_NEON(const uint8_t* src_rgba,
int src_stride_rgba,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void RGB24ToUVRow_NEON(const uint8* src_rgb24,
+void RGB24ToUVRow_NEON(const uint8_t* src_rgb24,
int src_stride_rgb24,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void RAWToUVRow_NEON(const uint8* src_raw,
+void RAWToUVRow_NEON(const uint8_t* src_raw,
int src_stride_raw,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void RGB565ToUVRow_NEON(const uint8* src_rgb565,
+void RGB565ToUVRow_NEON(const uint8_t* src_rgb565,
int src_stride_rgb565,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void ARGB1555ToUVRow_NEON(const uint8* src_argb1555,
+void ARGB1555ToUVRow_NEON(const uint8_t* src_argb1555,
int src_stride_argb1555,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void ARGB4444ToUVRow_NEON(const uint8* src_argb4444,
+void ARGB4444ToUVRow_NEON(const uint8_t* src_argb4444,
int src_stride_argb4444,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void ARGBToUVJRow_MSA(const uint8* src_argb,
- int src_stride_argb,
- uint8* dst_u,
- uint8* dst_v,
+void ARGBToUVJRow_MSA(const uint8_t* src_rgb0,
+ int src_stride_rgb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void BGRAToUVRow_MSA(const uint8* src_bgra,
- int src_stride_bgra,
- uint8* dst_u,
- uint8* dst_v,
+void BGRAToUVRow_MSA(const uint8_t* src_rgb0,
+ int src_stride_rgb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void ABGRToUVRow_MSA(const uint8* src_abgr,
- int src_stride_abgr,
- uint8* dst_u,
- uint8* dst_v,
+void ABGRToUVRow_MSA(const uint8_t* src_rgb0,
+ int src_stride_rgb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void RGBAToUVRow_MSA(const uint8* src_rgba,
- int src_stride_rgba,
- uint8* dst_u,
- uint8* dst_v,
+void RGBAToUVRow_MSA(const uint8_t* src_rgb0,
+ int src_stride_rgb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void RGB24ToUVRow_MSA(const uint8* src_rgb24,
- int src_stride_rgb24,
- uint8* dst_u,
- uint8* dst_v,
+void RGB24ToUVRow_MSA(const uint8_t* src_rgb0,
+ int src_stride_rgb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void RAWToUVRow_MSA(const uint8* src_raw,
- int src_stride_raw,
- uint8* dst_u,
- uint8* dst_v,
+void RAWToUVRow_MSA(const uint8_t* src_rgb0,
+ int src_stride_rgb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void RGB565ToUVRow_MSA(const uint8* src_rgb565,
+void RGB565ToUVRow_MSA(const uint8_t* src_rgb565,
int src_stride_rgb565,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void ARGB1555ToUVRow_MSA(const uint8* src_argb1555,
+void ARGB1555ToUVRow_MSA(const uint8_t* src_argb1555,
int src_stride_argb1555,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void BGRAToYRow_NEON(const uint8_t* src_bgra, uint8_t* dst_y, int width);
+void ABGRToYRow_NEON(const uint8_t* src_abgr, uint8_t* dst_y, int width);
+void RGBAToYRow_NEON(const uint8_t* src_rgba, uint8_t* dst_y, int width);
+void RGB24ToYRow_NEON(const uint8_t* src_rgb24, uint8_t* dst_y, int width);
+void RAWToYRow_NEON(const uint8_t* src_raw, uint8_t* dst_y, int width);
+void RGB565ToYRow_NEON(const uint8_t* src_rgb565, uint8_t* dst_y, int width);
+void ARGB1555ToYRow_NEON(const uint8_t* src_argb1555,
+ uint8_t* dst_y,
+ int width);
+void ARGB4444ToYRow_NEON(const uint8_t* src_argb4444,
+ uint8_t* dst_y,
int width);
-void BGRAToYRow_NEON(const uint8* src_bgra, uint8* dst_y, int width);
-void ABGRToYRow_NEON(const uint8* src_abgr, uint8* dst_y, int width);
-void RGBAToYRow_NEON(const uint8* src_rgba, uint8* dst_y, int width);
-void RGB24ToYRow_NEON(const uint8* src_rgb24, uint8* dst_y, int width);
-void RAWToYRow_NEON(const uint8* src_raw, uint8* dst_y, int width);
-void RGB565ToYRow_NEON(const uint8* src_rgb565, uint8* dst_y, int width);
-void ARGB1555ToYRow_NEON(const uint8* src_argb1555, uint8* dst_y, int width);
-void ARGB4444ToYRow_NEON(const uint8* src_argb4444, uint8* dst_y, int width);
-void BGRAToYRow_MSA(const uint8* src_bgra, uint8* dst_y, int width);
-void ABGRToYRow_MSA(const uint8* src_abgr, uint8* dst_y, int width);
-void RGBAToYRow_MSA(const uint8* src_rgba, uint8* dst_y, int width);
-void RGB24ToYRow_MSA(const uint8* src_rgb24, uint8* dst_y, int width);
-void RAWToYRow_MSA(const uint8* src_raw, uint8* dst_y, int width);
-void RGB565ToYRow_MSA(const uint8* src_rgb565, uint8* dst_y, int width);
-void ARGB1555ToYRow_MSA(const uint8* src_argb1555, uint8* dst_y, int width);
-void ARGBToYRow_C(const uint8* src_argb, uint8* dst_y, int width);
-void ARGBToYJRow_C(const uint8* src_argb, uint8* dst_y, int width);
-void BGRAToYRow_C(const uint8* src_bgra, uint8* dst_y, int width);
-void ABGRToYRow_C(const uint8* src_abgr, uint8* dst_y, int width);
-void RGBAToYRow_C(const uint8* src_rgba, uint8* dst_y, int width);
-void RGB24ToYRow_C(const uint8* src_rgb24, uint8* dst_y, int width);
-void RAWToYRow_C(const uint8* src_raw, uint8* dst_y, int width);
-void RGB565ToYRow_C(const uint8* src_rgb565, uint8* dst_y, int width);
-void ARGB1555ToYRow_C(const uint8* src_argb1555, uint8* dst_y, int width);
-void ARGB4444ToYRow_C(const uint8* src_argb4444, uint8* dst_y, int width);
-void ARGBToYRow_Any_SSSE3(const uint8* src_argb, uint8* dst_y, int width);
-void ARGBToYJRow_Any_SSSE3(const uint8* src_argb, uint8* dst_y, int width);
-void BGRAToYRow_Any_SSSE3(const uint8* src_bgra, uint8* dst_y, int width);
-void ABGRToYRow_Any_SSSE3(const uint8* src_abgr, uint8* dst_y, int width);
-void RGBAToYRow_Any_SSSE3(const uint8* src_rgba, uint8* dst_y, int width);
-void RGB24ToYRow_Any_SSSE3(const uint8* src_rgb24, uint8* dst_y, int width);
-void RAWToYRow_Any_SSSE3(const uint8* src_raw, uint8* dst_y, int width);
-void ARGBToYRow_Any_NEON(const uint8* src_argb, uint8* dst_y, int width);
-void ARGBToYJRow_Any_NEON(const uint8* src_argb, uint8* dst_y, int width);
-void BGRAToYRow_Any_NEON(const uint8* src_bgra, uint8* dst_y, int width);
-void ABGRToYRow_Any_NEON(const uint8* src_abgr, uint8* dst_y, int width);
-void RGBAToYRow_Any_NEON(const uint8* src_rgba, uint8* dst_y, int width);
-void RGB24ToYRow_Any_NEON(const uint8* src_rgb24, uint8* dst_y, int width);
-void RAWToYRow_Any_NEON(const uint8* src_raw, uint8* dst_y, int width);
-void RGB565ToYRow_Any_NEON(const uint8* src_rgb565, uint8* dst_y, int width);
-void ARGB1555ToYRow_Any_NEON(const uint8* src_argb1555,
- uint8* dst_y,
+void BGRAToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width);
+void ABGRToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width);
+void RGBAToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width);
+void RGB24ToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width);
+void RAWToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width);
+void RGB565ToYRow_MSA(const uint8_t* src_rgb565, uint8_t* dst_y, int width);
+void ARGB1555ToYRow_MSA(const uint8_t* src_argb1555, uint8_t* dst_y, int width);
+void ARGBToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width);
+void ARGBToYJRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width);
+void BGRAToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width);
+void ABGRToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width);
+void RGBAToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width);
+void RGB24ToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width);
+void RAWToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width);
+void RGB565ToYRow_C(const uint8_t* src_rgb565, uint8_t* dst_y, int width);
+void ARGB1555ToYRow_C(const uint8_t* src_argb1555, uint8_t* dst_y, int width);
+void ARGB4444ToYRow_C(const uint8_t* src_argb4444, uint8_t* dst_y, int width);
+void ARGBToYRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void ARGBToYJRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void BGRAToYRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void ABGRToYRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void RGBAToYRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void RGB24ToYRow_Any_SSSE3(const uint8_t* src_rgb24, uint8_t* dst_y, int width);
+void RAWToYRow_Any_SSSE3(const uint8_t* src_raw, uint8_t* dst_y, int width);
+void ARGBToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void ARGBToYJRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void BGRAToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void ABGRToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void RGBAToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void RGB24ToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void RAWToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void RGB565ToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void ARGB1555ToYRow_Any_NEON(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
int width);
-void ARGB4444ToYRow_Any_NEON(const uint8* src_argb4444,
- uint8* dst_y,
+void ARGB4444ToYRow_Any_NEON(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
int width);
-void BGRAToYRow_Any_MSA(const uint8* src_bgra, uint8* dst_y, int width);
-void ABGRToYRow_Any_MSA(const uint8* src_abgr, uint8* dst_y, int width);
-void RGBAToYRow_Any_MSA(const uint8* src_rgba, uint8* dst_y, int width);
-void ARGBToYJRow_Any_MSA(const uint8* src_argb, uint8* dst_y, int width);
-void ARGBToYRow_Any_MSA(const uint8* src_argb, uint8* dst_y, int width);
-void RGB24ToYRow_Any_MSA(const uint8* src_rgb24, uint8* dst_y, int width);
-void RAWToYRow_Any_MSA(const uint8* src_raw, uint8* dst_y, int width);
-void RGB565ToYRow_Any_MSA(const uint8* src_rgb565, uint8* dst_y, int width);
-void ARGB1555ToYRow_Any_MSA(const uint8* src_argb1555, uint8* dst_y, int width);
-
-void ARGBToUVRow_AVX2(const uint8* src_argb,
+void BGRAToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void ABGRToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void RGBAToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void ARGBToYJRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void ARGBToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void RGB24ToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void RAWToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void RGB565ToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void ARGB1555ToYRow_Any_MSA(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+
+void ARGBToUVRow_AVX2(const uint8_t* src_argb0,
int src_stride_argb,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void ARGBToUVJRow_AVX2(const uint8* src_argb,
+void ARGBToUVJRow_AVX2(const uint8_t* src_argb0,
int src_stride_argb,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void ARGBToUVRow_SSSE3(const uint8* src_argb,
+void ARGBToUVRow_SSSE3(const uint8_t* src_argb0,
int src_stride_argb,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void ARGBToUVJRow_SSSE3(const uint8* src_argb,
+void ARGBToUVJRow_SSSE3(const uint8_t* src_argb0,
int src_stride_argb,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void BGRAToUVRow_SSSE3(const uint8* src_bgra,
+void BGRAToUVRow_SSSE3(const uint8_t* src_bgra0,
int src_stride_bgra,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void ABGRToUVRow_SSSE3(const uint8* src_abgr,
+void ABGRToUVRow_SSSE3(const uint8_t* src_abgr0,
int src_stride_abgr,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void RGBAToUVRow_SSSE3(const uint8* src_rgba,
+void RGBAToUVRow_SSSE3(const uint8_t* src_rgba0,
int src_stride_rgba,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void ARGBToUVRow_Any_AVX2(const uint8* src_argb,
- int src_stride_argb,
- uint8* dst_u,
- uint8* dst_v,
+void ARGBToUVRow_Any_AVX2(const uint8_t* src_ptr,
+ int src_stride_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void ARGBToUVJRow_Any_AVX2(const uint8* src_argb,
- int src_stride_argb,
- uint8* dst_u,
- uint8* dst_v,
+void ARGBToUVJRow_Any_AVX2(const uint8_t* src_ptr,
+ int src_stride_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void ARGBToUVRow_Any_SSSE3(const uint8* src_argb,
- int src_stride_argb,
- uint8* dst_u,
- uint8* dst_v,
+void ARGBToUVRow_Any_SSSE3(const uint8_t* src_ptr,
+ int src_stride_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void ARGBToUVJRow_Any_SSSE3(const uint8* src_argb,
- int src_stride_argb,
- uint8* dst_u,
- uint8* dst_v,
- int width);
-void BGRAToUVRow_Any_SSSE3(const uint8* src_bgra,
- int src_stride_bgra,
- uint8* dst_u,
- uint8* dst_v,
+void ARGBToUVJRow_Any_SSSE3(const uint8_t* src_ptr,
+ int src_stride_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void BGRAToUVRow_Any_SSSE3(const uint8_t* src_ptr,
+ int src_stride_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void ABGRToUVRow_Any_SSSE3(const uint8* src_abgr,
- int src_stride_abgr,
- uint8* dst_u,
- uint8* dst_v,
+void ABGRToUVRow_Any_SSSE3(const uint8_t* src_ptr,
+ int src_stride_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void RGBAToUVRow_Any_SSSE3(const uint8* src_rgba,
- int src_stride_rgba,
- uint8* dst_u,
- uint8* dst_v,
+void RGBAToUVRow_Any_SSSE3(const uint8_t* src_ptr,
+ int src_stride_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void ARGBToUV444Row_Any_NEON(const uint8* src_argb,
- uint8* dst_u,
- uint8* dst_v,
+void ARGBToUV444Row_Any_NEON(const uint8_t* src_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void ARGBToUVRow_Any_NEON(const uint8* src_argb,
- int src_stride_argb,
- uint8* dst_u,
- uint8* dst_v,
+void ARGBToUVRow_Any_NEON(const uint8_t* src_ptr,
+ int src_stride_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void ARGBToUV444Row_Any_MSA(const uint8* src_argb,
- uint8* dst_u,
- uint8* dst_v,
- int width);
-void ARGBToUVRow_Any_MSA(const uint8* src_argb,
- int src_stride_argb,
- uint8* dst_u,
- uint8* dst_v,
+void ARGBToUV444Row_Any_MSA(const uint8_t* src_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void ARGBToUVRow_Any_MSA(const uint8_t* src_ptr,
+ int src_stride_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void ARGBToUVJRow_Any_NEON(const uint8* src_argb,
- int src_stride_argb,
- uint8* dst_u,
- uint8* dst_v,
+void ARGBToUVJRow_Any_NEON(const uint8_t* src_ptr,
+ int src_stride_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void BGRAToUVRow_Any_NEON(const uint8* src_bgra,
- int src_stride_bgra,
- uint8* dst_u,
- uint8* dst_v,
+void BGRAToUVRow_Any_NEON(const uint8_t* src_ptr,
+ int src_stride_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void ABGRToUVRow_Any_NEON(const uint8* src_abgr,
- int src_stride_abgr,
- uint8* dst_u,
- uint8* dst_v,
+void ABGRToUVRow_Any_NEON(const uint8_t* src_ptr,
+ int src_stride_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void RGBAToUVRow_Any_NEON(const uint8* src_rgba,
- int src_stride_rgba,
- uint8* dst_u,
- uint8* dst_v,
+void RGBAToUVRow_Any_NEON(const uint8_t* src_ptr,
+ int src_stride_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void RGB24ToUVRow_Any_NEON(const uint8* src_rgb24,
- int src_stride_rgb24,
- uint8* dst_u,
- uint8* dst_v,
+void RGB24ToUVRow_Any_NEON(const uint8_t* src_ptr,
+ int src_stride_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void RAWToUVRow_Any_NEON(const uint8* src_raw,
- int src_stride_raw,
- uint8* dst_u,
- uint8* dst_v,
+void RAWToUVRow_Any_NEON(const uint8_t* src_ptr,
+ int src_stride_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void RGB565ToUVRow_Any_NEON(const uint8* src_rgb565,
- int src_stride_rgb565,
- uint8* dst_u,
- uint8* dst_v,
- int width);
-void ARGB1555ToUVRow_Any_NEON(const uint8* src_argb1555,
- int src_stride_argb1555,
- uint8* dst_u,
- uint8* dst_v,
+void RGB565ToUVRow_Any_NEON(const uint8_t* src_ptr,
+ int src_stride_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void ARGB1555ToUVRow_Any_NEON(const uint8_t* src_ptr,
+ int src_stride_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void ARGB4444ToUVRow_Any_NEON(const uint8* src_argb4444,
- int src_stride_argb4444,
- uint8* dst_u,
- uint8* dst_v,
+void ARGB4444ToUVRow_Any_NEON(const uint8_t* src_ptr,
+ int src_stride_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void ARGBToUVJRow_Any_MSA(const uint8* src_argb,
- int src_stride_argb,
- uint8* dst_u,
- uint8* dst_v,
+void ARGBToUVJRow_Any_MSA(const uint8_t* src_ptr,
+ int src_stride_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void BGRAToUVRow_Any_MSA(const uint8* src_bgra,
- int src_stride_bgra,
- uint8* dst_u,
- uint8* dst_v,
+void BGRAToUVRow_Any_MSA(const uint8_t* src_ptr,
+ int src_stride_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void ABGRToUVRow_Any_MSA(const uint8* src_abgr,
- int src_stride_abgr,
- uint8* dst_u,
- uint8* dst_v,
+void ABGRToUVRow_Any_MSA(const uint8_t* src_ptr,
+ int src_stride_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void RGBAToUVRow_Any_MSA(const uint8* src_rgba,
- int src_stride_rgba,
- uint8* dst_u,
- uint8* dst_v,
+void RGBAToUVRow_Any_MSA(const uint8_t* src_ptr,
+ int src_stride_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void RGB24ToUVRow_Any_MSA(const uint8* src_rgb24,
- int src_stride_rgb24,
- uint8* dst_u,
- uint8* dst_v,
+void RGB24ToUVRow_Any_MSA(const uint8_t* src_ptr,
+ int src_stride_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void RAWToUVRow_Any_MSA(const uint8* src_raw,
- int src_stride_raw,
- uint8* dst_u,
- uint8* dst_v,
+void RAWToUVRow_Any_MSA(const uint8_t* src_ptr,
+ int src_stride_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void RGB565ToUVRow_Any_MSA(const uint8* src_rgb565,
- int src_stride_rgb565,
- uint8* dst_u,
- uint8* dst_v,
+void RGB565ToUVRow_Any_MSA(const uint8_t* src_ptr,
+ int src_stride_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void ARGB1555ToUVRow_Any_MSA(const uint8* src_argb1555,
- int src_stride_argb1555,
- uint8* dst_u,
- uint8* dst_v,
+void ARGB1555ToUVRow_Any_MSA(const uint8_t* src_ptr,
+ int src_stride_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void ARGBToUVRow_C(const uint8* src_argb,
- int src_stride_argb,
- uint8* dst_u,
- uint8* dst_v,
+void ARGBToUVRow_C(const uint8_t* src_rgb0,
+ int src_stride_rgb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void ARGBToUVJRow_C(const uint8* src_argb,
- int src_stride_argb,
- uint8* dst_u,
- uint8* dst_v,
+void ARGBToUVJRow_C(const uint8_t* src_rgb0,
+ int src_stride_rgb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void ARGBToUVRow_C(const uint8* src_argb,
- int src_stride_argb,
- uint8* dst_u,
- uint8* dst_v,
+void ARGBToUVRow_C(const uint8_t* src_rgb0,
+ int src_stride_rgb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void ARGBToUVJRow_C(const uint8* src_argb,
- int src_stride_argb,
- uint8* dst_u,
- uint8* dst_v,
+void ARGBToUVJRow_C(const uint8_t* src_rgb0,
+ int src_stride_rgb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void BGRAToUVRow_C(const uint8* src_bgra,
- int src_stride_bgra,
- uint8* dst_u,
- uint8* dst_v,
+void BGRAToUVRow_C(const uint8_t* src_rgb0,
+ int src_stride_rgb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void ABGRToUVRow_C(const uint8* src_abgr,
- int src_stride_abgr,
- uint8* dst_u,
- uint8* dst_v,
+void ABGRToUVRow_C(const uint8_t* src_rgb0,
+ int src_stride_rgb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void RGBAToUVRow_C(const uint8* src_rgba,
- int src_stride_rgba,
- uint8* dst_u,
- uint8* dst_v,
+void RGBAToUVRow_C(const uint8_t* src_rgb0,
+ int src_stride_rgb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void RGB24ToUVRow_C(const uint8* src_rgb24,
- int src_stride_rgb24,
- uint8* dst_u,
- uint8* dst_v,
+void RGB24ToUVRow_C(const uint8_t* src_rgb0,
+ int src_stride_rgb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void RAWToUVRow_C(const uint8* src_raw,
- int src_stride_raw,
- uint8* dst_u,
- uint8* dst_v,
+void RAWToUVRow_C(const uint8_t* src_rgb0,
+ int src_stride_rgb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void RGB565ToUVRow_C(const uint8* src_rgb565,
+void RGB565ToUVRow_C(const uint8_t* src_rgb565,
int src_stride_rgb565,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void ARGB1555ToUVRow_C(const uint8* src_argb1555,
+void ARGB1555ToUVRow_C(const uint8_t* src_argb1555,
int src_stride_argb1555,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void ARGB4444ToUVRow_C(const uint8* src_argb4444,
+void ARGB4444ToUVRow_C(const uint8_t* src_argb4444,
int src_stride_argb4444,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void ARGBToUV444Row_SSSE3(const uint8* src_argb,
- uint8* dst_u,
- uint8* dst_v,
+void ARGBToUV444Row_SSSE3(const uint8_t* src_argb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void ARGBToUV444Row_Any_SSSE3(const uint8* src_argb,
- uint8* dst_u,
- uint8* dst_v,
+void ARGBToUV444Row_Any_SSSE3(const uint8_t* src_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void ARGBToUV444Row_C(const uint8* src_argb,
- uint8* dst_u,
- uint8* dst_v,
+void ARGBToUV444Row_C(const uint8_t* src_argb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void MirrorRow_AVX2(const uint8* src, uint8* dst, int width);
-void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width);
-void MirrorRow_NEON(const uint8* src, uint8* dst, int width);
-void MirrorRow_MSA(const uint8* src, uint8* dst, int width);
-void MirrorRow_C(const uint8* src, uint8* dst, int width);
-void MirrorRow_Any_AVX2(const uint8* src, uint8* dst, int width);
-void MirrorRow_Any_SSSE3(const uint8* src, uint8* dst, int width);
-void MirrorRow_Any_SSE2(const uint8* src, uint8* dst, int width);
-void MirrorRow_Any_NEON(const uint8* src, uint8* dst, int width);
-void MirrorRow_Any_MSA(const uint8* src, uint8* dst, int width);
-
-void MirrorUVRow_SSSE3(const uint8* src_uv,
- uint8* dst_u,
- uint8* dst_v,
+void MirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width);
+void MirrorRow_SSSE3(const uint8_t* src, uint8_t* dst, int width);
+void MirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width);
+void MirrorRow_MSA(const uint8_t* src, uint8_t* dst, int width);
+void MirrorRow_C(const uint8_t* src, uint8_t* dst, int width);
+void MirrorRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void MirrorRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void MirrorRow_Any_SSE2(const uint8_t* src, uint8_t* dst, int width);
+void MirrorRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void MirrorRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+
+void MirrorUVRow_SSSE3(const uint8_t* src,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void MirrorUVRow_NEON(const uint8* src_uv,
- uint8* dst_u,
- uint8* dst_v,
+void MirrorUVRow_NEON(const uint8_t* src_uv,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void MirrorUVRow_MSA(const uint8* src_uv,
- uint8* dst_u,
- uint8* dst_v,
+void MirrorUVRow_MSA(const uint8_t* src_uv,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void MirrorUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width);
-
-void ARGBMirrorRow_AVX2(const uint8* src, uint8* dst, int width);
-void ARGBMirrorRow_SSE2(const uint8* src, uint8* dst, int width);
-void ARGBMirrorRow_NEON(const uint8* src, uint8* dst, int width);
-void ARGBMirrorRow_MSA(const uint8* src, uint8* dst, int width);
-void ARGBMirrorRow_C(const uint8* src, uint8* dst, int width);
-void ARGBMirrorRow_Any_AVX2(const uint8* src, uint8* dst, int width);
-void ARGBMirrorRow_Any_SSE2(const uint8* src, uint8* dst, int width);
-void ARGBMirrorRow_Any_NEON(const uint8* src, uint8* dst, int width);
-void ARGBMirrorRow_Any_MSA(const uint8* src, uint8* dst, int width);
-
-void SplitUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width);
-void SplitUVRow_SSE2(const uint8* src_uv,
- uint8* dst_u,
- uint8* dst_v,
+void MirrorUVRow_C(const uint8_t* src_uv,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+
+void ARGBMirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width);
+void ARGBMirrorRow_SSE2(const uint8_t* src, uint8_t* dst, int width);
+void ARGBMirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width);
+void ARGBMirrorRow_MSA(const uint8_t* src, uint8_t* dst, int width);
+void ARGBMirrorRow_C(const uint8_t* src, uint8_t* dst, int width);
+void ARGBMirrorRow_Any_AVX2(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+void ARGBMirrorRow_Any_SSE2(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+void ARGBMirrorRow_Any_NEON(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+void ARGBMirrorRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+
+void SplitUVRow_C(const uint8_t* src_uv,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void SplitUVRow_SSE2(const uint8_t* src_uv,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void SplitUVRow_AVX2(const uint8* src_uv,
- uint8* dst_u,
- uint8* dst_v,
+void SplitUVRow_AVX2(const uint8_t* src_uv,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void SplitUVRow_NEON(const uint8* src_uv,
- uint8* dst_u,
- uint8* dst_v,
+void SplitUVRow_NEON(const uint8_t* src_uv,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void SplitUVRow_MSA(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width);
-void SplitUVRow_Any_SSE2(const uint8* src_uv,
- uint8* dst_u,
- uint8* dst_v,
+void SplitUVRow_MSA(const uint8_t* src_uv,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void SplitUVRow_Any_SSE2(const uint8_t* src_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void SplitUVRow_Any_AVX2(const uint8* src_uv,
- uint8* dst_u,
- uint8* dst_v,
+void SplitUVRow_Any_AVX2(const uint8_t* src_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void SplitUVRow_Any_NEON(const uint8* src_uv,
- uint8* dst_u,
- uint8* dst_v,
+void SplitUVRow_Any_NEON(const uint8_t* src_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void SplitUVRow_Any_MSA(const uint8* src_uv,
- uint8* dst_u,
- uint8* dst_v,
+void SplitUVRow_Any_MSA(const uint8_t* src_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void MergeUVRow_C(const uint8* src_u,
- const uint8* src_v,
- uint8* dst_uv,
+void MergeUVRow_C(const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_uv,
int width);
-void MergeUVRow_SSE2(const uint8* src_u,
- const uint8* src_v,
- uint8* dst_uv,
+void MergeUVRow_SSE2(const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_uv,
int width);
-void MergeUVRow_AVX2(const uint8* src_u,
- const uint8* src_v,
- uint8* dst_uv,
+void MergeUVRow_AVX2(const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_uv,
int width);
-void MergeUVRow_NEON(const uint8* src_u,
- const uint8* src_v,
- uint8* dst_uv,
+void MergeUVRow_NEON(const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_uv,
int width);
-void MergeUVRow_MSA(const uint8* src_u,
- const uint8* src_v,
- uint8* dst_uv,
+void MergeUVRow_MSA(const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_uv,
int width);
-void MergeUVRow_Any_SSE2(const uint8* src_u,
- const uint8* src_v,
- uint8* dst_uv,
+void MergeUVRow_Any_SSE2(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
int width);
-void MergeUVRow_Any_AVX2(const uint8* src_u,
- const uint8* src_v,
- uint8* dst_uv,
+void MergeUVRow_Any_AVX2(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
int width);
-void MergeUVRow_Any_NEON(const uint8* src_u,
- const uint8* src_v,
- uint8* dst_uv,
+void MergeUVRow_Any_NEON(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
int width);
-void MergeUVRow_Any_MSA(const uint8* src_u,
- const uint8* src_v,
- uint8* dst_uv,
+void MergeUVRow_Any_MSA(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
int width);
-void SplitRGBRow_C(const uint8* src_rgb,
- uint8* dst_r,
- uint8* dst_g,
- uint8* dst_b,
+void SplitRGBRow_C(const uint8_t* src_rgb,
+ uint8_t* dst_r,
+ uint8_t* dst_g,
+ uint8_t* dst_b,
int width);
-void SplitRGBRow_SSSE3(const uint8* src_rgb,
- uint8* dst_r,
- uint8* dst_g,
- uint8* dst_b,
+void SplitRGBRow_SSSE3(const uint8_t* src_rgb,
+ uint8_t* dst_r,
+ uint8_t* dst_g,
+ uint8_t* dst_b,
int width);
-void SplitRGBRow_NEON(const uint8* src_rgb,
- uint8* dst_r,
- uint8* dst_g,
- uint8* dst_b,
+void SplitRGBRow_NEON(const uint8_t* src_rgb,
+ uint8_t* dst_r,
+ uint8_t* dst_g,
+ uint8_t* dst_b,
int width);
-void SplitRGBRow_Any_SSSE3(const uint8* src_rgb,
- uint8* dst_r,
- uint8* dst_g,
- uint8* dst_b,
+void SplitRGBRow_Any_SSSE3(const uint8_t* src_ptr,
+ uint8_t* dst_r,
+ uint8_t* dst_g,
+ uint8_t* dst_b,
int width);
-void SplitRGBRow_Any_NEON(const uint8* src_rgb,
- uint8* dst_r,
- uint8* dst_g,
- uint8* dst_b,
+void SplitRGBRow_Any_NEON(const uint8_t* src_ptr,
+ uint8_t* dst_r,
+ uint8_t* dst_g,
+ uint8_t* dst_b,
int width);
-void MergeRGBRow_C(const uint8* src_r,
- const uint8* src_g,
- const uint8* src_b,
- uint8* dst_rgb,
+void MergeRGBRow_C(const uint8_t* src_r,
+ const uint8_t* src_g,
+ const uint8_t* src_b,
+ uint8_t* dst_rgb,
int width);
-void MergeRGBRow_SSSE3(const uint8* src_r,
- const uint8* src_g,
- const uint8* src_b,
- uint8* dst_rgb,
+void MergeRGBRow_SSSE3(const uint8_t* src_r,
+ const uint8_t* src_g,
+ const uint8_t* src_b,
+ uint8_t* dst_rgb,
int width);
-void MergeRGBRow_NEON(const uint8* src_r,
- const uint8* src_g,
- const uint8* src_b,
- uint8* dst_rgb,
+void MergeRGBRow_NEON(const uint8_t* src_r,
+ const uint8_t* src_g,
+ const uint8_t* src_b,
+ uint8_t* dst_rgb,
int width);
-void MergeRGBRow_Any_SSSE3(const uint8* src_r,
- const uint8* src_g,
- const uint8* src_b,
- uint8* dst_rgb,
+void MergeRGBRow_Any_SSSE3(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
int width);
-void MergeRGBRow_Any_NEON(const uint8* src_r,
- const uint8* src_g,
- const uint8* src_b,
- uint8* dst_rgb,
+void MergeRGBRow_Any_NEON(const uint8_t* src_r,
+ const uint8_t* src_g,
+ const uint8_t* src_b,
+ uint8_t* dst_rgb,
int width);
-void MergeUVRow_16_C(const uint16* src_u,
- const uint16* src_v,
- uint16* dst_uv,
+void MergeUVRow_16_C(const uint16_t* src_u,
+ const uint16_t* src_v,
+ uint16_t* dst_uv,
int scale, /* 64 for 10 bit */
int width);
-void MergeUVRow_16_AVX2(const uint16* src_u,
- const uint16* src_v,
- uint16* dst_uv,
+void MergeUVRow_16_AVX2(const uint16_t* src_u,
+ const uint16_t* src_v,
+ uint16_t* dst_uv,
int scale,
int width);
-void MultiplyRow_16_AVX2(const uint16* src_y,
- uint16* dst_y,
+void MultiplyRow_16_AVX2(const uint16_t* src_y,
+ uint16_t* dst_y,
int scale,
int width);
-void MultiplyRow_16_C(const uint16* src_y, uint16* dst_y, int scale, int width);
+void MultiplyRow_16_C(const uint16_t* src_y,
+ uint16_t* dst_y,
+ int scale,
+ int width);
-void Convert8To16Row_C(const uint8* src_y, uint16* dst_y, int scale, int width);
-void Convert8To16Row_SSE2(const uint8* src_y,
- uint16* dst_y,
+void Convert8To16Row_C(const uint8_t* src_y,
+ uint16_t* dst_y,
+ int scale,
+ int width);
+void Convert8To16Row_SSE2(const uint8_t* src_y,
+ uint16_t* dst_y,
int scale,
int width);
-void Convert8To16Row_AVX2(const uint8* src_y,
- uint16* dst_y,
+void Convert8To16Row_AVX2(const uint8_t* src_y,
+ uint16_t* dst_y,
int scale,
int width);
-void Convert8To16Row_Any_SSE2(const uint8* src_y,
- uint16* dst_y,
+void Convert8To16Row_Any_SSE2(const uint8_t* src_ptr,
+ uint16_t* dst_ptr,
int scale,
int width);
-void Convert8To16Row_Any_AVX2(const uint8* src_y,
- uint16* dst_y,
+void Convert8To16Row_Any_AVX2(const uint8_t* src_ptr,
+ uint16_t* dst_ptr,
int scale,
int width);
-void Convert16To8Row_C(const uint16* src_y, uint8* dst_y, int scale, int width);
-void Convert16To8Row_SSSE3(const uint16* src_y,
- uint8* dst_y,
+void Convert16To8Row_C(const uint16_t* src_y,
+ uint8_t* dst_y,
+ int scale,
+ int width);
+void Convert16To8Row_SSSE3(const uint16_t* src_y,
+ uint8_t* dst_y,
int scale,
int width);
-void Convert16To8Row_AVX2(const uint16* src_y,
- uint8* dst_y,
+void Convert16To8Row_AVX2(const uint16_t* src_y,
+ uint8_t* dst_y,
int scale,
int width);
-void Convert16To8Row_Any_SSSE3(const uint16* src_y,
- uint8* dst_y,
+void Convert16To8Row_Any_SSSE3(const uint16_t* src_ptr,
+ uint8_t* dst_ptr,
int scale,
int width);
-void Convert16To8Row_Any_AVX2(const uint16* src_y,
- uint8* dst_y,
+void Convert16To8Row_Any_AVX2(const uint16_t* src_ptr,
+ uint8_t* dst_ptr,
int scale,
int width);
-void CopyRow_SSE2(const uint8* src, uint8* dst, int count);
-void CopyRow_AVX(const uint8* src, uint8* dst, int count);
-void CopyRow_ERMS(const uint8* src, uint8* dst, int count);
-void CopyRow_NEON(const uint8* src, uint8* dst, int count);
-void CopyRow_MIPS(const uint8* src, uint8* dst, int count);
-void CopyRow_C(const uint8* src, uint8* dst, int count);
-void CopyRow_Any_SSE2(const uint8* src, uint8* dst, int count);
-void CopyRow_Any_AVX(const uint8* src, uint8* dst, int count);
-void CopyRow_Any_NEON(const uint8* src, uint8* dst, int count);
-
-void CopyRow_16_C(const uint16* src, uint16* dst, int count);
-
-void ARGBCopyAlphaRow_C(const uint8* src_argb, uint8* dst_argb, int width);
-void ARGBCopyAlphaRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width);
-void ARGBCopyAlphaRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width);
-void ARGBCopyAlphaRow_Any_SSE2(const uint8* src_argb,
- uint8* dst_argb,
+void CopyRow_SSE2(const uint8_t* src, uint8_t* dst, int width);
+void CopyRow_AVX(const uint8_t* src, uint8_t* dst, int width);
+void CopyRow_ERMS(const uint8_t* src, uint8_t* dst, int width);
+void CopyRow_NEON(const uint8_t* src, uint8_t* dst, int width);
+void CopyRow_MIPS(const uint8_t* src, uint8_t* dst, int count);
+void CopyRow_C(const uint8_t* src, uint8_t* dst, int count);
+void CopyRow_Any_SSE2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void CopyRow_Any_AVX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void CopyRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+
+void CopyRow_16_C(const uint16_t* src, uint16_t* dst, int count);
+
+void ARGBCopyAlphaRow_C(const uint8_t* src, uint8_t* dst, int width);
+void ARGBCopyAlphaRow_SSE2(const uint8_t* src, uint8_t* dst, int width);
+void ARGBCopyAlphaRow_AVX2(const uint8_t* src, uint8_t* dst, int width);
+void ARGBCopyAlphaRow_Any_SSE2(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
int width);
-void ARGBCopyAlphaRow_Any_AVX2(const uint8* src_argb,
- uint8* dst_argb,
+void ARGBCopyAlphaRow_Any_AVX2(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
int width);
-void ARGBExtractAlphaRow_C(const uint8* src_argb, uint8* dst_a, int width);
-void ARGBExtractAlphaRow_SSE2(const uint8* src_argb, uint8* dst_a, int width);
-void ARGBExtractAlphaRow_AVX2(const uint8* src_argb, uint8* dst_a, int width);
-void ARGBExtractAlphaRow_NEON(const uint8* src_argb, uint8* dst_a, int width);
-void ARGBExtractAlphaRow_MSA(const uint8* src_argb, uint8* dst_a, int width);
-void ARGBExtractAlphaRow_Any_SSE2(const uint8* src_argb,
- uint8* dst_a,
+void ARGBExtractAlphaRow_C(const uint8_t* src_argb, uint8_t* dst_a, int width);
+void ARGBExtractAlphaRow_SSE2(const uint8_t* src_argb,
+ uint8_t* dst_a,
+ int width);
+void ARGBExtractAlphaRow_AVX2(const uint8_t* src_argb,
+ uint8_t* dst_a,
+ int width);
+void ARGBExtractAlphaRow_NEON(const uint8_t* src_argb,
+ uint8_t* dst_a,
+ int width);
+void ARGBExtractAlphaRow_MSA(const uint8_t* src_argb,
+ uint8_t* dst_a,
+ int width);
+void ARGBExtractAlphaRow_Any_SSE2(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
int width);
-void ARGBExtractAlphaRow_Any_AVX2(const uint8* src_argb,
- uint8* dst_a,
+void ARGBExtractAlphaRow_Any_AVX2(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
int width);
-void ARGBExtractAlphaRow_Any_NEON(const uint8* src_argb,
- uint8* dst_a,
+void ARGBExtractAlphaRow_Any_NEON(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
int width);
-void ARGBExtractAlphaRow_Any_MSA(const uint8* src_argb,
- uint8* dst_a,
+void ARGBExtractAlphaRow_Any_MSA(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
int width);
-void ARGBCopyYToAlphaRow_C(const uint8* src_y, uint8* dst_argb, int width);
-void ARGBCopyYToAlphaRow_SSE2(const uint8* src_y, uint8* dst_argb, int width);
-void ARGBCopyYToAlphaRow_AVX2(const uint8* src_y, uint8* dst_argb, int width);
-void ARGBCopyYToAlphaRow_Any_SSE2(const uint8* src_y,
- uint8* dst_argb,
+void ARGBCopyYToAlphaRow_C(const uint8_t* src, uint8_t* dst, int width);
+void ARGBCopyYToAlphaRow_SSE2(const uint8_t* src, uint8_t* dst, int width);
+void ARGBCopyYToAlphaRow_AVX2(const uint8_t* src, uint8_t* dst, int width);
+void ARGBCopyYToAlphaRow_Any_SSE2(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
int width);
-void ARGBCopyYToAlphaRow_Any_AVX2(const uint8* src_y,
- uint8* dst_argb,
+void ARGBCopyYToAlphaRow_Any_AVX2(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
int width);
-void SetRow_C(uint8* dst, uint8 v8, int count);
-void SetRow_MSA(uint8* dst, uint8 v8, int count);
-void SetRow_X86(uint8* dst, uint8 v8, int count);
-void SetRow_ERMS(uint8* dst, uint8 v8, int count);
-void SetRow_NEON(uint8* dst, uint8 v8, int count);
-void SetRow_Any_X86(uint8* dst, uint8 v8, int count);
-void SetRow_Any_NEON(uint8* dst, uint8 v8, int count);
-
-void ARGBSetRow_C(uint8* dst_argb, uint32 v32, int count);
-void ARGBSetRow_X86(uint8* dst_argb, uint32 v32, int count);
-void ARGBSetRow_NEON(uint8* dst_argb, uint32 v32, int count);
-void ARGBSetRow_Any_NEON(uint8* dst_argb, uint32 v32, int count);
-void ARGBSetRow_MSA(uint8* dst_argb, uint32 v32, int count);
-void ARGBSetRow_Any_MSA(uint8* dst_argb, uint32 v32, int count);
+void SetRow_C(uint8_t* dst, uint8_t v8, int width);
+void SetRow_MSA(uint8_t* dst, uint8_t v8, int width);
+void SetRow_X86(uint8_t* dst, uint8_t v8, int width);
+void SetRow_ERMS(uint8_t* dst, uint8_t v8, int width);
+void SetRow_NEON(uint8_t* dst, uint8_t v8, int width);
+void SetRow_Any_X86(uint8_t* dst_ptr, uint8_t v32, int width);
+void SetRow_Any_NEON(uint8_t* dst_ptr, uint8_t v32, int width);
+
+void ARGBSetRow_C(uint8_t* dst_argb, uint32_t v32, int width);
+void ARGBSetRow_X86(uint8_t* dst_argb, uint32_t v32, int width);
+void ARGBSetRow_NEON(uint8_t* dst, uint32_t v32, int width);
+void ARGBSetRow_Any_NEON(uint8_t* dst_ptr, uint32_t v32, int width);
+void ARGBSetRow_MSA(uint8_t* dst_argb, uint32_t v32, int width);
+void ARGBSetRow_Any_MSA(uint8_t* dst_ptr, uint32_t v32, int width);
// ARGBShufflers for BGRAToARGB etc.
-void ARGBShuffleRow_C(const uint8* src_argb,
- uint8* dst_argb,
- const uint8* shuffler,
+void ARGBShuffleRow_C(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ const uint8_t* shuffler,
int width);
-void ARGBShuffleRow_SSSE3(const uint8* src_argb,
- uint8* dst_argb,
- const uint8* shuffler,
+void ARGBShuffleRow_SSSE3(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ const uint8_t* shuffler,
int width);
-void ARGBShuffleRow_AVX2(const uint8* src_argb,
- uint8* dst_argb,
- const uint8* shuffler,
+void ARGBShuffleRow_AVX2(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ const uint8_t* shuffler,
int width);
-void ARGBShuffleRow_NEON(const uint8* src_argb,
- uint8* dst_argb,
- const uint8* shuffler,
+void ARGBShuffleRow_NEON(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ const uint8_t* shuffler,
int width);
-void ARGBShuffleRow_MSA(const uint8* src_argb,
- uint8* dst_argb,
- const uint8* shuffler,
+void ARGBShuffleRow_MSA(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ const uint8_t* shuffler,
int width);
-void ARGBShuffleRow_Any_SSSE3(const uint8* src_argb,
- uint8* dst_argb,
- const uint8* shuffler,
+void ARGBShuffleRow_Any_SSSE3(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ const uint8_t* param,
int width);
-void ARGBShuffleRow_Any_AVX2(const uint8* src_argb,
- uint8* dst_argb,
- const uint8* shuffler,
+void ARGBShuffleRow_Any_AVX2(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ const uint8_t* param,
int width);
-void ARGBShuffleRow_Any_NEON(const uint8* src_argb,
- uint8* dst_argb,
- const uint8* shuffler,
+void ARGBShuffleRow_Any_NEON(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ const uint8_t* param,
int width);
-void ARGBShuffleRow_Any_MSA(const uint8* src_argb,
- uint8* dst_argb,
- const uint8* shuffler,
- int width);
-
-void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int width);
-void RAWToARGBRow_SSSE3(const uint8* src_raw, uint8* dst_argb, int width);
-void RAWToRGB24Row_SSSE3(const uint8* src_raw, uint8* dst_rgb24, int width);
-void RGB565ToARGBRow_SSE2(const uint8* src_rgb565, uint8* dst_argb, int width);
-void ARGB1555ToARGBRow_SSE2(const uint8* src_argb1555,
- uint8* dst_argb,
- int width);
-void ARGB4444ToARGBRow_SSE2(const uint8* src_argb4444,
- uint8* dst_argb,
- int width);
-void RGB565ToARGBRow_AVX2(const uint8* src_rgb565, uint8* dst_argb, int width);
-void ARGB1555ToARGBRow_AVX2(const uint8* src_argb1555,
- uint8* dst_argb,
- int width);
-void ARGB4444ToARGBRow_AVX2(const uint8* src_argb4444,
- uint8* dst_argb,
- int width);
-
-void RGB24ToARGBRow_NEON(const uint8* src_rgb24, uint8* dst_argb, int width);
-void RGB24ToARGBRow_MSA(const uint8* src_rgb24, uint8* dst_argb, int width);
-void RAWToARGBRow_NEON(const uint8* src_raw, uint8* dst_argb, int width);
-void RAWToARGBRow_MSA(const uint8* src_raw, uint8* dst_argb, int width);
-void RAWToRGB24Row_NEON(const uint8* src_raw, uint8* dst_rgb24, int width);
-void RAWToRGB24Row_MSA(const uint8* src_raw, uint8* dst_rgb24, int width);
-void RGB565ToARGBRow_NEON(const uint8* src_rgb565, uint8* dst_argb, int width);
-void RGB565ToARGBRow_MSA(const uint8* src_rgb565, uint8* dst_argb, int width);
-void ARGB1555ToARGBRow_NEON(const uint8* src_argb1555,
- uint8* dst_argb,
- int width);
-void ARGB1555ToARGBRow_MSA(const uint8* src_argb1555,
- uint8* dst_argb,
+void ARGBShuffleRow_Any_MSA(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ const uint8_t* param,
+ int width);
+
+void RGB24ToARGBRow_SSSE3(const uint8_t* src_rgb24,
+ uint8_t* dst_argb,
+ int width);
+void RAWToARGBRow_SSSE3(const uint8_t* src_raw, uint8_t* dst_argb, int width);
+void RAWToRGB24Row_SSSE3(const uint8_t* src_raw, uint8_t* dst_rgb24, int width);
+void RGB565ToARGBRow_SSE2(const uint8_t* src, uint8_t* dst, int width);
+void ARGB1555ToARGBRow_SSE2(const uint8_t* src, uint8_t* dst, int width);
+void ARGB4444ToARGBRow_SSE2(const uint8_t* src, uint8_t* dst, int width);
+void RGB565ToARGBRow_AVX2(const uint8_t* src_rgb565,
+ uint8_t* dst_argb,
+ int width);
+void ARGB1555ToARGBRow_AVX2(const uint8_t* src_argb1555,
+ uint8_t* dst_argb,
+ int width);
+void ARGB4444ToARGBRow_AVX2(const uint8_t* src_argb4444,
+ uint8_t* dst_argb,
+ int width);
+
+void RGB24ToARGBRow_NEON(const uint8_t* src_rgb24,
+ uint8_t* dst_argb,
+ int width);
+void RGB24ToARGBRow_MSA(const uint8_t* src_rgb24, uint8_t* dst_argb, int width);
+void RAWToARGBRow_NEON(const uint8_t* src_raw, uint8_t* dst_argb, int width);
+void RAWToARGBRow_MSA(const uint8_t* src_raw, uint8_t* dst_argb, int width);
+void RAWToRGB24Row_NEON(const uint8_t* src_raw, uint8_t* dst_rgb24, int width);
+void RAWToRGB24Row_MSA(const uint8_t* src_raw, uint8_t* dst_rgb24, int width);
+void RGB565ToARGBRow_NEON(const uint8_t* src_rgb565,
+ uint8_t* dst_argb,
+ int width);
+void RGB565ToARGBRow_MSA(const uint8_t* src_rgb565,
+ uint8_t* dst_argb,
+ int width);
+void ARGB1555ToARGBRow_NEON(const uint8_t* src_argb1555,
+ uint8_t* dst_argb,
+ int width);
+void ARGB1555ToARGBRow_MSA(const uint8_t* src_argb1555,
+ uint8_t* dst_argb,
int width);
-void ARGB4444ToARGBRow_NEON(const uint8* src_argb4444,
- uint8* dst_argb,
+void ARGB4444ToARGBRow_NEON(const uint8_t* src_argb4444,
+ uint8_t* dst_argb,
int width);
-void ARGB4444ToARGBRow_MSA(const uint8* src_argb4444,
- uint8* dst_argb,
+void ARGB4444ToARGBRow_MSA(const uint8_t* src_argb4444,
+ uint8_t* dst_argb,
int width);
-void RGB24ToARGBRow_C(const uint8* src_rgb24, uint8* dst_argb, int width);
-void RAWToARGBRow_C(const uint8* src_raw, uint8* dst_argb, int width);
-void RAWToRGB24Row_C(const uint8* src_raw, uint8* dst_rgb24, int width);
-void RGB565ToARGBRow_C(const uint8* src_rgb, uint8* dst_argb, int width);
-void ARGB1555ToARGBRow_C(const uint8* src_argb, uint8* dst_argb, int width);
-void ARGB4444ToARGBRow_C(const uint8* src_argb, uint8* dst_argb, int width);
-void AR30ToARGBRow_C(const uint8* src_ar30, uint8* dst_argb, int width);
-void RGB24ToARGBRow_Any_SSSE3(const uint8* src_rgb24,
- uint8* dst_argb,
+void RGB24ToARGBRow_C(const uint8_t* src_rgb24, uint8_t* dst_argb, int width);
+void RAWToARGBRow_C(const uint8_t* src_raw, uint8_t* dst_argb, int width);
+void RAWToRGB24Row_C(const uint8_t* src_raw, uint8_t* dst_rgb24, int width);
+void RGB565ToARGBRow_C(const uint8_t* src_rgb565, uint8_t* dst_argb, int width);
+void ARGB1555ToARGBRow_C(const uint8_t* src_argb1555,
+ uint8_t* dst_argb,
+ int width);
+void ARGB4444ToARGBRow_C(const uint8_t* src_argb4444,
+ uint8_t* dst_argb,
+ int width);
+void AR30ToARGBRow_C(const uint8_t* src_ar30, uint8_t* dst_argb, int width);
+void AR30ToABGRRow_C(const uint8_t* src_ar30, uint8_t* dst_abgr, int width);
+void RGB24ToARGBRow_Any_SSSE3(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
int width);
-void RAWToARGBRow_Any_SSSE3(const uint8* src_raw, uint8* dst_argb, int width);
-void RAWToRGB24Row_Any_SSSE3(const uint8* src_raw, uint8* dst_rgb24, int width);
+void RAWToARGBRow_Any_SSSE3(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+void RAWToRGB24Row_Any_SSSE3(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
-void RGB565ToARGBRow_Any_SSE2(const uint8* src_rgb565,
- uint8* dst_argb,
+void RGB565ToARGBRow_Any_SSE2(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
int width);
-void ARGB1555ToARGBRow_Any_SSE2(const uint8* src_argb1555,
- uint8* dst_argb,
+void ARGB1555ToARGBRow_Any_SSE2(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
int width);
-void ARGB4444ToARGBRow_Any_SSE2(const uint8* src_argb4444,
- uint8* dst_argb,
+void ARGB4444ToARGBRow_Any_SSE2(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
int width);
-void RGB565ToARGBRow_Any_AVX2(const uint8* src_rgb565,
- uint8* dst_argb,
+void RGB565ToARGBRow_Any_AVX2(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
int width);
-void ARGB1555ToARGBRow_Any_AVX2(const uint8* src_argb1555,
- uint8* dst_argb,
+void ARGB1555ToARGBRow_Any_AVX2(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
int width);
-void ARGB4444ToARGBRow_Any_AVX2(const uint8* src_argb4444,
- uint8* dst_argb,
+void ARGB4444ToARGBRow_Any_AVX2(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
int width);
-void RGB24ToARGBRow_Any_NEON(const uint8* src_rgb24,
- uint8* dst_argb,
+void RGB24ToARGBRow_Any_NEON(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
int width);
-void RGB24ToARGBRow_Any_MSA(const uint8* src_rgb24, uint8* dst_argb, int width);
-void RAWToARGBRow_Any_NEON(const uint8* src_raw, uint8* dst_argb, int width);
-void RAWToARGBRow_Any_MSA(const uint8* src_raw, uint8* dst_argb, int width);
-void RAWToRGB24Row_Any_NEON(const uint8* src_raw, uint8* dst_rgb24, int width);
-void RAWToRGB24Row_Any_MSA(const uint8* src_raw, uint8* dst_rgb24, int width);
-void RGB565ToARGBRow_Any_NEON(const uint8* src_rgb565,
- uint8* dst_argb,
+void RGB24ToARGBRow_Any_MSA(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+void RAWToARGBRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void RAWToARGBRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void RAWToRGB24Row_Any_NEON(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+void RAWToRGB24Row_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void RGB565ToARGBRow_Any_NEON(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
int width);
-void RGB565ToARGBRow_Any_MSA(const uint8* src_rgb565,
- uint8* dst_argb,
+void RGB565ToARGBRow_Any_MSA(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
int width);
-void ARGB1555ToARGBRow_Any_NEON(const uint8* src_argb1555,
- uint8* dst_argb,
+void ARGB1555ToARGBRow_Any_NEON(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
int width);
-void ARGB1555ToARGBRow_Any_MSA(const uint8* src_argb1555,
- uint8* dst_argb,
+void ARGB1555ToARGBRow_Any_MSA(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
int width);
-void ARGB4444ToARGBRow_Any_NEON(const uint8* src_argb4444,
- uint8* dst_argb,
+void ARGB4444ToARGBRow_Any_NEON(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
int width);
-void ARGB4444ToARGBRow_Any_MSA(const uint8* src_argb4444,
- uint8* dst_argb,
+void ARGB4444ToARGBRow_Any_MSA(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
int width);
-void ARGBToRGB24Row_SSSE3(const uint8* src_argb, uint8* dst_rgb, int width);
-void ARGBToRAWRow_SSSE3(const uint8* src_argb, uint8* dst_rgb, int width);
-void ARGBToRGB565Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int width);
-void ARGBToARGB1555Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int width);
-void ARGBToARGB4444Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int width);
-void ARGBToAR30Row_SSSE3(const uint8* src_argb, uint8* dst_rgb, int width);
-
-void ARGBToRGB565DitherRow_C(const uint8* src_argb,
- uint8* dst_rgb,
- const uint32 dither4,
+void ARGBToRGB24Row_SSSE3(const uint8_t* src, uint8_t* dst, int width);
+void ARGBToRAWRow_SSSE3(const uint8_t* src, uint8_t* dst, int width);
+void ARGBToRGB565Row_SSE2(const uint8_t* src, uint8_t* dst, int width);
+void ARGBToARGB1555Row_SSE2(const uint8_t* src, uint8_t* dst, int width);
+void ARGBToARGB4444Row_SSE2(const uint8_t* src, uint8_t* dst, int width);
+void ABGRToAR30Row_SSSE3(const uint8_t* src, uint8_t* dst, int width);
+void ARGBToAR30Row_SSSE3(const uint8_t* src, uint8_t* dst, int width);
+
+void ARGBToRGB565DitherRow_C(const uint8_t* src_argb,
+ uint8_t* dst_rgb,
+ const uint32_t dither4,
int width);
-void ARGBToRGB565DitherRow_SSE2(const uint8* src_argb,
- uint8* dst_rgb,
- const uint32 dither4,
+void ARGBToRGB565DitherRow_SSE2(const uint8_t* src,
+ uint8_t* dst,
+ const uint32_t dither4,
int width);
-void ARGBToRGB565DitherRow_AVX2(const uint8* src_argb,
- uint8* dst_rgb,
- const uint32 dither4,
+void ARGBToRGB565DitherRow_AVX2(const uint8_t* src,
+ uint8_t* dst,
+ const uint32_t dither4,
int width);
-void ARGBToRGB565Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int width);
-void ARGBToARGB1555Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int width);
-void ARGBToARGB4444Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int width);
-void ARGBToAR30Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int width);
-
-void ARGBToRGB24Row_NEON(const uint8* src_argb, uint8* dst_rgb, int width);
-void ARGBToRAWRow_NEON(const uint8* src_argb, uint8* dst_rgb, int width);
-void ARGBToRGB565Row_NEON(const uint8* src_argb, uint8* dst_rgb, int width);
-void ARGBToARGB1555Row_NEON(const uint8* src_argb, uint8* dst_rgb, int width);
-void ARGBToARGB4444Row_NEON(const uint8* src_argb, uint8* dst_rgb, int width);
-void ARGBToRGB565DitherRow_NEON(const uint8* src_argb,
- uint8* dst_rgb,
- const uint32 dither4,
+void ARGBToRGB565Row_AVX2(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
+void ARGBToARGB1555Row_AVX2(const uint8_t* src_argb,
+ uint8_t* dst_rgb,
+ int width);
+void ARGBToARGB4444Row_AVX2(const uint8_t* src_argb,
+ uint8_t* dst_rgb,
+ int width);
+void ABGRToAR30Row_AVX2(const uint8_t* src, uint8_t* dst, int width);
+void ARGBToAR30Row_AVX2(const uint8_t* src, uint8_t* dst, int width);
+
+void ARGBToRGB24Row_NEON(const uint8_t* src_argb,
+ uint8_t* dst_rgb24,
+ int width);
+void ARGBToRAWRow_NEON(const uint8_t* src_argb, uint8_t* dst_raw, int width);
+void ARGBToRGB565Row_NEON(const uint8_t* src_argb,
+ uint8_t* dst_rgb565,
+ int width);
+void ARGBToARGB1555Row_NEON(const uint8_t* src_argb,
+ uint8_t* dst_argb1555,
+ int width);
+void ARGBToARGB4444Row_NEON(const uint8_t* src_argb,
+ uint8_t* dst_argb4444,
+ int width);
+void ARGBToRGB565DitherRow_NEON(const uint8_t* src_argb,
+ uint8_t* dst_rgb,
+ const uint32_t dither4,
int width);
-void ARGBToRGB24Row_MSA(const uint8* src_argb, uint8* dst_rgb, int width);
-void ARGBToRAWRow_MSA(const uint8* src_argb, uint8* dst_rgb, int width);
-void ARGBToRGB565Row_MSA(const uint8* src_argb, uint8* dst_rgb, int width);
-void ARGBToARGB1555Row_MSA(const uint8* src_argb, uint8* dst_rgb, int width);
-void ARGBToARGB4444Row_MSA(const uint8* src_argb, uint8* dst_rgb, int width);
-void ARGBToRGB565DitherRow_MSA(const uint8* src_argb,
- uint8* dst_rgb,
- const uint32 dither4,
+void ARGBToRGB24Row_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
+void ARGBToRAWRow_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
+void ARGBToRGB565Row_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
+void ARGBToARGB1555Row_MSA(const uint8_t* src_argb,
+ uint8_t* dst_rgb,
+ int width);
+void ARGBToARGB4444Row_MSA(const uint8_t* src_argb,
+ uint8_t* dst_rgb,
+ int width);
+void ARGBToRGB565DitherRow_MSA(const uint8_t* src_argb,
+ uint8_t* dst_rgb,
+ const uint32_t dither4,
int width);
-void ARGBToRGBARow_C(const uint8* src_argb, uint8* dst_rgb, int width);
-void ARGBToRGB24Row_C(const uint8* src_argb, uint8* dst_rgb, int width);
-void ARGBToRAWRow_C(const uint8* src_argb, uint8* dst_rgb, int width);
-void ARGBToRGB565Row_C(const uint8* src_argb, uint8* dst_rgb, int width);
-void ARGBToARGB1555Row_C(const uint8* src_argb, uint8* dst_rgb, int width);
-void ARGBToARGB4444Row_C(const uint8* src_argb, uint8* dst_rgb, int width);
-void ARGBToAR30Row_C(const uint8* src_argb, uint8* dst_rgb, int width);
-
-void J400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int width);
-void J400ToARGBRow_AVX2(const uint8* src_y, uint8* dst_argb, int width);
-void J400ToARGBRow_NEON(const uint8* src_y, uint8* dst_argb, int width);
-void J400ToARGBRow_MSA(const uint8* src_y, uint8* dst_argb, int width);
-void J400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int width);
-void J400ToARGBRow_Any_SSE2(const uint8* src_y, uint8* dst_argb, int width);
-void J400ToARGBRow_Any_AVX2(const uint8* src_y, uint8* dst_argb, int width);
-void J400ToARGBRow_Any_NEON(const uint8* src_y, uint8* dst_argb, int width);
-void J400ToARGBRow_Any_MSA(const uint8* src_y, uint8* dst_argb, int width);
-
-void I444ToARGBRow_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
+void ARGBToRGBARow_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
+void ARGBToRGB24Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
+void ARGBToRAWRow_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
+void ARGBToRGB565Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
+void ARGBToARGB1555Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
+void ARGBToARGB4444Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
+void ABGRToAR30Row_C(const uint8_t* src_abgr, uint8_t* dst_ar30, int width);
+void ARGBToAR30Row_C(const uint8_t* src_argb, uint8_t* dst_ar30, int width);
+
+void J400ToARGBRow_SSE2(const uint8_t* src_y, uint8_t* dst_argb, int width);
+void J400ToARGBRow_AVX2(const uint8_t* src_y, uint8_t* dst_argb, int width);
+void J400ToARGBRow_NEON(const uint8_t* src_y, uint8_t* dst_argb, int width);
+void J400ToARGBRow_MSA(const uint8_t* src_y, uint8_t* dst_argb, int width);
+void J400ToARGBRow_C(const uint8_t* src_y, uint8_t* dst_argb, int width);
+void J400ToARGBRow_Any_SSE2(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+void J400ToARGBRow_Any_AVX2(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+void J400ToARGBRow_Any_NEON(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+void J400ToARGBRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+
+void I444ToARGBRow_C(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422ToARGBRow_C(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422ToAR30Row_C(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width);
-void I422ToARGBRow_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
+void I210ToAR30Row_C(const uint16_t* src_y,
+ const uint16_t* src_u,
+ const uint16_t* src_v,
+ uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width);
-void I210ToARGBRow_C(const uint16* src_y,
- const uint16* src_u,
- const uint16* src_v,
- uint8* dst_argb,
+void I210ToARGBRow_C(const uint16_t* src_y,
+ const uint16_t* src_u,
+ const uint16_t* src_v,
+ uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width);
-void I422AlphaToARGBRow_C(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- const uint8* a_buf,
- uint8* dst_argb,
+void I422AlphaToARGBRow_C(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ const uint8_t* src_a,
+ uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width);
-void NV12ToARGBRow_C(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_argb,
+void NV12ToARGBRow_C(const uint8_t* src_y,
+ const uint8_t* src_uv,
+ uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width);
-void NV12ToRGB565Row_C(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_argb,
+void NV12ToRGB565Row_C(const uint8_t* src_y,
+ const uint8_t* src_uv,
+ uint8_t* dst_rgb565,
const struct YuvConstants* yuvconstants,
int width);
-void NV21ToARGBRow_C(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_argb,
+void NV21ToARGBRow_C(const uint8_t* src_y,
+ const uint8_t* src_vu,
+ uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width);
-void YUY2ToARGBRow_C(const uint8* src_yuy2,
- uint8* dst_argb,
+void YUY2ToARGBRow_C(const uint8_t* src_yuy2,
+ uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width);
-void UYVYToARGBRow_C(const uint8* src_uyvy,
- uint8* dst_argb,
+void UYVYToARGBRow_C(const uint8_t* src_uyvy,
+ uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width);
-void I422ToRGBARow_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_rgba,
+void I422ToRGBARow_C(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width);
-void I422ToRGB24Row_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_rgb24,
+void I422ToRGB24Row_C(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width);
-void I422ToARGB4444Row_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb4444,
+void I422ToARGB4444Row_C(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb4444,
const struct YuvConstants* yuvconstants,
int width);
-void I422ToARGB1555Row_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb4444,
+void I422ToARGB1555Row_C(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb1555,
const struct YuvConstants* yuvconstants,
int width);
-void I422ToRGB565Row_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_rgb565,
+void I422ToRGB565Row_C(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_rgb565,
const struct YuvConstants* yuvconstants,
int width);
-void I422ToARGBRow_AVX2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
+void I422ToARGBRow_AVX2(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
-void I422ToRGBARow_AVX2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
+void I422ToRGBARow_AVX2(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
-void I444ToARGBRow_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
+void I444ToARGBRow_SSSE3(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
-void I444ToARGBRow_AVX2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
+void I444ToARGBRow_AVX2(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
-void I444ToARGBRow_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
+void I444ToARGBRow_SSSE3(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
-void I444ToARGBRow_AVX2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
+void I444ToARGBRow_AVX2(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
-void I422ToARGBRow_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
+void I422ToARGBRow_SSSE3(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
-void I210ToARGBRow_SSSE3(const uint16* src_y,
- const uint16* src_u,
- const uint16* src_v,
- uint8* dst_argb,
+void I422ToAR30Row_SSSE3(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ar30,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I210ToAR30Row_SSSE3(const uint16_t* y_buf,
+ const uint16_t* u_buf,
+ const uint16_t* v_buf,
+ uint8_t* dst_ar30,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I210ToARGBRow_SSSE3(const uint16_t* y_buf,
+ const uint16_t* u_buf,
+ const uint16_t* v_buf,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
-void I422AlphaToARGBRow_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- const uint8* a_buf,
- uint8* dst_argb,
+void I422ToAR30Row_AVX2(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ar30,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I210ToARGBRow_AVX2(const uint16_t* y_buf,
+ const uint16_t* u_buf,
+ const uint16_t* v_buf,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I210ToAR30Row_AVX2(const uint16_t* y_buf,
+ const uint16_t* u_buf,
+ const uint16_t* v_buf,
+ uint8_t* dst_ar30,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422AlphaToARGBRow_SSSE3(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ const uint8_t* a_buf,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
-void I422AlphaToARGBRow_AVX2(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- const uint8* a_buf,
- uint8* dst_argb,
+void I422AlphaToARGBRow_AVX2(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ const uint8_t* a_buf,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
-void NV12ToARGBRow_SSSE3(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_argb,
+void NV12ToARGBRow_SSSE3(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
-void NV12ToARGBRow_AVX2(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_argb,
+void NV12ToARGBRow_AVX2(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
-void NV12ToRGB565Row_SSSE3(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_argb,
+void NV12ToRGB565Row_SSSE3(const uint8_t* src_y,
+ const uint8_t* src_uv,
+ uint8_t* dst_rgb565,
const struct YuvConstants* yuvconstants,
int width);
-void NV12ToRGB565Row_AVX2(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_argb,
+void NV12ToRGB565Row_AVX2(const uint8_t* src_y,
+ const uint8_t* src_uv,
+ uint8_t* dst_rgb565,
const struct YuvConstants* yuvconstants,
int width);
-void NV21ToARGBRow_SSSE3(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_argb,
+void NV21ToARGBRow_SSSE3(const uint8_t* y_buf,
+ const uint8_t* vu_buf,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
-void NV21ToARGBRow_AVX2(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_argb,
+void NV21ToARGBRow_AVX2(const uint8_t* y_buf,
+ const uint8_t* vu_buf,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
-void YUY2ToARGBRow_SSSE3(const uint8* src_yuy2,
- uint8* dst_argb,
+void YUY2ToARGBRow_SSSE3(const uint8_t* yuy2_buf,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
-void UYVYToARGBRow_SSSE3(const uint8* src_uyvy,
- uint8* dst_argb,
+void UYVYToARGBRow_SSSE3(const uint8_t* uyvy_buf,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
-void YUY2ToARGBRow_AVX2(const uint8* src_yuy2,
- uint8* dst_argb,
+void YUY2ToARGBRow_AVX2(const uint8_t* yuy2_buf,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
-void UYVYToARGBRow_AVX2(const uint8* src_uyvy,
- uint8* dst_argb,
+void UYVYToARGBRow_AVX2(const uint8_t* uyvy_buf,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
-void I422ToRGBARow_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_rgba,
+void I422ToRGBARow_SSSE3(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_rgba,
const struct YuvConstants* yuvconstants,
int width);
-void I422ToARGB4444Row_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
+void I422ToARGB4444Row_SSSE3(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb4444,
const struct YuvConstants* yuvconstants,
int width);
-void I422ToARGB4444Row_AVX2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
+void I422ToARGB4444Row_AVX2(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb4444,
const struct YuvConstants* yuvconstants,
int width);
-void I422ToARGB1555Row_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
+void I422ToARGB1555Row_SSSE3(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb1555,
const struct YuvConstants* yuvconstants,
int width);
-void I422ToARGB1555Row_AVX2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
+void I422ToARGB1555Row_AVX2(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb1555,
const struct YuvConstants* yuvconstants,
int width);
-void I422ToRGB565Row_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
+void I422ToRGB565Row_SSSE3(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_rgb565,
const struct YuvConstants* yuvconstants,
int width);
-void I422ToRGB565Row_AVX2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
+void I422ToRGB565Row_AVX2(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_rgb565,
const struct YuvConstants* yuvconstants,
int width);
-void I422ToRGB24Row_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_rgb24,
+void I422ToRGB24Row_SSSE3(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_rgb24,
const struct YuvConstants* yuvconstants,
int width);
-void I422ToRGB24Row_AVX2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_rgb24,
+void I422ToRGB24Row_AVX2(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_rgb24,
const struct YuvConstants* yuvconstants,
int width);
-void I422ToARGBRow_Any_AVX2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
+void I422ToARGBRow_Any_AVX2(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
-void I422ToRGBARow_Any_AVX2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
+void I422ToRGBARow_Any_AVX2(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
-void I444ToARGBRow_Any_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
+void I444ToARGBRow_Any_SSSE3(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
-void I444ToARGBRow_Any_AVX2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
+void I444ToARGBRow_Any_AVX2(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
-void I422ToARGBRow_Any_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
+void I422ToARGBRow_Any_SSSE3(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422ToAR30Row_Any_SSSE3(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I210ToAR30Row_Any_SSSE3(const uint16_t* y_buf,
+ const uint16_t* u_buf,
+ const uint16_t* v_buf,
+ uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
-void I210ToARGBRow_Any_SSSE3(const uint16* src_y,
- const uint16* src_u,
- const uint16* src_v,
- uint8* dst_argb,
+void I210ToARGBRow_Any_SSSE3(const uint16_t* y_buf,
+ const uint16_t* u_buf,
+ const uint16_t* v_buf,
+ uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
-void I422AlphaToARGBRow_Any_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- const uint8* a_buf,
- uint8* dst_argb,
+void I422ToAR30Row_Any_AVX2(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I210ToARGBRow_Any_AVX2(const uint16_t* y_buf,
+ const uint16_t* u_buf,
+ const uint16_t* v_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I210ToAR30Row_Any_AVX2(const uint16_t* y_buf,
+ const uint16_t* u_buf,
+ const uint16_t* v_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422AlphaToARGBRow_Any_SSSE3(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ const uint8_t* a_buf,
+ uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
-void I422AlphaToARGBRow_Any_AVX2(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- const uint8* a_buf,
- uint8* dst_argb,
+void I422AlphaToARGBRow_Any_AVX2(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ const uint8_t* a_buf,
+ uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
-void NV12ToARGBRow_Any_SSSE3(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_argb,
+void NV12ToARGBRow_Any_SSSE3(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
-void NV12ToARGBRow_Any_AVX2(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_argb,
+void NV12ToARGBRow_Any_AVX2(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
-void NV21ToARGBRow_Any_SSSE3(const uint8* src_y,
- const uint8* src_vu,
- uint8* dst_argb,
+void NV21ToARGBRow_Any_SSSE3(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
-void NV21ToARGBRow_Any_AVX2(const uint8* src_y,
- const uint8* src_vu,
- uint8* dst_argb,
+void NV21ToARGBRow_Any_AVX2(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
-void NV12ToRGB565Row_Any_SSSE3(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_argb,
+void NV12ToRGB565Row_Any_SSSE3(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
-void NV12ToRGB565Row_Any_AVX2(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_argb,
+void NV12ToRGB565Row_Any_AVX2(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
-void YUY2ToARGBRow_Any_SSSE3(const uint8* src_yuy2,
- uint8* dst_argb,
+void YUY2ToARGBRow_Any_SSSE3(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
-void UYVYToARGBRow_Any_SSSE3(const uint8* src_uyvy,
- uint8* dst_argb,
+void UYVYToARGBRow_Any_SSSE3(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
-void YUY2ToARGBRow_Any_AVX2(const uint8* src_yuy2,
- uint8* dst_argb,
+void YUY2ToARGBRow_Any_AVX2(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
-void UYVYToARGBRow_Any_AVX2(const uint8* src_uyvy,
- uint8* dst_argb,
+void UYVYToARGBRow_Any_AVX2(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
-void I422ToRGBARow_Any_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_rgba,
+void I422ToRGBARow_Any_SSSE3(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
-void I422ToARGB4444Row_Any_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_rgba,
+void I422ToARGB4444Row_Any_SSSE3(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
-void I422ToARGB4444Row_Any_AVX2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_rgba,
+void I422ToARGB4444Row_Any_AVX2(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
-void I422ToARGB1555Row_Any_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_rgba,
+void I422ToARGB1555Row_Any_SSSE3(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
-void I422ToARGB1555Row_Any_AVX2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_rgba,
+void I422ToARGB1555Row_Any_AVX2(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
-void I422ToRGB565Row_Any_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_rgba,
+void I422ToRGB565Row_Any_SSSE3(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
-void I422ToRGB565Row_Any_AVX2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_rgba,
+void I422ToRGB565Row_Any_AVX2(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
-void I422ToRGB24Row_Any_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
+void I422ToRGB24Row_Any_SSSE3(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
-void I422ToRGB24Row_Any_AVX2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
+void I422ToRGB24Row_Any_AVX2(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
-void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int width);
-void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int width);
-void I400ToARGBRow_AVX2(const uint8* src_y, uint8* dst_argb, int width);
-void I400ToARGBRow_NEON(const uint8* src_y, uint8* dst_argb, int width);
-void I400ToARGBRow_MSA(const uint8* src_y, uint8* dst_argb, int width);
-void I400ToARGBRow_Any_SSE2(const uint8* src_y, uint8* dst_argb, int width);
-void I400ToARGBRow_Any_AVX2(const uint8* src_y, uint8* dst_argb, int width);
-void I400ToARGBRow_Any_NEON(const uint8* src_y, uint8* dst_argb, int width);
-void I400ToARGBRow_Any_MSA(const uint8* src_y, uint8* dst_argb, int width);
+void I400ToARGBRow_C(const uint8_t* src_y, uint8_t* rgb_buf, int width);
+void I400ToARGBRow_SSE2(const uint8_t* y_buf, uint8_t* dst_argb, int width);
+void I400ToARGBRow_AVX2(const uint8_t* y_buf, uint8_t* dst_argb, int width);
+void I400ToARGBRow_NEON(const uint8_t* src_y, uint8_t* dst_argb, int width);
+void I400ToARGBRow_MSA(const uint8_t* src_y, uint8_t* dst_argb, int width);
+void I400ToARGBRow_Any_SSE2(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+void I400ToARGBRow_Any_AVX2(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+void I400ToARGBRow_Any_NEON(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+void I400ToARGBRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
// ARGB preattenuated alpha blend.
-void ARGBBlendRow_SSSE3(const uint8* src_argb,
- const uint8* src_argb1,
- uint8* dst_argb,
+void ARGBBlendRow_SSSE3(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
int width);
-void ARGBBlendRow_NEON(const uint8* src_argb,
- const uint8* src_argb1,
- uint8* dst_argb,
+void ARGBBlendRow_NEON(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
int width);
-void ARGBBlendRow_MSA(const uint8* src_argb,
- const uint8* src_argb1,
- uint8* dst_argb,
+void ARGBBlendRow_MSA(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
int width);
-void ARGBBlendRow_C(const uint8* src_argb,
- const uint8* src_argb1,
- uint8* dst_argb,
+void ARGBBlendRow_C(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
int width);
// Unattenuated planar alpha blend.
-void BlendPlaneRow_SSSE3(const uint8* src0,
- const uint8* src1,
- const uint8* alpha,
- uint8* dst,
+void BlendPlaneRow_SSSE3(const uint8_t* src0,
+ const uint8_t* src1,
+ const uint8_t* alpha,
+ uint8_t* dst,
int width);
-void BlendPlaneRow_Any_SSSE3(const uint8* src0,
- const uint8* src1,
- const uint8* alpha,
- uint8* dst,
+void BlendPlaneRow_Any_SSSE3(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
int width);
-void BlendPlaneRow_AVX2(const uint8* src0,
- const uint8* src1,
- const uint8* alpha,
- uint8* dst,
+void BlendPlaneRow_AVX2(const uint8_t* src0,
+ const uint8_t* src1,
+ const uint8_t* alpha,
+ uint8_t* dst,
int width);
-void BlendPlaneRow_Any_AVX2(const uint8* src0,
- const uint8* src1,
- const uint8* alpha,
- uint8* dst,
- int width);
-void BlendPlaneRow_C(const uint8* src0,
- const uint8* src1,
- const uint8* alpha,
- uint8* dst,
+void BlendPlaneRow_Any_AVX2(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ int width);
+void BlendPlaneRow_C(const uint8_t* src0,
+ const uint8_t* src1,
+ const uint8_t* alpha,
+ uint8_t* dst,
int width);
// ARGB multiply images. Same API as Blend, but these require
// pointer and width alignment for SSE2.
-void ARGBMultiplyRow_C(const uint8* src_argb,
- const uint8* src_argb1,
- uint8* dst_argb,
+void ARGBMultiplyRow_C(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
int width);
-void ARGBMultiplyRow_SSE2(const uint8* src_argb,
- const uint8* src_argb1,
- uint8* dst_argb,
+void ARGBMultiplyRow_SSE2(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
int width);
-void ARGBMultiplyRow_Any_SSE2(const uint8* src_argb,
- const uint8* src_argb1,
- uint8* dst_argb,
+void ARGBMultiplyRow_Any_SSE2(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
int width);
-void ARGBMultiplyRow_AVX2(const uint8* src_argb,
- const uint8* src_argb1,
- uint8* dst_argb,
+void ARGBMultiplyRow_AVX2(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
int width);
-void ARGBMultiplyRow_Any_AVX2(const uint8* src_argb,
- const uint8* src_argb1,
- uint8* dst_argb,
+void ARGBMultiplyRow_Any_AVX2(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
int width);
-void ARGBMultiplyRow_NEON(const uint8* src_argb,
- const uint8* src_argb1,
- uint8* dst_argb,
+void ARGBMultiplyRow_NEON(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
int width);
-void ARGBMultiplyRow_Any_NEON(const uint8* src_argb,
- const uint8* src_argb1,
- uint8* dst_argb,
+void ARGBMultiplyRow_Any_NEON(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
int width);
-void ARGBMultiplyRow_MSA(const uint8* src_argb,
- const uint8* src_argb1,
- uint8* dst_argb,
+void ARGBMultiplyRow_MSA(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
int width);
-void ARGBMultiplyRow_Any_MSA(const uint8* src_argb,
- const uint8* src_argb1,
- uint8* dst_argb,
+void ARGBMultiplyRow_Any_MSA(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
int width);
// ARGB add images.
-void ARGBAddRow_C(const uint8* src_argb,
- const uint8* src_argb1,
- uint8* dst_argb,
+void ARGBAddRow_C(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
int width);
-void ARGBAddRow_SSE2(const uint8* src_argb,
- const uint8* src_argb1,
- uint8* dst_argb,
+void ARGBAddRow_SSE2(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
int width);
-void ARGBAddRow_Any_SSE2(const uint8* src_argb,
- const uint8* src_argb1,
- uint8* dst_argb,
+void ARGBAddRow_Any_SSE2(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
int width);
-void ARGBAddRow_AVX2(const uint8* src_argb,
- const uint8* src_argb1,
- uint8* dst_argb,
+void ARGBAddRow_AVX2(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
int width);
-void ARGBAddRow_Any_AVX2(const uint8* src_argb,
- const uint8* src_argb1,
- uint8* dst_argb,
+void ARGBAddRow_Any_AVX2(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
int width);
-void ARGBAddRow_NEON(const uint8* src_argb,
- const uint8* src_argb1,
- uint8* dst_argb,
+void ARGBAddRow_NEON(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
int width);
-void ARGBAddRow_Any_NEON(const uint8* src_argb,
- const uint8* src_argb1,
- uint8* dst_argb,
+void ARGBAddRow_Any_NEON(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
int width);
-void ARGBAddRow_MSA(const uint8* src_argb,
- const uint8* src_argb1,
- uint8* dst_argb,
+void ARGBAddRow_MSA(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
int width);
-void ARGBAddRow_Any_MSA(const uint8* src_argb,
- const uint8* src_argb1,
- uint8* dst_argb,
+void ARGBAddRow_Any_MSA(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
int width);
// ARGB subtract images. Same API as Blend, but these require
// pointer and width alignment for SSE2.
-void ARGBSubtractRow_C(const uint8* src_argb,
- const uint8* src_argb1,
- uint8* dst_argb,
+void ARGBSubtractRow_C(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
int width);
-void ARGBSubtractRow_SSE2(const uint8* src_argb,
- const uint8* src_argb1,
- uint8* dst_argb,
+void ARGBSubtractRow_SSE2(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
int width);
-void ARGBSubtractRow_Any_SSE2(const uint8* src_argb,
- const uint8* src_argb1,
- uint8* dst_argb,
+void ARGBSubtractRow_Any_SSE2(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
int width);
-void ARGBSubtractRow_AVX2(const uint8* src_argb,
- const uint8* src_argb1,
- uint8* dst_argb,
+void ARGBSubtractRow_AVX2(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
int width);
-void ARGBSubtractRow_Any_AVX2(const uint8* src_argb,
- const uint8* src_argb1,
- uint8* dst_argb,
+void ARGBSubtractRow_Any_AVX2(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
int width);
-void ARGBSubtractRow_NEON(const uint8* src_argb,
- const uint8* src_argb1,
- uint8* dst_argb,
+void ARGBSubtractRow_NEON(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
int width);
-void ARGBSubtractRow_Any_NEON(const uint8* src_argb,
- const uint8* src_argb1,
- uint8* dst_argb,
+void ARGBSubtractRow_Any_NEON(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
int width);
-void ARGBSubtractRow_MSA(const uint8* src_argb,
- const uint8* src_argb1,
- uint8* dst_argb,
+void ARGBSubtractRow_MSA(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
int width);
-void ARGBSubtractRow_Any_MSA(const uint8* src_argb,
- const uint8* src_argb1,
- uint8* dst_argb,
+void ARGBSubtractRow_Any_MSA(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
int width);
-void ARGBToRGB24Row_Any_SSSE3(const uint8* src_argb, uint8* dst_rgb, int width);
-void ARGBToRAWRow_Any_SSSE3(const uint8* src_argb, uint8* dst_rgb, int width);
-void ARGBToRGB565Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, int width);
-void ARGBToARGB1555Row_Any_SSE2(const uint8* src_argb,
- uint8* dst_rgb,
+void ARGBToRGB24Row_Any_SSSE3(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+void ARGBToRAWRow_Any_SSSE3(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+void ARGBToRGB565Row_Any_SSE2(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+void ARGBToARGB1555Row_Any_SSE2(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
int width);
-void ARGBToARGB4444Row_Any_SSE2(const uint8* src_argb,
- uint8* dst_rgb,
+void ARGBToARGB4444Row_Any_SSE2(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
int width);
-void ARGBToAR30Row_Any_SSSE3(const uint8* src_argb, uint8* dst_rgb, int width);
+void ABGRToAR30Row_Any_SSSE3(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+void ARGBToAR30Row_Any_SSSE3(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
-void ARGBToRGB565DitherRow_Any_SSE2(const uint8* src_argb,
- uint8* dst_rgb,
- const uint32 dither4,
+void ARGBToRGB565DitherRow_Any_SSE2(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ const uint32_t param,
int width);
-void ARGBToRGB565DitherRow_Any_AVX2(const uint8* src_argb,
- uint8* dst_rgb,
- const uint32 dither4,
+void ARGBToRGB565DitherRow_Any_AVX2(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ const uint32_t param,
int width);
-void ARGBToRGB565Row_Any_AVX2(const uint8* src_argb, uint8* dst_rgb, int width);
-void ARGBToARGB1555Row_Any_AVX2(const uint8* src_argb,
- uint8* dst_rgb,
+void ARGBToRGB565Row_Any_AVX2(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+void ARGBToARGB1555Row_Any_AVX2(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
int width);
-void ARGBToARGB4444Row_Any_AVX2(const uint8* src_argb,
- uint8* dst_rgb,
+void ARGBToARGB4444Row_Any_AVX2(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
int width);
-void ARGBToAR30Row_Any_AVX2(const uint8* src_argb, uint8* dst_rgb, int width);
+void ABGRToAR30Row_Any_AVX2(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+void ARGBToAR30Row_Any_AVX2(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
-void ARGBToRGB24Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int width);
-void ARGBToRAWRow_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int width);
-void ARGBToRGB565Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int width);
-void ARGBToARGB1555Row_Any_NEON(const uint8* src_argb,
- uint8* dst_rgb,
+void ARGBToRGB24Row_Any_NEON(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+void ARGBToRAWRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void ARGBToRGB565Row_Any_NEON(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+void ARGBToARGB1555Row_Any_NEON(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
int width);
-void ARGBToARGB4444Row_Any_NEON(const uint8* src_argb,
- uint8* dst_rgb,
+void ARGBToARGB4444Row_Any_NEON(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
int width);
-void ARGBToRGB565DitherRow_Any_NEON(const uint8* src_argb,
- uint8* dst_rgb,
- const uint32 dither4,
+void ARGBToRGB565DitherRow_Any_NEON(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ const uint32_t param,
int width);
-void ARGBToRGB24Row_Any_MSA(const uint8* src_argb, uint8* dst_rgb, int width);
-void ARGBToRAWRow_Any_MSA(const uint8* src_argb, uint8* dst_rgb, int width);
-void ARGBToRGB565Row_Any_MSA(const uint8* src_argb, uint8* dst_rgb, int width);
-void ARGBToARGB1555Row_Any_MSA(const uint8* src_argb,
- uint8* dst_rgb,
+void ARGBToRGB24Row_Any_MSA(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+void ARGBToRAWRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void ARGBToRGB565Row_Any_MSA(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+void ARGBToARGB1555Row_Any_MSA(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
int width);
-void ARGBToARGB4444Row_Any_MSA(const uint8* src_argb,
- uint8* dst_rgb,
+void ARGBToARGB4444Row_Any_MSA(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
int width);
-void ARGBToRGB565DitherRow_Any_MSA(const uint8* src_argb,
- uint8* dst_rgb,
- const uint32 dither4,
+void ARGBToRGB565DitherRow_Any_MSA(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ const uint32_t param,
int width);
-void I444ToARGBRow_Any_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
+void I444ToARGBRow_Any_NEON(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
-void I422ToARGBRow_Any_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
+void I422ToARGBRow_Any_NEON(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
-void I422AlphaToARGBRow_Any_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- const uint8* src_a,
- uint8* dst_argb,
+void I422AlphaToARGBRow_Any_NEON(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ const uint8_t* a_buf,
+ uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
-void I422ToRGBARow_Any_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
+void I422ToRGBARow_Any_NEON(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
-void I422ToRGB24Row_Any_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
+void I422ToRGB24Row_Any_NEON(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
-void I422ToARGB4444Row_Any_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
+void I422ToARGB4444Row_Any_NEON(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
-void I422ToARGB1555Row_Any_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
+void I422ToARGB1555Row_Any_NEON(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
-void I422ToRGB565Row_Any_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
+void I422ToRGB565Row_Any_NEON(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
-void NV12ToARGBRow_Any_NEON(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_argb,
+void NV12ToARGBRow_Any_NEON(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
-void NV21ToARGBRow_Any_NEON(const uint8* src_y,
- const uint8* src_vu,
- uint8* dst_argb,
+void NV21ToARGBRow_Any_NEON(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
-void NV12ToRGB565Row_Any_NEON(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_argb,
+void NV12ToRGB565Row_Any_NEON(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
-void YUY2ToARGBRow_Any_NEON(const uint8* src_yuy2,
- uint8* dst_argb,
+void YUY2ToARGBRow_Any_NEON(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
-void UYVYToARGBRow_Any_NEON(const uint8* src_uyvy,
- uint8* dst_argb,
+void UYVYToARGBRow_Any_NEON(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
-void I444ToARGBRow_Any_MSA(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
+void I444ToARGBRow_Any_MSA(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
-void I422ToARGBRow_Any_MSA(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
+void I422ToARGBRow_Any_MSA(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
-void I422ToRGBARow_Any_MSA(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
+void I422ToRGBARow_Any_MSA(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
-void I422AlphaToARGBRow_Any_MSA(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- const uint8* src_a,
- uint8* dst_argb,
+void I422AlphaToARGBRow_Any_MSA(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ const uint8_t* a_buf,
+ uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
-void I422ToRGB24Row_Any_MSA(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_rgb24,
+void I422ToRGB24Row_Any_MSA(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
-void I422ToRGB565Row_Any_MSA(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_rgb565,
+void I422ToRGB565Row_Any_MSA(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
-void I422ToARGB4444Row_Any_MSA(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb4444,
+void I422ToARGB4444Row_Any_MSA(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
-void I422ToARGB1555Row_Any_MSA(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb1555,
+void I422ToARGB1555Row_Any_MSA(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
-void NV12ToARGBRow_Any_MSA(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_argb,
+void NV12ToARGBRow_Any_MSA(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
-void NV12ToRGB565Row_Any_MSA(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_argb,
+void NV12ToRGB565Row_Any_MSA(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
-void NV21ToARGBRow_Any_MSA(const uint8* src_y,
- const uint8* src_vu,
- uint8* dst_argb,
+void NV21ToARGBRow_Any_MSA(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
-void YUY2ToARGBRow_Any_MSA(const uint8* src_yuy2,
- uint8* dst_argb,
+void YUY2ToARGBRow_Any_MSA(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
-void UYVYToARGBRow_Any_MSA(const uint8* src_uyvy,
- uint8* dst_argb,
+void UYVYToARGBRow_Any_MSA(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
-void YUY2ToYRow_AVX2(const uint8* src_yuy2, uint8* dst_y, int width);
-void YUY2ToUVRow_AVX2(const uint8* src_yuy2,
+void YUY2ToYRow_AVX2(const uint8_t* src_yuy2, uint8_t* dst_y, int width);
+void YUY2ToUVRow_AVX2(const uint8_t* src_yuy2,
int stride_yuy2,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void YUY2ToUV422Row_AVX2(const uint8* src_yuy2,
- uint8* dst_u,
- uint8* dst_v,
+void YUY2ToUV422Row_AVX2(const uint8_t* src_yuy2,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void YUY2ToYRow_SSE2(const uint8* src_yuy2, uint8* dst_y, int width);
-void YUY2ToUVRow_SSE2(const uint8* src_yuy2,
+void YUY2ToYRow_SSE2(const uint8_t* src_yuy2, uint8_t* dst_y, int width);
+void YUY2ToUVRow_SSE2(const uint8_t* src_yuy2,
int stride_yuy2,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void YUY2ToUV422Row_SSE2(const uint8* src_yuy2,
- uint8* dst_u,
- uint8* dst_v,
+void YUY2ToUV422Row_SSE2(const uint8_t* src_yuy2,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void YUY2ToYRow_NEON(const uint8* src_yuy2, uint8* dst_y, int width);
-void YUY2ToUVRow_NEON(const uint8* src_yuy2,
+void YUY2ToYRow_NEON(const uint8_t* src_yuy2, uint8_t* dst_y, int width);
+void YUY2ToUVRow_NEON(const uint8_t* src_yuy2,
int stride_yuy2,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void YUY2ToUV422Row_NEON(const uint8* src_yuy2,
- uint8* dst_u,
- uint8* dst_v,
+void YUY2ToUV422Row_NEON(const uint8_t* src_yuy2,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void YUY2ToYRow_MSA(const uint8* src_yuy2, uint8* dst_y, int width);
-void YUY2ToUVRow_MSA(const uint8* src_yuy2,
- int stride_yuy2,
- uint8* dst_u,
- uint8* dst_v,
+void YUY2ToYRow_MSA(const uint8_t* src_yuy2, uint8_t* dst_y, int width);
+void YUY2ToUVRow_MSA(const uint8_t* src_yuy2,
+ int src_stride_yuy2,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void YUY2ToUV422Row_MSA(const uint8* src_yuy2,
- uint8* dst_u,
- uint8* dst_v,
+void YUY2ToUV422Row_MSA(const uint8_t* src_yuy2,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void YUY2ToYRow_C(const uint8* src_yuy2, uint8* dst_y, int width);
-void YUY2ToUVRow_C(const uint8* src_yuy2,
- int stride_yuy2,
- uint8* dst_u,
- uint8* dst_v,
+void YUY2ToYRow_C(const uint8_t* src_yuy2, uint8_t* dst_y, int width);
+void YUY2ToUVRow_C(const uint8_t* src_yuy2,
+ int src_stride_yuy2,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void YUY2ToUV422Row_C(const uint8* src_yuy2,
- uint8* dst_u,
- uint8* dst_v,
+void YUY2ToUV422Row_C(const uint8_t* src_yuy2,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void YUY2ToYRow_Any_AVX2(const uint8* src_yuy2, uint8* dst_y, int width);
-void YUY2ToUVRow_Any_AVX2(const uint8* src_yuy2,
- int stride_yuy2,
- uint8* dst_u,
- uint8* dst_v,
+void YUY2ToYRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void YUY2ToUVRow_Any_AVX2(const uint8_t* src_ptr,
+ int src_stride_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void YUY2ToUV422Row_Any_AVX2(const uint8* src_yuy2,
- uint8* dst_u,
- uint8* dst_v,
+void YUY2ToUV422Row_Any_AVX2(const uint8_t* src_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void YUY2ToYRow_Any_SSE2(const uint8* src_yuy2, uint8* dst_y, int width);
-void YUY2ToUVRow_Any_SSE2(const uint8* src_yuy2,
- int stride_yuy2,
- uint8* dst_u,
- uint8* dst_v,
+void YUY2ToYRow_Any_SSE2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void YUY2ToUVRow_Any_SSE2(const uint8_t* src_ptr,
+ int src_stride_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void YUY2ToUV422Row_Any_SSE2(const uint8* src_yuy2,
- uint8* dst_u,
- uint8* dst_v,
+void YUY2ToUV422Row_Any_SSE2(const uint8_t* src_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void YUY2ToYRow_Any_NEON(const uint8* src_yuy2, uint8* dst_y, int width);
-void YUY2ToUVRow_Any_NEON(const uint8* src_yuy2,
- int stride_yuy2,
- uint8* dst_u,
- uint8* dst_v,
+void YUY2ToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void YUY2ToUVRow_Any_NEON(const uint8_t* src_ptr,
+ int src_stride_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void YUY2ToUV422Row_Any_NEON(const uint8* src_yuy2,
- uint8* dst_u,
- uint8* dst_v,
+void YUY2ToUV422Row_Any_NEON(const uint8_t* src_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void YUY2ToYRow_Any_MSA(const uint8* src_yuy2, uint8* dst_y, int width);
-void YUY2ToUVRow_Any_MSA(const uint8* src_yuy2,
- int stride_yuy2,
- uint8* dst_u,
- uint8* dst_v,
+void YUY2ToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void YUY2ToUVRow_Any_MSA(const uint8_t* src_ptr,
+ int src_stride_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void YUY2ToUV422Row_Any_MSA(const uint8* src_yuy2,
- uint8* dst_u,
- uint8* dst_v,
+void YUY2ToUV422Row_Any_MSA(const uint8_t* src_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void UYVYToYRow_AVX2(const uint8* src_uyvy, uint8* dst_y, int width);
-void UYVYToUVRow_AVX2(const uint8* src_uyvy,
+void UYVYToYRow_AVX2(const uint8_t* src_uyvy, uint8_t* dst_y, int width);
+void UYVYToUVRow_AVX2(const uint8_t* src_uyvy,
int stride_uyvy,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void UYVYToUV422Row_AVX2(const uint8* src_uyvy,
- uint8* dst_u,
- uint8* dst_v,
+void UYVYToUV422Row_AVX2(const uint8_t* src_uyvy,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void UYVYToYRow_SSE2(const uint8* src_uyvy, uint8* dst_y, int width);
-void UYVYToUVRow_SSE2(const uint8* src_uyvy,
+void UYVYToYRow_SSE2(const uint8_t* src_uyvy, uint8_t* dst_y, int width);
+void UYVYToUVRow_SSE2(const uint8_t* src_uyvy,
int stride_uyvy,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void UYVYToUV422Row_SSE2(const uint8* src_uyvy,
- uint8* dst_u,
- uint8* dst_v,
+void UYVYToUV422Row_SSE2(const uint8_t* src_uyvy,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void UYVYToYRow_AVX2(const uint8* src_uyvy, uint8* dst_y, int width);
-void UYVYToUVRow_AVX2(const uint8* src_uyvy,
+void UYVYToYRow_AVX2(const uint8_t* src_uyvy, uint8_t* dst_y, int width);
+void UYVYToUVRow_AVX2(const uint8_t* src_uyvy,
int stride_uyvy,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void UYVYToUV422Row_AVX2(const uint8* src_uyvy,
- uint8* dst_u,
- uint8* dst_v,
+void UYVYToUV422Row_AVX2(const uint8_t* src_uyvy,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void UYVYToYRow_NEON(const uint8* src_uyvy, uint8* dst_y, int width);
-void UYVYToUVRow_NEON(const uint8* src_uyvy,
+void UYVYToYRow_NEON(const uint8_t* src_uyvy, uint8_t* dst_y, int width);
+void UYVYToUVRow_NEON(const uint8_t* src_uyvy,
int stride_uyvy,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void UYVYToUV422Row_NEON(const uint8* src_uyvy,
- uint8* dst_u,
- uint8* dst_v,
+void UYVYToUV422Row_NEON(const uint8_t* src_uyvy,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void UYVYToYRow_MSA(const uint8* src_uyvy, uint8* dst_y, int width);
-void UYVYToUVRow_MSA(const uint8* src_uyvy,
- int stride_uyvy,
- uint8* dst_u,
- uint8* dst_v,
+void UYVYToYRow_MSA(const uint8_t* src_uyvy, uint8_t* dst_y, int width);
+void UYVYToUVRow_MSA(const uint8_t* src_uyvy,
+ int src_stride_uyvy,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void UYVYToUV422Row_MSA(const uint8* src_uyvy,
- uint8* dst_u,
- uint8* dst_v,
+void UYVYToUV422Row_MSA(const uint8_t* src_uyvy,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void UYVYToYRow_C(const uint8* src_uyvy, uint8* dst_y, int width);
-void UYVYToUVRow_C(const uint8* src_uyvy,
- int stride_uyvy,
- uint8* dst_u,
- uint8* dst_v,
+void UYVYToYRow_C(const uint8_t* src_uyvy, uint8_t* dst_y, int width);
+void UYVYToUVRow_C(const uint8_t* src_uyvy,
+ int src_stride_uyvy,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void UYVYToUV422Row_C(const uint8* src_uyvy,
- uint8* dst_u,
- uint8* dst_v,
+void UYVYToUV422Row_C(const uint8_t* src_uyvy,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void UYVYToYRow_Any_AVX2(const uint8* src_uyvy, uint8* dst_y, int width);
-void UYVYToUVRow_Any_AVX2(const uint8* src_uyvy,
- int stride_uyvy,
- uint8* dst_u,
- uint8* dst_v,
+void UYVYToYRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void UYVYToUVRow_Any_AVX2(const uint8_t* src_ptr,
+ int src_stride_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void UYVYToUV422Row_Any_AVX2(const uint8* src_uyvy,
- uint8* dst_u,
- uint8* dst_v,
+void UYVYToUV422Row_Any_AVX2(const uint8_t* src_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void UYVYToYRow_Any_SSE2(const uint8* src_uyvy, uint8* dst_y, int width);
-void UYVYToUVRow_Any_SSE2(const uint8* src_uyvy,
- int stride_uyvy,
- uint8* dst_u,
- uint8* dst_v,
+void UYVYToYRow_Any_SSE2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void UYVYToUVRow_Any_SSE2(const uint8_t* src_ptr,
+ int src_stride_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void UYVYToUV422Row_Any_SSE2(const uint8* src_uyvy,
- uint8* dst_u,
- uint8* dst_v,
+void UYVYToUV422Row_Any_SSE2(const uint8_t* src_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void UYVYToYRow_Any_NEON(const uint8* src_uyvy, uint8* dst_y, int width);
-void UYVYToUVRow_Any_NEON(const uint8* src_uyvy,
- int stride_uyvy,
- uint8* dst_u,
- uint8* dst_v,
+void UYVYToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void UYVYToUVRow_Any_NEON(const uint8_t* src_ptr,
+ int src_stride_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void UYVYToUV422Row_Any_NEON(const uint8* src_uyvy,
- uint8* dst_u,
- uint8* dst_v,
+void UYVYToUV422Row_Any_NEON(const uint8_t* src_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void UYVYToYRow_Any_MSA(const uint8* src_uyvy, uint8* dst_y, int width);
-void UYVYToUVRow_Any_MSA(const uint8* src_uyvy,
- int stride_uyvy,
- uint8* dst_u,
- uint8* dst_v,
+void UYVYToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void UYVYToUVRow_Any_MSA(const uint8_t* src_ptr,
+ int src_stride_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void UYVYToUV422Row_Any_MSA(const uint8* src_uyvy,
- uint8* dst_u,
- uint8* dst_v,
+void UYVYToUV422Row_Any_MSA(const uint8_t* src_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width);
-void I422ToYUY2Row_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_yuy2,
+void I422ToYUY2Row_C(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_frame,
int width);
-void I422ToUYVYRow_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_uyvy,
+void I422ToUYVYRow_C(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_frame,
int width);
-void I422ToYUY2Row_SSE2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_yuy2,
+void I422ToYUY2Row_SSE2(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_yuy2,
+ int width);
+void I422ToUYVYRow_SSE2(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_uyvy,
int width);
-void I422ToUYVYRow_SSE2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_uyvy,
+void I422ToYUY2Row_Any_SSE2(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ int width);
+void I422ToUYVYRow_Any_SSE2(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ int width);
+void I422ToYUY2Row_AVX2(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_yuy2,
+ int width);
+void I422ToUYVYRow_AVX2(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_uyvy,
int width);
-void I422ToYUY2Row_Any_SSE2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_yuy2,
- int width);
-void I422ToUYVYRow_Any_SSE2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_uyvy,
- int width);
-void I422ToYUY2Row_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_yuy2,
+void I422ToYUY2Row_Any_AVX2(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ int width);
+void I422ToUYVYRow_Any_AVX2(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ int width);
+void I422ToYUY2Row_NEON(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_yuy2,
int width);
-void I422ToUYVYRow_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_uyvy,
+void I422ToUYVYRow_NEON(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_uyvy,
int width);
-void I422ToYUY2Row_Any_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_yuy2,
- int width);
-void I422ToUYVYRow_Any_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_uyvy,
- int width);
-void I422ToYUY2Row_MSA(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_yuy2,
+void I422ToYUY2Row_Any_NEON(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ int width);
+void I422ToUYVYRow_Any_NEON(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ int width);
+void I422ToYUY2Row_MSA(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_yuy2,
int width);
-void I422ToUYVYRow_MSA(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_uyvy,
+void I422ToUYVYRow_MSA(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_uyvy,
int width);
-void I422ToYUY2Row_Any_MSA(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_yuy2,
+void I422ToYUY2Row_Any_MSA(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
int width);
-void I422ToUYVYRow_Any_MSA(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_uyvy,
+void I422ToUYVYRow_Any_MSA(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
int width);
// Effects related row functions.
-void ARGBAttenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width);
-void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width);
-void ARGBAttenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width);
-void ARGBAttenuateRow_NEON(const uint8* src_argb, uint8* dst_argb, int width);
-void ARGBAttenuateRow_MSA(const uint8* src_argb, uint8* dst_argb, int width);
-void ARGBAttenuateRow_Any_SSSE3(const uint8* src_argb,
- uint8* dst_argb,
+void ARGBAttenuateRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width);
+void ARGBAttenuateRow_SSSE3(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ int width);
+void ARGBAttenuateRow_AVX2(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ int width);
+void ARGBAttenuateRow_NEON(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ int width);
+void ARGBAttenuateRow_MSA(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ int width);
+void ARGBAttenuateRow_Any_SSSE3(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
int width);
-void ARGBAttenuateRow_Any_AVX2(const uint8* src_argb,
- uint8* dst_argb,
+void ARGBAttenuateRow_Any_AVX2(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
int width);
-void ARGBAttenuateRow_Any_NEON(const uint8* src_argb,
- uint8* dst_argb,
+void ARGBAttenuateRow_Any_NEON(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
int width);
-void ARGBAttenuateRow_Any_MSA(const uint8* src_argb,
- uint8* dst_argb,
+void ARGBAttenuateRow_Any_MSA(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
int width);
// Inverse table for unattenuate, shared by C and SSE2.
-extern const uint32 fixed_invtbl8[256];
-void ARGBUnattenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width);
-void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width);
-void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width);
-void ARGBUnattenuateRow_Any_SSE2(const uint8* src_argb,
- uint8* dst_argb,
+extern const uint32_t fixed_invtbl8[256];
+void ARGBUnattenuateRow_C(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ int width);
+void ARGBUnattenuateRow_SSE2(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ int width);
+void ARGBUnattenuateRow_AVX2(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ int width);
+void ARGBUnattenuateRow_Any_SSE2(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
int width);
-void ARGBUnattenuateRow_Any_AVX2(const uint8* src_argb,
- uint8* dst_argb,
+void ARGBUnattenuateRow_Any_AVX2(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
int width);
-void ARGBGrayRow_C(const uint8* src_argb, uint8* dst_argb, int width);
-void ARGBGrayRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width);
-void ARGBGrayRow_NEON(const uint8* src_argb, uint8* dst_argb, int width);
-void ARGBGrayRow_MSA(const uint8* src_argb, uint8* dst_argb, int width);
+void ARGBGrayRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width);
+void ARGBGrayRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_argb, int width);
+void ARGBGrayRow_NEON(const uint8_t* src_argb, uint8_t* dst_argb, int width);
+void ARGBGrayRow_MSA(const uint8_t* src_argb, uint8_t* dst_argb, int width);
-void ARGBSepiaRow_C(uint8* dst_argb, int width);
-void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width);
-void ARGBSepiaRow_NEON(uint8* dst_argb, int width);
-void ARGBSepiaRow_MSA(uint8* dst_argb, int width);
+void ARGBSepiaRow_C(uint8_t* dst_argb, int width);
+void ARGBSepiaRow_SSSE3(uint8_t* dst_argb, int width);
+void ARGBSepiaRow_NEON(uint8_t* dst_argb, int width);
+void ARGBSepiaRow_MSA(uint8_t* dst_argb, int width);
-void ARGBColorMatrixRow_C(const uint8* src_argb,
- uint8* dst_argb,
- const int8* matrix_argb,
+void ARGBColorMatrixRow_C(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ const int8_t* matrix_argb,
int width);
-void ARGBColorMatrixRow_SSSE3(const uint8* src_argb,
- uint8* dst_argb,
- const int8* matrix_argb,
+void ARGBColorMatrixRow_SSSE3(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ const int8_t* matrix_argb,
int width);
-void ARGBColorMatrixRow_NEON(const uint8* src_argb,
- uint8* dst_argb,
- const int8* matrix_argb,
+void ARGBColorMatrixRow_NEON(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ const int8_t* matrix_argb,
int width);
-void ARGBColorMatrixRow_MSA(const uint8* src_argb,
- uint8* dst_argb,
- const int8* matrix_argb,
+void ARGBColorMatrixRow_MSA(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ const int8_t* matrix_argb,
int width);
-void ARGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width);
-void ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width);
+void ARGBColorTableRow_C(uint8_t* dst_argb,
+ const uint8_t* table_argb,
+ int width);
+void ARGBColorTableRow_X86(uint8_t* dst_argb,
+ const uint8_t* table_argb,
+ int width);
-void RGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width);
-void RGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width);
+void RGBColorTableRow_C(uint8_t* dst_argb,
+ const uint8_t* table_argb,
+ int width);
+void RGBColorTableRow_X86(uint8_t* dst_argb,
+ const uint8_t* table_argb,
+ int width);
-void ARGBQuantizeRow_C(uint8* dst_argb,
+void ARGBQuantizeRow_C(uint8_t* dst_argb,
int scale,
int interval_size,
int interval_offset,
int width);
-void ARGBQuantizeRow_SSE2(uint8* dst_argb,
+void ARGBQuantizeRow_SSE2(uint8_t* dst_argb,
int scale,
int interval_size,
int interval_offset,
int width);
-void ARGBQuantizeRow_NEON(uint8* dst_argb,
+void ARGBQuantizeRow_NEON(uint8_t* dst_argb,
int scale,
int interval_size,
int interval_offset,
int width);
-void ARGBQuantizeRow_MSA(uint8* dst_argb,
+void ARGBQuantizeRow_MSA(uint8_t* dst_argb,
int scale,
int interval_size,
int interval_offset,
int width);
-void ARGBShadeRow_C(const uint8* src_argb,
- uint8* dst_argb,
+void ARGBShadeRow_C(const uint8_t* src_argb,
+ uint8_t* dst_argb,
int width,
- uint32 value);
-void ARGBShadeRow_SSE2(const uint8* src_argb,
- uint8* dst_argb,
+ uint32_t value);
+void ARGBShadeRow_SSE2(const uint8_t* src_argb,
+ uint8_t* dst_argb,
int width,
- uint32 value);
-void ARGBShadeRow_NEON(const uint8* src_argb,
- uint8* dst_argb,
+ uint32_t value);
+void ARGBShadeRow_NEON(const uint8_t* src_argb,
+ uint8_t* dst_argb,
int width,
- uint32 value);
-void ARGBShadeRow_MSA(const uint8* src_argb,
- uint8* dst_argb,
+ uint32_t value);
+void ARGBShadeRow_MSA(const uint8_t* src_argb,
+ uint8_t* dst_argb,
int width,
- uint32 value);
+ uint32_t value);
// Used for blur.
-void CumulativeSumToAverageRow_SSE2(const int32* topleft,
- const int32* botleft,
+void CumulativeSumToAverageRow_SSE2(const int32_t* topleft,
+ const int32_t* botleft,
int width,
int area,
- uint8* dst,
+ uint8_t* dst,
int count);
-void ComputeCumulativeSumRow_SSE2(const uint8* row,
- int32* cumsum,
- const int32* previous_cumsum,
+void ComputeCumulativeSumRow_SSE2(const uint8_t* row,
+ int32_t* cumsum,
+ const int32_t* previous_cumsum,
int width);
-void CumulativeSumToAverageRow_C(const int32* topleft,
- const int32* botleft,
- int width,
+void CumulativeSumToAverageRow_C(const int32_t* tl,
+ const int32_t* bl,
+ int w,
int area,
- uint8* dst,
+ uint8_t* dst,
int count);
-void ComputeCumulativeSumRow_C(const uint8* row,
- int32* cumsum,
- const int32* previous_cumsum,
+void ComputeCumulativeSumRow_C(const uint8_t* row,
+ int32_t* cumsum,
+ const int32_t* previous_cumsum,
int width);
LIBYUV_API
-void ARGBAffineRow_C(const uint8* src_argb,
+void ARGBAffineRow_C(const uint8_t* src_argb,
int src_argb_stride,
- uint8* dst_argb,
+ uint8_t* dst_argb,
const float* uv_dudv,
int width);
LIBYUV_API
-void ARGBAffineRow_SSE2(const uint8* src_argb,
+void ARGBAffineRow_SSE2(const uint8_t* src_argb,
int src_argb_stride,
- uint8* dst_argb,
- const float* uv_dudv,
+ uint8_t* dst_argb,
+ const float* src_dudv,
int width);
// Used for I420Scale, ARGBScale, and ARGBInterpolate.
-void InterpolateRow_C(uint8* dst_ptr,
- const uint8* src_ptr,
- ptrdiff_t src_stride_ptr,
+void InterpolateRow_C(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
int width,
int source_y_fraction);
-void InterpolateRow_SSSE3(uint8* dst_ptr,
- const uint8* src_ptr,
- ptrdiff_t src_stride_ptr,
- int width,
+void InterpolateRow_SSSE3(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ int dst_width,
int source_y_fraction);
-void InterpolateRow_AVX2(uint8* dst_ptr,
- const uint8* src_ptr,
- ptrdiff_t src_stride_ptr,
- int width,
+void InterpolateRow_AVX2(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ int dst_width,
int source_y_fraction);
-void InterpolateRow_NEON(uint8* dst_ptr,
- const uint8* src_ptr,
- ptrdiff_t src_stride_ptr,
- int width,
+void InterpolateRow_NEON(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ int dst_width,
int source_y_fraction);
-void InterpolateRow_MSA(uint8* dst_ptr,
- const uint8* src_ptr,
- ptrdiff_t src_stride_ptr,
+void InterpolateRow_MSA(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
int width,
int source_y_fraction);
-void InterpolateRow_Any_NEON(uint8* dst_ptr,
- const uint8* src_ptr,
+void InterpolateRow_Any_NEON(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
ptrdiff_t src_stride_ptr,
int width,
int source_y_fraction);
-void InterpolateRow_Any_SSSE3(uint8* dst_ptr,
- const uint8* src_ptr,
+void InterpolateRow_Any_SSSE3(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
ptrdiff_t src_stride_ptr,
int width,
int source_y_fraction);
-void InterpolateRow_Any_AVX2(uint8* dst_ptr,
- const uint8* src_ptr,
+void InterpolateRow_Any_AVX2(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
ptrdiff_t src_stride_ptr,
int width,
int source_y_fraction);
-void InterpolateRow_Any_MSA(uint8* dst_ptr,
- const uint8* src_ptr,
+void InterpolateRow_Any_MSA(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
ptrdiff_t src_stride_ptr,
int width,
int source_y_fraction);
-void InterpolateRow_16_C(uint16* dst_ptr,
- const uint16* src_ptr,
- ptrdiff_t src_stride_ptr,
+void InterpolateRow_16_C(uint16_t* dst_ptr,
+ const uint16_t* src_ptr,
+ ptrdiff_t src_stride,
int width,
int source_y_fraction);
// Sobel images.
-void SobelXRow_C(const uint8* src_y0,
- const uint8* src_y1,
- const uint8* src_y2,
- uint8* dst_sobelx,
+void SobelXRow_C(const uint8_t* src_y0,
+ const uint8_t* src_y1,
+ const uint8_t* src_y2,
+ uint8_t* dst_sobelx,
int width);
-void SobelXRow_SSE2(const uint8* src_y0,
- const uint8* src_y1,
- const uint8* src_y2,
- uint8* dst_sobelx,
+void SobelXRow_SSE2(const uint8_t* src_y0,
+ const uint8_t* src_y1,
+ const uint8_t* src_y2,
+ uint8_t* dst_sobelx,
int width);
-void SobelXRow_NEON(const uint8* src_y0,
- const uint8* src_y1,
- const uint8* src_y2,
- uint8* dst_sobelx,
+void SobelXRow_NEON(const uint8_t* src_y0,
+ const uint8_t* src_y1,
+ const uint8_t* src_y2,
+ uint8_t* dst_sobelx,
int width);
-void SobelXRow_MSA(const uint8* src_y0,
- const uint8* src_y1,
- const uint8* src_y2,
- uint8* dst_sobelx,
+void SobelXRow_MSA(const uint8_t* src_y0,
+ const uint8_t* src_y1,
+ const uint8_t* src_y2,
+ uint8_t* dst_sobelx,
int width);
-void SobelYRow_C(const uint8* src_y0,
- const uint8* src_y1,
- uint8* dst_sobely,
+void SobelYRow_C(const uint8_t* src_y0,
+ const uint8_t* src_y1,
+ uint8_t* dst_sobely,
int width);
-void SobelYRow_SSE2(const uint8* src_y0,
- const uint8* src_y1,
- uint8* dst_sobely,
+void SobelYRow_SSE2(const uint8_t* src_y0,
+ const uint8_t* src_y1,
+ uint8_t* dst_sobely,
int width);
-void SobelYRow_NEON(const uint8* src_y0,
- const uint8* src_y1,
- uint8* dst_sobely,
+void SobelYRow_NEON(const uint8_t* src_y0,
+ const uint8_t* src_y1,
+ uint8_t* dst_sobely,
int width);
-void SobelYRow_MSA(const uint8* src_y0,
- const uint8* src_y1,
- uint8* dst_sobely,
+void SobelYRow_MSA(const uint8_t* src_y0,
+ const uint8_t* src_y1,
+ uint8_t* dst_sobely,
int width);
-void SobelRow_C(const uint8* src_sobelx,
- const uint8* src_sobely,
- uint8* dst_argb,
+void SobelRow_C(const uint8_t* src_sobelx,
+ const uint8_t* src_sobely,
+ uint8_t* dst_argb,
int width);
-void SobelRow_SSE2(const uint8* src_sobelx,
- const uint8* src_sobely,
- uint8* dst_argb,
+void SobelRow_SSE2(const uint8_t* src_sobelx,
+ const uint8_t* src_sobely,
+ uint8_t* dst_argb,
int width);
-void SobelRow_NEON(const uint8* src_sobelx,
- const uint8* src_sobely,
- uint8* dst_argb,
+void SobelRow_NEON(const uint8_t* src_sobelx,
+ const uint8_t* src_sobely,
+ uint8_t* dst_argb,
int width);
-void SobelRow_MSA(const uint8* src_sobelx,
- const uint8* src_sobely,
- uint8* dst_argb,
+void SobelRow_MSA(const uint8_t* src_sobelx,
+ const uint8_t* src_sobely,
+ uint8_t* dst_argb,
int width);
-void SobelToPlaneRow_C(const uint8* src_sobelx,
- const uint8* src_sobely,
- uint8* dst_y,
+void SobelToPlaneRow_C(const uint8_t* src_sobelx,
+ const uint8_t* src_sobely,
+ uint8_t* dst_y,
int width);
-void SobelToPlaneRow_SSE2(const uint8* src_sobelx,
- const uint8* src_sobely,
- uint8* dst_y,
+void SobelToPlaneRow_SSE2(const uint8_t* src_sobelx,
+ const uint8_t* src_sobely,
+ uint8_t* dst_y,
int width);
-void SobelToPlaneRow_NEON(const uint8* src_sobelx,
- const uint8* src_sobely,
- uint8* dst_y,
+void SobelToPlaneRow_NEON(const uint8_t* src_sobelx,
+ const uint8_t* src_sobely,
+ uint8_t* dst_y,
int width);
-void SobelToPlaneRow_MSA(const uint8* src_sobelx,
- const uint8* src_sobely,
- uint8* dst_y,
+void SobelToPlaneRow_MSA(const uint8_t* src_sobelx,
+ const uint8_t* src_sobely,
+ uint8_t* dst_y,
int width);
-void SobelXYRow_C(const uint8* src_sobelx,
- const uint8* src_sobely,
- uint8* dst_argb,
+void SobelXYRow_C(const uint8_t* src_sobelx,
+ const uint8_t* src_sobely,
+ uint8_t* dst_argb,
int width);
-void SobelXYRow_SSE2(const uint8* src_sobelx,
- const uint8* src_sobely,
- uint8* dst_argb,
+void SobelXYRow_SSE2(const uint8_t* src_sobelx,
+ const uint8_t* src_sobely,
+ uint8_t* dst_argb,
int width);
-void SobelXYRow_NEON(const uint8* src_sobelx,
- const uint8* src_sobely,
- uint8* dst_argb,
+void SobelXYRow_NEON(const uint8_t* src_sobelx,
+ const uint8_t* src_sobely,
+ uint8_t* dst_argb,
int width);
-void SobelXYRow_MSA(const uint8* src_sobelx,
- const uint8* src_sobely,
- uint8* dst_argb,
+void SobelXYRow_MSA(const uint8_t* src_sobelx,
+ const uint8_t* src_sobely,
+ uint8_t* dst_argb,
int width);
-void SobelRow_Any_SSE2(const uint8* src_sobelx,
- const uint8* src_sobely,
- uint8* dst_argb,
+void SobelRow_Any_SSE2(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
int width);
-void SobelRow_Any_NEON(const uint8* src_sobelx,
- const uint8* src_sobely,
- uint8* dst_argb,
+void SobelRow_Any_NEON(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
int width);
-void SobelRow_Any_MSA(const uint8* src_sobelx,
- const uint8* src_sobely,
- uint8* dst_argb,
+void SobelRow_Any_MSA(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
int width);
-void SobelToPlaneRow_Any_SSE2(const uint8* src_sobelx,
- const uint8* src_sobely,
- uint8* dst_y,
+void SobelToPlaneRow_Any_SSE2(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
int width);
-void SobelToPlaneRow_Any_NEON(const uint8* src_sobelx,
- const uint8* src_sobely,
- uint8* dst_y,
+void SobelToPlaneRow_Any_NEON(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
int width);
-void SobelToPlaneRow_Any_MSA(const uint8* src_sobelx,
- const uint8* src_sobely,
- uint8* dst_y,
+void SobelToPlaneRow_Any_MSA(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
int width);
-void SobelXYRow_Any_SSE2(const uint8* src_sobelx,
- const uint8* src_sobely,
- uint8* dst_argb,
+void SobelXYRow_Any_SSE2(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
int width);
-void SobelXYRow_Any_NEON(const uint8* src_sobelx,
- const uint8* src_sobely,
- uint8* dst_argb,
+void SobelXYRow_Any_NEON(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
int width);
-void SobelXYRow_Any_MSA(const uint8* src_sobelx,
- const uint8* src_sobely,
- uint8* dst_argb,
+void SobelXYRow_Any_MSA(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
int width);
-void ARGBPolynomialRow_C(const uint8* src_argb,
- uint8* dst_argb,
+void ARGBPolynomialRow_C(const uint8_t* src_argb,
+ uint8_t* dst_argb,
const float* poly,
int width);
-void ARGBPolynomialRow_SSE2(const uint8* src_argb,
- uint8* dst_argb,
+void ARGBPolynomialRow_SSE2(const uint8_t* src_argb,
+ uint8_t* dst_argb,
const float* poly,
int width);
-void ARGBPolynomialRow_AVX2(const uint8* src_argb,
- uint8* dst_argb,
+void ARGBPolynomialRow_AVX2(const uint8_t* src_argb,
+ uint8_t* dst_argb,
const float* poly,
int width);
// Scale and convert to half float.
-void HalfFloatRow_C(const uint16* src, uint16* dst, float scale, int width);
-void HalfFloatRow_SSE2(const uint16* src, uint16* dst, float scale, int width);
-void HalfFloatRow_Any_SSE2(const uint16* src,
- uint16* dst,
- float scale,
+void HalfFloatRow_C(const uint16_t* src, uint16_t* dst, float scale, int width);
+void HalfFloatRow_SSE2(const uint16_t* src,
+ uint16_t* dst,
+ float scale,
+ int width);
+void HalfFloatRow_Any_SSE2(const uint16_t* src_ptr,
+ uint16_t* dst_ptr,
+ float param,
int width);
-void HalfFloatRow_AVX2(const uint16* src, uint16* dst, float scale, int width);
-void HalfFloatRow_Any_AVX2(const uint16* src,
- uint16* dst,
- float scale,
+void HalfFloatRow_AVX2(const uint16_t* src,
+ uint16_t* dst,
+ float scale,
+ int width);
+void HalfFloatRow_Any_AVX2(const uint16_t* src_ptr,
+ uint16_t* dst_ptr,
+ float param,
int width);
-void HalfFloatRow_F16C(const uint16* src, uint16* dst, float scale, int width);
-void HalfFloatRow_Any_F16C(const uint16* src,
- uint16* dst,
+void HalfFloatRow_F16C(const uint16_t* src,
+ uint16_t* dst,
+ float scale,
+ int width);
+void HalfFloatRow_Any_F16C(const uint16_t* src,
+ uint16_t* dst,
float scale,
int width);
-void HalfFloat1Row_F16C(const uint16* src, uint16* dst, float scale, int width);
-void HalfFloat1Row_Any_F16C(const uint16* src,
- uint16* dst,
+void HalfFloat1Row_F16C(const uint16_t* src,
+ uint16_t* dst,
+ float scale,
+ int width);
+void HalfFloat1Row_Any_F16C(const uint16_t* src,
+ uint16_t* dst,
float scale,
int width);
-void HalfFloatRow_NEON(const uint16* src, uint16* dst, float scale, int width);
-void HalfFloatRow_Any_NEON(const uint16* src,
- uint16* dst,
- float scale,
+void HalfFloatRow_NEON(const uint16_t* src,
+ uint16_t* dst,
+ float scale,
+ int width);
+void HalfFloatRow_Any_NEON(const uint16_t* src_ptr,
+ uint16_t* dst_ptr,
+ float param,
int width);
-void HalfFloat1Row_NEON(const uint16* src, uint16* dst, float scale, int width);
-void HalfFloat1Row_Any_NEON(const uint16* src,
- uint16* dst,
- float scale,
+void HalfFloat1Row_NEON(const uint16_t* src,
+ uint16_t* dst,
+ float scale,
+ int width);
+void HalfFloat1Row_Any_NEON(const uint16_t* src_ptr,
+ uint16_t* dst_ptr,
+ float param,
int width);
-void HalfFloatRow_MSA(const uint16* src, uint16* dst, float scale, int width);
-void HalfFloatRow_Any_MSA(const uint16* src,
- uint16* dst,
- float scale,
+void HalfFloatRow_MSA(const uint16_t* src,
+ uint16_t* dst,
+ float scale,
+ int width);
+void HalfFloatRow_Any_MSA(const uint16_t* src_ptr,
+ uint16_t* dst_ptr,
+ float param,
int width);
-void ARGBLumaColorTableRow_C(const uint8* src_argb,
- uint8* dst_argb,
+void ARGBLumaColorTableRow_C(const uint8_t* src_argb,
+ uint8_t* dst_argb,
int width,
- const uint8* luma,
- uint32 lumacoeff);
-void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb,
- uint8* dst_argb,
+ const uint8_t* luma,
+ uint32_t lumacoeff);
+void ARGBLumaColorTableRow_SSSE3(const uint8_t* src_argb,
+ uint8_t* dst_argb,
int width,
- const uint8* luma,
- uint32 lumacoeff);
+ const uint8_t* luma,
+ uint32_t lumacoeff);
float ScaleMaxSamples_C(const float* src, float* dst, float scale, int width);
float ScaleMaxSamples_NEON(const float* src,
diff --git a/chromium/third_party/libyuv/include/libyuv/scale.h b/chromium/third_party/libyuv/include/libyuv/scale.h
index 6d6b9a8583a..b937d348cab 100644
--- a/chromium/third_party/libyuv/include/libyuv/scale.h
+++ b/chromium/third_party/libyuv/include/libyuv/scale.h
@@ -28,22 +28,22 @@ typedef enum FilterMode {
// Scale a YUV plane.
LIBYUV_API
-void ScalePlane(const uint8* src,
+void ScalePlane(const uint8_t* src,
int src_stride,
int src_width,
int src_height,
- uint8* dst,
+ uint8_t* dst,
int dst_stride,
int dst_width,
int dst_height,
enum FilterMode filtering);
LIBYUV_API
-void ScalePlane_16(const uint16* src,
+void ScalePlane_16(const uint16_t* src,
int src_stride,
int src_width,
int src_height,
- uint16* dst,
+ uint16_t* dst,
int dst_stride,
int dst_width,
int dst_height,
@@ -60,38 +60,38 @@ void ScalePlane_16(const uint16* src,
// Returns 0 if successful.
LIBYUV_API
-int I420Scale(const uint8* src_y,
+int I420Scale(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
int src_width,
int src_height,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int dst_width,
int dst_height,
enum FilterMode filtering);
LIBYUV_API
-int I420Scale_16(const uint16* src_y,
+int I420Scale_16(const uint16_t* src_y,
int src_stride_y,
- const uint16* src_u,
+ const uint16_t* src_u,
int src_stride_u,
- const uint16* src_v,
+ const uint16_t* src_v,
int src_stride_v,
int src_width,
int src_height,
- uint16* dst_y,
+ uint16_t* dst_y,
int dst_stride_y,
- uint16* dst_u,
+ uint16_t* dst_u,
int dst_stride_u,
- uint16* dst_v,
+ uint16_t* dst_v,
int dst_stride_v,
int dst_width,
int dst_height,
@@ -100,17 +100,17 @@ int I420Scale_16(const uint16* src_y,
#ifdef __cplusplus
// Legacy API. Deprecated.
LIBYUV_API
-int Scale(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
+int Scale(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
int src_stride_y,
int src_stride_u,
int src_stride_v,
int src_width,
int src_height,
- uint8* dst_y,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_y,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int dst_stride_y,
int dst_stride_u,
int dst_stride_v,
@@ -118,17 +118,6 @@ int Scale(const uint8* src_y,
int dst_height,
LIBYUV_BOOL interpolate);
-// Legacy API. Deprecated.
-LIBYUV_API
-int ScaleOffset(const uint8* src_i420,
- int src_width,
- int src_height,
- uint8* dst_i420,
- int dst_width,
- int dst_height,
- int dst_yoffset,
- LIBYUV_BOOL interpolate);
-
// For testing, allow disabling of specialized scalers.
LIBYUV_API
void SetUseReferenceImpl(LIBYUV_BOOL use);
diff --git a/chromium/third_party/libyuv/include/libyuv/scale_argb.h b/chromium/third_party/libyuv/include/libyuv/scale_argb.h
index 3d25e579cde..7641f18e341 100644
--- a/chromium/third_party/libyuv/include/libyuv/scale_argb.h
+++ b/chromium/third_party/libyuv/include/libyuv/scale_argb.h
@@ -20,11 +20,11 @@ extern "C" {
#endif
LIBYUV_API
-int ARGBScale(const uint8* src_argb,
+int ARGBScale(const uint8_t* src_argb,
int src_stride_argb,
int src_width,
int src_height,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int dst_width,
int dst_height,
@@ -32,11 +32,11 @@ int ARGBScale(const uint8* src_argb,
// Clipped scale takes destination rectangle coordinates for clip values.
LIBYUV_API
-int ARGBScaleClip(const uint8* src_argb,
+int ARGBScaleClip(const uint8_t* src_argb,
int src_stride_argb,
int src_width,
int src_height,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int dst_width,
int dst_height,
@@ -48,18 +48,18 @@ int ARGBScaleClip(const uint8* src_argb,
// Scale with YUV conversion to ARGB and clipping.
LIBYUV_API
-int YUVToARGBScaleClip(const uint8* src_y,
+int YUVToARGBScaleClip(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint32 src_fourcc,
+ uint32_t src_fourcc,
int src_width,
int src_height,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
- uint32 dst_fourcc,
+ uint32_t dst_fourcc,
int dst_width,
int dst_height,
int clip_x,
diff --git a/chromium/third_party/libyuv/include/libyuv/scale_row.h b/chromium/third_party/libyuv/include/libyuv/scale_row.h
index 3db46d399ea..7194ba09f84 100644
--- a/chromium/third_party/libyuv/include/libyuv/scale_row.h
+++ b/chromium/third_party/libyuv/include/libyuv/scale_row.h
@@ -19,17 +19,20 @@ namespace libyuv {
extern "C" {
#endif
-#if defined(__pnacl__) || defined(__CLR_VER) || \
+#if defined(__pnacl__) || defined(__CLR_VER) || \
+ (defined(__native_client__) && defined(__x86_64__)) || \
(defined(__i386__) && !defined(__SSE__) && !defined(__clang__))
#define LIBYUV_DISABLE_X86
#endif
+#if defined(__native_client__)
+#define LIBYUV_DISABLE_NEON
+#endif
// MemorySanitizer does not support assembly code yet. http://crbug.com/344505
#if defined(__has_feature)
#if __has_feature(memory_sanitizer)
#define LIBYUV_DISABLE_X86
#endif
#endif
-
// GCC >= 4.7.0 required for AVX2.
#if defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))
#if (__GNUC__ > 4) || (__GNUC__ == 4 && (__GNUC_MINOR__ >= 7))
@@ -81,7 +84,7 @@ extern "C" {
#endif
// The following are available on Neon platforms:
-#if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \
+#if !defined(LIBYUV_DISABLE_NEON) && \
(defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__))
#define HAS_SCALEARGBCOLS_NEON
#define HAS_SCALEARGBROWDOWN2_NEON
@@ -113,8 +116,8 @@ void ScalePlaneVertical(int src_height,
int dst_height,
int src_stride,
int dst_stride,
- const uint8* src_argb,
- uint8* dst_argb,
+ const uint8_t* src_argb,
+ uint8_t* dst_argb,
int x,
int y,
int dy,
@@ -126,8 +129,8 @@ void ScalePlaneVertical_16(int src_height,
int dst_height,
int src_stride,
int dst_stride,
- const uint16* src_argb,
- uint16* dst_argb,
+ const uint16_t* src_argb,
+ uint16_t* dst_argb,
int x,
int y,
int dy,
@@ -166,425 +169,431 @@ void ScaleSlope(int src_width,
int* dx,
int* dy);
-void ScaleRowDown2_C(const uint8* src_ptr,
+void ScaleRowDown2_C(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst,
+ uint8_t* dst,
int dst_width);
-void ScaleRowDown2_16_C(const uint16* src_ptr,
+void ScaleRowDown2_16_C(const uint16_t* src_ptr,
ptrdiff_t src_stride,
- uint16* dst,
+ uint16_t* dst,
int dst_width);
-void ScaleRowDown2Linear_C(const uint8* src_ptr,
+void ScaleRowDown2Linear_C(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst,
+ uint8_t* dst,
int dst_width);
-void ScaleRowDown2Linear_16_C(const uint16* src_ptr,
+void ScaleRowDown2Linear_16_C(const uint16_t* src_ptr,
ptrdiff_t src_stride,
- uint16* dst,
+ uint16_t* dst,
int dst_width);
-void ScaleRowDown2Box_C(const uint8* src_ptr,
+void ScaleRowDown2Box_C(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst,
+ uint8_t* dst,
int dst_width);
-void ScaleRowDown2Box_Odd_C(const uint8* src_ptr,
+void ScaleRowDown2Box_Odd_C(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst,
+ uint8_t* dst,
int dst_width);
-void ScaleRowDown2Box_16_C(const uint16* src_ptr,
+void ScaleRowDown2Box_16_C(const uint16_t* src_ptr,
ptrdiff_t src_stride,
- uint16* dst,
+ uint16_t* dst,
int dst_width);
-void ScaleRowDown4_C(const uint8* src_ptr,
+void ScaleRowDown4_C(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst,
+ uint8_t* dst,
int dst_width);
-void ScaleRowDown4_16_C(const uint16* src_ptr,
+void ScaleRowDown4_16_C(const uint16_t* src_ptr,
ptrdiff_t src_stride,
- uint16* dst,
+ uint16_t* dst,
int dst_width);
-void ScaleRowDown4Box_C(const uint8* src_ptr,
+void ScaleRowDown4Box_C(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst,
+ uint8_t* dst,
int dst_width);
-void ScaleRowDown4Box_16_C(const uint16* src_ptr,
+void ScaleRowDown4Box_16_C(const uint16_t* src_ptr,
ptrdiff_t src_stride,
- uint16* dst,
+ uint16_t* dst,
int dst_width);
-void ScaleRowDown34_C(const uint8* src_ptr,
+void ScaleRowDown34_C(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst,
+ uint8_t* dst,
int dst_width);
-void ScaleRowDown34_16_C(const uint16* src_ptr,
+void ScaleRowDown34_16_C(const uint16_t* src_ptr,
ptrdiff_t src_stride,
- uint16* dst,
+ uint16_t* dst,
int dst_width);
-void ScaleRowDown34_0_Box_C(const uint8* src_ptr,
+void ScaleRowDown34_0_Box_C(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* d,
+ uint8_t* d,
int dst_width);
-void ScaleRowDown34_0_Box_16_C(const uint16* src_ptr,
+void ScaleRowDown34_0_Box_16_C(const uint16_t* src_ptr,
ptrdiff_t src_stride,
- uint16* d,
+ uint16_t* d,
int dst_width);
-void ScaleRowDown34_1_Box_C(const uint8* src_ptr,
+void ScaleRowDown34_1_Box_C(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* d,
+ uint8_t* d,
int dst_width);
-void ScaleRowDown34_1_Box_16_C(const uint16* src_ptr,
+void ScaleRowDown34_1_Box_16_C(const uint16_t* src_ptr,
ptrdiff_t src_stride,
- uint16* d,
+ uint16_t* d,
int dst_width);
-void ScaleCols_C(uint8* dst_ptr,
- const uint8* src_ptr,
+void ScaleCols_C(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
int dst_width,
int x,
int dx);
-void ScaleCols_16_C(uint16* dst_ptr,
- const uint16* src_ptr,
+void ScaleCols_16_C(uint16_t* dst_ptr,
+ const uint16_t* src_ptr,
int dst_width,
int x,
int dx);
-void ScaleColsUp2_C(uint8* dst_ptr,
- const uint8* src_ptr,
+void ScaleColsUp2_C(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
int dst_width,
int,
int);
-void ScaleColsUp2_16_C(uint16* dst_ptr,
- const uint16* src_ptr,
+void ScaleColsUp2_16_C(uint16_t* dst_ptr,
+ const uint16_t* src_ptr,
int dst_width,
int,
int);
-void ScaleFilterCols_C(uint8* dst_ptr,
- const uint8* src_ptr,
+void ScaleFilterCols_C(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
int dst_width,
int x,
int dx);
-void ScaleFilterCols_16_C(uint16* dst_ptr,
- const uint16* src_ptr,
+void ScaleFilterCols_16_C(uint16_t* dst_ptr,
+ const uint16_t* src_ptr,
int dst_width,
int x,
int dx);
-void ScaleFilterCols64_C(uint8* dst_ptr,
- const uint8* src_ptr,
+void ScaleFilterCols64_C(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
int dst_width,
- int x,
+ int x32,
int dx);
-void ScaleFilterCols64_16_C(uint16* dst_ptr,
- const uint16* src_ptr,
+void ScaleFilterCols64_16_C(uint16_t* dst_ptr,
+ const uint16_t* src_ptr,
int dst_width,
- int x,
+ int x32,
int dx);
-void ScaleRowDown38_C(const uint8* src_ptr,
+void ScaleRowDown38_C(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst,
+ uint8_t* dst,
int dst_width);
-void ScaleRowDown38_16_C(const uint16* src_ptr,
+void ScaleRowDown38_16_C(const uint16_t* src_ptr,
ptrdiff_t src_stride,
- uint16* dst,
+ uint16_t* dst,
int dst_width);
-void ScaleRowDown38_3_Box_C(const uint8* src_ptr,
+void ScaleRowDown38_3_Box_C(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width);
-void ScaleRowDown38_3_Box_16_C(const uint16* src_ptr,
+void ScaleRowDown38_3_Box_16_C(const uint16_t* src_ptr,
ptrdiff_t src_stride,
- uint16* dst_ptr,
+ uint16_t* dst_ptr,
int dst_width);
-void ScaleRowDown38_2_Box_C(const uint8* src_ptr,
+void ScaleRowDown38_2_Box_C(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width);
-void ScaleRowDown38_2_Box_16_C(const uint16* src_ptr,
+void ScaleRowDown38_2_Box_16_C(const uint16_t* src_ptr,
ptrdiff_t src_stride,
- uint16* dst_ptr,
+ uint16_t* dst_ptr,
int dst_width);
-void ScaleAddRow_C(const uint8* src_ptr, uint16* dst_ptr, int src_width);
-void ScaleAddRow_16_C(const uint16* src_ptr, uint32* dst_ptr, int src_width);
-void ScaleARGBRowDown2_C(const uint8* src_argb,
+void ScaleAddRow_C(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width);
+void ScaleAddRow_16_C(const uint16_t* src_ptr,
+ uint32_t* dst_ptr,
+ int src_width);
+void ScaleARGBRowDown2_C(const uint8_t* src_argb,
ptrdiff_t src_stride,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_width);
-void ScaleARGBRowDown2Linear_C(const uint8* src_argb,
+void ScaleARGBRowDown2Linear_C(const uint8_t* src_argb,
ptrdiff_t src_stride,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_width);
-void ScaleARGBRowDown2Box_C(const uint8* src_argb,
+void ScaleARGBRowDown2Box_C(const uint8_t* src_argb,
ptrdiff_t src_stride,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_width);
-void ScaleARGBRowDownEven_C(const uint8* src_argb,
+void ScaleARGBRowDownEven_C(const uint8_t* src_argb,
ptrdiff_t src_stride,
int src_stepx,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_width);
-void ScaleARGBRowDownEvenBox_C(const uint8* src_argb,
+void ScaleARGBRowDownEvenBox_C(const uint8_t* src_argb,
ptrdiff_t src_stride,
int src_stepx,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_width);
-void ScaleARGBCols_C(uint8* dst_argb,
- const uint8* src_argb,
+void ScaleARGBCols_C(uint8_t* dst_argb,
+ const uint8_t* src_argb,
int dst_width,
int x,
int dx);
-void ScaleARGBCols64_C(uint8* dst_argb,
- const uint8* src_argb,
+void ScaleARGBCols64_C(uint8_t* dst_argb,
+ const uint8_t* src_argb,
int dst_width,
- int x,
+ int x32,
int dx);
-void ScaleARGBColsUp2_C(uint8* dst_argb,
- const uint8* src_argb,
+void ScaleARGBColsUp2_C(uint8_t* dst_argb,
+ const uint8_t* src_argb,
int dst_width,
int,
int);
-void ScaleARGBFilterCols_C(uint8* dst_argb,
- const uint8* src_argb,
+void ScaleARGBFilterCols_C(uint8_t* dst_argb,
+ const uint8_t* src_argb,
int dst_width,
int x,
int dx);
-void ScaleARGBFilterCols64_C(uint8* dst_argb,
- const uint8* src_argb,
+void ScaleARGBFilterCols64_C(uint8_t* dst_argb,
+ const uint8_t* src_argb,
int dst_width,
- int x,
+ int x32,
int dx);
// Specialized scalers for x86.
-void ScaleRowDown2_SSSE3(const uint8* src_ptr,
+void ScaleRowDown2_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width);
-void ScaleRowDown2Linear_SSSE3(const uint8* src_ptr,
+void ScaleRowDown2Linear_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width);
-void ScaleRowDown2Box_SSSE3(const uint8* src_ptr,
+void ScaleRowDown2Box_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width);
-void ScaleRowDown2_AVX2(const uint8* src_ptr,
+void ScaleRowDown2_AVX2(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width);
-void ScaleRowDown2Linear_AVX2(const uint8* src_ptr,
+void ScaleRowDown2Linear_AVX2(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width);
-void ScaleRowDown2Box_AVX2(const uint8* src_ptr,
+void ScaleRowDown2Box_AVX2(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width);
-void ScaleRowDown4_SSSE3(const uint8* src_ptr,
+void ScaleRowDown4_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width);
-void ScaleRowDown4Box_SSSE3(const uint8* src_ptr,
+void ScaleRowDown4Box_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width);
-void ScaleRowDown4_AVX2(const uint8* src_ptr,
+void ScaleRowDown4_AVX2(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width);
-void ScaleRowDown4Box_AVX2(const uint8* src_ptr,
+void ScaleRowDown4Box_AVX2(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width);
-void ScaleRowDown34_SSSE3(const uint8* src_ptr,
+void ScaleRowDown34_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width);
-void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr,
+void ScaleRowDown34_1_Box_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width);
-void ScaleRowDown34_0_Box_SSSE3(const uint8* src_ptr,
+void ScaleRowDown34_0_Box_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width);
-void ScaleRowDown38_SSSE3(const uint8* src_ptr,
+void ScaleRowDown38_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width);
-void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr,
+void ScaleRowDown38_3_Box_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width);
-void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr,
+void ScaleRowDown38_2_Box_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width);
-void ScaleRowDown2_Any_SSSE3(const uint8* src_ptr,
+void ScaleRowDown2_Any_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width);
-void ScaleRowDown2Linear_Any_SSSE3(const uint8* src_ptr,
+void ScaleRowDown2Linear_Any_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width);
-void ScaleRowDown2Box_Any_SSSE3(const uint8* src_ptr,
+void ScaleRowDown2Box_Any_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width);
-void ScaleRowDown2Box_Odd_SSSE3(const uint8* src_ptr,
+void ScaleRowDown2Box_Odd_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width);
-void ScaleRowDown2_Any_AVX2(const uint8* src_ptr,
+void ScaleRowDown2_Any_AVX2(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width);
-void ScaleRowDown2Linear_Any_AVX2(const uint8* src_ptr,
+void ScaleRowDown2Linear_Any_AVX2(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width);
-void ScaleRowDown2Box_Any_AVX2(const uint8* src_ptr,
+void ScaleRowDown2Box_Any_AVX2(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width);
-void ScaleRowDown2Box_Odd_AVX2(const uint8* src_ptr,
+void ScaleRowDown2Box_Odd_AVX2(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width);
-void ScaleRowDown4_Any_SSSE3(const uint8* src_ptr,
+void ScaleRowDown4_Any_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width);
-void ScaleRowDown4Box_Any_SSSE3(const uint8* src_ptr,
+void ScaleRowDown4Box_Any_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width);
-void ScaleRowDown4_Any_AVX2(const uint8* src_ptr,
+void ScaleRowDown4_Any_AVX2(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width);
-void ScaleRowDown4Box_Any_AVX2(const uint8* src_ptr,
+void ScaleRowDown4Box_Any_AVX2(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width);
-void ScaleRowDown34_Any_SSSE3(const uint8* src_ptr,
+void ScaleRowDown34_Any_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width);
-void ScaleRowDown34_1_Box_Any_SSSE3(const uint8* src_ptr,
+void ScaleRowDown34_1_Box_Any_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width);
-void ScaleRowDown34_0_Box_Any_SSSE3(const uint8* src_ptr,
+void ScaleRowDown34_0_Box_Any_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width);
-void ScaleRowDown38_Any_SSSE3(const uint8* src_ptr,
+void ScaleRowDown38_Any_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width);
-void ScaleRowDown38_3_Box_Any_SSSE3(const uint8* src_ptr,
+void ScaleRowDown38_3_Box_Any_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width);
-void ScaleRowDown38_2_Box_Any_SSSE3(const uint8* src_ptr,
+void ScaleRowDown38_2_Box_Any_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width);
-void ScaleAddRow_SSE2(const uint8* src_ptr, uint16* dst_ptr, int src_width);
-void ScaleAddRow_AVX2(const uint8* src_ptr, uint16* dst_ptr, int src_width);
-void ScaleAddRow_Any_SSE2(const uint8* src_ptr, uint16* dst_ptr, int src_width);
-void ScaleAddRow_Any_AVX2(const uint8* src_ptr, uint16* dst_ptr, int src_width);
-
-void ScaleFilterCols_SSSE3(uint8* dst_ptr,
- const uint8* src_ptr,
+void ScaleAddRow_SSE2(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width);
+void ScaleAddRow_AVX2(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width);
+void ScaleAddRow_Any_SSE2(const uint8_t* src_ptr,
+ uint16_t* dst_ptr,
+ int src_width);
+void ScaleAddRow_Any_AVX2(const uint8_t* src_ptr,
+ uint16_t* dst_ptr,
+ int src_width);
+
+void ScaleFilterCols_SSSE3(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
int dst_width,
int x,
int dx);
-void ScaleColsUp2_SSE2(uint8* dst_ptr,
- const uint8* src_ptr,
+void ScaleColsUp2_SSE2(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
int dst_width,
int x,
int dx);
// ARGB Column functions
-void ScaleARGBCols_SSE2(uint8* dst_argb,
- const uint8* src_argb,
+void ScaleARGBCols_SSE2(uint8_t* dst_argb,
+ const uint8_t* src_argb,
int dst_width,
int x,
int dx);
-void ScaleARGBFilterCols_SSSE3(uint8* dst_argb,
- const uint8* src_argb,
+void ScaleARGBFilterCols_SSSE3(uint8_t* dst_argb,
+ const uint8_t* src_argb,
int dst_width,
int x,
int dx);
-void ScaleARGBColsUp2_SSE2(uint8* dst_argb,
- const uint8* src_argb,
+void ScaleARGBColsUp2_SSE2(uint8_t* dst_argb,
+ const uint8_t* src_argb,
int dst_width,
int x,
int dx);
-void ScaleARGBFilterCols_NEON(uint8* dst_argb,
- const uint8* src_argb,
+void ScaleARGBFilterCols_NEON(uint8_t* dst_argb,
+ const uint8_t* src_argb,
int dst_width,
int x,
int dx);
-void ScaleARGBCols_NEON(uint8* dst_argb,
- const uint8* src_argb,
+void ScaleARGBCols_NEON(uint8_t* dst_argb,
+ const uint8_t* src_argb,
int dst_width,
int x,
int dx);
-void ScaleARGBFilterCols_Any_NEON(uint8* dst_argb,
- const uint8* src_argb,
+void ScaleARGBFilterCols_Any_NEON(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
int dst_width,
int x,
int dx);
-void ScaleARGBCols_Any_NEON(uint8* dst_argb,
- const uint8* src_argb,
+void ScaleARGBCols_Any_NEON(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
int dst_width,
int x,
int dx);
-void ScaleARGBFilterCols_MSA(uint8* dst_argb,
- const uint8* src_argb,
+void ScaleARGBFilterCols_MSA(uint8_t* dst_argb,
+ const uint8_t* src_argb,
int dst_width,
int x,
int dx);
-void ScaleARGBCols_MSA(uint8* dst_argb,
- const uint8* src_argb,
+void ScaleARGBCols_MSA(uint8_t* dst_argb,
+ const uint8_t* src_argb,
int dst_width,
int x,
int dx);
-void ScaleARGBFilterCols_Any_MSA(uint8* dst_argb,
- const uint8* src_argb,
+void ScaleARGBFilterCols_Any_MSA(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
int dst_width,
int x,
int dx);
-void ScaleARGBCols_Any_MSA(uint8* dst_argb,
- const uint8* src_argb,
+void ScaleARGBCols_Any_MSA(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
int dst_width,
int x,
int dx);
// ARGB Row functions
-void ScaleARGBRowDown2_SSE2(const uint8* src_argb,
+void ScaleARGBRowDown2_SSE2(const uint8_t* src_argb,
ptrdiff_t src_stride,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_width);
-void ScaleARGBRowDown2Linear_SSE2(const uint8* src_argb,
+void ScaleARGBRowDown2Linear_SSE2(const uint8_t* src_argb,
ptrdiff_t src_stride,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_width);
-void ScaleARGBRowDown2Box_SSE2(const uint8* src_argb,
+void ScaleARGBRowDown2Box_SSE2(const uint8_t* src_argb,
ptrdiff_t src_stride,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_width);
-void ScaleARGBRowDown2_NEON(const uint8* src_ptr,
+void ScaleARGBRowDown2_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst,
+ uint8_t* dst,
int dst_width);
-void ScaleARGBRowDown2Linear_NEON(const uint8* src_argb,
+void ScaleARGBRowDown2Linear_NEON(const uint8_t* src_argb,
ptrdiff_t src_stride,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_width);
-void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr,
+void ScaleARGBRowDown2Box_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst,
+ uint8_t* dst,
int dst_width);
void ScaleARGBRowDown2_MSA(const uint8_t* src_argb,
ptrdiff_t src_stride,
@@ -598,225 +607,227 @@ void ScaleARGBRowDown2Box_MSA(const uint8_t* src_argb,
ptrdiff_t src_stride,
uint8_t* dst_argb,
int dst_width);
-void ScaleARGBRowDown2_Any_SSE2(const uint8* src_argb,
+void ScaleARGBRowDown2_Any_SSE2(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_argb,
+ uint8_t* dst_ptr,
int dst_width);
-void ScaleARGBRowDown2Linear_Any_SSE2(const uint8* src_argb,
+void ScaleARGBRowDown2Linear_Any_SSE2(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_argb,
+ uint8_t* dst_ptr,
int dst_width);
-void ScaleARGBRowDown2Box_Any_SSE2(const uint8* src_argb,
+void ScaleARGBRowDown2Box_Any_SSE2(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_argb,
+ uint8_t* dst_ptr,
int dst_width);
-void ScaleARGBRowDown2_Any_NEON(const uint8* src_ptr,
+void ScaleARGBRowDown2_Any_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst,
+ uint8_t* dst_ptr,
int dst_width);
-void ScaleARGBRowDown2Linear_Any_NEON(const uint8* src_argb,
+void ScaleARGBRowDown2Linear_Any_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_argb,
+ uint8_t* dst_ptr,
int dst_width);
-void ScaleARGBRowDown2Box_Any_NEON(const uint8* src_ptr,
+void ScaleARGBRowDown2Box_Any_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst,
+ uint8_t* dst_ptr,
int dst_width);
-void ScaleARGBRowDown2_Any_MSA(const uint8_t* src_argb,
+void ScaleARGBRowDown2_Any_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8_t* dst_argb,
+ uint8_t* dst_ptr,
int dst_width);
-void ScaleARGBRowDown2Linear_Any_MSA(const uint8_t* src_argb,
+void ScaleARGBRowDown2Linear_Any_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8_t* dst_argb,
+ uint8_t* dst_ptr,
int dst_width);
-void ScaleARGBRowDown2Box_Any_MSA(const uint8_t* src_argb,
+void ScaleARGBRowDown2Box_Any_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8_t* dst_argb,
+ uint8_t* dst_ptr,
int dst_width);
-void ScaleARGBRowDownEven_SSE2(const uint8* src_argb,
+void ScaleARGBRowDownEven_SSE2(const uint8_t* src_argb,
ptrdiff_t src_stride,
int src_stepx,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_width);
-void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb,
+void ScaleARGBRowDownEvenBox_SSE2(const uint8_t* src_argb,
ptrdiff_t src_stride,
int src_stepx,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_width);
-void ScaleARGBRowDownEven_NEON(const uint8* src_argb,
+void ScaleARGBRowDownEven_NEON(const uint8_t* src_argb,
ptrdiff_t src_stride,
int src_stepx,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_width);
-void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb,
+void ScaleARGBRowDownEvenBox_NEON(const uint8_t* src_argb,
ptrdiff_t src_stride,
int src_stepx,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_width);
void ScaleARGBRowDownEven_MSA(const uint8_t* src_argb,
ptrdiff_t src_stride,
int32_t src_stepx,
uint8_t* dst_argb,
int dst_width);
-void ScaleARGBRowDownEvenBox_MSA(const uint8* src_argb,
+void ScaleARGBRowDownEvenBox_MSA(const uint8_t* src_argb,
ptrdiff_t src_stride,
int src_stepx,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_width);
-void ScaleARGBRowDownEven_Any_SSE2(const uint8* src_argb,
+void ScaleARGBRowDownEven_Any_SSE2(const uint8_t* src_ptr,
ptrdiff_t src_stride,
int src_stepx,
- uint8* dst_argb,
+ uint8_t* dst_ptr,
int dst_width);
-void ScaleARGBRowDownEvenBox_Any_SSE2(const uint8* src_argb,
+void ScaleARGBRowDownEvenBox_Any_SSE2(const uint8_t* src_ptr,
ptrdiff_t src_stride,
int src_stepx,
- uint8* dst_argb,
+ uint8_t* dst_ptr,
int dst_width);
-void ScaleARGBRowDownEven_Any_NEON(const uint8* src_argb,
+void ScaleARGBRowDownEven_Any_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
int src_stepx,
- uint8* dst_argb,
+ uint8_t* dst_ptr,
int dst_width);
-void ScaleARGBRowDownEvenBox_Any_NEON(const uint8* src_argb,
+void ScaleARGBRowDownEvenBox_Any_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
int src_stepx,
- uint8* dst_argb,
+ uint8_t* dst_ptr,
int dst_width);
-void ScaleARGBRowDownEven_Any_MSA(const uint8_t* src_argb,
+void ScaleARGBRowDownEven_Any_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
int32_t src_stepx,
- uint8_t* dst_argb,
+ uint8_t* dst_ptr,
int dst_width);
-void ScaleARGBRowDownEvenBox_Any_MSA(const uint8* src_argb,
+void ScaleARGBRowDownEvenBox_Any_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
int src_stepx,
- uint8* dst_argb,
+ uint8_t* dst_ptr,
int dst_width);
// ScaleRowDown2Box also used by planar functions
// NEON downscalers with interpolation.
// Note - not static due to reuse in convert for 444 to 420.
-void ScaleRowDown2_NEON(const uint8* src_ptr,
+void ScaleRowDown2_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst,
+ uint8_t* dst,
int dst_width);
-void ScaleRowDown2Linear_NEON(const uint8* src_ptr,
+void ScaleRowDown2Linear_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst,
+ uint8_t* dst,
int dst_width);
-void ScaleRowDown2Box_NEON(const uint8* src_ptr,
+void ScaleRowDown2Box_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst,
+ uint8_t* dst,
int dst_width);
-void ScaleRowDown4_NEON(const uint8* src_ptr,
+void ScaleRowDown4_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width);
-void ScaleRowDown4Box_NEON(const uint8* src_ptr,
+void ScaleRowDown4Box_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width);
// Down scale from 4 to 3 pixels. Use the neon multilane read/write
// to load up the every 4th pixel into a 4 different registers.
// Point samples 32 pixels to 24 pixels.
-void ScaleRowDown34_NEON(const uint8* src_ptr,
+void ScaleRowDown34_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width);
-void ScaleRowDown34_0_Box_NEON(const uint8* src_ptr,
+void ScaleRowDown34_0_Box_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width);
-void ScaleRowDown34_1_Box_NEON(const uint8* src_ptr,
+void ScaleRowDown34_1_Box_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width);
// 32 -> 12
-void ScaleRowDown38_NEON(const uint8* src_ptr,
+void ScaleRowDown38_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width);
// 32x3 -> 12x1
-void ScaleRowDown38_3_Box_NEON(const uint8* src_ptr,
+void ScaleRowDown38_3_Box_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width);
// 32x2 -> 12x1
-void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr,
+void ScaleRowDown38_2_Box_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width);
-void ScaleRowDown2_Any_NEON(const uint8* src_ptr,
+void ScaleRowDown2_Any_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst,
+ uint8_t* dst_ptr,
int dst_width);
-void ScaleRowDown2Linear_Any_NEON(const uint8* src_ptr,
+void ScaleRowDown2Linear_Any_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst,
+ uint8_t* dst_ptr,
int dst_width);
-void ScaleRowDown2Box_Any_NEON(const uint8* src_ptr,
+void ScaleRowDown2Box_Any_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst,
+ uint8_t* dst_ptr,
int dst_width);
-void ScaleRowDown2Box_Odd_NEON(const uint8* src_ptr,
+void ScaleRowDown2Box_Odd_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst,
+ uint8_t* dst_ptr,
int dst_width);
-void ScaleRowDown4_Any_NEON(const uint8* src_ptr,
+void ScaleRowDown4_Any_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width);
-void ScaleRowDown4Box_Any_NEON(const uint8* src_ptr,
+void ScaleRowDown4Box_Any_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width);
-void ScaleRowDown34_Any_NEON(const uint8* src_ptr,
+void ScaleRowDown34_Any_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width);
-void ScaleRowDown34_0_Box_Any_NEON(const uint8* src_ptr,
+void ScaleRowDown34_0_Box_Any_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width);
-void ScaleRowDown34_1_Box_Any_NEON(const uint8* src_ptr,
+void ScaleRowDown34_1_Box_Any_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width);
// 32 -> 12
-void ScaleRowDown38_Any_NEON(const uint8* src_ptr,
+void ScaleRowDown38_Any_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width);
// 32x3 -> 12x1
-void ScaleRowDown38_3_Box_Any_NEON(const uint8* src_ptr,
+void ScaleRowDown38_3_Box_Any_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width);
// 32x2 -> 12x1
-void ScaleRowDown38_2_Box_Any_NEON(const uint8* src_ptr,
+void ScaleRowDown38_2_Box_Any_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width);
-void ScaleAddRow_NEON(const uint8* src_ptr, uint16* dst_ptr, int src_width);
-void ScaleAddRow_Any_NEON(const uint8* src_ptr, uint16* dst_ptr, int src_width);
+void ScaleAddRow_NEON(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width);
+void ScaleAddRow_Any_NEON(const uint8_t* src_ptr,
+ uint16_t* dst_ptr,
+ int src_width);
-void ScaleFilterCols_NEON(uint8* dst_ptr,
- const uint8* src_ptr,
+void ScaleFilterCols_NEON(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
int dst_width,
int x,
int dx);
-void ScaleFilterCols_Any_NEON(uint8* dst_ptr,
- const uint8* src_ptr,
+void ScaleFilterCols_Any_NEON(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
int dst_width,
int x,
int dx);
@@ -854,47 +865,47 @@ void ScaleRowDown38_3_Box_MSA(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int dst_width);
void ScaleAddRow_MSA(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width);
-void ScaleFilterCols_MSA(uint8* dst_ptr,
- const uint8* src_ptr,
+void ScaleFilterCols_MSA(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
int dst_width,
int x,
int dx);
-void ScaleRowDown34_MSA(const uint8* src_ptr,
+void ScaleRowDown34_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst,
int dst_width);
-void ScaleRowDown34_0_Box_MSA(const uint8* src_ptr,
+void ScaleRowDown34_0_Box_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* d,
int dst_width);
-void ScaleRowDown34_1_Box_MSA(const uint8* src_ptr,
+void ScaleRowDown34_1_Box_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* d,
int dst_width);
void ScaleRowDown2_Any_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8_t* dst,
+ uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown2Linear_Any_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8_t* dst,
+ uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown2Box_Any_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8_t* dst,
+ uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown4_Any_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8_t* dst,
+ uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown4Box_Any_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8_t* dst,
+ uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown38_Any_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8_t* dst,
+ uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown38_2_Box_Any_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
@@ -907,22 +918,22 @@ void ScaleRowDown38_3_Box_Any_MSA(const uint8_t* src_ptr,
void ScaleAddRow_Any_MSA(const uint8_t* src_ptr,
uint16_t* dst_ptr,
int src_width);
-void ScaleFilterCols_Any_MSA(uint8* dst_ptr,
- const uint8* src_ptr,
+void ScaleFilterCols_Any_MSA(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
int dst_width,
int x,
int dx);
-void ScaleRowDown34_Any_MSA(const uint8* src_ptr,
+void ScaleRowDown34_Any_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width);
-void ScaleRowDown34_0_Box_Any_MSA(const uint8* src_ptr,
+void ScaleRowDown34_0_Box_Any_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width);
-void ScaleRowDown34_1_Box_Any_MSA(const uint8* src_ptr,
+void ScaleRowDown34_1_Box_Any_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width);
#ifdef __cplusplus
diff --git a/chromium/third_party/libyuv/include/libyuv/version.h b/chromium/third_party/libyuv/include/libyuv/version.h
index 97595e58ffc..683ac0482b4 100644
--- a/chromium/third_party/libyuv/include/libyuv/version.h
+++ b/chromium/third_party/libyuv/include/libyuv/version.h
@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_
-#define LIBYUV_VERSION 1688
+#define LIBYUV_VERSION 1698
#endif // INCLUDE_LIBYUV_VERSION_H_
diff --git a/chromium/third_party/libyuv/include/libyuv/video_common.h b/chromium/third_party/libyuv/include/libyuv/video_common.h
index e3c180f167d..bcef378b5a4 100644
--- a/chromium/third_party/libyuv/include/libyuv/video_common.h
+++ b/chromium/third_party/libyuv/include/libyuv/video_common.h
@@ -28,13 +28,13 @@ extern "C" {
// Needs to be a macro otherwise the OS X compiler complains when the kFormat*
// constants are used in a switch.
#ifdef __cplusplus
-#define FOURCC(a, b, c, d) \
- ((static_cast<uint32>(a)) | (static_cast<uint32>(b) << 8) | \
- (static_cast<uint32>(c) << 16) | (static_cast<uint32>(d) << 24))
+#define FOURCC(a, b, c, d) \
+ ((static_cast<uint32_t>(a)) | (static_cast<uint32_t>(b) << 8) | \
+ (static_cast<uint32_t>(c) << 16) | (static_cast<uint32_t>(d) << 24))
#else
-#define FOURCC(a, b, c, d) \
- (((uint32)(a)) | ((uint32)(b) << 8) | /* NOLINT */ \
- ((uint32)(c) << 16) | ((uint32)(d) << 24)) /* NOLINT */
+#define FOURCC(a, b, c, d) \
+ (((uint32_t)(a)) | ((uint32_t)(b) << 8) | /* NOLINT */ \
+ ((uint32_t)(c) << 16) | ((uint32_t)(d) << 24)) /* NOLINT */
#endif
// Some pages discussing FourCC codes:
@@ -63,11 +63,12 @@ enum FourCC {
// 1 Secondary YUV format: row biplanar.
FOURCC_M420 = FOURCC('M', '4', '2', '0'),
- // 10 Primary RGB formats: 4 32 bpp, 2 24 bpp, 3 16 bpp, 1 10 bpc
+ // 11 Primary RGB formats: 4 32 bpp, 2 24 bpp, 3 16 bpp, 1 10 bpc
FOURCC_ARGB = FOURCC('A', 'R', 'G', 'B'),
FOURCC_BGRA = FOURCC('B', 'G', 'R', 'A'),
FOURCC_ABGR = FOURCC('A', 'B', 'G', 'R'),
FOURCC_AR30 = FOURCC('A', 'R', '3', '0'), // 10 bit per channel. 2101010.
+ FOURCC_AB30 = FOURCC('A', 'B', '3', '0'), // ABGR version of 10 bit
FOURCC_24BG = FOURCC('2', '4', 'B', 'G'),
FOURCC_RAW = FOURCC('r', 'a', 'w', ' '),
FOURCC_RGBA = FOURCC('R', 'G', 'B', 'A'),
@@ -137,6 +138,7 @@ enum FourCCBpp {
FOURCC_BPP_ABGR = 32,
FOURCC_BPP_RGBA = 32,
FOURCC_BPP_AR30 = 32,
+ FOURCC_BPP_AB30 = 32,
FOURCC_BPP_24BG = 24,
FOURCC_BPP_RAW = 24,
FOURCC_BPP_RGBP = 16,
@@ -176,7 +178,7 @@ enum FourCCBpp {
};
// Converts fourcc aliases into canonical ones.
-LIBYUV_API uint32 CanonicalFourCC(uint32 fourcc);
+LIBYUV_API uint32_t CanonicalFourCC(uint32_t fourcc);
#ifdef __cplusplus
} // extern "C"
diff --git a/chromium/third_party/libyuv/source/compare.cc b/chromium/third_party/libyuv/source/compare.cc
index 8c379b59cb8..50e3abd0556 100644
--- a/chromium/third_party/libyuv/source/compare.cc
+++ b/chromium/third_party/libyuv/source/compare.cc
@@ -29,10 +29,11 @@ extern "C" {
// hash seed of 5381 recommended.
LIBYUV_API
-uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed) {
+uint32_t HashDjb2(const uint8_t* src, uint64_t count, uint32_t seed) {
const int kBlockSize = 1 << 15; // 32768;
int remainder;
- uint32 (*HashDjb2_SSE)(const uint8* src, int count, uint32 seed) = HashDjb2_C;
+ uint32_t (*HashDjb2_SSE)(const uint8_t* src, int count, uint32_t seed) =
+ HashDjb2_C;
#if defined(HAS_HASHDJB2_SSE41)
if (TestCpuFlag(kCpuHasSSE41)) {
HashDjb2_SSE = HashDjb2_SSE41;
@@ -44,7 +45,7 @@ uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed) {
}
#endif
- while (count >= (uint64)(kBlockSize)) {
+ while (count >= (uint64_t)(kBlockSize)) {
seed = HashDjb2_SSE(src, kBlockSize, seed);
src += kBlockSize;
count -= kBlockSize;
@@ -62,7 +63,7 @@ uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed) {
return seed;
}
-static uint32 ARGBDetectRow_C(const uint8* argb, int width) {
+static uint32_t ARGBDetectRow_C(const uint8_t* argb, int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
if (argb[0] != 255) { // First byte is not Alpha of 255, so not ARGB.
@@ -93,8 +94,11 @@ static uint32 ARGBDetectRow_C(const uint8* argb, int width) {
// Scan an opaque argb image and return fourcc based on alpha offset.
// Returns FOURCC_ARGB, FOURCC_BGRA, or 0 if unknown.
LIBYUV_API
-uint32 ARGBDetect(const uint8* argb, int stride_argb, int width, int height) {
- uint32 fourcc = 0;
+uint32_t ARGBDetect(const uint8_t* argb,
+ int stride_argb,
+ int width,
+ int height) {
+ uint32_t fourcc = 0;
int h;
// Coalesce rows.
@@ -114,17 +118,17 @@ uint32 ARGBDetect(const uint8* argb, int stride_argb, int width, int height) {
// So actual maximum is 1 less loop, which is 64436 - 32 bytes.
LIBYUV_API
-uint64 ComputeHammingDistance(const uint8* src_a,
- const uint8* src_b,
- int count) {
+uint64_t ComputeHammingDistance(const uint8_t* src_a,
+ const uint8_t* src_b,
+ int count) {
const int kBlockSize = 1 << 15; // 32768;
const int kSimdSize = 64;
// SIMD for multiple of 64, and C for remainder
int remainder = count & (kBlockSize - 1) & ~(kSimdSize - 1);
- uint64 diff = 0;
+ uint64_t diff = 0;
int i;
- uint32 (*HammingDistance)(const uint8* src_a, const uint8* src_b, int count) =
- HammingDistance_C;
+ uint32_t (*HammingDistance)(const uint8_t* src_a, const uint8_t* src_b,
+ int count) = HammingDistance_C;
#if defined(HAS_HAMMINGDISTANCE_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
HammingDistance = HammingDistance_NEON;
@@ -172,18 +176,18 @@ uint64 ComputeHammingDistance(const uint8* src_a,
// TODO(fbarchard): Refactor into row function.
LIBYUV_API
-uint64 ComputeSumSquareError(const uint8* src_a,
- const uint8* src_b,
- int count) {
+uint64_t ComputeSumSquareError(const uint8_t* src_a,
+ const uint8_t* src_b,
+ int count) {
// SumSquareError returns values 0 to 65535 for each squared difference.
- // Up to 65536 of those can be summed and remain within a uint32.
- // After each block of 65536 pixels, accumulate into a uint64.
+ // Up to 65536 of those can be summed and remain within a uint32_t.
+ // After each block of 65536 pixels, accumulate into a uint64_t.
const int kBlockSize = 65536;
int remainder = count & (kBlockSize - 1) & ~31;
- uint64 sse = 0;
+ uint64_t sse = 0;
int i;
- uint32 (*SumSquareError)(const uint8* src_a, const uint8* src_b, int count) =
- SumSquareError_C;
+ uint32_t (*SumSquareError)(const uint8_t* src_a, const uint8_t* src_b,
+ int count) = SumSquareError_C;
#if defined(HAS_SUMSQUAREERROR_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
SumSquareError = SumSquareError_NEON;
@@ -227,13 +231,13 @@ uint64 ComputeSumSquareError(const uint8* src_a,
}
LIBYUV_API
-uint64 ComputeSumSquareErrorPlane(const uint8* src_a,
- int stride_a,
- const uint8* src_b,
- int stride_b,
- int width,
- int height) {
- uint64 sse = 0;
+uint64_t ComputeSumSquareErrorPlane(const uint8_t* src_a,
+ int stride_a,
+ const uint8_t* src_b,
+ int stride_b,
+ int width,
+ int height) {
+ uint64_t sse = 0;
int h;
// Coalesce rows.
if (stride_a == width && stride_b == width) {
@@ -250,7 +254,7 @@ uint64 ComputeSumSquareErrorPlane(const uint8* src_a,
}
LIBYUV_API
-double SumSquareErrorToPsnr(uint64 sse, uint64 count) {
+double SumSquareErrorToPsnr(uint64_t sse, uint64_t count) {
double psnr;
if (sse > 0) {
double mse = (double)count / (double)sse;
@@ -259,65 +263,67 @@ double SumSquareErrorToPsnr(uint64 sse, uint64 count) {
psnr = kMaxPsnr; // Limit to prevent divide by 0
}
- if (psnr > kMaxPsnr)
+ if (psnr > kMaxPsnr) {
psnr = kMaxPsnr;
+ }
return psnr;
}
LIBYUV_API
-double CalcFramePsnr(const uint8* src_a,
+double CalcFramePsnr(const uint8_t* src_a,
int stride_a,
- const uint8* src_b,
+ const uint8_t* src_b,
int stride_b,
int width,
int height) {
- const uint64 samples = width * height;
- const uint64 sse = ComputeSumSquareErrorPlane(src_a, stride_a, src_b,
- stride_b, width, height);
+ const uint64_t samples = (uint64_t)width * (uint64_t)height;
+ const uint64_t sse = ComputeSumSquareErrorPlane(src_a, stride_a, src_b,
+ stride_b, width, height);
return SumSquareErrorToPsnr(sse, samples);
}
LIBYUV_API
-double I420Psnr(const uint8* src_y_a,
+double I420Psnr(const uint8_t* src_y_a,
int stride_y_a,
- const uint8* src_u_a,
+ const uint8_t* src_u_a,
int stride_u_a,
- const uint8* src_v_a,
+ const uint8_t* src_v_a,
int stride_v_a,
- const uint8* src_y_b,
+ const uint8_t* src_y_b,
int stride_y_b,
- const uint8* src_u_b,
+ const uint8_t* src_u_b,
int stride_u_b,
- const uint8* src_v_b,
+ const uint8_t* src_v_b,
int stride_v_b,
int width,
int height) {
- const uint64 sse_y = ComputeSumSquareErrorPlane(src_y_a, stride_y_a, src_y_b,
- stride_y_b, width, height);
+ const uint64_t sse_y = ComputeSumSquareErrorPlane(
+ src_y_a, stride_y_a, src_y_b, stride_y_b, width, height);
const int width_uv = (width + 1) >> 1;
const int height_uv = (height + 1) >> 1;
- const uint64 sse_u = ComputeSumSquareErrorPlane(
+ const uint64_t sse_u = ComputeSumSquareErrorPlane(
src_u_a, stride_u_a, src_u_b, stride_u_b, width_uv, height_uv);
- const uint64 sse_v = ComputeSumSquareErrorPlane(
+ const uint64_t sse_v = ComputeSumSquareErrorPlane(
src_v_a, stride_v_a, src_v_b, stride_v_b, width_uv, height_uv);
- const uint64 samples = width * height + 2 * (width_uv * height_uv);
- const uint64 sse = sse_y + sse_u + sse_v;
+ const uint64_t samples = (uint64_t)width * (uint64_t)height +
+ 2 * ((uint64_t)width_uv * (uint64_t)height_uv);
+ const uint64_t sse = sse_y + sse_u + sse_v;
return SumSquareErrorToPsnr(sse, samples);
}
-static const int64 cc1 = 26634; // (64^2*(.01*255)^2
-static const int64 cc2 = 239708; // (64^2*(.03*255)^2
+static const int64_t cc1 = 26634; // (64^2*(.01*255)^2
+static const int64_t cc2 = 239708; // (64^2*(.03*255)^2
-static double Ssim8x8_C(const uint8* src_a,
+static double Ssim8x8_C(const uint8_t* src_a,
int stride_a,
- const uint8* src_b,
+ const uint8_t* src_b,
int stride_b) {
- int64 sum_a = 0;
- int64 sum_b = 0;
- int64 sum_sq_a = 0;
- int64 sum_sq_b = 0;
- int64 sum_axb = 0;
+ int64_t sum_a = 0;
+ int64_t sum_b = 0;
+ int64_t sum_sq_a = 0;
+ int64_t sum_sq_b = 0;
+ int64_t sum_axb = 0;
int i;
for (i = 0; i < 8; ++i) {
@@ -335,20 +341,20 @@ static double Ssim8x8_C(const uint8* src_a,
}
{
- const int64 count = 64;
+ const int64_t count = 64;
// scale the constants by number of pixels
- const int64 c1 = (cc1 * count * count) >> 12;
- const int64 c2 = (cc2 * count * count) >> 12;
+ const int64_t c1 = (cc1 * count * count) >> 12;
+ const int64_t c2 = (cc2 * count * count) >> 12;
- const int64 sum_a_x_sum_b = sum_a * sum_b;
+ const int64_t sum_a_x_sum_b = sum_a * sum_b;
- const int64 ssim_n = (2 * sum_a_x_sum_b + c1) *
- (2 * count * sum_axb - 2 * sum_a_x_sum_b + c2);
+ const int64_t ssim_n = (2 * sum_a_x_sum_b + c1) *
+ (2 * count * sum_axb - 2 * sum_a_x_sum_b + c2);
- const int64 sum_a_sq = sum_a * sum_a;
- const int64 sum_b_sq = sum_b * sum_b;
+ const int64_t sum_a_sq = sum_a * sum_a;
+ const int64_t sum_b_sq = sum_b * sum_b;
- const int64 ssim_d =
+ const int64_t ssim_d =
(sum_a_sq + sum_b_sq + c1) *
(count * sum_sq_a - sum_a_sq + count * sum_sq_b - sum_b_sq + c2);
@@ -363,15 +369,15 @@ static double Ssim8x8_C(const uint8* src_a,
// on the 4x4 pixel grid. Such arrangement allows the windows to overlap
// block boundaries to penalize blocking artifacts.
LIBYUV_API
-double CalcFrameSsim(const uint8* src_a,
+double CalcFrameSsim(const uint8_t* src_a,
int stride_a,
- const uint8* src_b,
+ const uint8_t* src_b,
int stride_b,
int width,
int height) {
int samples = 0;
double ssim_total = 0;
- double (*Ssim8x8)(const uint8* src_a, int stride_a, const uint8* src_b,
+ double (*Ssim8x8)(const uint8_t* src_a, int stride_a, const uint8_t* src_b,
int stride_b) = Ssim8x8_C;
// sample point start with each 4x4 location
@@ -392,17 +398,17 @@ double CalcFrameSsim(const uint8* src_a,
}
LIBYUV_API
-double I420Ssim(const uint8* src_y_a,
+double I420Ssim(const uint8_t* src_y_a,
int stride_y_a,
- const uint8* src_u_a,
+ const uint8_t* src_u_a,
int stride_u_a,
- const uint8* src_v_a,
+ const uint8_t* src_v_a,
int stride_v_a,
- const uint8* src_y_b,
+ const uint8_t* src_y_b,
int stride_y_b,
- const uint8* src_u_b,
+ const uint8_t* src_u_b,
int stride_u_b,
- const uint8* src_v_b,
+ const uint8_t* src_v_b,
int stride_v_b,
int width,
int height) {
diff --git a/chromium/third_party/libyuv/source/compare_common.cc b/chromium/third_party/libyuv/source/compare_common.cc
index 83564a1bcb5..633466addb5 100644
--- a/chromium/third_party/libyuv/source/compare_common.cc
+++ b/chromium/third_party/libyuv/source/compare_common.cc
@@ -18,8 +18,10 @@ extern "C" {
#endif
#if ORIGINAL_OPT
-uint32 HammingDistance_C1(const uint8* src_a, const uint8* src_b, int count) {
- uint32 diff = 0u;
+uint32_t HammingDistance_C1(const uint8_t* src_a,
+ const uint8_t* src_b,
+ int count) {
+ uint32_t diff = 0u;
int i;
for (i = 0; i < count; ++i) {
@@ -46,13 +48,15 @@ uint32 HammingDistance_C1(const uint8* src_a, const uint8* src_b, int count) {
#endif
// Hakmem method for hamming distance.
-uint32 HammingDistance_C(const uint8* src_a, const uint8* src_b, int count) {
- uint32 diff = 0u;
+uint32_t HammingDistance_C(const uint8_t* src_a,
+ const uint8_t* src_b,
+ int count) {
+ uint32_t diff = 0u;
int i;
for (i = 0; i < count - 3; i += 4) {
- uint32 x = *((uint32*)src_a) ^ *((uint32*)src_b);
- uint32 u = x - ((x >> 1) & 0x55555555);
+ uint32_t x = *((uint32_t*)src_a) ^ *((uint32_t*)src_b); // NOLINT
+ uint32_t u = x - ((x >> 1) & 0x55555555);
u = ((u >> 2) & 0x33333333) + (u & 0x33333333);
diff += ((((u + (u >> 4)) & 0x0f0f0f0f) * 0x01010101) >> 24);
src_a += 4;
@@ -60,8 +64,8 @@ uint32 HammingDistance_C(const uint8* src_a, const uint8* src_b, int count) {
}
for (; i < count; ++i) {
- uint32 x = *src_a ^ *src_b;
- uint32 u = x - ((x >> 1) & 0x55);
+ uint32_t x = *src_a ^ *src_b;
+ uint32_t u = x - ((x >> 1) & 0x55);
u = ((u >> 2) & 0x33) + (u & 0x33);
diff += (u + (u >> 4)) & 0x0f;
src_a += 1;
@@ -71,20 +75,22 @@ uint32 HammingDistance_C(const uint8* src_a, const uint8* src_b, int count) {
return diff;
}
-uint32 SumSquareError_C(const uint8* src_a, const uint8* src_b, int count) {
- uint32 sse = 0u;
+uint32_t SumSquareError_C(const uint8_t* src_a,
+ const uint8_t* src_b,
+ int count) {
+ uint32_t sse = 0u;
int i;
for (i = 0; i < count; ++i) {
int diff = src_a[i] - src_b[i];
- sse += (uint32)(diff * diff);
+ sse += (uint32_t)(diff * diff);
}
return sse;
}
// hash seed of 5381 recommended.
// Internal C version of HashDjb2 with int sized count for efficiency.
-uint32 HashDjb2_C(const uint8* src, int count, uint32 seed) {
- uint32 hash = seed;
+uint32_t HashDjb2_C(const uint8_t* src, int count, uint32_t seed) {
+ uint32_t hash = seed;
int i;
for (i = 0; i < count; ++i) {
hash += (hash << 5) + src[i];
diff --git a/chromium/third_party/libyuv/source/compare_gcc.cc b/chromium/third_party/libyuv/source/compare_gcc.cc
index 49b471af1a0..676527c1b1b 100644
--- a/chromium/third_party/libyuv/source/compare_gcc.cc
+++ b/chromium/third_party/libyuv/source/compare_gcc.cc
@@ -23,10 +23,10 @@ extern "C" {
(defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER)))
#if defined(__x86_64__)
-uint32 HammingDistance_SSE42(const uint8* src_a,
- const uint8* src_b,
- int count) {
- uint64 diff = 0u;
+uint32_t HammingDistance_SSE42(const uint8_t* src_a,
+ const uint8_t* src_b,
+ int count) {
+ uint64_t diff = 0u;
asm volatile(
"xor %3,%3 \n"
@@ -68,13 +68,13 @@ uint32 HammingDistance_SSE42(const uint8* src_a,
:
: "memory", "cc", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10");
- return static_cast<uint32>(diff);
+ return static_cast<uint32_t>(diff);
}
#else
-uint32 HammingDistance_SSE42(const uint8* src_a,
- const uint8* src_b,
- int count) {
- uint32 diff = 0u;
+uint32_t HammingDistance_SSE42(const uint8_t* src_a,
+ const uint8_t* src_b,
+ int count) {
+ uint32_t diff = 0u;
asm volatile(
// Process 16 bytes per loop.
@@ -115,10 +115,10 @@ static const vec8 kNibbleMask = {15, 15, 15, 15, 15, 15, 15, 15,
15, 15, 15, 15, 15, 15, 15, 15};
static const vec8 kBitCount = {0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4};
-uint32 HammingDistance_SSSE3(const uint8* src_a,
- const uint8* src_b,
- int count) {
- uint32 diff = 0u;
+uint32_t HammingDistance_SSSE3(const uint8_t* src_a,
+ const uint8_t* src_b,
+ int count) {
+ uint32_t diff = 0u;
asm volatile(
"movdqa %4,%%xmm2 \n"
@@ -174,8 +174,10 @@ uint32 HammingDistance_SSSE3(const uint8* src_a,
}
#ifdef HAS_HAMMINGDISTANCE_AVX2
-uint32 HammingDistance_AVX2(const uint8* src_a, const uint8* src_b, int count) {
- uint32 diff = 0u;
+uint32_t HammingDistance_AVX2(const uint8_t* src_a,
+ const uint8_t* src_b,
+ int count) {
+ uint32_t diff = 0u;
asm volatile(
"vbroadcastf128 %4,%%ymm2 \n"
@@ -227,43 +229,46 @@ uint32 HammingDistance_AVX2(const uint8* src_a, const uint8* src_b, int count) {
}
#endif // HAS_HAMMINGDISTANCE_AVX2
-uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
- uint32 sse;
- asm volatile (
- "pxor %%xmm0,%%xmm0 \n"
- "pxor %%xmm5,%%xmm5 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm1 \n"
- "lea " MEMLEA(0x10, 0) ",%0 \n"
- "movdqu " MEMACCESS(1) ",%%xmm2 \n"
- "lea " MEMLEA(0x10, 1) ",%1 \n"
- "movdqa %%xmm1,%%xmm3 \n"
- "psubusb %%xmm2,%%xmm1 \n"
- "psubusb %%xmm3,%%xmm2 \n"
- "por %%xmm2,%%xmm1 \n"
- "movdqa %%xmm1,%%xmm2 \n"
- "punpcklbw %%xmm5,%%xmm1 \n"
- "punpckhbw %%xmm5,%%xmm2 \n"
- "pmaddwd %%xmm1,%%xmm1 \n"
- "pmaddwd %%xmm2,%%xmm2 \n"
- "paddd %%xmm1,%%xmm0 \n"
- "paddd %%xmm2,%%xmm0 \n"
- "sub $0x10,%2 \n"
- "jg 1b \n"
+uint32_t SumSquareError_SSE2(const uint8_t* src_a,
+ const uint8_t* src_b,
+ int count) {
+ uint32_t sse;
+ asm volatile(
+ "pxor %%xmm0,%%xmm0 \n"
+ "pxor %%xmm5,%%xmm5 \n"
- "pshufd $0xee,%%xmm0,%%xmm1 \n"
- "paddd %%xmm1,%%xmm0 \n"
- "pshufd $0x1,%%xmm0,%%xmm1 \n"
- "paddd %%xmm1,%%xmm0 \n"
- "movd %%xmm0,%3 \n"
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm1 \n"
+ "lea 0x10(%0),%0 \n"
+ "movdqu (%1),%%xmm2 \n"
+ "lea 0x10(%1),%1 \n"
+ "movdqa %%xmm1,%%xmm3 \n"
+ "psubusb %%xmm2,%%xmm1 \n"
+ "psubusb %%xmm3,%%xmm2 \n"
+ "por %%xmm2,%%xmm1 \n"
+ "movdqa %%xmm1,%%xmm2 \n"
+ "punpcklbw %%xmm5,%%xmm1 \n"
+ "punpckhbw %%xmm5,%%xmm2 \n"
+ "pmaddwd %%xmm1,%%xmm1 \n"
+ "pmaddwd %%xmm2,%%xmm2 \n"
+ "paddd %%xmm1,%%xmm0 \n"
+ "paddd %%xmm2,%%xmm0 \n"
+ "sub $0x10,%2 \n"
+ "jg 1b \n"
- : "+r"(src_a), // %0
- "+r"(src_b), // %1
- "+r"(count), // %2
- "=g"(sse) // %3
- :: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
- );
+ "pshufd $0xee,%%xmm0,%%xmm1 \n"
+ "paddd %%xmm1,%%xmm0 \n"
+ "pshufd $0x1,%%xmm0,%%xmm1 \n"
+ "paddd %%xmm1,%%xmm0 \n"
+ "movd %%xmm0,%3 \n"
+
+ : "+r"(src_a), // %0
+ "+r"(src_b), // %1
+ "+r"(count), // %2
+ "=g"(sse) // %3
+ ::"memory",
+ "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5");
return sse;
}
@@ -293,58 +298,58 @@ static const uvec32 kHashMul3 = {
0x00000001, // 33 ^ 0
};
-uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
- uint32 hash;
- asm volatile (
- "movd %2,%%xmm0 \n"
- "pxor %%xmm7,%%xmm7 \n"
- "movdqa %4,%%xmm6 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm1 \n"
- "lea " MEMLEA(0x10, 0) ",%0 \n"
- "pmulld %%xmm6,%%xmm0 \n"
- "movdqa %5,%%xmm5 \n"
- "movdqa %%xmm1,%%xmm2 \n"
- "punpcklbw %%xmm7,%%xmm2 \n"
- "movdqa %%xmm2,%%xmm3 \n"
- "punpcklwd %%xmm7,%%xmm3 \n"
- "pmulld %%xmm5,%%xmm3 \n"
- "movdqa %6,%%xmm5 \n"
- "movdqa %%xmm2,%%xmm4 \n"
- "punpckhwd %%xmm7,%%xmm4 \n"
- "pmulld %%xmm5,%%xmm4 \n"
- "movdqa %7,%%xmm5 \n"
- "punpckhbw %%xmm7,%%xmm1 \n"
- "movdqa %%xmm1,%%xmm2 \n"
- "punpcklwd %%xmm7,%%xmm2 \n"
- "pmulld %%xmm5,%%xmm2 \n"
- "movdqa %8,%%xmm5 \n"
- "punpckhwd %%xmm7,%%xmm1 \n"
- "pmulld %%xmm5,%%xmm1 \n"
- "paddd %%xmm4,%%xmm3 \n"
- "paddd %%xmm2,%%xmm1 \n"
- "paddd %%xmm3,%%xmm1 \n"
- "pshufd $0xe,%%xmm1,%%xmm2 \n"
- "paddd %%xmm2,%%xmm1 \n"
- "pshufd $0x1,%%xmm1,%%xmm2 \n"
- "paddd %%xmm2,%%xmm1 \n"
- "paddd %%xmm1,%%xmm0 \n"
- "sub $0x10,%1 \n"
- "jg 1b \n"
- "movd %%xmm0,%3 \n"
- : "+r"(src), // %0
- "+r"(count), // %1
- "+rm"(seed), // %2
- "=g"(hash) // %3
- : "m"(kHash16x33), // %4
- "m"(kHashMul0), // %5
- "m"(kHashMul1), // %6
- "m"(kHashMul2), // %7
- "m"(kHashMul3) // %8
- : "memory", "cc"
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
- );
+uint32_t HashDjb2_SSE41(const uint8_t* src, int count, uint32_t seed) {
+ uint32_t hash;
+ asm volatile(
+ "movd %2,%%xmm0 \n"
+ "pxor %%xmm7,%%xmm7 \n"
+ "movdqa %4,%%xmm6 \n"
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm1 \n"
+ "lea 0x10(%0),%0 \n"
+ "pmulld %%xmm6,%%xmm0 \n"
+ "movdqa %5,%%xmm5 \n"
+ "movdqa %%xmm1,%%xmm2 \n"
+ "punpcklbw %%xmm7,%%xmm2 \n"
+ "movdqa %%xmm2,%%xmm3 \n"
+ "punpcklwd %%xmm7,%%xmm3 \n"
+ "pmulld %%xmm5,%%xmm3 \n"
+ "movdqa %6,%%xmm5 \n"
+ "movdqa %%xmm2,%%xmm4 \n"
+ "punpckhwd %%xmm7,%%xmm4 \n"
+ "pmulld %%xmm5,%%xmm4 \n"
+ "movdqa %7,%%xmm5 \n"
+ "punpckhbw %%xmm7,%%xmm1 \n"
+ "movdqa %%xmm1,%%xmm2 \n"
+ "punpcklwd %%xmm7,%%xmm2 \n"
+ "pmulld %%xmm5,%%xmm2 \n"
+ "movdqa %8,%%xmm5 \n"
+ "punpckhwd %%xmm7,%%xmm1 \n"
+ "pmulld %%xmm5,%%xmm1 \n"
+ "paddd %%xmm4,%%xmm3 \n"
+ "paddd %%xmm2,%%xmm1 \n"
+ "paddd %%xmm3,%%xmm1 \n"
+ "pshufd $0xe,%%xmm1,%%xmm2 \n"
+ "paddd %%xmm2,%%xmm1 \n"
+ "pshufd $0x1,%%xmm1,%%xmm2 \n"
+ "paddd %%xmm2,%%xmm1 \n"
+ "paddd %%xmm1,%%xmm0 \n"
+ "sub $0x10,%1 \n"
+ "jg 1b \n"
+ "movd %%xmm0,%3 \n"
+ : "+r"(src), // %0
+ "+r"(count), // %1
+ "+rm"(seed), // %2
+ "=g"(hash) // %3
+ : "m"(kHash16x33), // %4
+ "m"(kHashMul0), // %5
+ "m"(kHashMul1), // %6
+ "m"(kHashMul2), // %7
+ "m"(kHashMul3) // %8
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
+ "xmm7");
return hash;
}
#endif // defined(__x86_64__) || (defined(__i386__) && !defined(__pic__)))
diff --git a/chromium/third_party/libyuv/source/compare_msa.cc b/chromium/third_party/libyuv/source/compare_msa.cc
index 57857cf5127..0b807d37bee 100644
--- a/chromium/third_party/libyuv/source/compare_msa.cc
+++ b/chromium/third_party/libyuv/source/compare_msa.cc
@@ -22,8 +22,10 @@ namespace libyuv {
extern "C" {
#endif
-uint32 HammingDistance_MSA(const uint8* src_a, const uint8* src_b, int count) {
- uint32 diff = 0u;
+uint32_t HammingDistance_MSA(const uint8_t* src_a,
+ const uint8_t* src_b,
+ int count) {
+ uint32_t diff = 0u;
int i;
v16u8 src0, src1, src2, src3;
v2i64 vec0 = {0}, vec1 = {0};
@@ -42,13 +44,15 @@ uint32 HammingDistance_MSA(const uint8* src_a, const uint8* src_b, int count) {
}
vec0 += vec1;
- diff = (uint32)__msa_copy_u_w((v4i32)vec0, 0);
- diff += (uint32)__msa_copy_u_w((v4i32)vec0, 2);
+ diff = (uint32_t)__msa_copy_u_w((v4i32)vec0, 0);
+ diff += (uint32_t)__msa_copy_u_w((v4i32)vec0, 2);
return diff;
}
-uint32 SumSquareError_MSA(const uint8* src_a, const uint8* src_b, int count) {
- uint32 sse = 0u;
+uint32_t SumSquareError_MSA(const uint8_t* src_a,
+ const uint8_t* src_b,
+ int count) {
+ uint32_t sse = 0u;
int i;
v16u8 src0, src1, src2, src3;
v8i16 vec0, vec1, vec2, vec3;
@@ -80,8 +84,8 @@ uint32 SumSquareError_MSA(const uint8* src_a, const uint8* src_b, int count) {
reg2 += reg3;
reg0 += reg2;
tmp0 = __msa_hadd_s_d(reg0, reg0);
- sse = (uint32)__msa_copy_u_w((v4i32)tmp0, 0);
- sse += (uint32)__msa_copy_u_w((v4i32)tmp0, 2);
+ sse = (uint32_t)__msa_copy_u_w((v4i32)tmp0, 0);
+ sse += (uint32_t)__msa_copy_u_w((v4i32)tmp0, 2);
return sse;
}
diff --git a/chromium/third_party/libyuv/source/compare_neon.cc b/chromium/third_party/libyuv/source/compare_neon.cc
index 5dfa71edcbf..2a2181e0cb3 100644
--- a/chromium/third_party/libyuv/source/compare_neon.cc
+++ b/chromium/third_party/libyuv/source/compare_neon.cc
@@ -23,8 +23,10 @@ extern "C" {
// 256 bits at a time
// uses short accumulator which restricts count to 131 KB
-uint32 HammingDistance_NEON(const uint8* src_a, const uint8* src_b, int count) {
- uint32 diff;
+uint32_t HammingDistance_NEON(const uint8_t* src_a,
+ const uint8_t* src_b,
+ int count) {
+ uint32_t diff;
asm volatile(
"vmov.u16 q4, #0 \n" // accumulator
@@ -52,8 +54,10 @@ uint32 HammingDistance_NEON(const uint8* src_a, const uint8* src_b, int count) {
return diff;
}
-uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) {
- uint32 sse;
+uint32_t SumSquareError_NEON(const uint8_t* src_a,
+ const uint8_t* src_b,
+ int count) {
+ uint32_t sse;
asm volatile(
"vmov.u8 q8, #0 \n"
"vmov.u8 q10, #0 \n"
diff --git a/chromium/third_party/libyuv/source/compare_neon64.cc b/chromium/third_party/libyuv/source/compare_neon64.cc
index ddf98fa68b2..6e8f672ab73 100644
--- a/chromium/third_party/libyuv/source/compare_neon64.cc
+++ b/chromium/third_party/libyuv/source/compare_neon64.cc
@@ -22,8 +22,10 @@ extern "C" {
// 256 bits at a time
// uses short accumulator which restricts count to 131 KB
-uint32 HammingDistance_NEON(const uint8* src_a, const uint8* src_b, int count) {
- uint32 diff;
+uint32_t HammingDistance_NEON(const uint8_t* src_a,
+ const uint8_t* src_b,
+ int count) {
+ uint32_t diff;
asm volatile(
"movi v4.8h, #0 \n"
@@ -47,8 +49,10 @@ uint32 HammingDistance_NEON(const uint8* src_a, const uint8* src_b, int count) {
return diff;
}
-uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) {
- uint32 sse;
+uint32_t SumSquareError_NEON(const uint8_t* src_a,
+ const uint8_t* src_b,
+ int count) {
+ uint32_t sse;
asm volatile(
"eor v16.16b, v16.16b, v16.16b \n"
"eor v18.16b, v18.16b, v18.16b \n"
diff --git a/chromium/third_party/libyuv/source/compare_win.cc b/chromium/third_party/libyuv/source/compare_win.cc
index bcd6a88ebbb..d57d3d9d1c8 100644
--- a/chromium/third_party/libyuv/source/compare_win.cc
+++ b/chromium/third_party/libyuv/source/compare_win.cc
@@ -25,14 +25,14 @@ extern "C" {
// This module is for 32 bit Visual C x86 and clangcl
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
-uint32 HammingDistance_SSE42(const uint8* src_a,
- const uint8* src_b,
- int count) {
- uint32 diff = 0u;
+uint32_t HammingDistance_SSE42(const uint8_t* src_a,
+ const uint8_t* src_b,
+ int count) {
+ uint32_t diff = 0u;
int i;
for (i = 0; i < count - 3; i += 4) {
- uint32 x = *((uint32*)src_a) ^ *((uint32*)src_b);
+ uint32_t x = *((uint32_t*)src_a) ^ *((uint32_t*)src_b); // NOLINT
src_a += 4;
src_b += 4;
diff += __popcnt(x);
@@ -40,8 +40,8 @@ uint32 HammingDistance_SSE42(const uint8* src_a,
return diff;
}
-__declspec(naked) uint32
- SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
+__declspec(naked) uint32_t
+ SumSquareError_SSE2(const uint8_t* src_a, const uint8_t* src_b, int count) {
__asm {
mov eax, [esp + 4] // src_a
mov edx, [esp + 8] // src_b
@@ -81,8 +81,8 @@ __declspec(naked) uint32
#if _MSC_VER >= 1700
// C4752: found Intel(R) Advanced Vector Extensions; consider using /arch:AVX.
#pragma warning(disable : 4752)
-__declspec(naked) uint32
- SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count) {
+__declspec(naked) uint32_t
+ SumSquareError_AVX2(const uint8_t* src_a, const uint8_t* src_b, int count) {
__asm {
mov eax, [esp + 4] // src_a
mov edx, [esp + 8] // src_b
@@ -146,8 +146,8 @@ uvec32 kHashMul3 = {
0x00000001, // 33 ^ 0
};
-__declspec(naked) uint32
- HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
+__declspec(naked) uint32_t
+ HashDjb2_SSE41(const uint8_t* src, int count, uint32_t seed) {
__asm {
mov eax, [esp + 4] // src
mov ecx, [esp + 8] // count
@@ -197,8 +197,8 @@ __declspec(naked) uint32
// Visual C 2012 required for AVX2.
#if _MSC_VER >= 1700
-__declspec(naked) uint32
- HashDjb2_AVX2(const uint8* src, int count, uint32 seed) {
+__declspec(naked) uint32_t
+ HashDjb2_AVX2(const uint8_t* src, int count, uint32_t seed) {
__asm {
mov eax, [esp + 4] // src
mov ecx, [esp + 8] // count
diff --git a/chromium/third_party/libyuv/source/convert.cc b/chromium/third_party/libyuv/source/convert.cc
index fd2066e29eb..375cc732c1d 100644
--- a/chromium/third_party/libyuv/source/convert.cc
+++ b/chromium/third_party/libyuv/source/convert.cc
@@ -28,17 +28,17 @@ static __inline int Abs(int v) {
}
// Any I4xx To I420 format with mirroring.
-static int I4xxToI420(const uint8* src_y,
+static int I4xxToI420(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int src_y_width,
int src_y_height,
@@ -66,17 +66,17 @@ static int I4xxToI420(const uint8* src_y,
// TODO(fbarchard): Use Scale plane which supports mirroring, but ensure
// is does row coalescing.
LIBYUV_API
-int I420Copy(const uint8* src_y,
+int I420Copy(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int width,
int height) {
@@ -108,17 +108,17 @@ int I420Copy(const uint8* src_y,
// Copy I010 with optional flipping.
LIBYUV_API
-int I010Copy(const uint16* src_y,
+int I010Copy(const uint16_t* src_y,
int src_stride_y,
- const uint16* src_u,
+ const uint16_t* src_u,
int src_stride_u,
- const uint16* src_v,
+ const uint16_t* src_v,
int src_stride_v,
- uint16* dst_y,
+ uint16_t* dst_y,
int dst_stride_y,
- uint16* dst_u,
+ uint16_t* dst_u,
int dst_stride_u,
- uint16* dst_v,
+ uint16_t* dst_v,
int dst_stride_v,
int width,
int height) {
@@ -150,17 +150,17 @@ int I010Copy(const uint16* src_y,
// Convert 10 bit YUV to 8 bit.
LIBYUV_API
-int I010ToI420(const uint16* src_y,
+int I010ToI420(const uint16_t* src_y,
int src_stride_y,
- const uint16* src_u,
+ const uint16_t* src_u,
int src_stride_u,
- const uint16* src_v,
+ const uint16_t* src_v,
int src_stride_v,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int width,
int height) {
@@ -195,17 +195,17 @@ int I010ToI420(const uint16* src_y,
// 422 chroma is 1/2 width, 1x height
// 420 chroma is 1/2 width, 1/2 height
LIBYUV_API
-int I422ToI420(const uint8* src_y,
+int I422ToI420(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int width,
int height) {
@@ -218,17 +218,17 @@ int I422ToI420(const uint8* src_y,
// 444 chroma is 1x width, 1x height
// 420 chroma is 1/2 width, 1/2 height
LIBYUV_API
-int I444ToI420(const uint8* src_y,
+int I444ToI420(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int width,
int height) {
@@ -239,13 +239,13 @@ int I444ToI420(const uint8* src_y,
// I400 is greyscale typically used in MJPG
LIBYUV_API
-int I400ToI420(const uint8* src_y,
+int I400ToI420(const uint8_t* src_y,
int src_stride_y,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int width,
int height) {
@@ -269,15 +269,15 @@ int I400ToI420(const uint8* src_y,
return 0;
}
-static void CopyPlane2(const uint8* src,
+static void CopyPlane2(const uint8_t* src,
int src_stride_0,
int src_stride_1,
- uint8* dst,
+ uint8_t* dst,
int dst_stride,
int width,
int height) {
int y;
- void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C;
+ void (*CopyRow)(const uint8_t* src, uint8_t* dst, int width) = CopyRow_C;
#if defined(HAS_COPYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
CopyRow = IS_ALIGNED(width, 32) ? CopyRow_SSE2 : CopyRow_Any_SSE2;
@@ -320,16 +320,16 @@ static void CopyPlane2(const uint8* src,
// src_stride_m420 is row planar. Normally this will be the width in pixels.
// The UV plane is half width, but 2 values, so src_stride_m420 applies to
// this as well as the two Y planes.
-static int X420ToI420(const uint8* src_y,
+static int X420ToI420(const uint8_t* src_y,
int src_stride_y0,
int src_stride_y1,
- const uint8* src_uv,
+ const uint8_t* src_uv,
int src_stride_uv,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int width,
int height) {
@@ -384,15 +384,15 @@ static int X420ToI420(const uint8* src_y,
// Convert NV12 to I420.
LIBYUV_API
-int NV12ToI420(const uint8* src_y,
+int NV12ToI420(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_uv,
+ const uint8_t* src_uv,
int src_stride_uv,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int width,
int height) {
@@ -403,15 +403,15 @@ int NV12ToI420(const uint8* src_y,
// Convert NV21 to I420. Same as NV12 but u and v pointers swapped.
LIBYUV_API
-int NV21ToI420(const uint8* src_y,
+int NV21ToI420(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_vu,
+ const uint8_t* src_vu,
int src_stride_vu,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int width,
int height) {
@@ -422,13 +422,13 @@ int NV21ToI420(const uint8* src_y,
// Convert M420 to I420.
LIBYUV_API
-int M420ToI420(const uint8* src_m420,
+int M420ToI420(const uint8_t* src_m420,
int src_stride_m420,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int width,
int height) {
@@ -440,20 +440,21 @@ int M420ToI420(const uint8* src_m420,
// Convert YUY2 to I420.
LIBYUV_API
-int YUY2ToI420(const uint8* src_yuy2,
+int YUY2ToI420(const uint8_t* src_yuy2,
int src_stride_yuy2,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int width,
int height) {
int y;
- void (*YUY2ToUVRow)(const uint8* src_yuy2, int src_stride_yuy2, uint8* dst_u,
- uint8* dst_v, int width) = YUY2ToUVRow_C;
- void (*YUY2ToYRow)(const uint8* src_yuy2, uint8* dst_y, int width) =
+ void (*YUY2ToUVRow)(const uint8_t* src_yuy2, int src_stride_yuy2,
+ uint8_t* dst_u, uint8_t* dst_v, int width) =
+ YUY2ToUVRow_C;
+ void (*YUY2ToYRow)(const uint8_t* src_yuy2, uint8_t* dst_y, int width) =
YUY2ToYRow_C;
// Negative height means invert the image.
if (height < 0) {
@@ -520,20 +521,21 @@ int YUY2ToI420(const uint8* src_yuy2,
// Convert UYVY to I420.
LIBYUV_API
-int UYVYToI420(const uint8* src_uyvy,
+int UYVYToI420(const uint8_t* src_uyvy,
int src_stride_uyvy,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int width,
int height) {
int y;
- void (*UYVYToUVRow)(const uint8* src_uyvy, int src_stride_uyvy, uint8* dst_u,
- uint8* dst_v, int width) = UYVYToUVRow_C;
- void (*UYVYToYRow)(const uint8* src_uyvy, uint8* dst_y, int width) =
+ void (*UYVYToUVRow)(const uint8_t* src_uyvy, int src_stride_uyvy,
+ uint8_t* dst_u, uint8_t* dst_v, int width) =
+ UYVYToUVRow_C;
+ void (*UYVYToYRow)(const uint8_t* src_uyvy, uint8_t* dst_y, int width) =
UYVYToYRow_C;
// Negative height means invert the image.
if (height < 0) {
@@ -600,20 +602,21 @@ int UYVYToI420(const uint8* src_uyvy,
// Convert ARGB to I420.
LIBYUV_API
-int ARGBToI420(const uint8* src_argb,
+int ARGBToI420(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int width,
int height) {
int y;
- void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb, uint8* dst_u,
- uint8* dst_v, int width) = ARGBToUVRow_C;
- void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) =
+ void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb,
+ uint8_t* dst_u, uint8_t* dst_v, int width) =
+ ARGBToUVRow_C;
+ void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
ARGBToYRow_C;
if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
return -1;
@@ -695,20 +698,21 @@ int ARGBToI420(const uint8* src_argb,
// Convert BGRA to I420.
LIBYUV_API
-int BGRAToI420(const uint8* src_bgra,
+int BGRAToI420(const uint8_t* src_bgra,
int src_stride_bgra,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int width,
int height) {
int y;
- void (*BGRAToUVRow)(const uint8* src_bgra0, int src_stride_bgra, uint8* dst_u,
- uint8* dst_v, int width) = BGRAToUVRow_C;
- void (*BGRAToYRow)(const uint8* src_bgra, uint8* dst_y, int width) =
+ void (*BGRAToUVRow)(const uint8_t* src_bgra0, int src_stride_bgra,
+ uint8_t* dst_u, uint8_t* dst_v, int width) =
+ BGRAToUVRow_C;
+ void (*BGRAToYRow)(const uint8_t* src_bgra, uint8_t* dst_y, int width) =
BGRAToYRow_C;
if (!src_bgra || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
return -1;
@@ -780,20 +784,21 @@ int BGRAToI420(const uint8* src_bgra,
// Convert ABGR to I420.
LIBYUV_API
-int ABGRToI420(const uint8* src_abgr,
+int ABGRToI420(const uint8_t* src_abgr,
int src_stride_abgr,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int width,
int height) {
int y;
- void (*ABGRToUVRow)(const uint8* src_abgr0, int src_stride_abgr, uint8* dst_u,
- uint8* dst_v, int width) = ABGRToUVRow_C;
- void (*ABGRToYRow)(const uint8* src_abgr, uint8* dst_y, int width) =
+ void (*ABGRToUVRow)(const uint8_t* src_abgr0, int src_stride_abgr,
+ uint8_t* dst_u, uint8_t* dst_v, int width) =
+ ABGRToUVRow_C;
+ void (*ABGRToYRow)(const uint8_t* src_abgr, uint8_t* dst_y, int width) =
ABGRToYRow_C;
if (!src_abgr || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
return -1;
@@ -865,20 +870,21 @@ int ABGRToI420(const uint8* src_abgr,
// Convert RGBA to I420.
LIBYUV_API
-int RGBAToI420(const uint8* src_rgba,
+int RGBAToI420(const uint8_t* src_rgba,
int src_stride_rgba,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int width,
int height) {
int y;
- void (*RGBAToUVRow)(const uint8* src_rgba0, int src_stride_rgba, uint8* dst_u,
- uint8* dst_v, int width) = RGBAToUVRow_C;
- void (*RGBAToYRow)(const uint8* src_rgba, uint8* dst_y, int width) =
+ void (*RGBAToUVRow)(const uint8_t* src_rgba0, int src_stride_rgba,
+ uint8_t* dst_u, uint8_t* dst_v, int width) =
+ RGBAToUVRow_C;
+ void (*RGBAToYRow)(const uint8_t* src_rgba, uint8_t* dst_y, int width) =
RGBAToYRow_C;
if (!src_rgba || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
return -1;
@@ -950,28 +956,30 @@ int RGBAToI420(const uint8* src_rgba,
// Convert RGB24 to I420.
LIBYUV_API
-int RGB24ToI420(const uint8* src_rgb24,
+int RGB24ToI420(const uint8_t* src_rgb24,
int src_stride_rgb24,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int width,
int height) {
int y;
#if (defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_MSA))
- void (*RGB24ToUVRow)(const uint8* src_rgb24, int src_stride_rgb24,
- uint8* dst_u, uint8* dst_v, int width) = RGB24ToUVRow_C;
- void (*RGB24ToYRow)(const uint8* src_rgb24, uint8* dst_y, int width) =
+ void (*RGB24ToUVRow)(const uint8_t* src_rgb24, int src_stride_rgb24,
+ uint8_t* dst_u, uint8_t* dst_v, int width) =
+ RGB24ToUVRow_C;
+ void (*RGB24ToYRow)(const uint8_t* src_rgb24, uint8_t* dst_y, int width) =
RGB24ToYRow_C;
#else
- void (*RGB24ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int width) =
+ void (*RGB24ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) =
RGB24ToARGBRow_C;
- void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb, uint8* dst_u,
- uint8* dst_v, int width) = ARGBToUVRow_C;
- void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) =
+ void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb,
+ uint8_t* dst_u, uint8_t* dst_v, int width) =
+ ARGBToUVRow_C;
+ void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
ARGBToYRow_C;
#endif
if (!src_rgb24 || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
@@ -1080,28 +1088,29 @@ int RGB24ToI420(const uint8* src_rgb24,
// Convert RAW to I420.
LIBYUV_API
-int RAWToI420(const uint8* src_raw,
+int RAWToI420(const uint8_t* src_raw,
int src_stride_raw,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int width,
int height) {
int y;
#if (defined(HAS_RAWTOYROW_NEON) || defined(HAS_RAWTOYROW_MSA))
- void (*RAWToUVRow)(const uint8* src_raw, int src_stride_raw, uint8* dst_u,
- uint8* dst_v, int width) = RAWToUVRow_C;
- void (*RAWToYRow)(const uint8* src_raw, uint8* dst_y, int width) =
+ void (*RAWToUVRow)(const uint8_t* src_raw, int src_stride_raw, uint8_t* dst_u,
+ uint8_t* dst_v, int width) = RAWToUVRow_C;
+ void (*RAWToYRow)(const uint8_t* src_raw, uint8_t* dst_y, int width) =
RAWToYRow_C;
#else
- void (*RAWToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int width) =
+ void (*RAWToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) =
RAWToARGBRow_C;
- void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb, uint8* dst_u,
- uint8* dst_v, int width) = ARGBToUVRow_C;
- void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) =
+ void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb,
+ uint8_t* dst_u, uint8_t* dst_v, int width) =
+ ARGBToUVRow_C;
+ void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
ARGBToYRow_C;
#endif
if (!src_raw || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
@@ -1210,29 +1219,30 @@ int RAWToI420(const uint8* src_raw,
// Convert RGB565 to I420.
LIBYUV_API
-int RGB565ToI420(const uint8* src_rgb565,
+int RGB565ToI420(const uint8_t* src_rgb565,
int src_stride_rgb565,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int width,
int height) {
int y;
#if (defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA))
- void (*RGB565ToUVRow)(const uint8* src_rgb565, int src_stride_rgb565,
- uint8* dst_u, uint8* dst_v, int width) =
+ void (*RGB565ToUVRow)(const uint8_t* src_rgb565, int src_stride_rgb565,
+ uint8_t* dst_u, uint8_t* dst_v, int width) =
RGB565ToUVRow_C;
- void (*RGB565ToYRow)(const uint8* src_rgb565, uint8* dst_y, int width) =
+ void (*RGB565ToYRow)(const uint8_t* src_rgb565, uint8_t* dst_y, int width) =
RGB565ToYRow_C;
#else
- void (*RGB565ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int width) =
- RGB565ToARGBRow_C;
- void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb, uint8* dst_u,
- uint8* dst_v, int width) = ARGBToUVRow_C;
- void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) =
+ void (*RGB565ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb,
+ int width) = RGB565ToARGBRow_C;
+ void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb,
+ uint8_t* dst_u, uint8_t* dst_v, int width) =
+ ARGBToUVRow_C;
+ void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
ARGBToYRow_C;
#endif
if (!src_rgb565 || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
@@ -1347,29 +1357,30 @@ int RGB565ToI420(const uint8* src_rgb565,
// Convert ARGB1555 to I420.
LIBYUV_API
-int ARGB1555ToI420(const uint8* src_argb1555,
+int ARGB1555ToI420(const uint8_t* src_argb1555,
int src_stride_argb1555,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int width,
int height) {
int y;
#if (defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_MSA))
- void (*ARGB1555ToUVRow)(const uint8* src_argb1555, int src_stride_argb1555,
- uint8* dst_u, uint8* dst_v, int width) =
+ void (*ARGB1555ToUVRow)(const uint8_t* src_argb1555, int src_stride_argb1555,
+ uint8_t* dst_u, uint8_t* dst_v, int width) =
ARGB1555ToUVRow_C;
- void (*ARGB1555ToYRow)(const uint8* src_argb1555, uint8* dst_y, int width) =
- ARGB1555ToYRow_C;
+ void (*ARGB1555ToYRow)(const uint8_t* src_argb1555, uint8_t* dst_y,
+ int width) = ARGB1555ToYRow_C;
#else
- void (*ARGB1555ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int width) =
- ARGB1555ToARGBRow_C;
- void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb, uint8* dst_u,
- uint8* dst_v, int width) = ARGBToUVRow_C;
- void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) =
+ void (*ARGB1555ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb,
+ int width) = ARGB1555ToARGBRow_C;
+ void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb,
+ uint8_t* dst_u, uint8_t* dst_v, int width) =
+ ARGBToUVRow_C;
+ void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
ARGBToYRow_C;
#endif
if (!src_argb1555 || !dst_y || !dst_u || !dst_v || width <= 0 ||
@@ -1488,29 +1499,30 @@ int ARGB1555ToI420(const uint8* src_argb1555,
// Convert ARGB4444 to I420.
LIBYUV_API
-int ARGB4444ToI420(const uint8* src_argb4444,
+int ARGB4444ToI420(const uint8_t* src_argb4444,
int src_stride_argb4444,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int width,
int height) {
int y;
#if defined(HAS_ARGB4444TOYROW_NEON)
- void (*ARGB4444ToUVRow)(const uint8* src_argb4444, int src_stride_argb4444,
- uint8* dst_u, uint8* dst_v, int width) =
+ void (*ARGB4444ToUVRow)(const uint8_t* src_argb4444, int src_stride_argb4444,
+ uint8_t* dst_u, uint8_t* dst_v, int width) =
ARGB4444ToUVRow_C;
- void (*ARGB4444ToYRow)(const uint8* src_argb4444, uint8* dst_y, int width) =
- ARGB4444ToYRow_C;
+ void (*ARGB4444ToYRow)(const uint8_t* src_argb4444, uint8_t* dst_y,
+ int width) = ARGB4444ToYRow_C;
#else
- void (*ARGB4444ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int width) =
- ARGB4444ToARGBRow_C;
- void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb, uint8* dst_u,
- uint8* dst_v, int width) = ARGBToUVRow_C;
- void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) =
+ void (*ARGB4444ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb,
+ int width) = ARGB4444ToARGBRow_C;
+ void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb,
+ uint8_t* dst_u, uint8_t* dst_v, int width) =
+ ARGBToUVRow_C;
+ void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
ARGBToYRow_C;
#endif
if (!src_argb4444 || !dst_y || !dst_u || !dst_v || width <= 0 ||
@@ -1639,9 +1651,9 @@ int ARGB4444ToI420(const uint8* src_argb4444,
return 0;
}
-static void SplitPixels(const uint8* src_u,
+static void SplitPixels(const uint8_t* src_u,
int src_pixel_stride_uv,
- uint8* dst_u,
+ uint8_t* dst_u,
int width) {
int i;
for (i = 0; i < width; ++i) {
@@ -1653,18 +1665,18 @@ static void SplitPixels(const uint8* src_u,
// Convert Android420 to I420.
LIBYUV_API
-int Android420ToI420(const uint8* src_y,
+int Android420ToI420(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
int src_pixel_stride_uv,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int width,
int height) {
@@ -1697,14 +1709,15 @@ int Android420ToI420(const uint8* src_y,
CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, halfheight);
return 0;
// Split UV planes - NV21
- } else if (src_pixel_stride_uv == 2 && vu_off == -1 &&
- src_stride_u == src_stride_v) {
+ }
+ if (src_pixel_stride_uv == 2 && vu_off == -1 &&
+ src_stride_u == src_stride_v) {
SplitUVPlane(src_v, src_stride_v, dst_v, dst_stride_v, dst_u, dst_stride_u,
halfwidth, halfheight);
return 0;
// Split UV planes - NV12
- } else if (src_pixel_stride_uv == 2 && vu_off == 1 &&
- src_stride_u == src_stride_v) {
+ }
+ if (src_pixel_stride_uv == 2 && vu_off == 1 && src_stride_u == src_stride_v) {
SplitUVPlane(src_u, src_stride_u, dst_u, dst_stride_u, dst_v, dst_stride_v,
halfwidth, halfheight);
return 0;
diff --git a/chromium/third_party/libyuv/source/convert_argb.cc b/chromium/third_party/libyuv/source/convert_argb.cc
index 9b93fc15194..e084f680680 100644
--- a/chromium/third_party/libyuv/source/convert_argb.cc
+++ b/chromium/third_party/libyuv/source/convert_argb.cc
@@ -26,9 +26,9 @@ extern "C" {
// Copy ARGB with optional flipping
LIBYUV_API
-int ARGBCopy(const uint8* src_argb,
+int ARGBCopy(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height) {
@@ -48,20 +48,20 @@ int ARGBCopy(const uint8* src_argb,
}
// Convert I420 to ARGB with matrix
-static int I420ToARGBMatrix(const uint8* src_y,
+static int I420ToARGBMatrix(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
const struct YuvConstants* yuvconstants,
int width,
int height) {
int y;
- void (*I422ToARGBRow)(const uint8* y_buf, const uint8* u_buf,
- const uint8* v_buf, uint8* rgb_buf,
+ void (*I422ToARGBRow)(const uint8_t* y_buf, const uint8_t* u_buf,
+ const uint8_t* v_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) =
I422ToARGBRow_C;
if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) {
@@ -120,13 +120,13 @@ static int I420ToARGBMatrix(const uint8* src_y,
// Convert I420 to ARGB.
LIBYUV_API
-int I420ToARGB(const uint8* src_y,
+int I420ToARGB(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height) {
@@ -137,13 +137,13 @@ int I420ToARGB(const uint8* src_y,
// Convert I420 to ABGR.
LIBYUV_API
-int I420ToABGR(const uint8* src_y,
+int I420ToABGR(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_abgr,
+ uint8_t* dst_abgr,
int dst_stride_abgr,
int width,
int height) {
@@ -156,13 +156,13 @@ int I420ToABGR(const uint8* src_y,
// Convert J420 to ARGB.
LIBYUV_API
-int J420ToARGB(const uint8* src_y,
+int J420ToARGB(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height) {
@@ -173,13 +173,13 @@ int J420ToARGB(const uint8* src_y,
// Convert J420 to ABGR.
LIBYUV_API
-int J420ToABGR(const uint8* src_y,
+int J420ToABGR(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_abgr,
+ uint8_t* dst_abgr,
int dst_stride_abgr,
int width,
int height) {
@@ -192,13 +192,13 @@ int J420ToABGR(const uint8* src_y,
// Convert H420 to ARGB.
LIBYUV_API
-int H420ToARGB(const uint8* src_y,
+int H420ToARGB(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height) {
@@ -209,13 +209,13 @@ int H420ToARGB(const uint8* src_y,
// Convert H420 to ABGR.
LIBYUV_API
-int H420ToABGR(const uint8* src_y,
+int H420ToABGR(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_abgr,
+ uint8_t* dst_abgr,
int dst_stride_abgr,
int width,
int height) {
@@ -227,20 +227,20 @@ int H420ToABGR(const uint8* src_y,
}
// Convert I422 to ARGB with matrix
-static int I422ToARGBMatrix(const uint8* src_y,
+static int I422ToARGBMatrix(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
const struct YuvConstants* yuvconstants,
int width,
int height) {
int y;
- void (*I422ToARGBRow)(const uint8* y_buf, const uint8* u_buf,
- const uint8* v_buf, uint8* rgb_buf,
+ void (*I422ToARGBRow)(const uint8_t* y_buf, const uint8_t* u_buf,
+ const uint8_t* v_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) =
I422ToARGBRow_C;
if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) {
@@ -304,13 +304,13 @@ static int I422ToARGBMatrix(const uint8* src_y,
// Convert I422 to ARGB.
LIBYUV_API
-int I422ToARGB(const uint8* src_y,
+int I422ToARGB(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height) {
@@ -321,13 +321,13 @@ int I422ToARGB(const uint8* src_y,
// Convert I422 to ABGR.
LIBYUV_API
-int I422ToABGR(const uint8* src_y,
+int I422ToABGR(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_abgr,
+ uint8_t* dst_abgr,
int dst_stride_abgr,
int width,
int height) {
@@ -340,13 +340,13 @@ int I422ToABGR(const uint8* src_y,
// Convert J422 to ARGB.
LIBYUV_API
-int J422ToARGB(const uint8* src_y,
+int J422ToARGB(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height) {
@@ -357,13 +357,13 @@ int J422ToARGB(const uint8* src_y,
// Convert J422 to ABGR.
LIBYUV_API
-int J422ToABGR(const uint8* src_y,
+int J422ToABGR(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_abgr,
+ uint8_t* dst_abgr,
int dst_stride_abgr,
int width,
int height) {
@@ -376,13 +376,13 @@ int J422ToABGR(const uint8* src_y,
// Convert H422 to ARGB.
LIBYUV_API
-int H422ToARGB(const uint8* src_y,
+int H422ToARGB(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height) {
@@ -393,13 +393,13 @@ int H422ToARGB(const uint8* src_y,
// Convert H422 to ABGR.
LIBYUV_API
-int H422ToABGR(const uint8* src_y,
+int H422ToABGR(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_abgr,
+ uint8_t* dst_abgr,
int dst_stride_abgr,
int width,
int height) {
@@ -413,24 +413,22 @@ int H422ToABGR(const uint8* src_y,
// Convert 10 bit YUV to ARGB with matrix
// TODO(fbarchard): Consider passing scale multiplier to I210ToARGB to
// multiply 10 bit yuv into high bits to allow any number of bits.
-static int H010ToAR30Matrix(const uint16* src_y,
+static int I010ToAR30Matrix(const uint16_t* src_y,
int src_stride_y,
- const uint16* src_u,
+ const uint16_t* src_u,
int src_stride_u,
- const uint16* src_v,
+ const uint16_t* src_v,
int src_stride_v,
- uint8* dst_ar30,
+ uint8_t* dst_ar30,
int dst_stride_ar30,
const struct YuvConstants* yuvconstants,
int width,
int height) {
int y;
- void (*I210ToARGBRow)(const uint16* y_buf, const uint16* u_buf,
- const uint16* v_buf, uint8* rgb_buf,
+ void (*I210ToAR30Row)(const uint16_t* y_buf, const uint16_t* u_buf,
+ const uint16_t* v_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) =
- I210ToARGBRow_C;
- void (*ARGBToAR30Row)(const uint8* src_argb, uint8* dst_rgb, int width) =
- ARGBToAR30Row_C;
+ I210ToAR30Row_C;
if (!src_y || !src_u || !src_v || !dst_ar30 || width <= 0 || height == 0) {
return -1;
}
@@ -440,84 +438,117 @@ static int H010ToAR30Matrix(const uint16* src_y,
dst_ar30 = dst_ar30 + (height - 1) * dst_stride_ar30;
dst_stride_ar30 = -dst_stride_ar30;
}
-#if defined(HAS_I210TOARGBROW_SSSE3)
+#if defined(HAS_I210TOAR30ROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
- I210ToARGBRow = I210ToARGBRow_Any_SSSE3;
+ I210ToAR30Row = I210ToAR30Row_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
- I210ToARGBRow = I210ToARGBRow_SSSE3;
+ I210ToAR30Row = I210ToAR30Row_SSSE3;
}
}
#endif
-#if defined(HAS_ARGBTOAR30ROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- ARGBToAR30Row = ARGBToAR30Row_Any_SSSE3;
- if (IS_ALIGNED(width, 4)) {
- ARGBToAR30Row = ARGBToAR30Row_SSSE3;
- }
- }
-#endif
-#if defined(HAS_ARGBTOAR30ROW_AVX2)
+#if defined(HAS_I210TOAR30ROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
- ARGBToAR30Row = ARGBToAR30Row_Any_AVX2;
- if (IS_ALIGNED(width, 8)) {
- ARGBToAR30Row = ARGBToAR30Row_AVX2;
+ I210ToAR30Row = I210ToAR30Row_Any_AVX2;
+ if (IS_ALIGNED(width, 16)) {
+ I210ToAR30Row = I210ToAR30Row_AVX2;
}
}
#endif
-
- {
- // Row buffers for 8 bit YUV and RGB.
- align_buffer_64(row_argb, width * 4);
-
- for (y = 0; y < height; ++y) {
- I210ToARGBRow(src_y, src_u, src_v, row_argb, yuvconstants, width);
- ARGBToAR30Row(row_argb, dst_ar30, width);
- dst_ar30 += dst_stride_ar30;
- src_y += src_stride_y;
- if (y & 1) {
- src_u += src_stride_u;
- src_v += src_stride_v;
- }
+ for (y = 0; y < height; ++y) {
+ I210ToAR30Row(src_y, src_u, src_v, dst_ar30, yuvconstants, width);
+ dst_ar30 += dst_stride_ar30;
+ src_y += src_stride_y;
+ if (y & 1) {
+ src_u += src_stride_u;
+ src_v += src_stride_v;
}
-
- free_aligned_buffer_64(row_argb);
}
-
return 0;
}
+// Convert I010 to AR30.
+LIBYUV_API
+int I010ToAR30(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_ar30,
+ int dst_stride_ar30,
+ int width,
+ int height) {
+ return I010ToAR30Matrix(src_y, src_stride_y, src_u, src_stride_u, src_v,
+ src_stride_v, dst_ar30, dst_stride_ar30,
+ &kYuvI601Constants, width, height);
+}
+
// Convert H010 to AR30.
LIBYUV_API
-int H010ToAR30(const uint16* src_y,
+int H010ToAR30(const uint16_t* src_y,
int src_stride_y,
- const uint16* src_u,
+ const uint16_t* src_u,
int src_stride_u,
- const uint16* src_v,
+ const uint16_t* src_v,
int src_stride_v,
- uint8* dst_ar30,
+ uint8_t* dst_ar30,
int dst_stride_ar30,
int width,
int height) {
- return H010ToAR30Matrix(src_y, src_stride_y, src_u, src_stride_u, src_v,
+ return I010ToAR30Matrix(src_y, src_stride_y, src_u, src_stride_u, src_v,
src_stride_v, dst_ar30, dst_stride_ar30,
&kYuvH709Constants, width, height);
}
+// Convert I010 to AB30.
+LIBYUV_API
+int I010ToAB30(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_ab30,
+ int dst_stride_ab30,
+ int width,
+ int height) {
+ return I010ToAR30Matrix(src_y, src_stride_y, src_v,
+ src_stride_v, src_u, src_stride_u, dst_ab30, dst_stride_ab30,
+ &kYvuI601Constants, width, height);
+}
+
+// Convert H010 to AB30.
+LIBYUV_API
+int H010ToAB30(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_ab30,
+ int dst_stride_ab30,
+ int width,
+ int height) {
+ return I010ToAR30Matrix(src_y, src_stride_y, src_v,
+ src_stride_v, src_u, src_stride_u, dst_ab30, dst_stride_ab30,
+ &kYvuH709Constants, width, height);
+}
+
// Convert 10 bit YUV to ARGB with matrix
-static int I010ToARGBMatrix(const uint16* src_y,
+static int I010ToARGBMatrix(const uint16_t* src_y,
int src_stride_y,
- const uint16* src_u,
+ const uint16_t* src_u,
int src_stride_u,
- const uint16* src_v,
+ const uint16_t* src_v,
int src_stride_v,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
const struct YuvConstants* yuvconstants,
int width,
int height) {
int y;
- void (*I210ToARGBRow)(const uint16* y_buf, const uint16* u_buf,
- const uint16* v_buf, uint8* rgb_buf,
+ void (*I210ToARGBRow)(const uint16_t* y_buf, const uint16_t* u_buf,
+ const uint16_t* v_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) =
I210ToARGBRow_C;
if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) {
@@ -537,7 +568,14 @@ static int I010ToARGBMatrix(const uint16* src_y,
}
}
#endif
-
+#if defined(HAS_I210TOARGBROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ I210ToARGBRow = I210ToARGBRow_Any_AVX2;
+ if (IS_ALIGNED(width, 16)) {
+ I210ToARGBRow = I210ToARGBRow_AVX2;
+ }
+ }
+#endif
for (y = 0; y < height; ++y) {
I210ToARGBRow(src_y, src_u, src_v, dst_argb, yuvconstants, width);
dst_argb += dst_stride_argb;
@@ -552,13 +590,13 @@ static int I010ToARGBMatrix(const uint16* src_y,
// Convert I010 to ARGB.
LIBYUV_API
-int I010ToARGB(const uint16* src_y,
+int I010ToARGB(const uint16_t* src_y,
int src_stride_y,
- const uint16* src_u,
+ const uint16_t* src_u,
int src_stride_u,
- const uint16* src_v,
+ const uint16_t* src_v,
int src_stride_v,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height) {
@@ -569,13 +607,13 @@ int I010ToARGB(const uint16* src_y,
// Convert I010 to ABGR.
LIBYUV_API
-int I010ToABGR(const uint16* src_y,
+int I010ToABGR(const uint16_t* src_y,
int src_stride_y,
- const uint16* src_u,
+ const uint16_t* src_u,
int src_stride_u,
- const uint16* src_v,
+ const uint16_t* src_v,
int src_stride_v,
- uint8* dst_abgr,
+ uint8_t* dst_abgr,
int dst_stride_abgr,
int width,
int height) {
@@ -588,13 +626,13 @@ int I010ToABGR(const uint16* src_y,
// Convert H010 to ARGB.
LIBYUV_API
-int H010ToARGB(const uint16* src_y,
+int H010ToARGB(const uint16_t* src_y,
int src_stride_y,
- const uint16* src_u,
+ const uint16_t* src_u,
int src_stride_u,
- const uint16* src_v,
+ const uint16_t* src_v,
int src_stride_v,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height) {
@@ -605,13 +643,13 @@ int H010ToARGB(const uint16* src_y,
// Convert H010 to ABGR.
LIBYUV_API
-int H010ToABGR(const uint16* src_y,
+int H010ToABGR(const uint16_t* src_y,
int src_stride_y,
- const uint16* src_u,
+ const uint16_t* src_u,
int src_stride_u,
- const uint16* src_v,
+ const uint16_t* src_v,
int src_stride_v,
- uint8* dst_abgr,
+ uint8_t* dst_abgr,
int dst_stride_abgr,
int width,
int height) {
@@ -623,20 +661,20 @@ int H010ToABGR(const uint16* src_y,
}
// Convert I444 to ARGB with matrix
-static int I444ToARGBMatrix(const uint8* src_y,
+static int I444ToARGBMatrix(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
const struct YuvConstants* yuvconstants,
int width,
int height) {
int y;
- void (*I444ToARGBRow)(const uint8* y_buf, const uint8* u_buf,
- const uint8* v_buf, uint8* rgb_buf,
+ void (*I444ToARGBRow)(const uint8_t* y_buf, const uint8_t* u_buf,
+ const uint8_t* v_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) =
I444ToARGBRow_C;
if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) {
@@ -700,13 +738,13 @@ static int I444ToARGBMatrix(const uint8* src_y,
// Convert I444 to ARGB.
LIBYUV_API
-int I444ToARGB(const uint8* src_y,
+int I444ToARGB(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height) {
@@ -717,13 +755,13 @@ int I444ToARGB(const uint8* src_y,
// Convert I444 to ABGR.
LIBYUV_API
-int I444ToABGR(const uint8* src_y,
+int I444ToABGR(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_abgr,
+ uint8_t* dst_abgr,
int dst_stride_abgr,
int width,
int height) {
@@ -736,13 +774,13 @@ int I444ToABGR(const uint8* src_y,
// Convert J444 to ARGB.
LIBYUV_API
-int J444ToARGB(const uint8* src_y,
+int J444ToARGB(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height) {
@@ -752,28 +790,28 @@ int J444ToARGB(const uint8* src_y,
}
// Convert I420 with Alpha to preattenuated ARGB.
-static int I420AlphaToARGBMatrix(const uint8* src_y,
+static int I420AlphaToARGBMatrix(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- const uint8* src_a,
+ const uint8_t* src_a,
int src_stride_a,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
const struct YuvConstants* yuvconstants,
int width,
int height,
int attenuate) {
int y;
- void (*I422AlphaToARGBRow)(const uint8* y_buf, const uint8* u_buf,
- const uint8* v_buf, const uint8* a_buf,
- uint8* dst_argb,
+ void (*I422AlphaToARGBRow)(const uint8_t* y_buf, const uint8_t* u_buf,
+ const uint8_t* v_buf, const uint8_t* a_buf,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) = I422AlphaToARGBRow_C;
- void (*ARGBAttenuateRow)(const uint8* src_argb, uint8* dst_argb, int width) =
- ARGBAttenuateRow_C;
+ void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb,
+ int width) = ARGBAttenuateRow_C;
if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) {
return -1;
}
@@ -867,15 +905,15 @@ static int I420AlphaToARGBMatrix(const uint8* src_y,
// Convert I420 with Alpha to ARGB.
LIBYUV_API
-int I420AlphaToARGB(const uint8* src_y,
+int I420AlphaToARGB(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- const uint8* src_a,
+ const uint8_t* src_a,
int src_stride_a,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height,
@@ -888,15 +926,15 @@ int I420AlphaToARGB(const uint8* src_y,
// Convert I420 with Alpha to ABGR.
LIBYUV_API
-int I420AlphaToABGR(const uint8* src_y,
+int I420AlphaToABGR(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- const uint8* src_a,
+ const uint8_t* src_a,
int src_stride_a,
- uint8* dst_abgr,
+ uint8_t* dst_abgr,
int dst_stride_abgr,
int width,
int height,
@@ -910,14 +948,14 @@ int I420AlphaToABGR(const uint8* src_y,
// Convert I400 to ARGB.
LIBYUV_API
-int I400ToARGB(const uint8* src_y,
+int I400ToARGB(const uint8_t* src_y,
int src_stride_y,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height) {
int y;
- void (*I400ToARGBRow)(const uint8* y_buf, uint8* rgb_buf, int width) =
+ void (*I400ToARGBRow)(const uint8_t* y_buf, uint8_t* rgb_buf, int width) =
I400ToARGBRow_C;
if (!src_y || !dst_argb || width <= 0 || height == 0) {
return -1;
@@ -977,14 +1015,14 @@ int I400ToARGB(const uint8* src_y,
// Convert J400 to ARGB.
LIBYUV_API
-int J400ToARGB(const uint8* src_y,
+int J400ToARGB(const uint8_t* src_y,
int src_stride_y,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height) {
int y;
- void (*J400ToARGBRow)(const uint8* src_y, uint8* dst_argb, int width) =
+ void (*J400ToARGBRow)(const uint8_t* src_y, uint8_t* dst_argb, int width) =
J400ToARGBRow_C;
if (!src_y || !dst_argb || width <= 0 || height == 0) {
return -1;
@@ -1055,74 +1093,74 @@ static const uvec8 kShuffleMaskRGBAToARGB = {
// Convert BGRA to ARGB.
LIBYUV_API
-int BGRAToARGB(const uint8* src_bgra,
+int BGRAToARGB(const uint8_t* src_bgra,
int src_stride_bgra,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height) {
return ARGBShuffle(src_bgra, src_stride_bgra, dst_argb, dst_stride_argb,
- (const uint8*)(&kShuffleMaskBGRAToARGB), width, height);
+ (const uint8_t*)(&kShuffleMaskBGRAToARGB), width, height);
}
// Convert ARGB to BGRA (same as BGRAToARGB).
LIBYUV_API
-int ARGBToBGRA(const uint8* src_bgra,
+int ARGBToBGRA(const uint8_t* src_bgra,
int src_stride_bgra,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height) {
return ARGBShuffle(src_bgra, src_stride_bgra, dst_argb, dst_stride_argb,
- (const uint8*)(&kShuffleMaskBGRAToARGB), width, height);
+ (const uint8_t*)(&kShuffleMaskBGRAToARGB), width, height);
}
// Convert ABGR to ARGB.
LIBYUV_API
-int ABGRToARGB(const uint8* src_abgr,
+int ABGRToARGB(const uint8_t* src_abgr,
int src_stride_abgr,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height) {
return ARGBShuffle(src_abgr, src_stride_abgr, dst_argb, dst_stride_argb,
- (const uint8*)(&kShuffleMaskABGRToARGB), width, height);
+ (const uint8_t*)(&kShuffleMaskABGRToARGB), width, height);
}
// Convert ARGB to ABGR to (same as ABGRToARGB).
LIBYUV_API
-int ARGBToABGR(const uint8* src_abgr,
+int ARGBToABGR(const uint8_t* src_abgr,
int src_stride_abgr,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height) {
return ARGBShuffle(src_abgr, src_stride_abgr, dst_argb, dst_stride_argb,
- (const uint8*)(&kShuffleMaskABGRToARGB), width, height);
+ (const uint8_t*)(&kShuffleMaskABGRToARGB), width, height);
}
// Convert RGBA to ARGB.
LIBYUV_API
-int RGBAToARGB(const uint8* src_rgba,
+int RGBAToARGB(const uint8_t* src_rgba,
int src_stride_rgba,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height) {
return ARGBShuffle(src_rgba, src_stride_rgba, dst_argb, dst_stride_argb,
- (const uint8*)(&kShuffleMaskRGBAToARGB), width, height);
+ (const uint8_t*)(&kShuffleMaskRGBAToARGB), width, height);
}
// Convert RGB24 to ARGB.
LIBYUV_API
-int RGB24ToARGB(const uint8* src_rgb24,
+int RGB24ToARGB(const uint8_t* src_rgb24,
int src_stride_rgb24,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height) {
int y;
- void (*RGB24ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int width) =
+ void (*RGB24ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) =
RGB24ToARGBRow_C;
if (!src_rgb24 || !dst_argb || width <= 0 || height == 0) {
return -1;
@@ -1174,14 +1212,14 @@ int RGB24ToARGB(const uint8* src_rgb24,
// Convert RAW to ARGB.
LIBYUV_API
-int RAWToARGB(const uint8* src_raw,
+int RAWToARGB(const uint8_t* src_raw,
int src_stride_raw,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height) {
int y;
- void (*RAWToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int width) =
+ void (*RAWToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) =
RAWToARGBRow_C;
if (!src_raw || !dst_argb || width <= 0 || height == 0) {
return -1;
@@ -1233,15 +1271,15 @@ int RAWToARGB(const uint8* src_raw,
// Convert RGB565 to ARGB.
LIBYUV_API
-int RGB565ToARGB(const uint8* src_rgb565,
+int RGB565ToARGB(const uint8_t* src_rgb565,
int src_stride_rgb565,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height) {
int y;
- void (*RGB565ToARGBRow)(const uint8* src_rgb565, uint8* dst_argb, int width) =
- RGB565ToARGBRow_C;
+ void (*RGB565ToARGBRow)(const uint8_t* src_rgb565, uint8_t* dst_argb,
+ int width) = RGB565ToARGBRow_C;
if (!src_rgb565 || !dst_argb || width <= 0 || height == 0) {
return -1;
}
@@ -1300,14 +1338,14 @@ int RGB565ToARGB(const uint8* src_rgb565,
// Convert ARGB1555 to ARGB.
LIBYUV_API
-int ARGB1555ToARGB(const uint8* src_argb1555,
+int ARGB1555ToARGB(const uint8_t* src_argb1555,
int src_stride_argb1555,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height) {
int y;
- void (*ARGB1555ToARGBRow)(const uint8* src_argb1555, uint8* dst_argb,
+ void (*ARGB1555ToARGBRow)(const uint8_t* src_argb1555, uint8_t* dst_argb,
int width) = ARGB1555ToARGBRow_C;
if (!src_argb1555 || !dst_argb || width <= 0 || height == 0) {
return -1;
@@ -1367,14 +1405,14 @@ int ARGB1555ToARGB(const uint8* src_argb1555,
// Convert ARGB4444 to ARGB.
LIBYUV_API
-int ARGB4444ToARGB(const uint8* src_argb4444,
+int ARGB4444ToARGB(const uint8_t* src_argb4444,
int src_stride_argb4444,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height) {
int y;
- void (*ARGB4444ToARGBRow)(const uint8* src_argb4444, uint8* dst_argb,
+ void (*ARGB4444ToARGBRow)(const uint8_t* src_argb4444, uint8_t* dst_argb,
int width) = ARGB4444ToARGBRow_C;
if (!src_argb4444 || !dst_argb || width <= 0 || height == 0) {
return -1;
@@ -1434,15 +1472,13 @@ int ARGB4444ToARGB(const uint8* src_argb4444,
// Convert AR30 to ARGB.
LIBYUV_API
-int AR30ToARGB(const uint8* src_ar30,
+int AR30ToARGB(const uint8_t* src_ar30,
int src_stride_ar30,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height) {
int y;
- void (*AR30ToARGBRow)(const uint8* src_ar30, uint8* dst_argb, int width) =
- AR30ToARGBRow_C;
if (!src_ar30 || !dst_argb || width <= 0 || height == 0) {
return -1;
}
@@ -1459,27 +1495,59 @@ int AR30ToARGB(const uint8* src_ar30,
src_stride_ar30 = dst_stride_argb = 0;
}
for (y = 0; y < height; ++y) {
- AR30ToARGBRow(src_ar30, dst_argb, width);
+ AR30ToARGBRow_C(src_ar30, dst_argb, width);
src_ar30 += src_stride_ar30;
dst_argb += dst_stride_argb;
}
return 0;
}
+// Convert AR30 to ABGR.
+LIBYUV_API
+int AR30ToABGR(const uint8_t* src_ar30,
+ int src_stride_ar30,
+ uint8_t* dst_abgr,
+ int dst_stride_abgr,
+ int width,
+ int height) {
+ int y;
+ if (!src_ar30 || !dst_abgr || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_ar30 = src_ar30 + (height - 1) * src_stride_ar30;
+ src_stride_ar30 = -src_stride_ar30;
+ }
+ // Coalesce rows.
+ if (src_stride_ar30 == width * 4 && dst_stride_abgr == width * 4) {
+ width *= height;
+ height = 1;
+ src_stride_ar30 = dst_stride_abgr = 0;
+ }
+ for (y = 0; y < height; ++y) {
+ AR30ToABGRRow_C(src_ar30, dst_abgr, width);
+ src_ar30 += src_stride_ar30;
+ dst_abgr += dst_stride_abgr;
+ }
+ return 0;
+}
+
// Convert NV12 to ARGB with matrix
-static int NV12ToARGBMatrix(const uint8* src_y,
+static int NV12ToARGBMatrix(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_uv,
+ const uint8_t* src_uv,
int src_stride_uv,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
const struct YuvConstants* yuvconstants,
int width,
int height) {
int y;
- void (*NV12ToARGBRow)(const uint8* y_buf, const uint8* uv_buf, uint8* rgb_buf,
- const struct YuvConstants* yuvconstants, int width) =
- NV12ToARGBRow_C;
+ void (*NV12ToARGBRow)(
+ const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants, int width) = NV12ToARGBRow_C;
if (!src_y || !src_uv || !dst_argb || width <= 0 || height == 0) {
return -1;
}
@@ -1534,20 +1602,20 @@ static int NV12ToARGBMatrix(const uint8* src_y,
}
// Convert NV21 to ARGB with matrix
-static int NV21ToARGBMatrix(const uint8* src_y,
+static int NV21ToARGBMatrix(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_uv,
- int src_stride_uv,
- uint8* dst_argb,
+ const uint8_t* src_vu,
+ int src_stride_vu,
+ uint8_t* dst_argb,
int dst_stride_argb,
const struct YuvConstants* yuvconstants,
int width,
int height) {
int y;
- void (*NV21ToARGBRow)(const uint8* y_buf, const uint8* uv_buf, uint8* rgb_buf,
- const struct YuvConstants* yuvconstants, int width) =
- NV21ToARGBRow_C;
- if (!src_y || !src_uv || !dst_argb || width <= 0 || height == 0) {
+ void (*NV21ToARGBRow)(
+ const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants, int width) = NV21ToARGBRow_C;
+ if (!src_y || !src_vu || !dst_argb || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
@@ -1590,11 +1658,11 @@ static int NV21ToARGBMatrix(const uint8* src_y,
#endif
for (y = 0; y < height; ++y) {
- NV21ToARGBRow(src_y, src_uv, dst_argb, yuvconstants, width);
+ NV21ToARGBRow(src_y, src_vu, dst_argb, yuvconstants, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
if (y & 1) {
- src_uv += src_stride_uv;
+ src_vu += src_stride_vu;
}
}
return 0;
@@ -1602,11 +1670,11 @@ static int NV21ToARGBMatrix(const uint8* src_y,
// Convert NV12 to ARGB.
LIBYUV_API
-int NV12ToARGB(const uint8* src_y,
+int NV12ToARGB(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_uv,
+ const uint8_t* src_uv,
int src_stride_uv,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height) {
@@ -1616,26 +1684,26 @@ int NV12ToARGB(const uint8* src_y,
// Convert NV21 to ARGB.
LIBYUV_API
-int NV21ToARGB(const uint8* src_y,
+int NV21ToARGB(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_uv,
- int src_stride_uv,
- uint8* dst_argb,
+ const uint8_t* src_vu,
+ int src_stride_vu,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height) {
- return NV21ToARGBMatrix(src_y, src_stride_y, src_uv, src_stride_uv, dst_argb,
+ return NV21ToARGBMatrix(src_y, src_stride_y, src_vu, src_stride_vu, dst_argb,
dst_stride_argb, &kYuvI601Constants, width, height);
}
// Convert NV12 to ABGR.
// To output ABGR instead of ARGB swap the UV and use a mirrrored yuc matrix.
// To swap the UV use NV12 instead of NV21.LIBYUV_API
-int NV12ToABGR(const uint8* src_y,
+int NV12ToABGR(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_uv,
+ const uint8_t* src_uv,
int src_stride_uv,
- uint8* dst_abgr,
+ uint8_t* dst_abgr,
int dst_stride_abgr,
int width,
int height) {
@@ -1645,11 +1713,11 @@ int NV12ToABGR(const uint8* src_y,
// Convert NV21 to ABGR.
LIBYUV_API
-int NV21ToABGR(const uint8* src_y,
+int NV21ToABGR(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_vu,
+ const uint8_t* src_vu,
int src_stride_vu,
- uint8* dst_abgr,
+ uint8_t* dst_abgr,
int dst_stride_abgr,
int width,
int height) {
@@ -1659,16 +1727,16 @@ int NV21ToABGR(const uint8* src_y,
// Convert M420 to ARGB.
LIBYUV_API
-int M420ToARGB(const uint8* src_m420,
+int M420ToARGB(const uint8_t* src_m420,
int src_stride_m420,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height) {
int y;
- void (*NV12ToARGBRow)(const uint8* y_buf, const uint8* uv_buf, uint8* rgb_buf,
- const struct YuvConstants* yuvconstants, int width) =
- NV12ToARGBRow_C;
+ void (*NV12ToARGBRow)(
+ const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants, int width) = NV12ToARGBRow_C;
if (!src_m420 || !dst_argb || width <= 0 || height == 0) {
return -1;
}
@@ -1728,14 +1796,14 @@ int M420ToARGB(const uint8* src_m420,
// Convert YUY2 to ARGB.
LIBYUV_API
-int YUY2ToARGB(const uint8* src_yuy2,
+int YUY2ToARGB(const uint8_t* src_yuy2,
int src_stride_yuy2,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height) {
int y;
- void (*YUY2ToARGBRow)(const uint8* src_yuy2, uint8* dst_argb,
+ void (*YUY2ToARGBRow)(const uint8_t* src_yuy2, uint8_t* dst_argb,
const struct YuvConstants* yuvconstants, int width) =
YUY2ToARGBRow_C;
if (!src_yuy2 || !dst_argb || width <= 0 || height == 0) {
@@ -1795,14 +1863,14 @@ int YUY2ToARGB(const uint8* src_yuy2,
// Convert UYVY to ARGB.
LIBYUV_API
-int UYVYToARGB(const uint8* src_uyvy,
+int UYVYToARGB(const uint8_t* src_uyvy,
int src_stride_uyvy,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height) {
int y;
- void (*UYVYToARGBRow)(const uint8* src_uyvy, uint8* dst_argb,
+ void (*UYVYToARGBRow)(const uint8_t* src_uyvy, uint8_t* dst_argb,
const struct YuvConstants* yuvconstants, int width) =
UYVYToARGBRow_C;
if (!src_uyvy || !dst_argb || width <= 0 || height == 0) {
@@ -1859,10 +1927,10 @@ int UYVYToARGB(const uint8* src_uyvy,
}
return 0;
}
-static void WeavePixels(const uint8* src_u,
- const uint8* src_v,
+static void WeavePixels(const uint8_t* src_u,
+ const uint8_t* src_v,
int src_pixel_stride_uv,
- uint8* dst_uv,
+ uint8_t* dst_uv,
int width) {
int i;
for (i = 0; i < width; ++i) {
@@ -1876,20 +1944,20 @@ static void WeavePixels(const uint8* src_u,
// Convert Android420 to ARGB.
LIBYUV_API
-int Android420ToARGBMatrix(const uint8* src_y,
+int Android420ToARGBMatrix(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
int src_pixel_stride_uv,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
const struct YuvConstants* yuvconstants,
int width,
int height) {
int y;
- uint8* dst_uv;
+ uint8_t* dst_uv;
const ptrdiff_t vu_off = src_v - src_u;
int halfwidth = (width + 1) >> 1;
int halfheight = (height + 1) >> 1;
@@ -1910,13 +1978,14 @@ int Android420ToARGBMatrix(const uint8* src_y,
src_stride_v, dst_argb, dst_stride_argb,
yuvconstants, width, height);
// NV21
- } else if (src_pixel_stride_uv == 2 && vu_off == -1 &&
- src_stride_u == src_stride_v) {
+ }
+ if (src_pixel_stride_uv == 2 && vu_off == -1 &&
+ src_stride_u == src_stride_v) {
return NV21ToARGBMatrix(src_y, src_stride_y, src_v, src_stride_v, dst_argb,
dst_stride_argb, yuvconstants, width, height);
// NV12
- } else if (src_pixel_stride_uv == 2 && vu_off == 1 &&
- src_stride_u == src_stride_v) {
+ }
+ if (src_pixel_stride_uv == 2 && vu_off == 1 && src_stride_u == src_stride_v) {
return NV12ToARGBMatrix(src_y, src_stride_y, src_u, src_stride_u, dst_argb,
dst_stride_argb, yuvconstants, width, height);
}
@@ -1938,14 +2007,14 @@ int Android420ToARGBMatrix(const uint8* src_y,
// Convert Android420 to ARGB.
LIBYUV_API
-int Android420ToARGB(const uint8* src_y,
+int Android420ToARGB(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
int src_pixel_stride_uv,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height) {
@@ -1957,14 +2026,14 @@ int Android420ToARGB(const uint8* src_y,
// Convert Android420 to ABGR.
LIBYUV_API
-int Android420ToABGR(const uint8* src_y,
+int Android420ToABGR(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
int src_pixel_stride_uv,
- uint8* dst_abgr,
+ uint8_t* dst_abgr,
int dst_stride_abgr,
int width,
int height) {
diff --git a/chromium/third_party/libyuv/source/convert_from.cc b/chromium/third_party/libyuv/source/convert_from.cc
index 9da607102f6..b5587ced625 100644
--- a/chromium/third_party/libyuv/source/convert_from.cc
+++ b/chromium/third_party/libyuv/source/convert_from.cc
@@ -30,17 +30,17 @@ static __inline int Abs(int v) {
}
// I420 To any I4xx YUV format with mirroring.
-static int I420ToI4xx(const uint8* src_y,
+static int I420ToI4xx(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int src_y_width,
int src_y_height,
@@ -67,17 +67,17 @@ static int I420ToI4xx(const uint8* src_y,
// Convert 8 bit YUV to 10 bit.
LIBYUV_API
-int I420ToI010(const uint8* src_y,
+int I420ToI010(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint16* dst_y,
+ uint16_t* dst_y,
int dst_stride_y,
- uint16* dst_u,
+ uint16_t* dst_u,
int dst_stride_u,
- uint16* dst_v,
+ uint16_t* dst_v,
int dst_stride_v,
int width,
int height) {
@@ -112,17 +112,17 @@ int I420ToI010(const uint8* src_y,
// 420 chroma is 1/2 width, 1/2 height
// 422 chroma is 1/2 width, 1x height
LIBYUV_API
-int I420ToI422(const uint8* src_y,
+int I420ToI422(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int width,
int height) {
@@ -137,17 +137,17 @@ int I420ToI422(const uint8* src_y,
// 420 chroma is 1/2 width, 1/2 height
// 444 chroma is 1x width, 1x height
LIBYUV_API
-int I420ToI444(const uint8* src_y,
+int I420ToI444(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int width,
int height) {
@@ -161,9 +161,9 @@ int I420ToI444(const uint8* src_y,
// Copy to I400. Source can be I420,422,444,400,NV12,NV21
LIBYUV_API
-int I400Copy(const uint8* src_y,
+int I400Copy(const uint8_t* src_y,
int src_stride_y,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
int width,
int height) {
@@ -181,19 +181,19 @@ int I400Copy(const uint8* src_y,
}
LIBYUV_API
-int I422ToYUY2(const uint8* src_y,
+int I422ToYUY2(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_yuy2,
+ uint8_t* dst_yuy2,
int dst_stride_yuy2,
int width,
int height) {
int y;
- void (*I422ToYUY2Row)(const uint8* src_y, const uint8* src_u,
- const uint8* src_v, uint8* dst_yuy2, int width) =
+ void (*I422ToYUY2Row)(const uint8_t* src_y, const uint8_t* src_u,
+ const uint8_t* src_v, uint8_t* dst_yuy2, int width) =
I422ToYUY2Row_C;
if (!src_y || !src_u || !src_v || !dst_yuy2 || width <= 0 || height == 0) {
return -1;
@@ -219,6 +219,14 @@ int I422ToYUY2(const uint8* src_y,
}
}
#endif
+#if defined(HAS_I422TOYUY2ROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ I422ToYUY2Row = I422ToYUY2Row_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ I422ToYUY2Row = I422ToYUY2Row_AVX2;
+ }
+ }
+#endif
#if defined(HAS_I422TOYUY2ROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I422ToYUY2Row = I422ToYUY2Row_Any_NEON;
@@ -239,19 +247,19 @@ int I422ToYUY2(const uint8* src_y,
}
LIBYUV_API
-int I420ToYUY2(const uint8* src_y,
+int I420ToYUY2(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_yuy2,
+ uint8_t* dst_yuy2,
int dst_stride_yuy2,
int width,
int height) {
int y;
- void (*I422ToYUY2Row)(const uint8* src_y, const uint8* src_u,
- const uint8* src_v, uint8* dst_yuy2, int width) =
+ void (*I422ToYUY2Row)(const uint8_t* src_y, const uint8_t* src_u,
+ const uint8_t* src_v, uint8_t* dst_yuy2, int width) =
I422ToYUY2Row_C;
if (!src_y || !src_u || !src_v || !dst_yuy2 || width <= 0 || height == 0) {
return -1;
@@ -270,6 +278,14 @@ int I420ToYUY2(const uint8* src_y,
}
}
#endif
+#if defined(HAS_I422TOYUY2ROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ I422ToYUY2Row = I422ToYUY2Row_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ I422ToYUY2Row = I422ToYUY2Row_AVX2;
+ }
+ }
+#endif
#if defined(HAS_I422TOYUY2ROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I422ToYUY2Row = I422ToYUY2Row_Any_NEON;
@@ -303,19 +319,19 @@ int I420ToYUY2(const uint8* src_y,
}
LIBYUV_API
-int I422ToUYVY(const uint8* src_y,
+int I422ToUYVY(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_uyvy,
+ uint8_t* dst_uyvy,
int dst_stride_uyvy,
int width,
int height) {
int y;
- void (*I422ToUYVYRow)(const uint8* src_y, const uint8* src_u,
- const uint8* src_v, uint8* dst_uyvy, int width) =
+ void (*I422ToUYVYRow)(const uint8_t* src_y, const uint8_t* src_u,
+ const uint8_t* src_v, uint8_t* dst_uyvy, int width) =
I422ToUYVYRow_C;
if (!src_y || !src_u || !src_v || !dst_uyvy || width <= 0 || height == 0) {
return -1;
@@ -341,6 +357,14 @@ int I422ToUYVY(const uint8* src_y,
}
}
#endif
+#if defined(HAS_I422TOUYVYROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ I422ToUYVYRow = I422ToUYVYRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ I422ToUYVYRow = I422ToUYVYRow_AVX2;
+ }
+ }
+#endif
#if defined(HAS_I422TOUYVYROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I422ToUYVYRow = I422ToUYVYRow_Any_NEON;
@@ -369,19 +393,19 @@ int I422ToUYVY(const uint8* src_y,
}
LIBYUV_API
-int I420ToUYVY(const uint8* src_y,
+int I420ToUYVY(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_uyvy,
+ uint8_t* dst_uyvy,
int dst_stride_uyvy,
int width,
int height) {
int y;
- void (*I422ToUYVYRow)(const uint8* src_y, const uint8* src_u,
- const uint8* src_v, uint8* dst_uyvy, int width) =
+ void (*I422ToUYVYRow)(const uint8_t* src_y, const uint8_t* src_u,
+ const uint8_t* src_v, uint8_t* dst_uyvy, int width) =
I422ToUYVYRow_C;
if (!src_y || !src_u || !src_v || !dst_uyvy || width <= 0 || height == 0) {
return -1;
@@ -400,6 +424,14 @@ int I420ToUYVY(const uint8* src_y,
}
}
#endif
+#if defined(HAS_I422TOUYVYROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ I422ToUYVYRow = I422ToUYVYRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ I422ToUYVYRow = I422ToUYVYRow_AVX2;
+ }
+ }
+#endif
#if defined(HAS_I422TOUYVYROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I422ToUYVYRow = I422ToUYVYRow_Any_NEON;
@@ -434,15 +466,15 @@ int I420ToUYVY(const uint8* src_y,
// TODO(fbarchard): test negative height for invert.
LIBYUV_API
-int I420ToNV12(const uint8* src_y,
+int I420ToNV12(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_uv,
+ uint8_t* dst_uv,
int dst_stride_uv,
int width,
int height) {
@@ -461,15 +493,15 @@ int I420ToNV12(const uint8* src_y,
}
LIBYUV_API
-int I420ToNV21(const uint8* src_y,
+int I420ToNV21(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_vu,
+ uint8_t* dst_vu,
int dst_stride_vu,
int width,
int height) {
@@ -479,20 +511,20 @@ int I420ToNV21(const uint8* src_y,
}
// Convert I422 to RGBA with matrix
-static int I420ToRGBAMatrix(const uint8* src_y,
+static int I420ToRGBAMatrix(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_rgba,
+ uint8_t* dst_rgba,
int dst_stride_rgba,
const struct YuvConstants* yuvconstants,
int width,
int height) {
int y;
- void (*I422ToRGBARow)(const uint8* y_buf, const uint8* u_buf,
- const uint8* v_buf, uint8* rgb_buf,
+ void (*I422ToRGBARow)(const uint8_t* y_buf, const uint8_t* u_buf,
+ const uint8_t* v_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) =
I422ToRGBARow_C;
if (!src_y || !src_u || !src_v || !dst_rgba || width <= 0 || height == 0) {
@@ -551,13 +583,13 @@ static int I420ToRGBAMatrix(const uint8* src_y,
// Convert I420 to RGBA.
LIBYUV_API
-int I420ToRGBA(const uint8* src_y,
+int I420ToRGBA(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_rgba,
+ uint8_t* dst_rgba,
int dst_stride_rgba,
int width,
int height) {
@@ -568,13 +600,13 @@ int I420ToRGBA(const uint8* src_y,
// Convert I420 to BGRA.
LIBYUV_API
-int I420ToBGRA(const uint8* src_y,
+int I420ToBGRA(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_bgra,
+ uint8_t* dst_bgra,
int dst_stride_bgra,
int width,
int height) {
@@ -586,20 +618,20 @@ int I420ToBGRA(const uint8* src_y,
}
// Convert I420 to RGB24 with matrix
-static int I420ToRGB24Matrix(const uint8* src_y,
+static int I420ToRGB24Matrix(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_rgb24,
+ uint8_t* dst_rgb24,
int dst_stride_rgb24,
const struct YuvConstants* yuvconstants,
int width,
int height) {
int y;
- void (*I422ToRGB24Row)(const uint8* y_buf, const uint8* u_buf,
- const uint8* v_buf, uint8* rgb_buf,
+ void (*I422ToRGB24Row)(const uint8_t* y_buf, const uint8_t* u_buf,
+ const uint8_t* v_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) =
I422ToRGB24Row_C;
if (!src_y || !src_u || !src_v || !dst_rgb24 || width <= 0 || height == 0) {
@@ -658,13 +690,13 @@ static int I420ToRGB24Matrix(const uint8* src_y,
// Convert I420 to RGB24.
LIBYUV_API
-int I420ToRGB24(const uint8* src_y,
+int I420ToRGB24(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_rgb24,
+ uint8_t* dst_rgb24,
int dst_stride_rgb24,
int width,
int height) {
@@ -675,13 +707,13 @@ int I420ToRGB24(const uint8* src_y,
// Convert I420 to RAW.
LIBYUV_API
-int I420ToRAW(const uint8* src_y,
+int I420ToRAW(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_raw,
+ uint8_t* dst_raw,
int dst_stride_raw,
int width,
int height) {
@@ -694,13 +726,13 @@ int I420ToRAW(const uint8* src_y,
// Convert H420 to RGB24.
LIBYUV_API
-int H420ToRGB24(const uint8* src_y,
+int H420ToRGB24(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_rgb24,
+ uint8_t* dst_rgb24,
int dst_stride_rgb24,
int width,
int height) {
@@ -711,13 +743,13 @@ int H420ToRGB24(const uint8* src_y,
// Convert H420 to RAW.
LIBYUV_API
-int H420ToRAW(const uint8* src_y,
+int H420ToRAW(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_raw,
+ uint8_t* dst_raw,
int dst_stride_raw,
int width,
int height) {
@@ -730,19 +762,19 @@ int H420ToRAW(const uint8* src_y,
// Convert I420 to ARGB1555.
LIBYUV_API
-int I420ToARGB1555(const uint8* src_y,
+int I420ToARGB1555(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_argb1555,
+ uint8_t* dst_argb1555,
int dst_stride_argb1555,
int width,
int height) {
int y;
- void (*I422ToARGB1555Row)(const uint8* y_buf, const uint8* u_buf,
- const uint8* v_buf, uint8* rgb_buf,
+ void (*I422ToARGB1555Row)(const uint8_t* y_buf, const uint8_t* u_buf,
+ const uint8_t* v_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width) = I422ToARGB1555Row_C;
if (!src_y || !src_u || !src_v || !dst_argb1555 || width <= 0 ||
@@ -803,19 +835,19 @@ int I420ToARGB1555(const uint8* src_y,
// Convert I420 to ARGB4444.
LIBYUV_API
-int I420ToARGB4444(const uint8* src_y,
+int I420ToARGB4444(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_argb4444,
+ uint8_t* dst_argb4444,
int dst_stride_argb4444,
int width,
int height) {
int y;
- void (*I422ToARGB4444Row)(const uint8* y_buf, const uint8* u_buf,
- const uint8* v_buf, uint8* rgb_buf,
+ void (*I422ToARGB4444Row)(const uint8_t* y_buf, const uint8_t* u_buf,
+ const uint8_t* v_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width) = I422ToARGB4444Row_C;
if (!src_y || !src_u || !src_v || !dst_argb4444 || width <= 0 ||
@@ -876,19 +908,19 @@ int I420ToARGB4444(const uint8* src_y,
// Convert I420 to RGB565.
LIBYUV_API
-int I420ToRGB565(const uint8* src_y,
+int I420ToRGB565(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_rgb565,
+ uint8_t* dst_rgb565,
int dst_stride_rgb565,
int width,
int height) {
int y;
- void (*I422ToRGB565Row)(const uint8* y_buf, const uint8* u_buf,
- const uint8* v_buf, uint8* rgb_buf,
+ void (*I422ToRGB565Row)(const uint8_t* y_buf, const uint8_t* u_buf,
+ const uint8_t* v_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) =
I422ToRGB565Row_C;
if (!src_y || !src_u || !src_v || !dst_rgb565 || width <= 0 || height == 0) {
@@ -947,19 +979,19 @@ int I420ToRGB565(const uint8* src_y,
// Convert I422 to RGB565.
LIBYUV_API
-int I422ToRGB565(const uint8* src_y,
+int I422ToRGB565(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_rgb565,
+ uint8_t* dst_rgb565,
int dst_stride_rgb565,
int width,
int height) {
int y;
- void (*I422ToRGB565Row)(const uint8* y_buf, const uint8* u_buf,
- const uint8* v_buf, uint8* rgb_buf,
+ void (*I422ToRGB565Row)(const uint8_t* y_buf, const uint8_t* u_buf,
+ const uint8_t* v_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) =
I422ToRGB565Row_C;
if (!src_y || !src_u || !src_v || !dst_rgb565 || width <= 0 || height == 0) {
@@ -1015,30 +1047,30 @@ int I422ToRGB565(const uint8* src_y,
}
// Ordered 8x8 dither for 888 to 565. Values from 0 to 7.
-static const uint8 kDither565_4x4[16] = {
+static const uint8_t kDither565_4x4[16] = {
0, 4, 1, 5, 6, 2, 7, 3, 1, 5, 0, 4, 7, 3, 6, 2,
};
// Convert I420 to RGB565 with dithering.
LIBYUV_API
-int I420ToRGB565Dither(const uint8* src_y,
+int I420ToRGB565Dither(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_rgb565,
+ uint8_t* dst_rgb565,
int dst_stride_rgb565,
- const uint8* dither4x4,
+ const uint8_t* dither4x4,
int width,
int height) {
int y;
- void (*I422ToARGBRow)(const uint8* y_buf, const uint8* u_buf,
- const uint8* v_buf, uint8* rgb_buf,
+ void (*I422ToARGBRow)(const uint8_t* y_buf, const uint8_t* u_buf,
+ const uint8_t* v_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) =
I422ToARGBRow_C;
- void (*ARGBToRGB565DitherRow)(const uint8* src_argb, uint8* dst_rgb,
- const uint32 dither4, int width) =
+ void (*ARGBToRGB565DitherRow)(const uint8_t* src_argb, uint8_t* dst_rgb,
+ const uint32_t dither4, int width) =
ARGBToRGB565DitherRow_C;
if (!src_y || !src_u || !src_v || !dst_rgb565 || width <= 0 || height == 0) {
return -1;
@@ -1122,8 +1154,8 @@ int I420ToRGB565Dither(const uint8* src_y,
for (y = 0; y < height; ++y) {
I422ToARGBRow(src_y, src_u, src_v, row_argb, &kYuvI601Constants, width);
ARGBToRGB565DitherRow(row_argb, dst_rgb565,
- *(uint32*)(dither4x4 + ((y & 3) << 2)), // NOLINT
- width); // NOLINT
+ *(uint32_t*)(dither4x4 + ((y & 3) << 2)), // NOLINT
+ width); // NOLINT
dst_rgb565 += dst_stride_rgb565;
src_y += src_stride_y;
if (y & 1) {
@@ -1137,24 +1169,22 @@ int I420ToRGB565Dither(const uint8* src_y,
}
// Convert I420 to AR30 with matrix
-static int I420ToAR30Matrix(const uint8* src_y,
+static int I420ToAR30Matrix(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_ar30,
+ uint8_t* dst_ar30,
int dst_stride_ar30,
const struct YuvConstants* yuvconstants,
int width,
int height) {
int y;
- void (*I422ToARGBRow)(const uint8* y_buf, const uint8* u_buf,
- const uint8* v_buf, uint8* rgb_buf,
+ void (*I422ToAR30Row)(const uint8_t* y_buf, const uint8_t* u_buf,
+ const uint8_t* v_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) =
- I422ToARGBRow_C;
- void (*ARGBToAR30Row)(const uint8* src_argb, uint8* dst_rgb, int width) =
- ARGBToAR30Row_C;
+ I422ToAR30Row_C;
if (!src_y || !src_u || !src_v || !dst_ar30 || width <= 0 || height == 0) {
return -1;
@@ -1166,84 +1196,44 @@ static int I420ToAR30Matrix(const uint8* src_y,
dst_stride_ar30 = -dst_stride_ar30;
}
-#if defined(HAS_ARGBTOAR30ROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- ARGBToAR30Row = ARGBToAR30Row_Any_SSSE3;
- if (IS_ALIGNED(width, 4)) {
- ARGBToAR30Row = ARGBToAR30Row_SSSE3;
- }
- }
-#endif
-#if defined(HAS_ARGBTOAR30ROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- ARGBToAR30Row = ARGBToAR30Row_Any_AVX2;
- if (IS_ALIGNED(width, 8)) {
- ARGBToAR30Row = ARGBToAR30Row_AVX2;
- }
- }
-#endif
-#if defined(HAS_I422TOARGBROW_SSSE3)
+#if defined(HAS_I422TOAR30ROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
- I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
+ I422ToAR30Row = I422ToAR30Row_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
- I422ToARGBRow = I422ToARGBRow_SSSE3;
+ I422ToAR30Row = I422ToAR30Row_SSSE3;
}
}
#endif
-#if defined(HAS_I422TOARGBROW_AVX2)
+#if defined(HAS_I422TOAR30ROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
- I422ToARGBRow = I422ToARGBRow_Any_AVX2;
+ I422ToAR30Row = I422ToAR30Row_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
- I422ToARGBRow = I422ToARGBRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_I422TOARGBROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- I422ToARGBRow = I422ToARGBRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- I422ToARGBRow = I422ToARGBRow_NEON;
- }
- }
-#endif
-#if defined(HAS_I422TOARGBROW_MSA)
- if (TestCpuFlag(kCpuHasMSA)) {
- I422ToARGBRow = I422ToARGBRow_Any_MSA;
- if (IS_ALIGNED(width, 8)) {
- I422ToARGBRow = I422ToARGBRow_MSA;
+ I422ToAR30Row = I422ToAR30Row_AVX2;
}
}
#endif
- {
- // Row buffer for ARGB.
- align_buffer_64(row_argb, width * 4);
-
- for (y = 0; y < height; ++y) {
- I422ToARGBRow(src_y, src_u, src_v, row_argb, yuvconstants, width);
- ARGBToAR30Row(row_argb, dst_ar30, width);
- dst_ar30 += dst_stride_ar30;
- src_y += src_stride_y;
- if (y & 1) {
- src_u += src_stride_u;
- src_v += src_stride_v;
- }
+ for (y = 0; y < height; ++y) {
+ I422ToAR30Row(src_y, src_u, src_v, dst_ar30, yuvconstants, width);
+ dst_ar30 += dst_stride_ar30;
+ src_y += src_stride_y;
+ if (y & 1) {
+ src_u += src_stride_u;
+ src_v += src_stride_v;
}
-
- free_aligned_buffer_64(row_argb);
}
return 0;
}
// Convert I420 to AR30.
LIBYUV_API
-int I420ToAR30(const uint8* src_y,
+int I420ToAR30(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_ar30,
+ uint8_t* dst_ar30,
int dst_stride_ar30,
int width,
int height) {
@@ -1252,20 +1242,37 @@ int I420ToAR30(const uint8* src_y,
&kYuvI601Constants, width, height);
}
+// Convert H420 to AR30.
+LIBYUV_API
+int H420ToAR30(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_ar30,
+ int dst_stride_ar30,
+ int width,
+ int height) {
+ return I420ToAR30Matrix(src_y, src_stride_y, src_u, src_stride_u, src_v,
+ src_stride_v, dst_ar30, dst_stride_ar30,
+ &kYvuH709Constants, width, height);
+}
+
// Convert I420 to specified format
LIBYUV_API
-int ConvertFromI420(const uint8* y,
+int ConvertFromI420(const uint8_t* y,
int y_stride,
- const uint8* u,
+ const uint8_t* u,
int u_stride,
- const uint8* v,
+ const uint8_t* v,
int v_stride,
- uint8* dst_sample,
+ uint8_t* dst_sample,
int dst_sample_stride,
int width,
int height,
- uint32 fourcc) {
- uint32 format = CanonicalFourCC(fourcc);
+ uint32_t fourcc) {
+ uint32_t format = CanonicalFourCC(fourcc);
int r = 0;
if (!y || !u || !v || !dst_sample || width <= 0 || height == 0) {
return -1;
@@ -1338,7 +1345,7 @@ int ConvertFromI420(const uint8* y,
height);
break;
case FOURCC_NV12: {
- uint8* dst_uv = dst_sample + width * height;
+ uint8_t* dst_uv = dst_sample + width * height;
r = I420ToNV12(y, y_stride, u, u_stride, v, v_stride, dst_sample,
dst_sample_stride ? dst_sample_stride : width, dst_uv,
dst_sample_stride ? dst_sample_stride : width, width,
@@ -1346,7 +1353,7 @@ int ConvertFromI420(const uint8* y,
break;
}
case FOURCC_NV21: {
- uint8* dst_vu = dst_sample + width * height;
+ uint8_t* dst_vu = dst_sample + width * height;
r = I420ToNV21(y, y_stride, u, u_stride, v, v_stride, dst_sample,
dst_sample_stride ? dst_sample_stride : width, dst_vu,
dst_sample_stride ? dst_sample_stride : width, width,
@@ -1360,8 +1367,8 @@ int ConvertFromI420(const uint8* y,
dst_sample_stride = dst_sample_stride ? dst_sample_stride : width;
int halfstride = (dst_sample_stride + 1) / 2;
int halfheight = (height + 1) / 2;
- uint8* dst_u;
- uint8* dst_v;
+ uint8_t* dst_u;
+ uint8_t* dst_v;
if (format == FOURCC_YV12) {
dst_v = dst_sample + dst_sample_stride * height;
dst_u = dst_v + halfstride * halfheight;
@@ -1378,8 +1385,8 @@ int ConvertFromI420(const uint8* y,
case FOURCC_YV16: {
dst_sample_stride = dst_sample_stride ? dst_sample_stride : width;
int halfstride = (dst_sample_stride + 1) / 2;
- uint8* dst_u;
- uint8* dst_v;
+ uint8_t* dst_u;
+ uint8_t* dst_v;
if (format == FOURCC_YV16) {
dst_v = dst_sample + dst_sample_stride * height;
dst_u = dst_v + halfstride * height;
@@ -1395,8 +1402,8 @@ int ConvertFromI420(const uint8* y,
case FOURCC_I444:
case FOURCC_YV24: {
dst_sample_stride = dst_sample_stride ? dst_sample_stride : width;
- uint8* dst_u;
- uint8* dst_v;
+ uint8_t* dst_u;
+ uint8_t* dst_v;
if (format == FOURCC_YV24) {
dst_v = dst_sample + dst_sample_stride * height;
dst_u = dst_v + dst_sample_stride * height;
diff --git a/chromium/third_party/libyuv/source/convert_from_argb.cc b/chromium/third_party/libyuv/source/convert_from_argb.cc
index 02e12a12804..16b838458f0 100644
--- a/chromium/third_party/libyuv/source/convert_from_argb.cc
+++ b/chromium/third_party/libyuv/source/convert_from_argb.cc
@@ -22,21 +22,21 @@ extern "C" {
// ARGB little endian (bgra in memory) to I444
LIBYUV_API
-int ARGBToI444(const uint8* src_argb,
+int ARGBToI444(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int width,
int height) {
int y;
- void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) =
+ void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
ARGBToYRow_C;
- void (*ARGBToUV444Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
- int width) = ARGBToUV444Row_C;
+ void (*ARGBToUV444Row)(const uint8_t* src_argb, uint8_t* dst_u,
+ uint8_t* dst_v, int width) = ARGBToUV444Row_C;
if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
return -1;
}
@@ -122,20 +122,21 @@ int ARGBToI444(const uint8* src_argb,
// ARGB little endian (bgra in memory) to I422
LIBYUV_API
-int ARGBToI422(const uint8* src_argb,
+int ARGBToI422(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int width,
int height) {
int y;
- void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb, uint8* dst_u,
- uint8* dst_v, int width) = ARGBToUVRow_C;
- void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) =
+ void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb,
+ uint8_t* dst_u, uint8_t* dst_v, int width) =
+ ARGBToUVRow_C;
+ void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
ARGBToYRow_C;
if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
return -1;
@@ -219,22 +220,23 @@ int ARGBToI422(const uint8* src_argb,
}
LIBYUV_API
-int ARGBToNV12(const uint8* src_argb,
+int ARGBToNV12(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_uv,
+ uint8_t* dst_uv,
int dst_stride_uv,
int width,
int height) {
int y;
int halfwidth = (width + 1) >> 1;
- void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb, uint8* dst_u,
- uint8* dst_v, int width) = ARGBToUVRow_C;
- void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) =
+ void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb,
+ uint8_t* dst_u, uint8_t* dst_v, int width) =
+ ARGBToUVRow_C;
+ void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
ARGBToYRow_C;
- void (*MergeUVRow_)(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
- int width) = MergeUVRow_C;
+ void (*MergeUVRow_)(const uint8_t* src_u, const uint8_t* src_v,
+ uint8_t* dst_uv, int width) = MergeUVRow_C;
if (!src_argb || !dst_y || !dst_uv || width <= 0 || height == 0) {
return -1;
}
@@ -331,7 +333,7 @@ int ARGBToNV12(const uint8* src_argb,
{
// Allocate a rows of uv.
align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2);
- uint8* row_v = row_u + ((halfwidth + 31) & ~31);
+ uint8_t* row_v = row_u + ((halfwidth + 31) & ~31);
for (y = 0; y < height - 1; y += 2) {
ARGBToUVRow(src_argb, src_stride_argb, row_u, row_v, width);
@@ -354,23 +356,24 @@ int ARGBToNV12(const uint8* src_argb,
// Same as NV12 but U and V swapped.
LIBYUV_API
-int ARGBToNV21(const uint8* src_argb,
+int ARGBToNV21(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_uv,
- int dst_stride_uv,
+ uint8_t* dst_vu,
+ int dst_stride_vu,
int width,
int height) {
int y;
int halfwidth = (width + 1) >> 1;
- void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb, uint8* dst_u,
- uint8* dst_v, int width) = ARGBToUVRow_C;
- void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) =
+ void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb,
+ uint8_t* dst_u, uint8_t* dst_v, int width) =
+ ARGBToUVRow_C;
+ void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
ARGBToYRow_C;
- void (*MergeUVRow_)(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
- int width) = MergeUVRow_C;
- if (!src_argb || !dst_y || !dst_uv || width <= 0 || height == 0) {
+ void (*MergeUVRow_)(const uint8_t* src_u, const uint8_t* src_v,
+ uint8_t* dst_vu, int width) = MergeUVRow_C;
+ if (!src_argb || !dst_y || !dst_vu || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
@@ -466,20 +469,20 @@ int ARGBToNV21(const uint8* src_argb,
{
// Allocate a rows of uv.
align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2);
- uint8* row_v = row_u + ((halfwidth + 31) & ~31);
+ uint8_t* row_v = row_u + ((halfwidth + 31) & ~31);
for (y = 0; y < height - 1; y += 2) {
ARGBToUVRow(src_argb, src_stride_argb, row_u, row_v, width);
- MergeUVRow_(row_v, row_u, dst_uv, halfwidth);
+ MergeUVRow_(row_v, row_u, dst_vu, halfwidth);
ARGBToYRow(src_argb, dst_y, width);
ARGBToYRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width);
src_argb += src_stride_argb * 2;
dst_y += dst_stride_y * 2;
- dst_uv += dst_stride_uv;
+ dst_vu += dst_stride_vu;
}
if (height & 1) {
ARGBToUVRow(src_argb, 0, row_u, row_v, width);
- MergeUVRow_(row_v, row_u, dst_uv, halfwidth);
+ MergeUVRow_(row_v, row_u, dst_vu, halfwidth);
ARGBToYRow(src_argb, dst_y, width);
}
free_aligned_buffer_64(row_u);
@@ -489,19 +492,20 @@ int ARGBToNV21(const uint8* src_argb,
// Convert ARGB to YUY2.
LIBYUV_API
-int ARGBToYUY2(const uint8* src_argb,
+int ARGBToYUY2(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_yuy2,
+ uint8_t* dst_yuy2,
int dst_stride_yuy2,
int width,
int height) {
int y;
- void (*ARGBToUVRow)(const uint8* src_argb, int src_stride_argb, uint8* dst_u,
- uint8* dst_v, int width) = ARGBToUVRow_C;
- void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) =
+ void (*ARGBToUVRow)(const uint8_t* src_argb, int src_stride_argb,
+ uint8_t* dst_u, uint8_t* dst_v, int width) =
+ ARGBToUVRow_C;
+ void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
ARGBToYRow_C;
- void (*I422ToYUY2Row)(const uint8* src_y, const uint8* src_u,
- const uint8* src_v, uint8* dst_yuy2, int width) =
+ void (*I422ToYUY2Row)(const uint8_t* src_y, const uint8_t* src_u,
+ const uint8_t* src_v, uint8_t* dst_yuy2, int width) =
I422ToYUY2Row_C;
if (!src_argb || !dst_yuy2 || width <= 0 || height == 0) {
@@ -579,6 +583,14 @@ int ARGBToYUY2(const uint8* src_argb,
}
}
#endif
+#if defined(HAS_I422TOYUY2ROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ I422ToYUY2Row = I422ToYUY2Row_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ I422ToYUY2Row = I422ToYUY2Row_AVX2;
+ }
+ }
+#endif
#if defined(HAS_I422TOYUY2ROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I422ToYUY2Row = I422ToYUY2Row_Any_NEON;
@@ -599,8 +611,8 @@ int ARGBToYUY2(const uint8* src_argb,
{
// Allocate a rows of yuv.
align_buffer_64(row_y, ((width + 63) & ~63) * 2);
- uint8* row_u = row_y + ((width + 63) & ~63);
- uint8* row_v = row_u + ((width + 63) & ~63) / 2;
+ uint8_t* row_u = row_y + ((width + 63) & ~63);
+ uint8_t* row_v = row_u + ((width + 63) & ~63) / 2;
for (y = 0; y < height; ++y) {
ARGBToUVRow(src_argb, 0, row_u, row_v, width);
@@ -617,19 +629,20 @@ int ARGBToYUY2(const uint8* src_argb,
// Convert ARGB to UYVY.
LIBYUV_API
-int ARGBToUYVY(const uint8* src_argb,
+int ARGBToUYVY(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_uyvy,
+ uint8_t* dst_uyvy,
int dst_stride_uyvy,
int width,
int height) {
int y;
- void (*ARGBToUVRow)(const uint8* src_argb, int src_stride_argb, uint8* dst_u,
- uint8* dst_v, int width) = ARGBToUVRow_C;
- void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) =
+ void (*ARGBToUVRow)(const uint8_t* src_argb, int src_stride_argb,
+ uint8_t* dst_u, uint8_t* dst_v, int width) =
+ ARGBToUVRow_C;
+ void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
ARGBToYRow_C;
- void (*I422ToUYVYRow)(const uint8* src_y, const uint8* src_u,
- const uint8* src_v, uint8* dst_uyvy, int width) =
+ void (*I422ToUYVYRow)(const uint8_t* src_y, const uint8_t* src_u,
+ const uint8_t* src_v, uint8_t* dst_uyvy, int width) =
I422ToUYVYRow_C;
if (!src_argb || !dst_uyvy || width <= 0 || height == 0) {
@@ -707,6 +720,14 @@ int ARGBToUYVY(const uint8* src_argb,
}
}
#endif
+#if defined(HAS_I422TOUYVYROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ I422ToUYVYRow = I422ToUYVYRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ I422ToUYVYRow = I422ToUYVYRow_AVX2;
+ }
+ }
+#endif
#if defined(HAS_I422TOUYVYROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I422ToUYVYRow = I422ToUYVYRow_Any_NEON;
@@ -727,8 +748,8 @@ int ARGBToUYVY(const uint8* src_argb,
{
// Allocate a rows of yuv.
align_buffer_64(row_y, ((width + 63) & ~63) * 2);
- uint8* row_u = row_y + ((width + 63) & ~63);
- uint8* row_v = row_u + ((width + 63) & ~63) / 2;
+ uint8_t* row_u = row_y + ((width + 63) & ~63);
+ uint8_t* row_v = row_u + ((width + 63) & ~63) / 2;
for (y = 0; y < height; ++y) {
ARGBToUVRow(src_argb, 0, row_u, row_v, width);
@@ -745,14 +766,14 @@ int ARGBToUYVY(const uint8* src_argb,
// Convert ARGB to I400.
LIBYUV_API
-int ARGBToI400(const uint8* src_argb,
+int ARGBToI400(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
int width,
int height) {
int y;
- void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) =
+ void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
ARGBToYRow_C;
if (!src_argb || !dst_y || width <= 0 || height == 0) {
return -1;
@@ -815,26 +836,26 @@ static const uvec8 kShuffleMaskARGBToRGBA = {
// Convert ARGB to RGBA.
LIBYUV_API
-int ARGBToRGBA(const uint8* src_argb,
+int ARGBToRGBA(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_rgba,
+ uint8_t* dst_rgba,
int dst_stride_rgba,
int width,
int height) {
return ARGBShuffle(src_argb, src_stride_argb, dst_rgba, dst_stride_rgba,
- (const uint8*)(&kShuffleMaskARGBToRGBA), width, height);
+ (const uint8_t*)(&kShuffleMaskARGBToRGBA), width, height);
}
// Convert ARGB To RGB24.
LIBYUV_API
-int ARGBToRGB24(const uint8* src_argb,
+int ARGBToRGB24(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_rgb24,
+ uint8_t* dst_rgb24,
int dst_stride_rgb24,
int width,
int height) {
int y;
- void (*ARGBToRGB24Row)(const uint8* src_argb, uint8* dst_rgb, int width) =
+ void (*ARGBToRGB24Row)(const uint8_t* src_argb, uint8_t* dst_rgb, int width) =
ARGBToRGB24Row_C;
if (!src_argb || !dst_rgb24 || width <= 0 || height == 0) {
return -1;
@@ -885,14 +906,14 @@ int ARGBToRGB24(const uint8* src_argb,
// Convert ARGB To RAW.
LIBYUV_API
-int ARGBToRAW(const uint8* src_argb,
+int ARGBToRAW(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_raw,
+ uint8_t* dst_raw,
int dst_stride_raw,
int width,
int height) {
int y;
- void (*ARGBToRAWRow)(const uint8* src_argb, uint8* dst_rgb, int width) =
+ void (*ARGBToRAWRow)(const uint8_t* src_argb, uint8_t* dst_rgb, int width) =
ARGBToRAWRow_C;
if (!src_argb || !dst_raw || width <= 0 || height == 0) {
return -1;
@@ -942,22 +963,22 @@ int ARGBToRAW(const uint8* src_argb,
}
// Ordered 8x8 dither for 888 to 565. Values from 0 to 7.
-static const uint8 kDither565_4x4[16] = {
+static const uint8_t kDither565_4x4[16] = {
0, 4, 1, 5, 6, 2, 7, 3, 1, 5, 0, 4, 7, 3, 6, 2,
};
// Convert ARGB To RGB565 with 4x4 dither matrix (16 bytes).
LIBYUV_API
-int ARGBToRGB565Dither(const uint8* src_argb,
+int ARGBToRGB565Dither(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_rgb565,
+ uint8_t* dst_rgb565,
int dst_stride_rgb565,
- const uint8* dither4x4,
+ const uint8_t* dither4x4,
int width,
int height) {
int y;
- void (*ARGBToRGB565DitherRow)(const uint8* src_argb, uint8* dst_rgb,
- const uint32 dither4, int width) =
+ void (*ARGBToRGB565DitherRow)(const uint8_t* src_argb, uint8_t* dst_rgb,
+ const uint32_t dither4, int width) =
ARGBToRGB565DitherRow_C;
if (!src_argb || !dst_rgb565 || width <= 0 || height == 0) {
return -1;
@@ -1005,7 +1026,7 @@ int ARGBToRGB565Dither(const uint8* src_argb,
for (y = 0; y < height; ++y) {
ARGBToRGB565DitherRow(src_argb, dst_rgb565,
- *(uint32*)(dither4x4 + ((y & 3) << 2)),
+ *(uint32_t*)(dither4x4 + ((y & 3) << 2)), // NOLINT
width); /* NOLINT */
src_argb += src_stride_argb;
dst_rgb565 += dst_stride_rgb565;
@@ -1016,15 +1037,15 @@ int ARGBToRGB565Dither(const uint8* src_argb,
// Convert ARGB To RGB565.
// TODO(fbarchard): Consider using dither function low level with zeros.
LIBYUV_API
-int ARGBToRGB565(const uint8* src_argb,
+int ARGBToRGB565(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_rgb565,
+ uint8_t* dst_rgb565,
int dst_stride_rgb565,
int width,
int height) {
int y;
- void (*ARGBToRGB565Row)(const uint8* src_argb, uint8* dst_rgb, int width) =
- ARGBToRGB565Row_C;
+ void (*ARGBToRGB565Row)(const uint8_t* src_argb, uint8_t* dst_rgb,
+ int width) = ARGBToRGB565Row_C;
if (!src_argb || !dst_rgb565 || width <= 0 || height == 0) {
return -1;
}
@@ -1082,15 +1103,15 @@ int ARGBToRGB565(const uint8* src_argb,
// Convert ARGB To ARGB1555.
LIBYUV_API
-int ARGBToARGB1555(const uint8* src_argb,
+int ARGBToARGB1555(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_argb1555,
+ uint8_t* dst_argb1555,
int dst_stride_argb1555,
int width,
int height) {
int y;
- void (*ARGBToARGB1555Row)(const uint8* src_argb, uint8* dst_rgb, int width) =
- ARGBToARGB1555Row_C;
+ void (*ARGBToARGB1555Row)(const uint8_t* src_argb, uint8_t* dst_rgb,
+ int width) = ARGBToARGB1555Row_C;
if (!src_argb || !dst_argb1555 || width <= 0 || height == 0) {
return -1;
}
@@ -1148,15 +1169,15 @@ int ARGBToARGB1555(const uint8* src_argb,
// Convert ARGB To ARGB4444.
LIBYUV_API
-int ARGBToARGB4444(const uint8* src_argb,
+int ARGBToARGB4444(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_argb4444,
+ uint8_t* dst_argb4444,
int dst_stride_argb4444,
int width,
int height) {
int y;
- void (*ARGBToARGB4444Row)(const uint8* src_argb, uint8* dst_rgb, int width) =
- ARGBToARGB4444Row_C;
+ void (*ARGBToARGB4444Row)(const uint8_t* src_argb, uint8_t* dst_rgb,
+ int width) = ARGBToARGB4444Row_C;
if (!src_argb || !dst_argb4444 || width <= 0 || height == 0) {
return -1;
}
@@ -1212,16 +1233,65 @@ int ARGBToARGB4444(const uint8* src_argb,
return 0;
}
+// Convert ABGR To AR30.
+LIBYUV_API
+int ABGRToAR30(const uint8_t* src_abgr,
+ int src_stride_abgr,
+ uint8_t* dst_ar30,
+ int dst_stride_ar30,
+ int width,
+ int height) {
+ int y;
+ void (*ABGRToAR30Row)(const uint8_t* src_abgr, uint8_t* dst_rgb, int width) =
+ ABGRToAR30Row_C;
+ if (!src_abgr || !dst_ar30 || width <= 0 || height == 0) {
+ return -1;
+ }
+ if (height < 0) {
+ height = -height;
+ src_abgr = src_abgr + (height - 1) * src_stride_abgr;
+ src_stride_abgr = -src_stride_abgr;
+ }
+ // Coalesce rows.
+ if (src_stride_abgr == width * 4 && dst_stride_ar30 == width * 4) {
+ width *= height;
+ height = 1;
+ src_stride_abgr = dst_stride_ar30 = 0;
+ }
+#if defined(HAS_ABGRTOAR30ROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ ABGRToAR30Row = ABGRToAR30Row_Any_SSSE3;
+ if (IS_ALIGNED(width, 4)) {
+ ABGRToAR30Row = ABGRToAR30Row_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_ABGRTOAR30ROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ABGRToAR30Row = ABGRToAR30Row_Any_AVX2;
+ if (IS_ALIGNED(width, 8)) {
+ ABGRToAR30Row = ABGRToAR30Row_AVX2;
+ }
+ }
+#endif
+ for (y = 0; y < height; ++y) {
+ ABGRToAR30Row(src_abgr, dst_ar30, width);
+ src_abgr += src_stride_abgr;
+ dst_ar30 += dst_stride_ar30;
+ }
+ return 0;
+}
+
// Convert ARGB To AR30.
LIBYUV_API
-int ARGBToAR30(const uint8* src_argb,
+int ARGBToAR30(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_ar30,
+ uint8_t* dst_ar30,
int dst_stride_ar30,
int width,
int height) {
int y;
- void (*ARGBToAR30Row)(const uint8* src_argb, uint8* dst_rgb, int width) =
+ void (*ARGBToAR30Row)(const uint8_t* src_argb, uint8_t* dst_rgb, int width) =
ARGBToAR30Row_C;
if (!src_argb || !dst_ar30 || width <= 0 || height == 0) {
return -1;
@@ -1263,20 +1333,21 @@ int ARGBToAR30(const uint8* src_argb,
// Convert ARGB to J420. (JPeg full range I420).
LIBYUV_API
-int ARGBToJ420(const uint8* src_argb,
+int ARGBToJ420(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_yj,
+ uint8_t* dst_yj,
int dst_stride_yj,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int width,
int height) {
int y;
- void (*ARGBToUVJRow)(const uint8* src_argb0, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int width) = ARGBToUVJRow_C;
- void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_yj, int width) =
+ void (*ARGBToUVJRow)(const uint8_t* src_argb0, int src_stride_argb,
+ uint8_t* dst_u, uint8_t* dst_v, int width) =
+ ARGBToUVJRow_C;
+ void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_yj, int width) =
ARGBToYJRow_C;
if (!src_argb || !dst_yj || !dst_u || !dst_v || width <= 0 || height == 0) {
return -1;
@@ -1356,20 +1427,21 @@ int ARGBToJ420(const uint8* src_argb,
// Convert ARGB to J422. (JPeg full range I422).
LIBYUV_API
-int ARGBToJ422(const uint8* src_argb,
+int ARGBToJ422(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_yj,
+ uint8_t* dst_yj,
int dst_stride_yj,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int width,
int height) {
int y;
- void (*ARGBToUVJRow)(const uint8* src_argb0, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int width) = ARGBToUVJRow_C;
- void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_yj, int width) =
+ void (*ARGBToUVJRow)(const uint8_t* src_argb0, int src_stride_argb,
+ uint8_t* dst_u, uint8_t* dst_v, int width) =
+ ARGBToUVJRow_C;
+ void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_yj, int width) =
ARGBToYJRow_C;
if (!src_argb || !dst_yj || !dst_u || !dst_v || width <= 0 || height == 0) {
return -1;
@@ -1451,14 +1523,14 @@ int ARGBToJ422(const uint8* src_argb,
// Convert ARGB to J400.
LIBYUV_API
-int ARGBToJ400(const uint8* src_argb,
+int ARGBToJ400(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_yj,
+ uint8_t* dst_yj,
int dst_stride_yj,
int width,
int height) {
int y;
- void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_yj, int width) =
+ void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_yj, int width) =
ARGBToYJRow_C;
if (!src_argb || !dst_yj || width <= 0 || height == 0) {
return -1;
diff --git a/chromium/third_party/libyuv/source/convert_jpeg.cc b/chromium/third_party/libyuv/source/convert_jpeg.cc
index 216a9f26d87..c91b43dc226 100644
--- a/chromium/third_party/libyuv/source/convert_jpeg.cc
+++ b/chromium/third_party/libyuv/source/convert_jpeg.cc
@@ -22,18 +22,18 @@ extern "C" {
#ifdef HAVE_JPEG
struct I420Buffers {
- uint8* y;
+ uint8_t* y;
int y_stride;
- uint8* u;
+ uint8_t* u;
int u_stride;
- uint8* v;
+ uint8_t* v;
int v_stride;
int w;
int h;
};
static void JpegCopyI420(void* opaque,
- const uint8* const* data,
+ const uint8_t* const* data,
const int* strides,
int rows) {
I420Buffers* dest = (I420Buffers*)(opaque);
@@ -47,7 +47,7 @@ static void JpegCopyI420(void* opaque,
}
static void JpegI422ToI420(void* opaque,
- const uint8* const* data,
+ const uint8_t* const* data,
const int* strides,
int rows) {
I420Buffers* dest = (I420Buffers*)(opaque);
@@ -61,7 +61,7 @@ static void JpegI422ToI420(void* opaque,
}
static void JpegI444ToI420(void* opaque,
- const uint8* const* data,
+ const uint8_t* const* data,
const int* strides,
int rows) {
I420Buffers* dest = (I420Buffers*)(opaque);
@@ -75,7 +75,7 @@ static void JpegI444ToI420(void* opaque,
}
static void JpegI400ToI420(void* opaque,
- const uint8* const* data,
+ const uint8_t* const* data,
const int* strides,
int rows) {
I420Buffers* dest = (I420Buffers*)(opaque);
@@ -89,7 +89,10 @@ static void JpegI400ToI420(void* opaque,
// Query size of MJPG in pixels.
LIBYUV_API
-int MJPGSize(const uint8* sample, size_t sample_size, int* width, int* height) {
+int MJPGSize(const uint8_t* sample,
+ size_t sample_size,
+ int* width,
+ int* height) {
MJpegDecoder mjpeg_decoder;
LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(sample, sample_size);
if (ret) {
@@ -103,13 +106,13 @@ int MJPGSize(const uint8* sample, size_t sample_size, int* width, int* height) {
// MJPG (Motion JPeg) to I420
// TODO(fbarchard): review w and h requirement. dw and dh may be enough.
LIBYUV_API
-int MJPGToI420(const uint8* sample,
+int MJPGToI420(const uint8_t* sample,
size_t sample_size,
- uint8* y,
+ uint8_t* y,
int y_stride,
- uint8* u,
+ uint8_t* u,
int u_stride,
- uint8* v,
+ uint8_t* v,
int v_stride,
int w,
int h,
@@ -183,14 +186,14 @@ int MJPGToI420(const uint8* sample,
#ifdef HAVE_JPEG
struct ARGBBuffers {
- uint8* argb;
+ uint8_t* argb;
int argb_stride;
int w;
int h;
};
static void JpegI420ToARGB(void* opaque,
- const uint8* const* data,
+ const uint8_t* const* data,
const int* strides,
int rows) {
ARGBBuffers* dest = (ARGBBuffers*)(opaque);
@@ -201,7 +204,7 @@ static void JpegI420ToARGB(void* opaque,
}
static void JpegI422ToARGB(void* opaque,
- const uint8* const* data,
+ const uint8_t* const* data,
const int* strides,
int rows) {
ARGBBuffers* dest = (ARGBBuffers*)(opaque);
@@ -212,7 +215,7 @@ static void JpegI422ToARGB(void* opaque,
}
static void JpegI444ToARGB(void* opaque,
- const uint8* const* data,
+ const uint8_t* const* data,
const int* strides,
int rows) {
ARGBBuffers* dest = (ARGBBuffers*)(opaque);
@@ -223,7 +226,7 @@ static void JpegI444ToARGB(void* opaque,
}
static void JpegI400ToARGB(void* opaque,
- const uint8* const* data,
+ const uint8_t* const* data,
const int* strides,
int rows) {
ARGBBuffers* dest = (ARGBBuffers*)(opaque);
@@ -235,9 +238,9 @@ static void JpegI400ToARGB(void* opaque,
// MJPG (Motion JPeg) to ARGB
// TODO(fbarchard): review w and h requirement. dw and dh may be enough.
LIBYUV_API
-int MJPGToARGB(const uint8* sample,
+int MJPGToARGB(const uint8_t* sample,
size_t sample_size,
- uint8* argb,
+ uint8_t* argb,
int argb_stride,
int w,
int h,
diff --git a/chromium/third_party/libyuv/source/convert_to_argb.cc b/chromium/third_party/libyuv/source/convert_to_argb.cc
index 63a5104b3c7..677e5d56fcc 100644
--- a/chromium/third_party/libyuv/source/convert_to_argb.cc
+++ b/chromium/third_party/libyuv/source/convert_to_argb.cc
@@ -29,10 +29,10 @@ extern "C" {
// sample_size is measured in bytes and is the size of the frame.
// With MJPEG it is the compressed size of the frame.
LIBYUV_API
-int ConvertToARGB(const uint8* sample,
+int ConvertToARGB(const uint8_t* sample,
size_t sample_size,
- uint8* crop_argb,
- int argb_stride,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
int crop_x,
int crop_y,
int src_width,
@@ -40,11 +40,11 @@ int ConvertToARGB(const uint8* sample,
int crop_width,
int crop_height,
enum RotationMode rotation,
- uint32 fourcc) {
- uint32 format = CanonicalFourCC(fourcc);
+ uint32_t fourcc) {
+ uint32_t format = CanonicalFourCC(fourcc);
int aligned_src_width = (src_width + 1) & ~1;
- const uint8* src;
- const uint8* src_uv;
+ const uint8_t* src;
+ const uint8_t* src_uv;
int abs_src_height = (src_height < 0) ? -src_height : src_height;
int inv_crop_height = (crop_height < 0) ? -crop_height : crop_height;
int r = 0;
@@ -52,17 +52,17 @@ int ConvertToARGB(const uint8* sample,
// One pass rotation is available for some formats. For the rest, convert
// to ARGB (with optional vertical flipping) into a temporary ARGB buffer,
// and then rotate the ARGB to the final destination buffer.
- // For in-place conversion, if destination crop_argb is same as source sample,
+ // For in-place conversion, if destination dst_argb is same as source sample,
// also enable temporary buffer.
LIBYUV_BOOL need_buf =
- (rotation && format != FOURCC_ARGB) || crop_argb == sample;
- uint8* dest_argb = crop_argb;
- int dest_argb_stride = argb_stride;
- uint8* rotate_buffer = NULL;
+ (rotation && format != FOURCC_ARGB) || dst_argb == sample;
+ uint8_t* dest_argb = dst_argb;
+ int dest_dst_stride_argb = dst_stride_argb;
+ uint8_t* rotate_buffer = NULL;
int abs_crop_height = (crop_height < 0) ? -crop_height : crop_height;
- if (crop_argb == NULL || sample == NULL || src_width <= 0 ||
- crop_width <= 0 || src_height == 0 || crop_height == 0) {
+ if (dst_argb == NULL || sample == NULL || src_width <= 0 || crop_width <= 0 ||
+ src_height == 0 || crop_height == 0) {
return -1;
}
if (src_height < 0) {
@@ -71,76 +71,76 @@ int ConvertToARGB(const uint8* sample,
if (need_buf) {
int argb_size = crop_width * 4 * abs_crop_height;
- rotate_buffer = (uint8*)malloc(argb_size); /* NOLINT */
+ rotate_buffer = (uint8_t*)malloc(argb_size); /* NOLINT */
if (!rotate_buffer) {
return 1; // Out of memory runtime error.
}
- crop_argb = rotate_buffer;
- argb_stride = crop_width * 4;
+ dst_argb = rotate_buffer;
+ dst_stride_argb = crop_width * 4;
}
switch (format) {
// Single plane formats
case FOURCC_YUY2:
src = sample + (aligned_src_width * crop_y + crop_x) * 2;
- r = YUY2ToARGB(src, aligned_src_width * 2, crop_argb, argb_stride,
+ r = YUY2ToARGB(src, aligned_src_width * 2, dst_argb, dst_stride_argb,
crop_width, inv_crop_height);
break;
case FOURCC_UYVY:
src = sample + (aligned_src_width * crop_y + crop_x) * 2;
- r = UYVYToARGB(src, aligned_src_width * 2, crop_argb, argb_stride,
+ r = UYVYToARGB(src, aligned_src_width * 2, dst_argb, dst_stride_argb,
crop_width, inv_crop_height);
break;
case FOURCC_24BG:
src = sample + (src_width * crop_y + crop_x) * 3;
- r = RGB24ToARGB(src, src_width * 3, crop_argb, argb_stride, crop_width,
+ r = RGB24ToARGB(src, src_width * 3, dst_argb, dst_stride_argb, crop_width,
inv_crop_height);
break;
case FOURCC_RAW:
src = sample + (src_width * crop_y + crop_x) * 3;
- r = RAWToARGB(src, src_width * 3, crop_argb, argb_stride, crop_width,
+ r = RAWToARGB(src, src_width * 3, dst_argb, dst_stride_argb, crop_width,
inv_crop_height);
break;
case FOURCC_ARGB:
if (!need_buf && !rotation) {
src = sample + (src_width * crop_y + crop_x) * 4;
- r = ARGBToARGB(src, src_width * 4, crop_argb, argb_stride, crop_width,
- inv_crop_height);
+ r = ARGBToARGB(src, src_width * 4, dst_argb, dst_stride_argb,
+ crop_width, inv_crop_height);
}
break;
case FOURCC_BGRA:
src = sample + (src_width * crop_y + crop_x) * 4;
- r = BGRAToARGB(src, src_width * 4, crop_argb, argb_stride, crop_width,
+ r = BGRAToARGB(src, src_width * 4, dst_argb, dst_stride_argb, crop_width,
inv_crop_height);
break;
case FOURCC_ABGR:
src = sample + (src_width * crop_y + crop_x) * 4;
- r = ABGRToARGB(src, src_width * 4, crop_argb, argb_stride, crop_width,
+ r = ABGRToARGB(src, src_width * 4, dst_argb, dst_stride_argb, crop_width,
inv_crop_height);
break;
case FOURCC_RGBA:
src = sample + (src_width * crop_y + crop_x) * 4;
- r = RGBAToARGB(src, src_width * 4, crop_argb, argb_stride, crop_width,
+ r = RGBAToARGB(src, src_width * 4, dst_argb, dst_stride_argb, crop_width,
inv_crop_height);
break;
case FOURCC_RGBP:
src = sample + (src_width * crop_y + crop_x) * 2;
- r = RGB565ToARGB(src, src_width * 2, crop_argb, argb_stride, crop_width,
- inv_crop_height);
+ r = RGB565ToARGB(src, src_width * 2, dst_argb, dst_stride_argb,
+ crop_width, inv_crop_height);
break;
case FOURCC_RGBO:
src = sample + (src_width * crop_y + crop_x) * 2;
- r = ARGB1555ToARGB(src, src_width * 2, crop_argb, argb_stride, crop_width,
- inv_crop_height);
+ r = ARGB1555ToARGB(src, src_width * 2, dst_argb, dst_stride_argb,
+ crop_width, inv_crop_height);
break;
case FOURCC_R444:
src = sample + (src_width * crop_y + crop_x) * 2;
- r = ARGB4444ToARGB(src, src_width * 2, crop_argb, argb_stride, crop_width,
- inv_crop_height);
+ r = ARGB4444ToARGB(src, src_width * 2, dst_argb, dst_stride_argb,
+ crop_width, inv_crop_height);
break;
case FOURCC_I400:
src = sample + src_width * crop_y + crop_x;
- r = I400ToARGB(src, src_width, crop_argb, argb_stride, crop_width,
+ r = I400ToARGB(src, src_width, dst_argb, dst_stride_argb, crop_width,
inv_crop_height);
break;
@@ -148,27 +148,27 @@ int ConvertToARGB(const uint8* sample,
case FOURCC_NV12:
src = sample + (src_width * crop_y + crop_x);
src_uv = sample + aligned_src_width * (src_height + crop_y / 2) + crop_x;
- r = NV12ToARGB(src, src_width, src_uv, aligned_src_width, crop_argb,
- argb_stride, crop_width, inv_crop_height);
+ r = NV12ToARGB(src, src_width, src_uv, aligned_src_width, dst_argb,
+ dst_stride_argb, crop_width, inv_crop_height);
break;
case FOURCC_NV21:
src = sample + (src_width * crop_y + crop_x);
src_uv = sample + aligned_src_width * (src_height + crop_y / 2) + crop_x;
// Call NV12 but with u and v parameters swapped.
- r = NV21ToARGB(src, src_width, src_uv, aligned_src_width, crop_argb,
- argb_stride, crop_width, inv_crop_height);
+ r = NV21ToARGB(src, src_width, src_uv, aligned_src_width, dst_argb,
+ dst_stride_argb, crop_width, inv_crop_height);
break;
case FOURCC_M420:
src = sample + (src_width * crop_y) * 12 / 8 + crop_x;
- r = M420ToARGB(src, src_width, crop_argb, argb_stride, crop_width,
+ r = M420ToARGB(src, src_width, dst_argb, dst_stride_argb, crop_width,
inv_crop_height);
break;
// Triplanar formats
case FOURCC_I420:
case FOURCC_YV12: {
- const uint8* src_y = sample + (src_width * crop_y + crop_x);
- const uint8* src_u;
- const uint8* src_v;
+ const uint8_t* src_y = sample + (src_width * crop_y + crop_x);
+ const uint8_t* src_u;
+ const uint8_t* src_v;
int halfwidth = (src_width + 1) / 2;
int halfheight = (abs_src_height + 1) / 2;
if (format == FOURCC_YV12) {
@@ -183,14 +183,14 @@ int ConvertToARGB(const uint8* sample,
halfwidth * (halfheight + crop_y / 2) + crop_x / 2;
}
r = I420ToARGB(src_y, src_width, src_u, halfwidth, src_v, halfwidth,
- crop_argb, argb_stride, crop_width, inv_crop_height);
+ dst_argb, dst_stride_argb, crop_width, inv_crop_height);
break;
}
case FOURCC_J420: {
- const uint8* src_y = sample + (src_width * crop_y + crop_x);
- const uint8* src_u;
- const uint8* src_v;
+ const uint8_t* src_y = sample + (src_width * crop_y + crop_x);
+ const uint8_t* src_u;
+ const uint8_t* src_v;
int halfwidth = (src_width + 1) / 2;
int halfheight = (abs_src_height + 1) / 2;
src_u = sample + src_width * abs_src_height +
@@ -198,15 +198,15 @@ int ConvertToARGB(const uint8* sample,
src_v = sample + src_width * abs_src_height +
halfwidth * (halfheight + crop_y / 2) + crop_x / 2;
r = J420ToARGB(src_y, src_width, src_u, halfwidth, src_v, halfwidth,
- crop_argb, argb_stride, crop_width, inv_crop_height);
+ dst_argb, dst_stride_argb, crop_width, inv_crop_height);
break;
}
case FOURCC_I422:
case FOURCC_YV16: {
- const uint8* src_y = sample + src_width * crop_y + crop_x;
- const uint8* src_u;
- const uint8* src_v;
+ const uint8_t* src_y = sample + src_width * crop_y + crop_x;
+ const uint8_t* src_u;
+ const uint8_t* src_v;
int halfwidth = (src_width + 1) / 2;
if (format == FOURCC_YV16) {
src_v = sample + src_width * abs_src_height + halfwidth * crop_y +
@@ -220,14 +220,14 @@ int ConvertToARGB(const uint8* sample,
halfwidth * (abs_src_height + crop_y) + crop_x / 2;
}
r = I422ToARGB(src_y, src_width, src_u, halfwidth, src_v, halfwidth,
- crop_argb, argb_stride, crop_width, inv_crop_height);
+ dst_argb, dst_stride_argb, crop_width, inv_crop_height);
break;
}
case FOURCC_I444:
case FOURCC_YV24: {
- const uint8* src_y = sample + src_width * crop_y + crop_x;
- const uint8* src_u;
- const uint8* src_v;
+ const uint8_t* src_y = sample + src_width * crop_y + crop_x;
+ const uint8_t* src_u;
+ const uint8_t* src_v;
if (format == FOURCC_YV24) {
src_v = sample + src_width * (abs_src_height + crop_y) + crop_x;
src_u = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x;
@@ -236,12 +236,12 @@ int ConvertToARGB(const uint8* sample,
src_v = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x;
}
r = I444ToARGB(src_y, src_width, src_u, src_width, src_v, src_width,
- crop_argb, argb_stride, crop_width, inv_crop_height);
+ dst_argb, dst_stride_argb, crop_width, inv_crop_height);
break;
}
#ifdef HAVE_JPEG
case FOURCC_MJPG:
- r = MJPGToARGB(sample, sample_size, crop_argb, argb_stride, src_width,
+ r = MJPGToARGB(sample, sample_size, dst_argb, dst_stride_argb, src_width,
abs_src_height, crop_width, inv_crop_height);
break;
#endif
@@ -251,13 +251,13 @@ int ConvertToARGB(const uint8* sample,
if (need_buf) {
if (!r) {
- r = ARGBRotate(crop_argb, argb_stride, dest_argb, dest_argb_stride,
+ r = ARGBRotate(dst_argb, dst_stride_argb, dest_argb, dest_dst_stride_argb,
crop_width, abs_crop_height, rotation);
}
free(rotate_buffer);
} else if (rotation) {
src = sample + (src_width * crop_y + crop_x) * 4;
- r = ARGBRotate(src, src_width * 4, crop_argb, argb_stride, crop_width,
+ r = ARGBRotate(src, src_width * 4, dst_argb, dst_stride_argb, crop_width,
inv_crop_height, rotation);
}
diff --git a/chromium/third_party/libyuv/source/convert_to_i420.cc b/chromium/third_party/libyuv/source/convert_to_i420.cc
index a50689db949..1bed9d6440d 100644
--- a/chromium/third_party/libyuv/source/convert_to_i420.cc
+++ b/chromium/third_party/libyuv/source/convert_to_i420.cc
@@ -25,14 +25,14 @@ extern "C" {
// sample_size is measured in bytes and is the size of the frame.
// With MJPEG it is the compressed size of the frame.
LIBYUV_API
-int ConvertToI420(const uint8* sample,
+int ConvertToI420(const uint8_t* sample,
size_t sample_size,
- uint8* y,
- int y_stride,
- uint8* u,
- int u_stride,
- uint8* v,
- int v_stride,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
int crop_x,
int crop_y,
int src_width,
@@ -40,11 +40,11 @@ int ConvertToI420(const uint8* sample,
int crop_width,
int crop_height,
enum RotationMode rotation,
- uint32 fourcc) {
- uint32 format = CanonicalFourCC(fourcc);
+ uint32_t fourcc) {
+ uint32_t format = CanonicalFourCC(fourcc);
int aligned_src_width = (src_width + 1) & ~1;
- const uint8* src;
- const uint8* src_uv;
+ const uint8_t* src;
+ const uint8_t* src_uv;
const int abs_src_height = (src_height < 0) ? -src_height : src_height;
// TODO(nisse): Why allow crop_height < 0?
const int abs_crop_height = (crop_height < 0) ? -crop_height : crop_height;
@@ -52,132 +52,143 @@ int ConvertToI420(const uint8* sample,
LIBYUV_BOOL need_buf =
(rotation && format != FOURCC_I420 && format != FOURCC_NV12 &&
format != FOURCC_NV21 && format != FOURCC_YV12) ||
- y == sample;
- uint8* tmp_y = y;
- uint8* tmp_u = u;
- uint8* tmp_v = v;
- int tmp_y_stride = y_stride;
- int tmp_u_stride = u_stride;
- int tmp_v_stride = v_stride;
- uint8* rotate_buffer = NULL;
+ dst_y == sample;
+ uint8_t* tmp_y = dst_y;
+ uint8_t* tmp_u = dst_u;
+ uint8_t* tmp_v = dst_v;
+ int tmp_y_stride = dst_stride_y;
+ int tmp_u_stride = dst_stride_u;
+ int tmp_v_stride = dst_stride_v;
+ uint8_t* rotate_buffer = NULL;
const int inv_crop_height =
(src_height < 0) ? -abs_crop_height : abs_crop_height;
- if (!y || !u || !v || !sample || src_width <= 0 || crop_width <= 0 ||
- src_height == 0 || crop_height == 0) {
+ if (!dst_y || !dst_u || !dst_v || !sample || src_width <= 0 ||
+ crop_width <= 0 || src_height == 0 || crop_height == 0) {
return -1;
}
// One pass rotation is available for some formats. For the rest, convert
// to I420 (with optional vertical flipping) into a temporary I420 buffer,
// and then rotate the I420 to the final destination buffer.
- // For in-place conversion, if destination y is same as source sample,
+ // For in-place conversion, if destination dst_y is same as source sample,
// also enable temporary buffer.
if (need_buf) {
int y_size = crop_width * abs_crop_height;
int uv_size = ((crop_width + 1) / 2) * ((abs_crop_height + 1) / 2);
- rotate_buffer = (uint8*)malloc(y_size + uv_size * 2); /* NOLINT */
+ rotate_buffer = (uint8_t*)malloc(y_size + uv_size * 2); /* NOLINT */
if (!rotate_buffer) {
return 1; // Out of memory runtime error.
}
- y = rotate_buffer;
- u = y + y_size;
- v = u + uv_size;
- y_stride = crop_width;
- u_stride = v_stride = ((crop_width + 1) / 2);
+ dst_y = rotate_buffer;
+ dst_u = dst_y + y_size;
+ dst_v = dst_u + uv_size;
+ dst_stride_y = crop_width;
+ dst_stride_u = dst_stride_v = ((crop_width + 1) / 2);
}
switch (format) {
// Single plane formats
case FOURCC_YUY2:
src = sample + (aligned_src_width * crop_y + crop_x) * 2;
- r = YUY2ToI420(src, aligned_src_width * 2, y, y_stride, u, u_stride, v,
- v_stride, crop_width, inv_crop_height);
+ r = YUY2ToI420(src, aligned_src_width * 2, dst_y, dst_stride_y, dst_u,
+ dst_stride_u, dst_v, dst_stride_v, crop_width,
+ inv_crop_height);
break;
case FOURCC_UYVY:
src = sample + (aligned_src_width * crop_y + crop_x) * 2;
- r = UYVYToI420(src, aligned_src_width * 2, y, y_stride, u, u_stride, v,
- v_stride, crop_width, inv_crop_height);
+ r = UYVYToI420(src, aligned_src_width * 2, dst_y, dst_stride_y, dst_u,
+ dst_stride_u, dst_v, dst_stride_v, crop_width,
+ inv_crop_height);
break;
case FOURCC_RGBP:
src = sample + (src_width * crop_y + crop_x) * 2;
- r = RGB565ToI420(src, src_width * 2, y, y_stride, u, u_stride, v,
- v_stride, crop_width, inv_crop_height);
+ r = RGB565ToI420(src, src_width * 2, dst_y, dst_stride_y, dst_u,
+ dst_stride_u, dst_v, dst_stride_v, crop_width,
+ inv_crop_height);
break;
case FOURCC_RGBO:
src = sample + (src_width * crop_y + crop_x) * 2;
- r = ARGB1555ToI420(src, src_width * 2, y, y_stride, u, u_stride, v,
- v_stride, crop_width, inv_crop_height);
+ r = ARGB1555ToI420(src, src_width * 2, dst_y, dst_stride_y, dst_u,
+ dst_stride_u, dst_v, dst_stride_v, crop_width,
+ inv_crop_height);
break;
case FOURCC_R444:
src = sample + (src_width * crop_y + crop_x) * 2;
- r = ARGB4444ToI420(src, src_width * 2, y, y_stride, u, u_stride, v,
- v_stride, crop_width, inv_crop_height);
+ r = ARGB4444ToI420(src, src_width * 2, dst_y, dst_stride_y, dst_u,
+ dst_stride_u, dst_v, dst_stride_v, crop_width,
+ inv_crop_height);
break;
case FOURCC_24BG:
src = sample + (src_width * crop_y + crop_x) * 3;
- r = RGB24ToI420(src, src_width * 3, y, y_stride, u, u_stride, v, v_stride,
- crop_width, inv_crop_height);
+ r = RGB24ToI420(src, src_width * 3, dst_y, dst_stride_y, dst_u,
+ dst_stride_u, dst_v, dst_stride_v, crop_width,
+ inv_crop_height);
break;
case FOURCC_RAW:
src = sample + (src_width * crop_y + crop_x) * 3;
- r = RAWToI420(src, src_width * 3, y, y_stride, u, u_stride, v, v_stride,
- crop_width, inv_crop_height);
+ r = RAWToI420(src, src_width * 3, dst_y, dst_stride_y, dst_u,
+ dst_stride_u, dst_v, dst_stride_v, crop_width,
+ inv_crop_height);
break;
case FOURCC_ARGB:
src = sample + (src_width * crop_y + crop_x) * 4;
- r = ARGBToI420(src, src_width * 4, y, y_stride, u, u_stride, v, v_stride,
- crop_width, inv_crop_height);
+ r = ARGBToI420(src, src_width * 4, dst_y, dst_stride_y, dst_u,
+ dst_stride_u, dst_v, dst_stride_v, crop_width,
+ inv_crop_height);
break;
case FOURCC_BGRA:
src = sample + (src_width * crop_y + crop_x) * 4;
- r = BGRAToI420(src, src_width * 4, y, y_stride, u, u_stride, v, v_stride,
- crop_width, inv_crop_height);
+ r = BGRAToI420(src, src_width * 4, dst_y, dst_stride_y, dst_u,
+ dst_stride_u, dst_v, dst_stride_v, crop_width,
+ inv_crop_height);
break;
case FOURCC_ABGR:
src = sample + (src_width * crop_y + crop_x) * 4;
- r = ABGRToI420(src, src_width * 4, y, y_stride, u, u_stride, v, v_stride,
- crop_width, inv_crop_height);
+ r = ABGRToI420(src, src_width * 4, dst_y, dst_stride_y, dst_u,
+ dst_stride_u, dst_v, dst_stride_v, crop_width,
+ inv_crop_height);
break;
case FOURCC_RGBA:
src = sample + (src_width * crop_y + crop_x) * 4;
- r = RGBAToI420(src, src_width * 4, y, y_stride, u, u_stride, v, v_stride,
- crop_width, inv_crop_height);
+ r = RGBAToI420(src, src_width * 4, dst_y, dst_stride_y, dst_u,
+ dst_stride_u, dst_v, dst_stride_v, crop_width,
+ inv_crop_height);
break;
case FOURCC_I400:
src = sample + src_width * crop_y + crop_x;
- r = I400ToI420(src, src_width, y, y_stride, u, u_stride, v, v_stride,
- crop_width, inv_crop_height);
+ r = I400ToI420(src, src_width, dst_y, dst_stride_y, dst_u, dst_stride_u,
+ dst_v, dst_stride_v, crop_width, inv_crop_height);
break;
// Biplanar formats
case FOURCC_NV12:
src = sample + (src_width * crop_y + crop_x);
src_uv = sample + (src_width * src_height) +
((crop_y / 2) * aligned_src_width) + ((crop_x / 2) * 2);
- r = NV12ToI420Rotate(src, src_width, src_uv, aligned_src_width, y,
- y_stride, u, u_stride, v, v_stride, crop_width,
- inv_crop_height, rotation);
+ r = NV12ToI420Rotate(src, src_width, src_uv, aligned_src_width, dst_y,
+ dst_stride_y, dst_u, dst_stride_u, dst_v,
+ dst_stride_v, crop_width, inv_crop_height, rotation);
break;
case FOURCC_NV21:
src = sample + (src_width * crop_y + crop_x);
src_uv = sample + (src_width * src_height) +
((crop_y / 2) * aligned_src_width) + ((crop_x / 2) * 2);
- // Call NV12 but with u and v parameters swapped.
- r = NV12ToI420Rotate(src, src_width, src_uv, aligned_src_width, y,
- y_stride, v, v_stride, u, u_stride, crop_width,
- inv_crop_height, rotation);
+ // Call NV12 but with dst_u and dst_v parameters swapped.
+ r = NV12ToI420Rotate(src, src_width, src_uv, aligned_src_width, dst_y,
+ dst_stride_y, dst_v, dst_stride_v, dst_u,
+ dst_stride_u, crop_width, inv_crop_height, rotation);
break;
case FOURCC_M420:
src = sample + (src_width * crop_y) * 12 / 8 + crop_x;
- r = M420ToI420(src, src_width, y, y_stride, u, u_stride, v, v_stride,
- crop_width, inv_crop_height);
+ r = M420ToI420(src, src_width, dst_y, dst_stride_y, dst_u, dst_stride_u,
+ dst_v, dst_stride_v, crop_width, inv_crop_height);
break;
// Triplanar formats
case FOURCC_I420:
case FOURCC_YV12: {
- const uint8* src_y = sample + (src_width * crop_y + crop_x);
- const uint8* src_u;
- const uint8* src_v;
+ const uint8_t* src_y = sample + (src_width * crop_y + crop_x);
+ const uint8_t* src_u;
+ const uint8_t* src_v;
int halfwidth = (src_width + 1) / 2;
int halfheight = (abs_src_height + 1) / 2;
if (format == FOURCC_YV12) {
@@ -191,16 +202,16 @@ int ConvertToI420(const uint8* sample,
src_v = sample + src_width * abs_src_height +
halfwidth * (halfheight + crop_y / 2) + crop_x / 2;
}
- r = I420Rotate(src_y, src_width, src_u, halfwidth, src_v, halfwidth, y,
- y_stride, u, u_stride, v, v_stride, crop_width,
- inv_crop_height, rotation);
+ r = I420Rotate(src_y, src_width, src_u, halfwidth, src_v, halfwidth,
+ dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v,
+ dst_stride_v, crop_width, inv_crop_height, rotation);
break;
}
case FOURCC_I422:
case FOURCC_YV16: {
- const uint8* src_y = sample + src_width * crop_y + crop_x;
- const uint8* src_u;
- const uint8* src_v;
+ const uint8_t* src_y = sample + src_width * crop_y + crop_x;
+ const uint8_t* src_u;
+ const uint8_t* src_v;
int halfwidth = (src_width + 1) / 2;
if (format == FOURCC_YV16) {
src_v = sample + src_width * abs_src_height + halfwidth * crop_y +
@@ -213,16 +224,16 @@ int ConvertToI420(const uint8* sample,
src_v = sample + src_width * abs_src_height +
halfwidth * (abs_src_height + crop_y) + crop_x / 2;
}
- r = I422ToI420(src_y, src_width, src_u, halfwidth, src_v, halfwidth, y,
- y_stride, u, u_stride, v, v_stride, crop_width,
- inv_crop_height);
+ r = I422ToI420(src_y, src_width, src_u, halfwidth, src_v, halfwidth,
+ dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v,
+ dst_stride_v, crop_width, inv_crop_height);
break;
}
case FOURCC_I444:
case FOURCC_YV24: {
- const uint8* src_y = sample + src_width * crop_y + crop_x;
- const uint8* src_u;
- const uint8* src_v;
+ const uint8_t* src_y = sample + src_width * crop_y + crop_x;
+ const uint8_t* src_u;
+ const uint8_t* src_v;
if (format == FOURCC_YV24) {
src_v = sample + src_width * (abs_src_height + crop_y) + crop_x;
src_u = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x;
@@ -230,15 +241,16 @@ int ConvertToI420(const uint8* sample,
src_u = sample + src_width * (abs_src_height + crop_y) + crop_x;
src_v = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x;
}
- r = I444ToI420(src_y, src_width, src_u, src_width, src_v, src_width, y,
- y_stride, u, u_stride, v, v_stride, crop_width,
- inv_crop_height);
+ r = I444ToI420(src_y, src_width, src_u, src_width, src_v, src_width,
+ dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v,
+ dst_stride_v, crop_width, inv_crop_height);
break;
}
#ifdef HAVE_JPEG
case FOURCC_MJPG:
- r = MJPGToI420(sample, sample_size, y, y_stride, u, u_stride, v, v_stride,
- src_width, abs_src_height, crop_width, inv_crop_height);
+ r = MJPGToI420(sample, sample_size, dst_y, dst_stride_y, dst_u,
+ dst_stride_u, dst_v, dst_stride_v, src_width,
+ abs_src_height, crop_width, inv_crop_height);
break;
#endif
default:
@@ -247,9 +259,10 @@ int ConvertToI420(const uint8* sample,
if (need_buf) {
if (!r) {
- r = I420Rotate(y, y_stride, u, u_stride, v, v_stride, tmp_y, tmp_y_stride,
- tmp_u, tmp_u_stride, tmp_v, tmp_v_stride, crop_width,
- abs_crop_height, rotation);
+ r = I420Rotate(dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v,
+ dst_stride_v, tmp_y, tmp_y_stride, tmp_u, tmp_u_stride,
+ tmp_v, tmp_v_stride, crop_width, abs_crop_height,
+ rotation);
}
free(rotate_buffer);
}
diff --git a/chromium/third_party/libyuv/source/cpu_id.cc b/chromium/third_party/libyuv/source/cpu_id.cc
index d08fc365988..446aad12078 100644
--- a/chromium/third_party/libyuv/source/cpu_id.cc
+++ b/chromium/third_party/libyuv/source/cpu_id.cc
@@ -27,8 +27,6 @@
#include <stdio.h>
#include <string.h>
-#include "libyuv/basic_types.h" // For CPU_X86
-
#ifdef __cplusplus
namespace libyuv {
extern "C" {
@@ -218,7 +216,9 @@ static LIBYUV_BOOL TestEnv(const char*) {
static SAFEBUFFERS int GetCpuFlags(void) {
int cpu_info = 0;
-#if !defined(__pnacl__) && !defined(__CLR_VER) && defined(CPU_X86)
+#if !defined(__pnacl__) && !defined(__CLR_VER) && \
+ (defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || \
+ defined(_M_IX86))
int cpu_info0[4] = {0, 0, 0, 0};
int cpu_info1[4] = {0, 0, 0, 0};
int cpu_info7[4] = {0, 0, 0, 0};
diff --git a/chromium/third_party/libyuv/source/mjpeg_decoder.cc b/chromium/third_party/libyuv/source/mjpeg_decoder.cc
index b43c008bdd2..eaf2530130b 100644
--- a/chromium/third_party/libyuv/source/mjpeg_decoder.cc
+++ b/chromium/third_party/libyuv/source/mjpeg_decoder.cc
@@ -102,7 +102,7 @@ MJpegDecoder::~MJpegDecoder() {
DestroyOutputBuffers();
}
-LIBYUV_BOOL MJpegDecoder::LoadFrame(const uint8* src, size_t src_len) {
+LIBYUV_BOOL MJpegDecoder::LoadFrame(const uint8_t* src, size_t src_len) {
if (!ValidateJpeg(src, src_len)) {
return LIBYUV_FALSE;
}
@@ -129,7 +129,7 @@ LIBYUV_BOOL MJpegDecoder::LoadFrame(const uint8* src, size_t src_len) {
if (scanlines_[i]) {
delete scanlines_[i];
}
- scanlines_[i] = new uint8*[scanlines_size];
+ scanlines_[i] = new uint8_t*[scanlines_size];
scanlines_sizes_[i] = scanlines_size;
}
@@ -145,7 +145,7 @@ LIBYUV_BOOL MJpegDecoder::LoadFrame(const uint8* src, size_t src_len) {
if (databuf_[i]) {
delete databuf_[i];
}
- databuf_[i] = new uint8[databuf_size];
+ databuf_[i] = new uint8_t[databuf_size];
databuf_strides_[i] = databuf_stride;
}
@@ -243,7 +243,7 @@ LIBYUV_BOOL MJpegDecoder::UnloadFrame() {
}
// TODO(fbarchard): Allow rectangle to be specified: x, y, width, height.
-LIBYUV_BOOL MJpegDecoder::DecodeToBuffers(uint8** planes,
+LIBYUV_BOOL MJpegDecoder::DecodeToBuffers(uint8_t** planes,
int dst_width,
int dst_height) {
if (dst_width != GetWidth() || dst_height > GetHeight()) {
@@ -469,9 +469,9 @@ void MJpegDecoder::AllocOutputBuffers(int num_outbufs) {
// it.
DestroyOutputBuffers();
- scanlines_ = new uint8**[num_outbufs];
+ scanlines_ = new uint8_t**[num_outbufs];
scanlines_sizes_ = new int[num_outbufs];
- databuf_ = new uint8*[num_outbufs];
+ databuf_ = new uint8_t*[num_outbufs];
databuf_strides_ = new int[num_outbufs];
for (int i = 0; i < num_outbufs; ++i) {
@@ -527,9 +527,9 @@ LIBYUV_BOOL MJpegDecoder::FinishDecode() {
return LIBYUV_TRUE;
}
-void MJpegDecoder::SetScanlinePointers(uint8** data) {
+void MJpegDecoder::SetScanlinePointers(uint8_t** data) {
for (int i = 0; i < num_outbufs_; ++i) {
- uint8* data_i = data[i];
+ uint8_t* data_i = data[i];
for (int j = 0; j < scanlines_sizes_[i]; ++j) {
scanlines_[i][j] = data_i;
data_i += GetComponentStride(i);
@@ -552,13 +552,13 @@ JpegSubsamplingType MJpegDecoder::JpegSubsamplingTypeHelper(
if (subsample_x[0] == 1 && subsample_y[0] == 1 && subsample_x[1] == 2 &&
subsample_y[1] == 2 && subsample_x[2] == 2 && subsample_y[2] == 2) {
return kJpegYuv420;
- } else if (subsample_x[0] == 1 && subsample_y[0] == 1 &&
- subsample_x[1] == 2 && subsample_y[1] == 1 &&
- subsample_x[2] == 2 && subsample_y[2] == 1) {
+ }
+ if (subsample_x[0] == 1 && subsample_y[0] == 1 && subsample_x[1] == 2 &&
+ subsample_y[1] == 1 && subsample_x[2] == 2 && subsample_y[2] == 1) {
return kJpegYuv422;
- } else if (subsample_x[0] == 1 && subsample_y[0] == 1 &&
- subsample_x[1] == 1 && subsample_y[1] == 1 &&
- subsample_x[2] == 1 && subsample_y[2] == 1) {
+ }
+ if (subsample_x[0] == 1 && subsample_y[0] == 1 && subsample_x[1] == 1 &&
+ subsample_y[1] == 1 && subsample_x[2] == 1 && subsample_y[2] == 1) {
return kJpegYuv444;
}
} else if (number_of_components == 1) { // Grey-scale images.
diff --git a/chromium/third_party/libyuv/source/mjpeg_validate.cc b/chromium/third_party/libyuv/source/mjpeg_validate.cc
index bd760425359..80c2cc0cb9b 100644
--- a/chromium/third_party/libyuv/source/mjpeg_validate.cc
+++ b/chromium/third_party/libyuv/source/mjpeg_validate.cc
@@ -18,13 +18,13 @@ extern "C" {
#endif
// Helper function to scan for EOI marker (0xff 0xd9).
-static LIBYUV_BOOL ScanEOI(const uint8* sample, size_t sample_size) {
+static LIBYUV_BOOL ScanEOI(const uint8_t* sample, size_t sample_size) {
if (sample_size >= 2) {
- const uint8* end = sample + sample_size - 1;
- const uint8* it = sample;
+ const uint8_t* end = sample + sample_size - 1;
+ const uint8_t* it = sample;
while (it < end) {
// TODO(fbarchard): scan for 0xd9 instead.
- it = (const uint8*)(memchr(it, 0xff, end - it));
+ it = (const uint8_t*)(memchr(it, 0xff, end - it));
if (it == NULL) {
break;
}
@@ -39,7 +39,7 @@ static LIBYUV_BOOL ScanEOI(const uint8* sample, size_t sample_size) {
}
// Helper function to validate the jpeg appears intact.
-LIBYUV_BOOL ValidateJpeg(const uint8* sample, size_t sample_size) {
+LIBYUV_BOOL ValidateJpeg(const uint8_t* sample, size_t sample_size) {
// Maximum size that ValidateJpeg will consider valid.
const size_t kMaxJpegSize = 0x7fffffffull;
const size_t kBackSearchSize = 1024;
diff --git a/chromium/third_party/libyuv/source/planar_functions.cc b/chromium/third_party/libyuv/source/planar_functions.cc
index c55ef7f2742..77d71633f57 100644
--- a/chromium/third_party/libyuv/source/planar_functions.cc
+++ b/chromium/third_party/libyuv/source/planar_functions.cc
@@ -26,14 +26,14 @@ extern "C" {
// Copy a plane of data
LIBYUV_API
-void CopyPlane(const uint8* src_y,
+void CopyPlane(const uint8_t* src_y,
int src_stride_y,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
int width,
int height) {
int y;
- void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C;
+ void (*CopyRow)(const uint8_t* src, uint8_t* dst, int width) = CopyRow_C;
// Negative height means invert the image.
if (height < 0) {
height = -height;
@@ -83,14 +83,14 @@ void CopyPlane(const uint8* src_y,
// TODO(fbarchard): Consider support for negative height.
// TODO(fbarchard): Consider stride measured in bytes.
LIBYUV_API
-void CopyPlane_16(const uint16* src_y,
+void CopyPlane_16(const uint16_t* src_y,
int src_stride_y,
- uint16* dst_y,
+ uint16_t* dst_y,
int dst_stride_y,
int width,
int height) {
int y;
- void (*CopyRow)(const uint16* src, uint16* dst, int width) = CopyRow_16_C;
+ void (*CopyRow)(const uint16_t* src, uint16_t* dst, int width) = CopyRow_16_C;
// Coalesce rows.
if (src_stride_y == width && dst_stride_y == width) {
width *= height;
@@ -123,15 +123,15 @@ void CopyPlane_16(const uint16* src_y,
// Convert a plane of 16 bit data to 8 bit
LIBYUV_API
-void Convert16To8Plane(const uint16* src_y,
+void Convert16To8Plane(const uint16_t* src_y,
int src_stride_y,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
int scale, // 16384 for 10 bits
int width,
int height) {
int y;
- void (*Convert16To8Row)(const uint16* src_y, uint8* dst_y, int scale,
+ void (*Convert16To8Row)(const uint16_t* src_y, uint8_t* dst_y, int scale,
int width) = Convert16To8Row_C;
// Negative height means invert the image.
@@ -173,15 +173,15 @@ void Convert16To8Plane(const uint16* src_y,
// Convert a plane of 8 bit data to 16 bit
LIBYUV_API
-void Convert8To16Plane(const uint8* src_y,
+void Convert8To16Plane(const uint8_t* src_y,
int src_stride_y,
- uint16* dst_y,
+ uint16_t* dst_y,
int dst_stride_y,
int scale, // 16384 for 10 bits
int width,
int height) {
int y;
- void (*Convert8To16Row)(const uint8* src_y, uint16* dst_y, int scale,
+ void (*Convert8To16Row)(const uint8_t* src_y, uint16_t* dst_y, int scale,
int width) = Convert8To16Row_C;
// Negative height means invert the image.
@@ -223,17 +223,17 @@ void Convert8To16Plane(const uint8* src_y,
// Copy I422.
LIBYUV_API
-int I422Copy(const uint8* src_y,
+int I422Copy(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int width,
int height) {
@@ -262,17 +262,17 @@ int I422Copy(const uint8* src_y,
// Copy I444.
LIBYUV_API
-int I444Copy(const uint8* src_y,
+int I444Copy(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int width,
int height) {
@@ -300,9 +300,9 @@ int I444Copy(const uint8* src_y,
// Copy I400.
LIBYUV_API
-int I400ToI400(const uint8* src_y,
+int I400ToI400(const uint8_t* src_y,
int src_stride_y,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
int width,
int height) {
@@ -321,13 +321,13 @@ int I400ToI400(const uint8* src_y,
// Convert I420 to I400.
LIBYUV_API
-int I420ToI400(const uint8* src_y,
+int I420ToI400(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
int width,
int height) {
@@ -352,16 +352,16 @@ int I420ToI400(const uint8* src_y,
// Support function for NV12 etc UV channels.
// Width and height are plane sizes (typically half pixel width).
LIBYUV_API
-void SplitUVPlane(const uint8* src_uv,
+void SplitUVPlane(const uint8_t* src_uv,
int src_stride_uv,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int width,
int height) {
int y;
- void (*SplitUVRow)(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
+ void (*SplitUVRow)(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v,
int width) = SplitUVRow_C;
// Negative height means invert the image.
if (height < 0) {
@@ -421,17 +421,17 @@ void SplitUVPlane(const uint8* src_uv,
}
LIBYUV_API
-void MergeUVPlane(const uint8* src_u,
+void MergeUVPlane(const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_uv,
+ uint8_t* dst_uv,
int dst_stride_uv,
int width,
int height) {
int y;
- void (*MergeUVRow)(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
- int width) = MergeUVRow_C;
+ void (*MergeUVRow)(const uint8_t* src_u, const uint8_t* src_v,
+ uint8_t* dst_uv, int width) = MergeUVRow_C;
// Coalesce rows.
// Negative height means invert the image.
if (height < 0) {
@@ -491,19 +491,19 @@ void MergeUVPlane(const uint8* src_u,
// Support function for NV12 etc RGB channels.
// Width and height are plane sizes (typically half pixel width).
LIBYUV_API
-void SplitRGBPlane(const uint8* src_rgb,
+void SplitRGBPlane(const uint8_t* src_rgb,
int src_stride_rgb,
- uint8* dst_r,
+ uint8_t* dst_r,
int dst_stride_r,
- uint8* dst_g,
+ uint8_t* dst_g,
int dst_stride_g,
- uint8* dst_b,
+ uint8_t* dst_b,
int dst_stride_b,
int width,
int height) {
int y;
- void (*SplitRGBRow)(const uint8* src_rgb, uint8* dst_r, uint8* dst_g,
- uint8* dst_b, int width) = SplitRGBRow_C;
+ void (*SplitRGBRow)(const uint8_t* src_rgb, uint8_t* dst_r, uint8_t* dst_g,
+ uint8_t* dst_b, int width) = SplitRGBRow_C;
// Negative height means invert the image.
if (height < 0) {
height = -height;
@@ -549,19 +549,19 @@ void SplitRGBPlane(const uint8* src_rgb,
}
LIBYUV_API
-void MergeRGBPlane(const uint8* src_r,
+void MergeRGBPlane(const uint8_t* src_r,
int src_stride_r,
- const uint8* src_g,
+ const uint8_t* src_g,
int src_stride_g,
- const uint8* src_b,
+ const uint8_t* src_b,
int src_stride_b,
- uint8* dst_rgb,
+ uint8_t* dst_rgb,
int dst_stride_rgb,
int width,
int height) {
int y;
- void (*MergeRGBRow)(const uint8* src_r, const uint8* src_g,
- const uint8* src_b, uint8* dst_rgb, int width) =
+ void (*MergeRGBRow)(const uint8_t* src_r, const uint8_t* src_g,
+ const uint8_t* src_b, uint8_t* dst_rgb, int width) =
MergeRGBRow_C;
// Coalesce rows.
// Negative height means invert the image.
@@ -605,14 +605,14 @@ void MergeRGBPlane(const uint8* src_r,
}
// Mirror a plane of data.
-void MirrorPlane(const uint8* src_y,
+void MirrorPlane(const uint8_t* src_y,
int src_stride_y,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
int width,
int height) {
int y;
- void (*MirrorRow)(const uint8* src, uint8* dst, int width) = MirrorRow_C;
+ void (*MirrorRow)(const uint8_t* src, uint8_t* dst, int width) = MirrorRow_C;
// Negative height means invert the image.
if (height < 0) {
height = -height;
@@ -662,20 +662,20 @@ void MirrorPlane(const uint8* src_y,
// Convert YUY2 to I422.
LIBYUV_API
-int YUY2ToI422(const uint8* src_yuy2,
+int YUY2ToI422(const uint8_t* src_yuy2,
int src_stride_yuy2,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int width,
int height) {
int y;
- void (*YUY2ToUV422Row)(const uint8* src_yuy2, uint8* dst_u, uint8* dst_v,
- int width) = YUY2ToUV422Row_C;
- void (*YUY2ToYRow)(const uint8* src_yuy2, uint8* dst_y, int width) =
+ void (*YUY2ToUV422Row)(const uint8_t* src_yuy2, uint8_t* dst_u,
+ uint8_t* dst_v, int width) = YUY2ToUV422Row_C;
+ void (*YUY2ToYRow)(const uint8_t* src_yuy2, uint8_t* dst_y, int width) =
YUY2ToYRow_C;
if (!src_yuy2 || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
return -1;
@@ -748,20 +748,20 @@ int YUY2ToI422(const uint8* src_yuy2,
// Convert UYVY to I422.
LIBYUV_API
-int UYVYToI422(const uint8* src_uyvy,
+int UYVYToI422(const uint8_t* src_uyvy,
int src_stride_uyvy,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int width,
int height) {
int y;
- void (*UYVYToUV422Row)(const uint8* src_uyvy, uint8* dst_u, uint8* dst_v,
- int width) = UYVYToUV422Row_C;
- void (*UYVYToYRow)(const uint8* src_uyvy, uint8* dst_y, int width) =
+ void (*UYVYToUV422Row)(const uint8_t* src_uyvy, uint8_t* dst_u,
+ uint8_t* dst_v, int width) = UYVYToUV422Row_C;
+ void (*UYVYToYRow)(const uint8_t* src_uyvy, uint8_t* dst_y, int width) =
UYVYToYRow_C;
if (!src_uyvy || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
return -1;
@@ -834,14 +834,14 @@ int UYVYToI422(const uint8* src_uyvy,
// Convert YUY2 to Y.
LIBYUV_API
-int YUY2ToY(const uint8* src_yuy2,
+int YUY2ToY(const uint8_t* src_yuy2,
int src_stride_yuy2,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
int width,
int height) {
int y;
- void (*YUY2ToYRow)(const uint8* src_yuy2, uint8* dst_y, int width) =
+ void (*YUY2ToYRow)(const uint8_t* src_yuy2, uint8_t* dst_y, int width) =
YUY2ToYRow_C;
if (!src_yuy2 || !dst_y || width <= 0 || height == 0) {
return -1;
@@ -901,9 +901,9 @@ int YUY2ToY(const uint8* src_yuy2,
// Mirror I400 with optional flipping
LIBYUV_API
-int I400Mirror(const uint8* src_y,
+int I400Mirror(const uint8_t* src_y,
int src_stride_y,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
int width,
int height) {
@@ -923,17 +923,17 @@ int I400Mirror(const uint8* src_y,
// Mirror I420 with optional flipping
LIBYUV_API
-int I420Mirror(const uint8* src_y,
+int I420Mirror(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int width,
int height) {
@@ -965,14 +965,14 @@ int I420Mirror(const uint8* src_y,
// ARGB mirror.
LIBYUV_API
-int ARGBMirror(const uint8* src_argb,
+int ARGBMirror(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height) {
int y;
- void (*ARGBMirrorRow)(const uint8* src, uint8* dst, int width) =
+ void (*ARGBMirrorRow)(const uint8_t* src, uint8_t* dst, int width) =
ARGBMirrorRow_C;
if (!src_argb || !dst_argb || width <= 0 || height == 0) {
return -1;
@@ -1030,8 +1030,8 @@ int ARGBMirror(const uint8* src_argb,
// the same blend function for all pixels if possible.
LIBYUV_API
ARGBBlendRow GetARGBBlend() {
- void (*ARGBBlendRow)(const uint8* src_argb, const uint8* src_argb1,
- uint8* dst_argb, int width) = ARGBBlendRow_C;
+ void (*ARGBBlendRow)(const uint8_t* src_argb, const uint8_t* src_argb1,
+ uint8_t* dst_argb, int width) = ARGBBlendRow_C;
#if defined(HAS_ARGBBLENDROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBBlendRow = ARGBBlendRow_SSSE3;
@@ -1053,17 +1053,17 @@ ARGBBlendRow GetARGBBlend() {
// Alpha Blend 2 ARGB images and store to destination.
LIBYUV_API
-int ARGBBlend(const uint8* src_argb0,
+int ARGBBlend(const uint8_t* src_argb0,
int src_stride_argb0,
- const uint8* src_argb1,
+ const uint8_t* src_argb1,
int src_stride_argb1,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height) {
int y;
- void (*ARGBBlendRow)(const uint8* src_argb, const uint8* src_argb1,
- uint8* dst_argb, int width) = GetARGBBlend();
+ void (*ARGBBlendRow)(const uint8_t* src_argb, const uint8_t* src_argb1,
+ uint8_t* dst_argb, int width) = GetARGBBlend();
if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
return -1;
}
@@ -1092,19 +1092,19 @@ int ARGBBlend(const uint8* src_argb0,
// Alpha Blend plane and store to destination.
LIBYUV_API
-int BlendPlane(const uint8* src_y0,
+int BlendPlane(const uint8_t* src_y0,
int src_stride_y0,
- const uint8* src_y1,
+ const uint8_t* src_y1,
int src_stride_y1,
- const uint8* alpha,
+ const uint8_t* alpha,
int alpha_stride,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
int width,
int height) {
int y;
- void (*BlendPlaneRow)(const uint8* src0, const uint8* src1,
- const uint8* alpha, uint8* dst, int width) =
+ void (*BlendPlaneRow)(const uint8_t* src0, const uint8_t* src1,
+ const uint8_t* alpha, uint8_t* dst, int width) =
BlendPlaneRow_C;
if (!src_y0 || !src_y1 || !alpha || !dst_y || width <= 0 || height == 0) {
return -1;
@@ -1154,36 +1154,36 @@ int BlendPlane(const uint8* src_y0,
#define MAXTWIDTH 2048
// Alpha Blend YUV images and store to destination.
LIBYUV_API
-int I420Blend(const uint8* src_y0,
+int I420Blend(const uint8_t* src_y0,
int src_stride_y0,
- const uint8* src_u0,
+ const uint8_t* src_u0,
int src_stride_u0,
- const uint8* src_v0,
+ const uint8_t* src_v0,
int src_stride_v0,
- const uint8* src_y1,
+ const uint8_t* src_y1,
int src_stride_y1,
- const uint8* src_u1,
+ const uint8_t* src_u1,
int src_stride_u1,
- const uint8* src_v1,
+ const uint8_t* src_v1,
int src_stride_v1,
- const uint8* alpha,
+ const uint8_t* alpha,
int alpha_stride,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int width,
int height) {
int y;
// Half width/height for UV.
int halfwidth = (width + 1) >> 1;
- void (*BlendPlaneRow)(const uint8* src0, const uint8* src1,
- const uint8* alpha, uint8* dst, int width) =
+ void (*BlendPlaneRow)(const uint8_t* src0, const uint8_t* src1,
+ const uint8_t* alpha, uint8_t* dst, int width) =
BlendPlaneRow_C;
- void (*ScaleRowDown2)(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) = ScaleRowDown2Box_C;
+ void (*ScaleRowDown2)(const uint8_t* src_ptr, ptrdiff_t src_stride,
+ uint8_t* dst_ptr, int dst_width) = ScaleRowDown2Box_C;
if (!src_y0 || !src_u0 || !src_v0 || !src_y1 || !src_u1 || !src_v1 ||
!alpha || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
return -1;
@@ -1278,17 +1278,17 @@ int I420Blend(const uint8* src_y0,
// Multiply 2 ARGB images and store to destination.
LIBYUV_API
-int ARGBMultiply(const uint8* src_argb0,
+int ARGBMultiply(const uint8_t* src_argb0,
int src_stride_argb0,
- const uint8* src_argb1,
+ const uint8_t* src_argb1,
int src_stride_argb1,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height) {
int y;
- void (*ARGBMultiplyRow)(const uint8* src0, const uint8* src1, uint8* dst,
- int width) = ARGBMultiplyRow_C;
+ void (*ARGBMultiplyRow)(const uint8_t* src0, const uint8_t* src1,
+ uint8_t* dst, int width) = ARGBMultiplyRow_C;
if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
return -1;
}
@@ -1350,16 +1350,16 @@ int ARGBMultiply(const uint8* src_argb0,
// Add 2 ARGB images and store to destination.
LIBYUV_API
-int ARGBAdd(const uint8* src_argb0,
+int ARGBAdd(const uint8_t* src_argb0,
int src_stride_argb0,
- const uint8* src_argb1,
+ const uint8_t* src_argb1,
int src_stride_argb1,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height) {
int y;
- void (*ARGBAddRow)(const uint8* src0, const uint8* src1, uint8* dst,
+ void (*ARGBAddRow)(const uint8_t* src0, const uint8_t* src1, uint8_t* dst,
int width) = ARGBAddRow_C;
if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
return -1;
@@ -1427,17 +1427,17 @@ int ARGBAdd(const uint8* src_argb0,
// Subtract 2 ARGB images and store to destination.
LIBYUV_API
-int ARGBSubtract(const uint8* src_argb0,
+int ARGBSubtract(const uint8_t* src_argb0,
int src_stride_argb0,
- const uint8* src_argb1,
+ const uint8_t* src_argb1,
int src_stride_argb1,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height) {
int y;
- void (*ARGBSubtractRow)(const uint8* src0, const uint8* src1, uint8* dst,
- int width) = ARGBSubtractRow_C;
+ void (*ARGBSubtractRow)(const uint8_t* src0, const uint8_t* src1,
+ uint8_t* dst, int width) = ARGBSubtractRow_C;
if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
return -1;
}
@@ -1497,20 +1497,20 @@ int ARGBSubtract(const uint8* src_argb0,
return 0;
}
// Convert I422 to RGBA with matrix
-static int I422ToRGBAMatrix(const uint8* src_y,
+static int I422ToRGBAMatrix(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_rgba,
+ uint8_t* dst_rgba,
int dst_stride_rgba,
const struct YuvConstants* yuvconstants,
int width,
int height) {
int y;
- void (*I422ToRGBARow)(const uint8* y_buf, const uint8* u_buf,
- const uint8* v_buf, uint8* rgb_buf,
+ void (*I422ToRGBARow)(const uint8_t* y_buf, const uint8_t* u_buf,
+ const uint8_t* v_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) =
I422ToRGBARow_C;
if (!src_y || !src_u || !src_v || !dst_rgba || width <= 0 || height == 0) {
@@ -1567,13 +1567,13 @@ static int I422ToRGBAMatrix(const uint8* src_y,
// Convert I422 to RGBA.
LIBYUV_API
-int I422ToRGBA(const uint8* src_y,
+int I422ToRGBA(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_rgba,
+ uint8_t* dst_rgba,
int dst_stride_rgba,
int width,
int height) {
@@ -1584,13 +1584,13 @@ int I422ToRGBA(const uint8* src_y,
// Convert I422 to BGRA.
LIBYUV_API
-int I422ToBGRA(const uint8* src_y,
+int I422ToBGRA(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_bgra,
+ uint8_t* dst_bgra,
int dst_stride_bgra,
int width,
int height) {
@@ -1603,17 +1603,17 @@ int I422ToBGRA(const uint8* src_y,
// Convert NV12 to RGB565.
LIBYUV_API
-int NV12ToRGB565(const uint8* src_y,
+int NV12ToRGB565(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_uv,
+ const uint8_t* src_uv,
int src_stride_uv,
- uint8* dst_rgb565,
+ uint8_t* dst_rgb565,
int dst_stride_rgb565,
int width,
int height) {
int y;
void (*NV12ToRGB565Row)(
- const uint8* y_buf, const uint8* uv_buf, uint8* rgb_buf,
+ const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) = NV12ToRGB565Row_C;
if (!src_y || !src_uv || !dst_rgb565 || width <= 0 || height == 0) {
return -1;
@@ -1670,14 +1670,14 @@ int NV12ToRGB565(const uint8* src_y,
// Convert RAW to RGB24.
LIBYUV_API
-int RAWToRGB24(const uint8* src_raw,
+int RAWToRGB24(const uint8_t* src_raw,
int src_stride_raw,
- uint8* dst_rgb24,
+ uint8_t* dst_rgb24,
int dst_stride_rgb24,
int width,
int height) {
int y;
- void (*RAWToRGB24Row)(const uint8* src_rgb, uint8* dst_rgb24, int width) =
+ void (*RAWToRGB24Row)(const uint8_t* src_rgb, uint8_t* dst_rgb24, int width) =
RAWToRGB24Row_C;
if (!src_raw || !dst_rgb24 || width <= 0 || height == 0) {
return -1;
@@ -1728,13 +1728,13 @@ int RAWToRGB24(const uint8* src_raw,
}
LIBYUV_API
-void SetPlane(uint8* dst_y,
+void SetPlane(uint8_t* dst_y,
int dst_stride_y,
int width,
int height,
- uint32 value) {
+ uint32_t value) {
int y;
- void (*SetRow)(uint8 * dst, uint8 value, int width) = SetRow_C;
+ void (*SetRow)(uint8_t * dst, uint8_t value, int width) = SetRow_C;
if (height < 0) {
height = -height;
dst_y = dst_y + (height - 1) * dst_stride_y;
@@ -1782,11 +1782,11 @@ void SetPlane(uint8* dst_y,
// Draw a rectangle into I420
LIBYUV_API
-int I420Rect(uint8* dst_y,
+int I420Rect(uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int x,
int y,
@@ -1797,9 +1797,9 @@ int I420Rect(uint8* dst_y,
int value_v) {
int halfwidth = (width + 1) >> 1;
int halfheight = (height + 1) >> 1;
- uint8* start_y = dst_y + y * dst_stride_y + x;
- uint8* start_u = dst_u + (y / 2) * dst_stride_u + (x / 2);
- uint8* start_v = dst_v + (y / 2) * dst_stride_v + (x / 2);
+ uint8_t* start_y = dst_y + y * dst_stride_y + x;
+ uint8_t* start_u = dst_u + (y / 2) * dst_stride_u + (x / 2);
+ uint8_t* start_v = dst_v + (y / 2) * dst_stride_v + (x / 2);
if (!dst_y || !dst_u || !dst_v || width <= 0 || height == 0 || x < 0 ||
y < 0 || value_y < 0 || value_y > 255 || value_u < 0 || value_u > 255 ||
value_v < 0 || value_v > 255) {
@@ -1814,15 +1814,16 @@ int I420Rect(uint8* dst_y,
// Draw a rectangle into ARGB
LIBYUV_API
-int ARGBRect(uint8* dst_argb,
+int ARGBRect(uint8_t* dst_argb,
int dst_stride_argb,
int dst_x,
int dst_y,
int width,
int height,
- uint32 value) {
+ uint32_t value) {
int y;
- void (*ARGBSetRow)(uint8 * dst_argb, uint32 value, int width) = ARGBSetRow_C;
+ void (*ARGBSetRow)(uint8_t * dst_argb, uint32_t value, int width) =
+ ARGBSetRow_C;
if (!dst_argb || width <= 0 || height == 0 || dst_x < 0 || dst_y < 0) {
return -1;
}
@@ -1883,15 +1884,15 @@ int ARGBRect(uint8* dst_argb,
// f is foreground pixel premultiplied by alpha
LIBYUV_API
-int ARGBAttenuate(const uint8* src_argb,
+int ARGBAttenuate(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height) {
int y;
- void (*ARGBAttenuateRow)(const uint8* src_argb, uint8* dst_argb, int width) =
- ARGBAttenuateRow_C;
+ void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb,
+ int width) = ARGBAttenuateRow_C;
if (!src_argb || !dst_argb || width <= 0 || height == 0) {
return -1;
}
@@ -1949,14 +1950,14 @@ int ARGBAttenuate(const uint8* src_argb,
// Convert preattentuated ARGB to unattenuated ARGB.
LIBYUV_API
-int ARGBUnattenuate(const uint8* src_argb,
+int ARGBUnattenuate(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height) {
int y;
- void (*ARGBUnattenuateRow)(const uint8* src_argb, uint8* dst_argb,
+ void (*ARGBUnattenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb,
int width) = ARGBUnattenuateRow_C;
if (!src_argb || !dst_argb || width <= 0 || height == 0) {
return -1;
@@ -2000,14 +2001,14 @@ int ARGBUnattenuate(const uint8* src_argb,
// Convert ARGB to Grayed ARGB.
LIBYUV_API
-int ARGBGrayTo(const uint8* src_argb,
+int ARGBGrayTo(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height) {
int y;
- void (*ARGBGrayRow)(const uint8* src_argb, uint8* dst_argb, int width) =
+ void (*ARGBGrayRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) =
ARGBGrayRow_C;
if (!src_argb || !dst_argb || width <= 0 || height == 0) {
return -1;
@@ -2049,16 +2050,16 @@ int ARGBGrayTo(const uint8* src_argb,
// Make a rectangle of ARGB gray scale.
LIBYUV_API
-int ARGBGray(uint8* dst_argb,
+int ARGBGray(uint8_t* dst_argb,
int dst_stride_argb,
int dst_x,
int dst_y,
int width,
int height) {
int y;
- void (*ARGBGrayRow)(const uint8* src_argb, uint8* dst_argb, int width) =
+ void (*ARGBGrayRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) =
ARGBGrayRow_C;
- uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
+ uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) {
return -1;
}
@@ -2093,15 +2094,15 @@ int ARGBGray(uint8* dst_argb,
// Make a rectangle of ARGB Sepia tone.
LIBYUV_API
-int ARGBSepia(uint8* dst_argb,
+int ARGBSepia(uint8_t* dst_argb,
int dst_stride_argb,
int dst_x,
int dst_y,
int width,
int height) {
int y;
- void (*ARGBSepiaRow)(uint8 * dst_argb, int width) = ARGBSepiaRow_C;
- uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
+ void (*ARGBSepiaRow)(uint8_t * dst_argb, int width) = ARGBSepiaRow_C;
+ uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) {
return -1;
}
@@ -2137,16 +2138,16 @@ int ARGBSepia(uint8* dst_argb,
// Apply a 4x4 matrix to each ARGB pixel.
// Note: Normally for shading, but can be used to swizzle or invert.
LIBYUV_API
-int ARGBColorMatrix(const uint8* src_argb,
+int ARGBColorMatrix(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
- const int8* matrix_argb,
+ const int8_t* matrix_argb,
int width,
int height) {
int y;
- void (*ARGBColorMatrixRow)(const uint8* src_argb, uint8* dst_argb,
- const int8* matrix_argb, int width) =
+ void (*ARGBColorMatrixRow)(const uint8_t* src_argb, uint8_t* dst_argb,
+ const int8_t* matrix_argb, int width) =
ARGBColorMatrixRow_C;
if (!src_argb || !dst_argb || !matrix_argb || width <= 0 || height == 0) {
return -1;
@@ -2188,15 +2189,15 @@ int ARGBColorMatrix(const uint8* src_argb,
// Apply a 4x3 matrix to each ARGB pixel.
// Deprecated.
LIBYUV_API
-int RGBColorMatrix(uint8* dst_argb,
+int RGBColorMatrix(uint8_t* dst_argb,
int dst_stride_argb,
- const int8* matrix_rgb,
+ const int8_t* matrix_rgb,
int dst_x,
int dst_y,
int width,
int height) {
- SIMD_ALIGNED(int8 matrix_argb[16]);
- uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
+ SIMD_ALIGNED(int8_t matrix_argb[16]);
+ uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
if (!dst_argb || !matrix_rgb || width <= 0 || height <= 0 || dst_x < 0 ||
dst_y < 0) {
return -1;
@@ -2218,24 +2219,24 @@ int RGBColorMatrix(uint8* dst_argb,
matrix_argb[14] = matrix_argb[13] = matrix_argb[12] = 0;
matrix_argb[15] = 64; // 1.0
- return ARGBColorMatrix((const uint8*)(dst), dst_stride_argb, dst,
+ return ARGBColorMatrix((const uint8_t*)(dst), dst_stride_argb, dst,
dst_stride_argb, &matrix_argb[0], width, height);
}
// Apply a color table each ARGB pixel.
// Table contains 256 ARGB values.
LIBYUV_API
-int ARGBColorTable(uint8* dst_argb,
+int ARGBColorTable(uint8_t* dst_argb,
int dst_stride_argb,
- const uint8* table_argb,
+ const uint8_t* table_argb,
int dst_x,
int dst_y,
int width,
int height) {
int y;
- void (*ARGBColorTableRow)(uint8 * dst_argb, const uint8* table_argb,
+ void (*ARGBColorTableRow)(uint8_t * dst_argb, const uint8_t* table_argb,
int width) = ARGBColorTableRow_C;
- uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
+ uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
if (!dst_argb || !table_argb || width <= 0 || height <= 0 || dst_x < 0 ||
dst_y < 0) {
return -1;
@@ -2261,17 +2262,17 @@ int ARGBColorTable(uint8* dst_argb,
// Apply a color table each ARGB pixel but preserve destination alpha.
// Table contains 256 ARGB values.
LIBYUV_API
-int RGBColorTable(uint8* dst_argb,
+int RGBColorTable(uint8_t* dst_argb,
int dst_stride_argb,
- const uint8* table_argb,
+ const uint8_t* table_argb,
int dst_x,
int dst_y,
int width,
int height) {
int y;
- void (*RGBColorTableRow)(uint8 * dst_argb, const uint8* table_argb,
+ void (*RGBColorTableRow)(uint8_t * dst_argb, const uint8_t* table_argb,
int width) = RGBColorTableRow_C;
- uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
+ uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
if (!dst_argb || !table_argb || width <= 0 || height <= 0 || dst_x < 0 ||
dst_y < 0) {
return -1;
@@ -2304,7 +2305,7 @@ int RGBColorTable(uint8* dst_argb,
// Caveat - although SSE2 saturates, the C function does not and should be used
// with care if doing anything but quantization.
LIBYUV_API
-int ARGBQuantize(uint8* dst_argb,
+int ARGBQuantize(uint8_t* dst_argb,
int dst_stride_argb,
int scale,
int interval_size,
@@ -2314,9 +2315,9 @@ int ARGBQuantize(uint8* dst_argb,
int width,
int height) {
int y;
- void (*ARGBQuantizeRow)(uint8 * dst_argb, int scale, int interval_size,
+ void (*ARGBQuantizeRow)(uint8_t * dst_argb, int scale, int interval_size,
int interval_offset, int width) = ARGBQuantizeRow_C;
- uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
+ uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0 ||
interval_size < 1 || interval_size > 255) {
return -1;
@@ -2352,17 +2353,17 @@ int ARGBQuantize(uint8* dst_argb,
// Computes table of cumulative sum for image where the value is the sum
// of all values above and to the left of the entry. Used by ARGBBlur.
LIBYUV_API
-int ARGBComputeCumulativeSum(const uint8* src_argb,
+int ARGBComputeCumulativeSum(const uint8_t* src_argb,
int src_stride_argb,
- int32* dst_cumsum,
+ int32_t* dst_cumsum,
int dst_stride32_cumsum,
int width,
int height) {
int y;
- void (*ComputeCumulativeSumRow)(const uint8* row, int32* cumsum,
- const int32* previous_cumsum, int width) =
+ void (*ComputeCumulativeSumRow)(const uint8_t* row, int32_t* cumsum,
+ const int32_t* previous_cumsum, int width) =
ComputeCumulativeSumRow_C;
- int32* previous_cumsum = dst_cumsum;
+ int32_t* previous_cumsum = dst_cumsum;
if (!dst_cumsum || !src_argb || width <= 0 || height <= 0) {
return -1;
}
@@ -2386,25 +2387,25 @@ int ARGBComputeCumulativeSum(const uint8* src_argb,
// aligned to 16 byte boundary. height can be radius * 2 + 2 to save memory
// as the buffer is treated as circular.
LIBYUV_API
-int ARGBBlur(const uint8* src_argb,
+int ARGBBlur(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
- int32* dst_cumsum,
+ int32_t* dst_cumsum,
int dst_stride32_cumsum,
int width,
int height,
int radius) {
int y;
- void (*ComputeCumulativeSumRow)(const uint8* row, int32* cumsum,
- const int32* previous_cumsum, int width) =
+ void (*ComputeCumulativeSumRow)(const uint8_t* row, int32_t* cumsum,
+ const int32_t* previous_cumsum, int width) =
ComputeCumulativeSumRow_C;
- void (*CumulativeSumToAverageRow)(const int32* topleft, const int32* botleft,
- int width, int area, uint8* dst,
- int count) = CumulativeSumToAverageRow_C;
- int32* cumsum_bot_row;
- int32* max_cumsum_bot_row;
- int32* cumsum_top_row;
+ void (*CumulativeSumToAverageRow)(
+ const int32_t* topleft, const int32_t* botleft, int width, int area,
+ uint8_t* dst, int count) = CumulativeSumToAverageRow_C;
+ int32_t* cumsum_bot_row;
+ int32_t* max_cumsum_bot_row;
+ int32_t* cumsum_top_row;
if (!src_argb || !dst_argb || width <= 0 || height == 0) {
return -1;
@@ -2458,7 +2459,7 @@ int ARGBBlur(const uint8* src_argb,
// Increment cumsum_bot_row pointer with circular buffer wrap around and
// then fill in a row of CumulativeSum.
if ((y + radius) < height) {
- const int32* prev_cumsum_bot_row = cumsum_bot_row;
+ const int32_t* prev_cumsum_bot_row = cumsum_bot_row;
cumsum_bot_row += dst_stride32_cumsum;
if (cumsum_bot_row >= max_cumsum_bot_row) {
cumsum_bot_row = dst_cumsum;
@@ -2496,16 +2497,16 @@ int ARGBBlur(const uint8* src_argb,
// Multiply ARGB image by a specified ARGB value.
LIBYUV_API
-int ARGBShade(const uint8* src_argb,
+int ARGBShade(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height,
- uint32 value) {
+ uint32_t value) {
int y;
- void (*ARGBShadeRow)(const uint8* src_argb, uint8* dst_argb, int width,
- uint32 value) = ARGBShadeRow_C;
+ void (*ARGBShadeRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width,
+ uint32_t value) = ARGBShadeRow_C;
if (!src_argb || !dst_argb || width <= 0 || height == 0 || value == 0u) {
return -1;
}
@@ -2546,17 +2547,17 @@ int ARGBShade(const uint8* src_argb,
// Interpolate 2 planes by specified amount (0 to 255).
LIBYUV_API
-int InterpolatePlane(const uint8* src0,
+int InterpolatePlane(const uint8_t* src0,
int src_stride0,
- const uint8* src1,
+ const uint8_t* src1,
int src_stride1,
- uint8* dst,
+ uint8_t* dst,
int dst_stride,
int width,
int height,
int interpolation) {
int y;
- void (*InterpolateRow)(uint8 * dst_ptr, const uint8* src_ptr,
+ void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr,
ptrdiff_t src_stride, int dst_width,
int source_y_fraction) = InterpolateRow_C;
if (!src0 || !src1 || !dst || width <= 0 || height == 0) {
@@ -2618,11 +2619,11 @@ int InterpolatePlane(const uint8* src0,
// Interpolate 2 ARGB images by specified amount (0 to 255).
LIBYUV_API
-int ARGBInterpolate(const uint8* src_argb0,
+int ARGBInterpolate(const uint8_t* src_argb0,
int src_stride_argb0,
- const uint8* src_argb1,
+ const uint8_t* src_argb1,
int src_stride_argb1,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height,
@@ -2634,23 +2635,23 @@ int ARGBInterpolate(const uint8* src_argb0,
// Interpolate 2 YUV images by specified amount (0 to 255).
LIBYUV_API
-int I420Interpolate(const uint8* src0_y,
+int I420Interpolate(const uint8_t* src0_y,
int src0_stride_y,
- const uint8* src0_u,
+ const uint8_t* src0_u,
int src0_stride_u,
- const uint8* src0_v,
+ const uint8_t* src0_v,
int src0_stride_v,
- const uint8* src1_y,
+ const uint8_t* src1_y,
int src1_stride_y,
- const uint8* src1_u,
+ const uint8_t* src1_u,
int src1_stride_u,
- const uint8* src1_v,
+ const uint8_t* src1_v,
int src1_stride_v,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int width,
int height,
@@ -2672,16 +2673,16 @@ int I420Interpolate(const uint8* src0_y,
// Shuffle ARGB channel order. e.g. BGRA to ARGB.
LIBYUV_API
-int ARGBShuffle(const uint8* src_bgra,
+int ARGBShuffle(const uint8_t* src_bgra,
int src_stride_bgra,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
- const uint8* shuffler,
+ const uint8_t* shuffler,
int width,
int height) {
int y;
- void (*ARGBShuffleRow)(const uint8* src_bgra, uint8* dst_argb,
- const uint8* shuffler, int width) = ARGBShuffleRow_C;
+ void (*ARGBShuffleRow)(const uint8_t* src_bgra, uint8_t* dst_argb,
+ const uint8_t* shuffler, int width) = ARGBShuffleRow_C;
if (!src_bgra || !dst_argb || width <= 0 || height == 0) {
return -1;
}
@@ -2739,23 +2740,23 @@ int ARGBShuffle(const uint8* src_bgra,
}
// Sobel ARGB effect.
-static int ARGBSobelize(const uint8* src_argb,
+static int ARGBSobelize(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height,
- void (*SobelRow)(const uint8* src_sobelx,
- const uint8* src_sobely,
- uint8* dst,
+ void (*SobelRow)(const uint8_t* src_sobelx,
+ const uint8_t* src_sobely,
+ uint8_t* dst,
int width)) {
int y;
- void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_g, int width) =
+ void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_g, int width) =
ARGBToYJRow_C;
- void (*SobelYRow)(const uint8* src_y0, const uint8* src_y1, uint8* dst_sobely,
- int width) = SobelYRow_C;
- void (*SobelXRow)(const uint8* src_y0, const uint8* src_y1,
- const uint8* src_y2, uint8* dst_sobely, int width) =
+ void (*SobelYRow)(const uint8_t* src_y0, const uint8_t* src_y1,
+ uint8_t* dst_sobely, int width) = SobelYRow_C;
+ void (*SobelXRow)(const uint8_t* src_y0, const uint8_t* src_y1,
+ const uint8_t* src_y2, uint8_t* dst_sobely, int width) =
SobelXRow_C;
const int kEdge = 16; // Extra pixels at start of row for extrude/align.
if (!src_argb || !dst_argb || width <= 0 || height == 0) {
@@ -2835,14 +2836,14 @@ static int ARGBSobelize(const uint8* src_argb,
// 3 rows with edges before/after.
const int kRowSize = (width + kEdge + 31) & ~31;
align_buffer_64(rows, kRowSize * 2 + (kEdge + kRowSize * 3 + kEdge));
- uint8* row_sobelx = rows;
- uint8* row_sobely = rows + kRowSize;
- uint8* row_y = rows + kRowSize * 2;
+ uint8_t* row_sobelx = rows;
+ uint8_t* row_sobely = rows + kRowSize;
+ uint8_t* row_y = rows + kRowSize * 2;
// Convert first row.
- uint8* row_y0 = row_y + kEdge;
- uint8* row_y1 = row_y0 + kRowSize;
- uint8* row_y2 = row_y1 + kRowSize;
+ uint8_t* row_y0 = row_y + kEdge;
+ uint8_t* row_y1 = row_y0 + kRowSize;
+ uint8_t* row_y2 = row_y1 + kRowSize;
ARGBToYJRow(src_argb, row_y0, width);
row_y0[-1] = row_y0[0];
memset(row_y0 + width, row_y0[width - 1], 16); // Extrude 16 for valgrind.
@@ -2866,7 +2867,7 @@ static int ARGBSobelize(const uint8* src_argb,
// Cycle thru circular queue of 3 row_y buffers.
{
- uint8* row_yt = row_y0;
+ uint8_t* row_yt = row_y0;
row_y0 = row_y1;
row_y1 = row_y2;
row_y2 = row_yt;
@@ -2881,14 +2882,14 @@ static int ARGBSobelize(const uint8* src_argb,
// Sobel ARGB effect.
LIBYUV_API
-int ARGBSobel(const uint8* src_argb,
+int ARGBSobel(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height) {
- void (*SobelRow)(const uint8* src_sobelx, const uint8* src_sobely,
- uint8* dst_argb, int width) = SobelRow_C;
+ void (*SobelRow)(const uint8_t* src_sobelx, const uint8_t* src_sobely,
+ uint8_t* dst_argb, int width) = SobelRow_C;
#if defined(HAS_SOBELROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
SobelRow = SobelRow_Any_SSE2;
@@ -2919,14 +2920,14 @@ int ARGBSobel(const uint8* src_argb,
// Sobel ARGB effect with planar output.
LIBYUV_API
-int ARGBSobelToPlane(const uint8* src_argb,
+int ARGBSobelToPlane(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
int width,
int height) {
- void (*SobelToPlaneRow)(const uint8* src_sobelx, const uint8* src_sobely,
- uint8* dst_, int width) = SobelToPlaneRow_C;
+ void (*SobelToPlaneRow)(const uint8_t* src_sobelx, const uint8_t* src_sobely,
+ uint8_t* dst_, int width) = SobelToPlaneRow_C;
#if defined(HAS_SOBELTOPLANEROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
SobelToPlaneRow = SobelToPlaneRow_Any_SSE2;
@@ -2958,14 +2959,14 @@ int ARGBSobelToPlane(const uint8* src_argb,
// SobelXY ARGB effect.
// Similar to Sobel, but also stores Sobel X in R and Sobel Y in B. G = Sobel.
LIBYUV_API
-int ARGBSobelXY(const uint8* src_argb,
+int ARGBSobelXY(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height) {
- void (*SobelXYRow)(const uint8* src_sobelx, const uint8* src_sobely,
- uint8* dst_argb, int width) = SobelXYRow_C;
+ void (*SobelXYRow)(const uint8_t* src_sobelx, const uint8_t* src_sobely,
+ uint8_t* dst_argb, int width) = SobelXYRow_C;
#if defined(HAS_SOBELXYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
SobelXYRow = SobelXYRow_Any_SSE2;
@@ -2996,15 +2997,15 @@ int ARGBSobelXY(const uint8* src_argb,
// Apply a 4x4 polynomial to each ARGB pixel.
LIBYUV_API
-int ARGBPolynomial(const uint8* src_argb,
+int ARGBPolynomial(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
const float* poly,
int width,
int height) {
int y;
- void (*ARGBPolynomialRow)(const uint8* src_argb, uint8* dst_argb,
+ void (*ARGBPolynomialRow)(const uint8_t* src_argb, uint8_t* dst_argb,
const float* poly, int width) = ARGBPolynomialRow_C;
if (!src_argb || !dst_argb || !poly || width <= 0 || height == 0) {
return -1;
@@ -3044,16 +3045,16 @@ int ARGBPolynomial(const uint8* src_argb,
// Convert plane of 16 bit shorts to half floats.
// Source values are multiplied by scale before storing as half float.
LIBYUV_API
-int HalfFloatPlane(const uint16* src_y,
+int HalfFloatPlane(const uint16_t* src_y,
int src_stride_y,
- uint16* dst_y,
+ uint16_t* dst_y,
int dst_stride_y,
float scale,
int width,
int height) {
int y;
- void (*HalfFloatRow)(const uint16* src, uint16* dst, float scale, int width) =
- HalfFloatRow_C;
+ void (*HalfFloatRow)(const uint16_t* src, uint16_t* dst, float scale,
+ int width) = HalfFloatRow_C;
if (!src_y || !dst_y || width <= 0 || height == 0) {
return -1;
}
@@ -3124,17 +3125,17 @@ int HalfFloatPlane(const uint16* src_y,
// Apply a lumacolortable to each ARGB pixel.
LIBYUV_API
-int ARGBLumaColorTable(const uint8* src_argb,
+int ARGBLumaColorTable(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
- const uint8* luma,
+ const uint8_t* luma,
int width,
int height) {
int y;
void (*ARGBLumaColorTableRow)(
- const uint8* src_argb, uint8* dst_argb, int width, const uint8* luma,
- const uint32 lumacoeff) = ARGBLumaColorTableRow_C;
+ const uint8_t* src_argb, uint8_t* dst_argb, int width,
+ const uint8_t* luma, const uint32_t lumacoeff) = ARGBLumaColorTableRow_C;
if (!src_argb || !dst_argb || !luma || width <= 0 || height == 0) {
return -1;
}
@@ -3166,15 +3167,15 @@ int ARGBLumaColorTable(const uint8* src_argb,
// Copy Alpha from one ARGB image to another.
LIBYUV_API
-int ARGBCopyAlpha(const uint8* src_argb,
+int ARGBCopyAlpha(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height) {
int y;
- void (*ARGBCopyAlphaRow)(const uint8* src_argb, uint8* dst_argb, int width) =
- ARGBCopyAlphaRow_C;
+ void (*ARGBCopyAlphaRow)(const uint8_t* src_argb, uint8_t* dst_argb,
+ int width) = ARGBCopyAlphaRow_C;
if (!src_argb || !dst_argb || width <= 0 || height == 0) {
return -1;
}
@@ -3217,10 +3218,10 @@ int ARGBCopyAlpha(const uint8* src_argb,
// Extract just the alpha channel from ARGB.
LIBYUV_API
-int ARGBExtractAlpha(const uint8* src_argb,
- int src_stride,
- uint8* dst_a,
- int dst_stride,
+int ARGBExtractAlpha(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_a,
+ int dst_stride_a,
int width,
int height) {
if (!src_argb || !dst_a || width <= 0 || height == 0) {
@@ -3229,17 +3230,17 @@ int ARGBExtractAlpha(const uint8* src_argb,
// Negative height means invert the image.
if (height < 0) {
height = -height;
- src_argb += (height - 1) * src_stride;
- src_stride = -src_stride;
+ src_argb += (height - 1) * src_stride_argb;
+ src_stride_argb = -src_stride_argb;
}
// Coalesce rows.
- if (src_stride == width * 4 && dst_stride == width) {
+ if (src_stride_argb == width * 4 && dst_stride_a == width) {
width *= height;
height = 1;
- src_stride = dst_stride = 0;
+ src_stride_argb = dst_stride_a = 0;
}
- void (*ARGBExtractAlphaRow)(const uint8* src_argb, uint8* dst_a, int width) =
- ARGBExtractAlphaRow_C;
+ void (*ARGBExtractAlphaRow)(const uint8_t* src_argb, uint8_t* dst_a,
+ int width) = ARGBExtractAlphaRow_C;
#if defined(HAS_ARGBEXTRACTALPHAROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
ARGBExtractAlphaRow = IS_ALIGNED(width, 8) ? ARGBExtractAlphaRow_SSE2
@@ -3267,23 +3268,23 @@ int ARGBExtractAlpha(const uint8* src_argb,
for (int y = 0; y < height; ++y) {
ARGBExtractAlphaRow(src_argb, dst_a, width);
- src_argb += src_stride;
- dst_a += dst_stride;
+ src_argb += src_stride_argb;
+ dst_a += dst_stride_a;
}
return 0;
}
// Copy a planar Y channel to the alpha channel of a destination ARGB image.
LIBYUV_API
-int ARGBCopyYToAlpha(const uint8* src_y,
+int ARGBCopyYToAlpha(const uint8_t* src_y,
int src_stride_y,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height) {
int y;
- void (*ARGBCopyYToAlphaRow)(const uint8* src_y, uint8* dst_argb, int width) =
- ARGBCopyYToAlphaRow_C;
+ void (*ARGBCopyYToAlphaRow)(const uint8_t* src_y, uint8_t* dst_argb,
+ int width) = ARGBCopyYToAlphaRow_C;
if (!src_y || !dst_argb || width <= 0 || height == 0) {
return -1;
}
@@ -3328,19 +3329,19 @@ int ARGBCopyYToAlpha(const uint8* src_y,
// directly. A SplitUVRow_Odd function could copy the remaining chroma.
LIBYUV_API
-int YUY2ToNV12(const uint8* src_yuy2,
+int YUY2ToNV12(const uint8_t* src_yuy2,
int src_stride_yuy2,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_uv,
+ uint8_t* dst_uv,
int dst_stride_uv,
int width,
int height) {
int y;
int halfwidth = (width + 1) >> 1;
- void (*SplitUVRow)(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
+ void (*SplitUVRow)(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v,
int width) = SplitUVRow_C;
- void (*InterpolateRow)(uint8 * dst_ptr, const uint8* src_ptr,
+ void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr,
ptrdiff_t src_stride, int dst_width,
int source_y_fraction) = InterpolateRow_C;
if (!src_yuy2 || !dst_y || !dst_uv || width <= 0 || height == 0) {
@@ -3444,19 +3445,19 @@ int YUY2ToNV12(const uint8* src_yuy2,
}
LIBYUV_API
-int UYVYToNV12(const uint8* src_uyvy,
+int UYVYToNV12(const uint8_t* src_uyvy,
int src_stride_uyvy,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_uv,
+ uint8_t* dst_uv,
int dst_stride_uv,
int width,
int height) {
int y;
int halfwidth = (width + 1) >> 1;
- void (*SplitUVRow)(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
+ void (*SplitUVRow)(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v,
int width) = SplitUVRow_C;
- void (*InterpolateRow)(uint8 * dst_ptr, const uint8* src_ptr,
+ void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr,
ptrdiff_t src_stride, int dst_width,
int source_y_fraction) = InterpolateRow_C;
if (!src_uyvy || !dst_y || !dst_uv || width <= 0 || height == 0) {
diff --git a/chromium/third_party/libyuv/source/rotate.cc b/chromium/third_party/libyuv/source/rotate.cc
index 1f74cd0714a..f2bed85b755 100644
--- a/chromium/third_party/libyuv/source/rotate.cc
+++ b/chromium/third_party/libyuv/source/rotate.cc
@@ -22,18 +22,18 @@ extern "C" {
#endif
LIBYUV_API
-void TransposePlane(const uint8* src,
+void TransposePlane(const uint8_t* src,
int src_stride,
- uint8* dst,
+ uint8_t* dst,
int dst_stride,
int width,
int height) {
int i = height;
#if defined(HAS_TRANSPOSEWX16_MSA)
- void (*TransposeWx16)(const uint8* src, int src_stride, uint8* dst,
+ void (*TransposeWx16)(const uint8_t* src, int src_stride, uint8_t* dst,
int dst_stride, int width) = TransposeWx16_C;
#else
- void (*TransposeWx8)(const uint8* src, int src_stride, uint8* dst,
+ void (*TransposeWx8)(const uint8_t* src, int src_stride, uint8_t* dst,
int dst_stride, int width) = TransposeWx8_C;
#endif
#if defined(HAS_TRANSPOSEWX8_NEON)
@@ -90,9 +90,9 @@ void TransposePlane(const uint8* src,
}
LIBYUV_API
-void RotatePlane90(const uint8* src,
+void RotatePlane90(const uint8_t* src,
int src_stride,
- uint8* dst,
+ uint8_t* dst,
int dst_stride,
int width,
int height) {
@@ -105,9 +105,9 @@ void RotatePlane90(const uint8* src,
}
LIBYUV_API
-void RotatePlane270(const uint8* src,
+void RotatePlane270(const uint8_t* src,
int src_stride,
- uint8* dst,
+ uint8_t* dst,
int dst_stride,
int width,
int height) {
@@ -120,20 +120,20 @@ void RotatePlane270(const uint8* src,
}
LIBYUV_API
-void RotatePlane180(const uint8* src,
+void RotatePlane180(const uint8_t* src,
int src_stride,
- uint8* dst,
+ uint8_t* dst,
int dst_stride,
int width,
int height) {
// Swap first and last row and mirror the content. Uses a temporary row.
align_buffer_64(row, width);
- const uint8* src_bot = src + src_stride * (height - 1);
- uint8* dst_bot = dst + dst_stride * (height - 1);
+ const uint8_t* src_bot = src + src_stride * (height - 1);
+ uint8_t* dst_bot = dst + dst_stride * (height - 1);
int half_height = (height + 1) >> 1;
int y;
- void (*MirrorRow)(const uint8* src, uint8* dst, int width) = MirrorRow_C;
- void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C;
+ void (*MirrorRow)(const uint8_t* src, uint8_t* dst, int width) = MirrorRow_C;
+ void (*CopyRow)(const uint8_t* src, uint8_t* dst, int width) = CopyRow_C;
#if defined(HAS_MIRRORROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
MirrorRow = MirrorRow_Any_NEON;
@@ -201,22 +201,22 @@ void RotatePlane180(const uint8* src,
}
LIBYUV_API
-void TransposeUV(const uint8* src,
+void TransposeUV(const uint8_t* src,
int src_stride,
- uint8* dst_a,
+ uint8_t* dst_a,
int dst_stride_a,
- uint8* dst_b,
+ uint8_t* dst_b,
int dst_stride_b,
int width,
int height) {
int i = height;
#if defined(HAS_TRANSPOSEUVWX16_MSA)
- void (*TransposeUVWx16)(const uint8* src, int src_stride, uint8* dst_a,
- int dst_stride_a, uint8* dst_b, int dst_stride_b,
+ void (*TransposeUVWx16)(const uint8_t* src, int src_stride, uint8_t* dst_a,
+ int dst_stride_a, uint8_t* dst_b, int dst_stride_b,
int width) = TransposeUVWx16_C;
#else
- void (*TransposeUVWx8)(const uint8* src, int src_stride, uint8* dst_a,
- int dst_stride_a, uint8* dst_b, int dst_stride_b,
+ void (*TransposeUVWx8)(const uint8_t* src, int src_stride, uint8_t* dst_a,
+ int dst_stride_a, uint8_t* dst_b, int dst_stride_b,
int width) = TransposeUVWx8_C;
#endif
#if defined(HAS_TRANSPOSEUVWX8_NEON)
@@ -270,11 +270,11 @@ void TransposeUV(const uint8* src,
}
LIBYUV_API
-void RotateUV90(const uint8* src,
+void RotateUV90(const uint8_t* src,
int src_stride,
- uint8* dst_a,
+ uint8_t* dst_a,
int dst_stride_a,
- uint8* dst_b,
+ uint8_t* dst_b,
int dst_stride_b,
int width,
int height) {
@@ -286,11 +286,11 @@ void RotateUV90(const uint8* src,
}
LIBYUV_API
-void RotateUV270(const uint8* src,
+void RotateUV270(const uint8_t* src,
int src_stride,
- uint8* dst_a,
+ uint8_t* dst_a,
int dst_stride_a,
- uint8* dst_b,
+ uint8_t* dst_b,
int dst_stride_b,
int width,
int height) {
@@ -305,17 +305,17 @@ void RotateUV270(const uint8* src,
// Rotate 180 is a horizontal and vertical flip.
LIBYUV_API
-void RotateUV180(const uint8* src,
+void RotateUV180(const uint8_t* src,
int src_stride,
- uint8* dst_a,
+ uint8_t* dst_a,
int dst_stride_a,
- uint8* dst_b,
+ uint8_t* dst_b,
int dst_stride_b,
int width,
int height) {
int i;
- void (*MirrorUVRow)(const uint8* src, uint8* dst_u, uint8* dst_v, int width) =
- MirrorUVRow_C;
+ void (*MirrorUVRow)(const uint8_t* src, uint8_t* dst_u, uint8_t* dst_v,
+ int width) = MirrorUVRow_C;
#if defined(HAS_MIRRORUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
MirrorUVRow = MirrorUVRow_NEON;
@@ -344,9 +344,9 @@ void RotateUV180(const uint8* src,
}
LIBYUV_API
-int RotatePlane(const uint8* src,
+int RotatePlane(const uint8_t* src,
int src_stride,
- uint8* dst,
+ uint8_t* dst,
int dst_stride,
int width,
int height,
@@ -383,17 +383,17 @@ int RotatePlane(const uint8* src,
}
LIBYUV_API
-int I420Rotate(const uint8* src_y,
+int I420Rotate(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int width,
int height,
@@ -451,15 +451,15 @@ int I420Rotate(const uint8* src_y,
}
LIBYUV_API
-int NV12ToI420Rotate(const uint8* src_y,
+int NV12ToI420Rotate(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_uv,
+ const uint8_t* src_uv,
int src_stride_uv,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int width,
int height,
diff --git a/chromium/third_party/libyuv/source/rotate_any.cc b/chromium/third_party/libyuv/source/rotate_any.cc
index eb4f7418660..c2752e6222c 100644
--- a/chromium/third_party/libyuv/source/rotate_any.cc
+++ b/chromium/third_party/libyuv/source/rotate_any.cc
@@ -19,8 +19,8 @@ extern "C" {
#endif
#define TANY(NAMEANY, TPOS_SIMD, MASK) \
- void NAMEANY(const uint8* src, int src_stride, uint8* dst, int dst_stride, \
- int width) { \
+ void NAMEANY(const uint8_t* src, int src_stride, uint8_t* dst, \
+ int dst_stride, int width) { \
int r = width & MASK; \
int n = width - r; \
if (n > 0) { \
@@ -44,8 +44,9 @@ TANY(TransposeWx16_Any_MSA, TransposeWx16_MSA, 15)
#undef TANY
#define TUVANY(NAMEANY, TPOS_SIMD, MASK) \
- void NAMEANY(const uint8* src, int src_stride, uint8* dst_a, \
- int dst_stride_a, uint8* dst_b, int dst_stride_b, int width) { \
+ void NAMEANY(const uint8_t* src, int src_stride, uint8_t* dst_a, \
+ int dst_stride_a, uint8_t* dst_b, int dst_stride_b, \
+ int width) { \
int r = width & MASK; \
int n = width - r; \
if (n > 0) { \
diff --git a/chromium/third_party/libyuv/source/rotate_argb.cc b/chromium/third_party/libyuv/source/rotate_argb.cc
index f6a2bf69f94..f13b041f88b 100644
--- a/chromium/third_party/libyuv/source/rotate_argb.cc
+++ b/chromium/third_party/libyuv/source/rotate_argb.cc
@@ -22,43 +22,41 @@ extern "C" {
// ARGBScale has a function to copy pixels to a row, striding each source
// pixel by a constant.
-#if !defined(LIBYUV_DISABLE_X86) && \
- (defined(_M_IX86) || \
- (defined(__x86_64__) && !defined(__native_client__)) || \
- defined(__i386__))
+#if !defined(LIBYUV_DISABLE_X86) && \
+ (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
#define HAS_SCALEARGBROWDOWNEVEN_SSE2
-void ScaleARGBRowDownEven_SSE2(const uint8* src_ptr,
+void ScaleARGBRowDownEven_SSE2(const uint8_t* src_ptr,
int src_stride,
int src_stepx,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width);
#endif
-#if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \
+#if !defined(LIBYUV_DISABLE_NEON) && \
(defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__))
#define HAS_SCALEARGBROWDOWNEVEN_NEON
-void ScaleARGBRowDownEven_NEON(const uint8* src_ptr,
+void ScaleARGBRowDownEven_NEON(const uint8_t* src_ptr,
int src_stride,
int src_stepx,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width);
#endif
-void ScaleARGBRowDownEven_C(const uint8* src_ptr,
+void ScaleARGBRowDownEven_C(const uint8_t* src_ptr,
int,
int src_stepx,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width);
-static void ARGBTranspose(const uint8* src,
+static void ARGBTranspose(const uint8_t* src,
int src_stride,
- uint8* dst,
+ uint8_t* dst,
int dst_stride,
int width,
int height) {
int i;
int src_pixel_step = src_stride >> 2;
- void (*ScaleARGBRowDownEven)(const uint8* src_ptr, int src_stride,
- int src_step, uint8* dst_ptr, int dst_width) =
+ void (*ScaleARGBRowDownEven)(const uint8_t* src_ptr, int src_stride,
+ int src_step, uint8_t* dst_ptr, int dst_width) =
ScaleARGBRowDownEven_C;
#if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(height, 4)) { // Width of dest.
@@ -78,9 +76,9 @@ static void ARGBTranspose(const uint8* src,
}
}
-void ARGBRotate90(const uint8* src,
+void ARGBRotate90(const uint8_t* src,
int src_stride,
- uint8* dst,
+ uint8_t* dst,
int dst_stride,
int width,
int height) {
@@ -92,9 +90,9 @@ void ARGBRotate90(const uint8* src,
ARGBTranspose(src, src_stride, dst, dst_stride, width, height);
}
-void ARGBRotate270(const uint8* src,
+void ARGBRotate270(const uint8_t* src,
int src_stride,
- uint8* dst,
+ uint8_t* dst,
int dst_stride,
int width,
int height) {
@@ -106,21 +104,21 @@ void ARGBRotate270(const uint8* src,
ARGBTranspose(src, src_stride, dst, dst_stride, width, height);
}
-void ARGBRotate180(const uint8* src,
+void ARGBRotate180(const uint8_t* src,
int src_stride,
- uint8* dst,
+ uint8_t* dst,
int dst_stride,
int width,
int height) {
// Swap first and last row and mirror the content. Uses a temporary row.
align_buffer_64(row, width * 4);
- const uint8* src_bot = src + src_stride * (height - 1);
- uint8* dst_bot = dst + dst_stride * (height - 1);
+ const uint8_t* src_bot = src + src_stride * (height - 1);
+ uint8_t* dst_bot = dst + dst_stride * (height - 1);
int half_height = (height + 1) >> 1;
int y;
- void (*ARGBMirrorRow)(const uint8* src, uint8* dst, int width) =
+ void (*ARGBMirrorRow)(const uint8_t* src, uint8_t* dst, int width) =
ARGBMirrorRow_C;
- void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C;
+ void (*CopyRow)(const uint8_t* src, uint8_t* dst, int width) = CopyRow_C;
#if defined(HAS_ARGBMIRRORROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBMirrorRow = ARGBMirrorRow_Any_NEON;
@@ -188,9 +186,9 @@ void ARGBRotate180(const uint8* src,
}
LIBYUV_API
-int ARGBRotate(const uint8* src_argb,
+int ARGBRotate(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height,
diff --git a/chromium/third_party/libyuv/source/rotate_common.cc b/chromium/third_party/libyuv/source/rotate_common.cc
index 89357e732d2..ff212adebc4 100644
--- a/chromium/third_party/libyuv/source/rotate_common.cc
+++ b/chromium/third_party/libyuv/source/rotate_common.cc
@@ -16,9 +16,9 @@ namespace libyuv {
extern "C" {
#endif
-void TransposeWx8_C(const uint8* src,
+void TransposeWx8_C(const uint8_t* src,
int src_stride,
- uint8* dst,
+ uint8_t* dst,
int dst_stride,
int width) {
int i;
@@ -36,11 +36,11 @@ void TransposeWx8_C(const uint8* src,
}
}
-void TransposeUVWx8_C(const uint8* src,
+void TransposeUVWx8_C(const uint8_t* src,
int src_stride,
- uint8* dst_a,
+ uint8_t* dst_a,
int dst_stride_a,
- uint8* dst_b,
+ uint8_t* dst_b,
int dst_stride_b,
int width) {
int i;
@@ -67,9 +67,9 @@ void TransposeUVWx8_C(const uint8* src,
}
}
-void TransposeWxH_C(const uint8* src,
+void TransposeWxH_C(const uint8_t* src,
int src_stride,
- uint8* dst,
+ uint8_t* dst,
int dst_stride,
int width,
int height) {
@@ -82,11 +82,11 @@ void TransposeWxH_C(const uint8* src,
}
}
-void TransposeUVWxH_C(const uint8* src,
+void TransposeUVWxH_C(const uint8_t* src,
int src_stride,
- uint8* dst_a,
+ uint8_t* dst_a,
int dst_stride_a,
- uint8* dst_b,
+ uint8_t* dst_b,
int dst_stride_b,
int width,
int height) {
diff --git a/chromium/third_party/libyuv/source/rotate_gcc.cc b/chromium/third_party/libyuv/source/rotate_gcc.cc
index 74b48ac4084..04e19e29eef 100644
--- a/chromium/third_party/libyuv/source/rotate_gcc.cc
+++ b/chromium/third_party/libyuv/source/rotate_gcc.cc
@@ -22,9 +22,9 @@ extern "C" {
// Transpose 8x8. 32 or 64 bit, but not NaCL for 64 bit.
#if defined(HAS_TRANSPOSEWX8_SSSE3)
-void TransposeWx8_SSSE3(const uint8* src,
+void TransposeWx8_SSSE3(const uint8_t* src,
int src_stride,
- uint8* dst,
+ uint8_t* dst,
int dst_stride,
int width) {
asm volatile(
@@ -112,9 +112,9 @@ void TransposeWx8_SSSE3(const uint8* src,
// Transpose 16x8. 64 bit
#if defined(HAS_TRANSPOSEWX8_FAST_SSSE3)
-void TransposeWx8_Fast_SSSE3(const uint8* src,
+void TransposeWx8_Fast_SSSE3(const uint8_t* src,
int src_stride,
- uint8* dst,
+ uint8_t* dst,
int dst_stride,
int width) {
asm volatile(
@@ -255,11 +255,11 @@ void TransposeWx8_Fast_SSSE3(const uint8* src,
// Transpose UV 8x8. 64 bit.
#if defined(HAS_TRANSPOSEUVWX8_SSE2)
-void TransposeUVWx8_SSE2(const uint8* src,
+void TransposeUVWx8_SSE2(const uint8_t* src,
int src_stride,
- uint8* dst_a,
+ uint8_t* dst_a,
int dst_stride_a,
- uint8* dst_b,
+ uint8_t* dst_b,
int dst_stride_b,
int width) {
asm volatile(
diff --git a/chromium/third_party/libyuv/source/rotate_msa.cc b/chromium/third_party/libyuv/source/rotate_msa.cc
index 8907765aba7..99bdca65b32 100644
--- a/chromium/third_party/libyuv/source/rotate_msa.cc
+++ b/chromium/third_party/libyuv/source/rotate_msa.cc
@@ -51,9 +51,9 @@ extern "C" {
out3 = (v16u8)__msa_ilvl_d((v2i64)in3, (v2i64)in2); \
}
-void TransposeWx16_C(const uint8* src,
+void TransposeWx16_C(const uint8_t* src,
int src_stride,
- uint8* dst,
+ uint8_t* dst,
int dst_stride,
int width) {
TransposeWx8_C(src, src_stride, dst, dst_stride, width);
@@ -61,11 +61,11 @@ void TransposeWx16_C(const uint8* src,
width);
}
-void TransposeUVWx16_C(const uint8* src,
+void TransposeUVWx16_C(const uint8_t* src,
int src_stride,
- uint8* dst_a,
+ uint8_t* dst_a,
int dst_stride_a,
- uint8* dst_b,
+ uint8_t* dst_b,
int dst_stride_b,
int width) {
TransposeUVWx8_C(src, src_stride, dst_a, dst_stride_a, dst_b, dst_stride_b,
@@ -74,13 +74,13 @@ void TransposeUVWx16_C(const uint8* src,
dst_stride_a, (dst_b + 8), dst_stride_b, width);
}
-void TransposeWx16_MSA(const uint8* src,
+void TransposeWx16_MSA(const uint8_t* src,
int src_stride,
- uint8* dst,
+ uint8_t* dst,
int dst_stride,
int width) {
int x;
- const uint8* s;
+ const uint8_t* s;
v16u8 src0, src1, src2, src3, dst0, dst1, dst2, dst3, vec0, vec1, vec2, vec3;
v16u8 reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7;
v16u8 res0, res1, res2, res3, res4, res5, res6, res7, res8, res9;
@@ -153,15 +153,15 @@ void TransposeWx16_MSA(const uint8* src,
}
}
-void TransposeUVWx16_MSA(const uint8* src,
+void TransposeUVWx16_MSA(const uint8_t* src,
int src_stride,
- uint8* dst_a,
+ uint8_t* dst_a,
int dst_stride_a,
- uint8* dst_b,
+ uint8_t* dst_b,
int dst_stride_b,
int width) {
int x;
- const uint8* s;
+ const uint8_t* s;
v16u8 src0, src1, src2, src3, dst0, dst1, dst2, dst3, vec0, vec1, vec2, vec3;
v16u8 reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7;
v16u8 res0, res1, res2, res3, res4, res5, res6, res7, res8, res9;
diff --git a/chromium/third_party/libyuv/source/rotate_neon.cc b/chromium/third_party/libyuv/source/rotate_neon.cc
index 47ff9b29ef5..fdc0dd476c6 100644
--- a/chromium/third_party/libyuv/source/rotate_neon.cc
+++ b/chromium/third_party/libyuv/source/rotate_neon.cc
@@ -24,12 +24,12 @@ extern "C" {
static const uvec8 kVTbl4x4Transpose = {0, 4, 8, 12, 1, 5, 9, 13,
2, 6, 10, 14, 3, 7, 11, 15};
-void TransposeWx8_NEON(const uint8* src,
+void TransposeWx8_NEON(const uint8_t* src,
int src_stride,
- uint8* dst,
+ uint8_t* dst,
int dst_stride,
int width) {
- const uint8* src_temp;
+ const uint8_t* src_temp;
asm volatile(
// loops are on blocks of 8. loop will stop when
// counter gets to or below 0. starting the counter
@@ -192,14 +192,14 @@ void TransposeWx8_NEON(const uint8* src,
static const uvec8 kVTbl4x4TransposeDi = {0, 8, 1, 9, 2, 10, 3, 11,
4, 12, 5, 13, 6, 14, 7, 15};
-void TransposeUVWx8_NEON(const uint8* src,
+void TransposeUVWx8_NEON(const uint8_t* src,
int src_stride,
- uint8* dst_a,
+ uint8_t* dst_a,
int dst_stride_a,
- uint8* dst_b,
+ uint8_t* dst_b,
int dst_stride_b,
int width) {
- const uint8* src_temp;
+ const uint8_t* src_temp;
asm volatile(
// loops are on blocks of 8. loop will stop when
// counter gets to or below 0. starting the counter
diff --git a/chromium/third_party/libyuv/source/rotate_neon64.cc b/chromium/third_party/libyuv/source/rotate_neon64.cc
index 93c30546bd2..f469baacf68 100644
--- a/chromium/third_party/libyuv/source/rotate_neon64.cc
+++ b/chromium/third_party/libyuv/source/rotate_neon64.cc
@@ -24,12 +24,12 @@ extern "C" {
static const uvec8 kVTbl4x4Transpose = {0, 4, 8, 12, 1, 5, 9, 13,
2, 6, 10, 14, 3, 7, 11, 15};
-void TransposeWx8_NEON(const uint8* src,
+void TransposeWx8_NEON(const uint8_t* src,
int src_stride,
- uint8* dst,
+ uint8_t* dst,
int dst_stride,
int width) {
- const uint8* src_temp;
+ const uint8_t* src_temp;
asm volatile(
// loops are on blocks of 8. loop will stop when
// counter gets to or below 0. starting the counter
@@ -196,18 +196,18 @@ void TransposeWx8_NEON(const uint8* src,
"v17", "v18", "v19", "v20", "v21", "v22", "v23");
}
-static const uint8 kVTbl4x4TransposeDi[32] = {
+static const uint8_t kVTbl4x4TransposeDi[32] = {
0, 16, 32, 48, 2, 18, 34, 50, 4, 20, 36, 52, 6, 22, 38, 54,
1, 17, 33, 49, 3, 19, 35, 51, 5, 21, 37, 53, 7, 23, 39, 55};
-void TransposeUVWx8_NEON(const uint8* src,
+void TransposeUVWx8_NEON(const uint8_t* src,
int src_stride,
- uint8* dst_a,
+ uint8_t* dst_a,
int dst_stride_a,
- uint8* dst_b,
+ uint8_t* dst_b,
int dst_stride_b,
int width) {
- const uint8* src_temp;
+ const uint8_t* src_temp;
asm volatile(
// loops are on blocks of 8. loop will stop when
// counter gets to or below 0. starting the counter
diff --git a/chromium/third_party/libyuv/source/rotate_win.cc b/chromium/third_party/libyuv/source/rotate_win.cc
index fb052f65212..e887dd525c7 100644
--- a/chromium/third_party/libyuv/source/rotate_win.cc
+++ b/chromium/third_party/libyuv/source/rotate_win.cc
@@ -19,9 +19,9 @@ extern "C" {
// This module is for 32 bit Visual C x86 and clangcl
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
-__declspec(naked) void TransposeWx8_SSSE3(const uint8* src,
+__declspec(naked) void TransposeWx8_SSSE3(const uint8_t* src,
int src_stride,
- uint8* dst,
+ uint8_t* dst,
int dst_stride,
int width) {
__asm {
@@ -112,11 +112,11 @@ __declspec(naked) void TransposeWx8_SSSE3(const uint8* src,
}
}
-__declspec(naked) void TransposeUVWx8_SSE2(const uint8* src,
+__declspec(naked) void TransposeUVWx8_SSE2(const uint8_t* src,
int src_stride,
- uint8* dst_a,
+ uint8_t* dst_a,
int dst_stride_a,
- uint8* dst_b,
+ uint8_t* dst_b,
int dst_stride_b,
int w) {
__asm {
diff --git a/chromium/third_party/libyuv/source/row_any.cc b/chromium/third_party/libyuv/source/row_any.cc
index 7e557d42109..9343992b1e9 100644
--- a/chromium/third_party/libyuv/source/row_any.cc
+++ b/chromium/third_party/libyuv/source/row_any.cc
@@ -31,25 +31,25 @@ extern "C" {
#define SS(width, shift) (((width) + (1 << (shift)) - 1) >> (shift))
// Any 4 planes to 1 with yuvconstants
-#define ANY41C(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \
- void NAMEANY(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, \
- const uint8* a_buf, uint8* dst_ptr, \
- const struct YuvConstants* yuvconstants, int width) { \
- SIMD_ALIGNED(uint8 temp[64 * 5]); \
- memset(temp, 0, 64 * 4); /* for msan */ \
- int r = width & MASK; \
- int n = width & ~MASK; \
- if (n > 0) { \
- ANY_SIMD(y_buf, u_buf, v_buf, a_buf, dst_ptr, yuvconstants, n); \
- } \
- memcpy(temp, y_buf + n, r); \
- memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
- memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
- memcpy(temp + 192, a_buf + n, r); \
- ANY_SIMD(temp, temp + 64, temp + 128, temp + 192, temp + 256, \
- yuvconstants, MASK + 1); \
- memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 256, \
- SS(r, DUVSHIFT) * BPP); \
+#define ANY41C(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \
+ void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf, \
+ const uint8_t* v_buf, const uint8_t* a_buf, uint8_t* dst_ptr, \
+ const struct YuvConstants* yuvconstants, int width) { \
+ SIMD_ALIGNED(uint8_t temp[64 * 5]); \
+ memset(temp, 0, 64 * 4); /* for msan */ \
+ int r = width & MASK; \
+ int n = width & ~MASK; \
+ if (n > 0) { \
+ ANY_SIMD(y_buf, u_buf, v_buf, a_buf, dst_ptr, yuvconstants, n); \
+ } \
+ memcpy(temp, y_buf + n, r); \
+ memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
+ memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
+ memcpy(temp + 192, a_buf + n, r); \
+ ANY_SIMD(temp, temp + 64, temp + 128, temp + 192, temp + 256, \
+ yuvconstants, MASK + 1); \
+ memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 256, \
+ SS(r, DUVSHIFT) * BPP); \
}
#ifdef HAS_I422ALPHATOARGBROW_SSSE3
@@ -67,22 +67,22 @@ ANY41C(I422AlphaToARGBRow_Any_MSA, I422AlphaToARGBRow_MSA, 1, 0, 4, 7)
#undef ANY41C
// Any 3 planes to 1.
-#define ANY31(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \
- void NAMEANY(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, \
- uint8* dst_ptr, int width) { \
- SIMD_ALIGNED(uint8 temp[64 * 4]); \
- memset(temp, 0, 64 * 3); /* for YUY2 and msan */ \
- int r = width & MASK; \
- int n = width & ~MASK; \
- if (n > 0) { \
- ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, n); \
- } \
- memcpy(temp, y_buf + n, r); \
- memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
- memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
- ANY_SIMD(temp, temp + 64, temp + 128, temp + 192, MASK + 1); \
- memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 192, \
- SS(r, DUVSHIFT) * BPP); \
+#define ANY31(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \
+ void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf, \
+ const uint8_t* v_buf, uint8_t* dst_ptr, int width) { \
+ SIMD_ALIGNED(uint8_t temp[64 * 4]); \
+ memset(temp, 0, 64 * 3); /* for YUY2 and msan */ \
+ int r = width & MASK; \
+ int n = width & ~MASK; \
+ if (n > 0) { \
+ ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, n); \
+ } \
+ memcpy(temp, y_buf + n, r); \
+ memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
+ memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
+ ANY_SIMD(temp, temp + 64, temp + 128, temp + 192, MASK + 1); \
+ memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 192, \
+ SS(r, DUVSHIFT) * BPP); \
}
// Merge functions.
@@ -96,6 +96,10 @@ ANY31(MergeRGBRow_Any_NEON, MergeRGBRow_NEON, 0, 0, 3, 15)
ANY31(I422ToYUY2Row_Any_SSE2, I422ToYUY2Row_SSE2, 1, 1, 4, 15)
ANY31(I422ToUYVYRow_Any_SSE2, I422ToUYVYRow_SSE2, 1, 1, 4, 15)
#endif
+#ifdef HAS_I422TOYUY2ROW_AVX2
+ANY31(I422ToYUY2Row_Any_AVX2, I422ToYUY2Row_AVX2, 1, 1, 4, 31)
+ANY31(I422ToUYVYRow_Any_AVX2, I422ToUYVYRow_AVX2, 1, 1, 4, 31)
+#endif
#ifdef HAS_I422TOYUY2ROW_NEON
ANY31(I422ToYUY2Row_Any_NEON, I422ToYUY2Row_NEON, 1, 1, 4, 15)
#endif
@@ -120,10 +124,10 @@ ANY31(BlendPlaneRow_Any_SSSE3, BlendPlaneRow_SSSE3, 0, 0, 1, 7)
// on arm that subsamples 444 to 422 internally.
// Any 3 planes to 1 with yuvconstants
#define ANY31C(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \
- void NAMEANY(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, \
- uint8* dst_ptr, const struct YuvConstants* yuvconstants, \
- int width) { \
- SIMD_ALIGNED(uint8 temp[64 * 4]); \
+ void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf, \
+ const uint8_t* v_buf, uint8_t* dst_ptr, \
+ const struct YuvConstants* yuvconstants, int width) { \
+ SIMD_ALIGNED(uint8_t temp[64 * 4]); \
memset(temp, 0, 64 * 3); /* for YUY2 and msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
@@ -145,6 +149,12 @@ ANY31(BlendPlaneRow_Any_SSSE3, BlendPlaneRow_SSSE3, 0, 0, 1, 7)
#ifdef HAS_I422TOARGBROW_SSSE3
ANY31C(I422ToARGBRow_Any_SSSE3, I422ToARGBRow_SSSE3, 1, 0, 4, 7)
#endif
+#ifdef HAS_I422TOAR30ROW_SSSE3
+ANY31C(I422ToAR30Row_Any_SSSE3, I422ToAR30Row_SSSE3, 1, 0, 4, 7)
+#endif
+#ifdef HAS_I422TOAR30ROW_AVX2
+ANY31C(I422ToAR30Row_Any_AVX2, I422ToAR30Row_AVX2, 1, 0, 4, 15)
+#endif
#ifdef HAS_I444TOARGBROW_SSSE3
ANY31C(I444ToARGBRow_Any_SSSE3, I444ToARGBRow_SSSE3, 0, 0, 4, 7)
ANY31C(I422ToRGBARow_Any_SSSE3, I422ToRGBARow_SSSE3, 1, 0, 4, 7)
@@ -194,48 +204,57 @@ ANY31C(I422ToRGB565Row_Any_MSA, I422ToRGB565Row_MSA, 1, 0, 2, 7)
#endif
#undef ANY31C
-// 64 byte per row for future AVX2
// Any 3 planes of 16 bit to 1 with yuvconstants
-// TODO(fbarchard): consider
-#define ANY31CT(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, T, SBPP, BPP, MASK) \
- void NAMEANY(const T* y_buf, const T* u_buf, const T* v_buf, uint8* dst_ptr, \
- const struct YuvConstants* yuvconstants, int width) { \
- SIMD_ALIGNED(T temp[16 * 3]); \
- SIMD_ALIGNED(uint8 out[64]); \
- memset(temp, 0, 16 * 3 * SBPP); /* for YUY2 and msan */ \
- int r = width & MASK; \
- int n = width & ~MASK; \
- if (n > 0) { \
- ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, yuvconstants, n); \
- } \
- memcpy(temp, y_buf + n, r * SBPP); \
- memcpy(temp + 16, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP); \
- memcpy(temp + 32, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP); \
- ANY_SIMD(temp, temp + 16, temp + 32, out, yuvconstants, MASK + 1); \
- memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, out, SS(r, DUVSHIFT) * BPP); \
+// TODO(fbarchard): consider sharing this code with ANY31C
+#define ANY31CT(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, T, SBPP, BPP, MASK) \
+ void NAMEANY(const T* y_buf, const T* u_buf, const T* v_buf, \
+ uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, \
+ int width) { \
+ SIMD_ALIGNED(T temp[16 * 3]); \
+ SIMD_ALIGNED(uint8_t out[64]); \
+ memset(temp, 0, 16 * 3 * SBPP); /* for YUY2 and msan */ \
+ int r = width & MASK; \
+ int n = width & ~MASK; \
+ if (n > 0) { \
+ ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, yuvconstants, n); \
+ } \
+ memcpy(temp, y_buf + n, r * SBPP); \
+ memcpy(temp + 16, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP); \
+ memcpy(temp + 32, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP); \
+ ANY_SIMD(temp, temp + 16, temp + 32, out, yuvconstants, MASK + 1); \
+ memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, out, SS(r, DUVSHIFT) * BPP); \
}
+#ifdef HAS_I210TOAR30ROW_SSSE3
+ANY31CT(I210ToAR30Row_Any_SSSE3, I210ToAR30Row_SSSE3, 1, 0, uint16_t, 2, 4, 7)
+#endif
#ifdef HAS_I210TOARGBROW_SSSE3
-ANY31CT(I210ToARGBRow_Any_SSSE3, I210ToARGBRow_SSSE3, 1, 0, uint16, 2, 4, 7)
+ANY31CT(I210ToARGBRow_Any_SSSE3, I210ToARGBRow_SSSE3, 1, 0, uint16_t, 2, 4, 7)
+#endif
+#ifdef HAS_I210TOARGBROW_AVX2
+ANY31CT(I210ToARGBRow_Any_AVX2, I210ToARGBRow_AVX2, 1, 0, uint16_t, 2, 4, 15)
+#endif
+#ifdef HAS_I210TOAR30ROW_AVX2
+ANY31CT(I210ToAR30Row_Any_AVX2, I210ToAR30Row_AVX2, 1, 0, uint16_t, 2, 4, 15)
#endif
#undef ANY31CT
// Any 2 planes to 1.
-#define ANY21(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK) \
- void NAMEANY(const uint8* y_buf, const uint8* uv_buf, uint8* dst_ptr, \
- int width) { \
- SIMD_ALIGNED(uint8 temp[64 * 3]); \
- memset(temp, 0, 64 * 2); /* for msan */ \
- int r = width & MASK; \
- int n = width & ~MASK; \
- if (n > 0) { \
- ANY_SIMD(y_buf, uv_buf, dst_ptr, n); \
- } \
- memcpy(temp, y_buf + n * SBPP, r * SBPP); \
- memcpy(temp + 64, uv_buf + (n >> UVSHIFT) * SBPP2, \
- SS(r, UVSHIFT) * SBPP2); \
- ANY_SIMD(temp, temp + 64, temp + 128, MASK + 1); \
- memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \
+#define ANY21(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK) \
+ void NAMEANY(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, \
+ int width) { \
+ SIMD_ALIGNED(uint8_t temp[64 * 3]); \
+ memset(temp, 0, 64 * 2); /* for msan */ \
+ int r = width & MASK; \
+ int n = width & ~MASK; \
+ if (n > 0) { \
+ ANY_SIMD(y_buf, uv_buf, dst_ptr, n); \
+ } \
+ memcpy(temp, y_buf + n * SBPP, r * SBPP); \
+ memcpy(temp + 64, uv_buf + (n >> UVSHIFT) * SBPP2, \
+ SS(r, UVSHIFT) * SBPP2); \
+ ANY_SIMD(temp, temp + 64, temp + 128, MASK + 1); \
+ memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \
}
// Merge functions.
@@ -319,21 +338,21 @@ ANY21(SobelXYRow_Any_MSA, SobelXYRow_MSA, 0, 1, 1, 4, 15)
#undef ANY21
// Any 2 planes to 1 with yuvconstants
-#define ANY21C(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK) \
- void NAMEANY(const uint8* y_buf, const uint8* uv_buf, uint8* dst_ptr, \
- const struct YuvConstants* yuvconstants, int width) { \
- SIMD_ALIGNED(uint8 temp[64 * 3]); \
- memset(temp, 0, 64 * 2); /* for msan */ \
- int r = width & MASK; \
- int n = width & ~MASK; \
- if (n > 0) { \
- ANY_SIMD(y_buf, uv_buf, dst_ptr, yuvconstants, n); \
- } \
- memcpy(temp, y_buf + n * SBPP, r * SBPP); \
- memcpy(temp + 64, uv_buf + (n >> UVSHIFT) * SBPP2, \
- SS(r, UVSHIFT) * SBPP2); \
- ANY_SIMD(temp, temp + 64, temp + 128, yuvconstants, MASK + 1); \
- memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \
+#define ANY21C(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK) \
+ void NAMEANY(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, \
+ const struct YuvConstants* yuvconstants, int width) { \
+ SIMD_ALIGNED(uint8_t temp[64 * 3]); \
+ memset(temp, 0, 64 * 2); /* for msan */ \
+ int r = width & MASK; \
+ int n = width & ~MASK; \
+ if (n > 0) { \
+ ANY_SIMD(y_buf, uv_buf, dst_ptr, yuvconstants, n); \
+ } \
+ memcpy(temp, y_buf + n * SBPP, r * SBPP); \
+ memcpy(temp + 64, uv_buf + (n >> UVSHIFT) * SBPP2, \
+ SS(r, UVSHIFT) * SBPP2); \
+ ANY_SIMD(temp, temp + 64, temp + 128, yuvconstants, MASK + 1); \
+ memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \
}
// Biplanar to RGB.
@@ -377,8 +396,8 @@ ANY21C(NV12ToRGB565Row_Any_MSA, NV12ToRGB565Row_MSA, 1, 1, 2, 2, 7)
// Any 1 to 1.
#define ANY11(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \
- void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, int width) { \
- SIMD_ALIGNED(uint8 temp[128 * 2]); \
+ void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) { \
+ SIMD_ALIGNED(uint8_t temp[128 * 2]); \
memset(temp, 0, 128); /* for YUY2 and msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
@@ -413,9 +432,15 @@ ANY11(ARGBToRGB565Row_Any_AVX2, ARGBToRGB565Row_AVX2, 0, 4, 2, 7)
ANY11(ARGBToARGB1555Row_Any_AVX2, ARGBToARGB1555Row_AVX2, 0, 4, 2, 7)
ANY11(ARGBToARGB4444Row_Any_AVX2, ARGBToARGB4444Row_AVX2, 0, 4, 2, 7)
#endif
+#if defined(HAS_ABGRTOAR30ROW_SSSE3)
+ANY11(ABGRToAR30Row_Any_SSSE3, ABGRToAR30Row_SSSE3, 0, 4, 4, 3)
+#endif
#if defined(HAS_ARGBTOAR30ROW_SSSE3)
ANY11(ARGBToAR30Row_Any_SSSE3, ARGBToAR30Row_SSSE3, 0, 4, 4, 3)
#endif
+#if defined(HAS_ABGRTOAR30ROW_AVX2)
+ANY11(ABGRToAR30Row_Any_AVX2, ABGRToAR30Row_AVX2, 0, 4, 4, 7)
+#endif
#if defined(HAS_ARGBTOAR30ROW_AVX2)
ANY11(ARGBToAR30Row_Any_AVX2, ARGBToAR30Row_AVX2, 0, 4, 4, 7)
#endif
@@ -632,8 +657,8 @@ ANY11(ARGBExtractAlphaRow_Any_MSA, ARGBExtractAlphaRow_MSA, 0, 4, 1, 15)
// Any 1 to 1 blended. Destination is read, modify, write.
#define ANY11B(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \
- void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, int width) { \
- SIMD_ALIGNED(uint8 temp[64 * 2]); \
+ void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) { \
+ SIMD_ALIGNED(uint8_t temp[64 * 2]); \
memset(temp, 0, 64 * 2); /* for msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
@@ -661,24 +686,24 @@ ANY11B(ARGBCopyYToAlphaRow_Any_SSE2, ARGBCopyYToAlphaRow_SSE2, 0, 1, 4, 7)
#undef ANY11B
// Any 1 to 1 with parameter.
-#define ANY11P(NAMEANY, ANY_SIMD, T, SBPP, BPP, MASK) \
- void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, T param, int width) { \
- SIMD_ALIGNED(uint8 temp[64 * 2]); \
- memset(temp, 0, 64); /* for msan */ \
- int r = width & MASK; \
- int n = width & ~MASK; \
- if (n > 0) { \
- ANY_SIMD(src_ptr, dst_ptr, param, n); \
- } \
- memcpy(temp, src_ptr + n * SBPP, r * SBPP); \
- ANY_SIMD(temp, temp + 64, param, MASK + 1); \
- memcpy(dst_ptr + n * BPP, temp + 64, r * BPP); \
+#define ANY11P(NAMEANY, ANY_SIMD, T, SBPP, BPP, MASK) \
+ void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, T param, int width) { \
+ SIMD_ALIGNED(uint8_t temp[64 * 2]); \
+ memset(temp, 0, 64); /* for msan */ \
+ int r = width & MASK; \
+ int n = width & ~MASK; \
+ if (n > 0) { \
+ ANY_SIMD(src_ptr, dst_ptr, param, n); \
+ } \
+ memcpy(temp, src_ptr + n * SBPP, r * SBPP); \
+ ANY_SIMD(temp, temp + 64, param, MASK + 1); \
+ memcpy(dst_ptr + n * BPP, temp + 64, r * BPP); \
}
#if defined(HAS_ARGBTORGB565DITHERROW_SSE2)
ANY11P(ARGBToRGB565DitherRow_Any_SSE2,
ARGBToRGB565DitherRow_SSE2,
- const uint32,
+ const uint32_t,
4,
2,
3)
@@ -686,7 +711,7 @@ ANY11P(ARGBToRGB565DitherRow_Any_SSE2,
#if defined(HAS_ARGBTORGB565DITHERROW_AVX2)
ANY11P(ARGBToRGB565DitherRow_Any_AVX2,
ARGBToRGB565DitherRow_AVX2,
- const uint32,
+ const uint32_t,
4,
2,
7)
@@ -694,7 +719,7 @@ ANY11P(ARGBToRGB565DitherRow_Any_AVX2,
#if defined(HAS_ARGBTORGB565DITHERROW_NEON)
ANY11P(ARGBToRGB565DitherRow_Any_NEON,
ARGBToRGB565DitherRow_NEON,
- const uint32,
+ const uint32_t,
4,
2,
7)
@@ -702,22 +727,22 @@ ANY11P(ARGBToRGB565DitherRow_Any_NEON,
#if defined(HAS_ARGBTORGB565DITHERROW_MSA)
ANY11P(ARGBToRGB565DitherRow_Any_MSA,
ARGBToRGB565DitherRow_MSA,
- const uint32,
+ const uint32_t,
4,
2,
7)
#endif
#ifdef HAS_ARGBSHUFFLEROW_SSSE3
-ANY11P(ARGBShuffleRow_Any_SSSE3, ARGBShuffleRow_SSSE3, const uint8*, 4, 4, 7)
+ANY11P(ARGBShuffleRow_Any_SSSE3, ARGBShuffleRow_SSSE3, const uint8_t*, 4, 4, 7)
#endif
#ifdef HAS_ARGBSHUFFLEROW_AVX2
-ANY11P(ARGBShuffleRow_Any_AVX2, ARGBShuffleRow_AVX2, const uint8*, 4, 4, 15)
+ANY11P(ARGBShuffleRow_Any_AVX2, ARGBShuffleRow_AVX2, const uint8_t*, 4, 4, 15)
#endif
#ifdef HAS_ARGBSHUFFLEROW_NEON
-ANY11P(ARGBShuffleRow_Any_NEON, ARGBShuffleRow_NEON, const uint8*, 4, 4, 3)
+ANY11P(ARGBShuffleRow_Any_NEON, ARGBShuffleRow_NEON, const uint8_t*, 4, 4, 3)
#endif
#ifdef HAS_ARGBSHUFFLEROW_MSA
-ANY11P(ARGBShuffleRow_Any_MSA, ARGBShuffleRow_MSA, const uint8*, 4, 4, 7)
+ANY11P(ARGBShuffleRow_Any_MSA, ARGBShuffleRow_MSA, const uint8_t*, 4, 4, 7)
#endif
#undef ANY11P
@@ -742,34 +767,53 @@ ANY11C(Convert16To8Row_Any_SSSE3,
Convert16To8Row_SSSE3,
2,
1,
- uint16,
- uint8,
+ uint16_t,
+ uint8_t,
15)
#endif
#ifdef HAS_CONVERT16TO8ROW_AVX2
-ANY11C(Convert16To8Row_Any_AVX2, Convert16To8Row_AVX2, 2, 1, uint16, uint8, 31)
+ANY11C(Convert16To8Row_Any_AVX2,
+ Convert16To8Row_AVX2,
+ 2,
+ 1,
+ uint16_t,
+ uint8_t,
+ 31)
#endif
#ifdef HAS_CONVERT8TO16ROW_SSE2
-ANY11C(Convert8To16Row_Any_SSE2, Convert8To16Row_SSE2, 1, 2, uint8, uint16, 15)
+ANY11C(Convert8To16Row_Any_SSE2,
+ Convert8To16Row_SSE2,
+ 1,
+ 2,
+ uint8_t,
+ uint16_t,
+ 15)
#endif
#ifdef HAS_CONVERT8TO16ROW_AVX2
-ANY11C(Convert8To16Row_Any_AVX2, Convert8To16Row_AVX2, 1, 2, uint8, uint16, 31)
+ANY11C(Convert8To16Row_Any_AVX2,
+ Convert8To16Row_AVX2,
+ 1,
+ 2,
+ uint8_t,
+ uint16_t,
+ 31)
#endif
#undef ANY11C
// Any 1 to 1 with parameter and shorts to byte. BPP measures in shorts.
-#define ANY11P16(NAMEANY, ANY_SIMD, T, SBPP, BPP, MASK) \
- void NAMEANY(const uint16* src_ptr, uint16* dst_ptr, T param, int width) { \
- SIMD_ALIGNED(uint16 temp[32 * 2]); \
- memset(temp, 0, 64); /* for msan */ \
- int r = width & MASK; \
- int n = width & ~MASK; \
- if (n > 0) { \
- ANY_SIMD(src_ptr, dst_ptr, param, n); \
- } \
- memcpy(temp, src_ptr + n, r * SBPP); \
- ANY_SIMD(temp, temp + 16, param, MASK + 1); \
- memcpy(dst_ptr + n, temp + 16, r * BPP); \
+#define ANY11P16(NAMEANY, ANY_SIMD, T, SBPP, BPP, MASK) \
+ void NAMEANY(const uint16_t* src_ptr, uint16_t* dst_ptr, T param, \
+ int width) { \
+ SIMD_ALIGNED(uint16_t temp[32 * 2]); \
+ memset(temp, 0, 64); /* for msan */ \
+ int r = width & MASK; \
+ int n = width & ~MASK; \
+ if (n > 0) { \
+ ANY_SIMD(src_ptr, dst_ptr, param, n); \
+ } \
+ memcpy(temp, src_ptr + n, r * SBPP); \
+ ANY_SIMD(temp, temp + 16, param, MASK + 1); \
+ memcpy(dst_ptr + n, temp + 16, r * BPP); \
}
#ifdef HAS_HALFFLOATROW_SSE2
@@ -793,9 +837,9 @@ ANY11P16(HalfFloatRow_Any_MSA, HalfFloatRow_MSA, float, 2, 2, 31)
// Any 1 to 1 with yuvconstants
#define ANY11C(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \
- void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, \
+ void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, \
const struct YuvConstants* yuvconstants, int width) { \
- SIMD_ALIGNED(uint8 temp[128 * 2]); \
+ SIMD_ALIGNED(uint8_t temp[128 * 2]); \
memset(temp, 0, 128); /* for YUY2 and msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
@@ -825,20 +869,20 @@ ANY11C(UYVYToARGBRow_Any_MSA, UYVYToARGBRow_MSA, 1, 4, 4, 7)
#undef ANY11C
// Any 1 to 1 interpolate. Takes 2 rows of source via stride.
-#define ANY11T(NAMEANY, ANY_SIMD, SBPP, BPP, MASK) \
- void NAMEANY(uint8* dst_ptr, const uint8* src_ptr, ptrdiff_t src_stride_ptr, \
- int width, int source_y_fraction) { \
- SIMD_ALIGNED(uint8 temp[64 * 3]); \
- memset(temp, 0, 64 * 2); /* for msan */ \
- int r = width & MASK; \
- int n = width & ~MASK; \
- if (n > 0) { \
- ANY_SIMD(dst_ptr, src_ptr, src_stride_ptr, n, source_y_fraction); \
- } \
- memcpy(temp, src_ptr + n * SBPP, r * SBPP); \
- memcpy(temp + 64, src_ptr + src_stride_ptr + n * SBPP, r * SBPP); \
- ANY_SIMD(temp + 128, temp, 64, MASK + 1, source_y_fraction); \
- memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \
+#define ANY11T(NAMEANY, ANY_SIMD, SBPP, BPP, MASK) \
+ void NAMEANY(uint8_t* dst_ptr, const uint8_t* src_ptr, \
+ ptrdiff_t src_stride_ptr, int width, int source_y_fraction) { \
+ SIMD_ALIGNED(uint8_t temp[64 * 3]); \
+ memset(temp, 0, 64 * 2); /* for msan */ \
+ int r = width & MASK; \
+ int n = width & ~MASK; \
+ if (n > 0) { \
+ ANY_SIMD(dst_ptr, src_ptr, src_stride_ptr, n, source_y_fraction); \
+ } \
+ memcpy(temp, src_ptr + n * SBPP, r * SBPP); \
+ memcpy(temp + 64, src_ptr + src_stride_ptr + n * SBPP, r * SBPP); \
+ ANY_SIMD(temp + 128, temp, 64, MASK + 1, source_y_fraction); \
+ memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \
}
#ifdef HAS_INTERPOLATEROW_AVX2
@@ -857,8 +901,8 @@ ANY11T(InterpolateRow_Any_MSA, InterpolateRow_MSA, 1, 1, 31)
// Any 1 to 1 mirror.
#define ANY11M(NAMEANY, ANY_SIMD, BPP, MASK) \
- void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, int width) { \
- SIMD_ALIGNED(uint8 temp[64 * 2]); \
+ void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) { \
+ SIMD_ALIGNED(uint8_t temp[64 * 2]); \
memset(temp, 0, 64); /* for msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
@@ -897,46 +941,47 @@ ANY11M(ARGBMirrorRow_Any_MSA, ARGBMirrorRow_MSA, 4, 15)
#undef ANY11M
// Any 1 plane. (memset)
-#define ANY1(NAMEANY, ANY_SIMD, T, BPP, MASK) \
- void NAMEANY(uint8* dst_ptr, T v32, int width) { \
- SIMD_ALIGNED(uint8 temp[64]); \
- int r = width & MASK; \
- int n = width & ~MASK; \
- if (n > 0) { \
- ANY_SIMD(dst_ptr, v32, n); \
- } \
- ANY_SIMD(temp, v32, MASK + 1); \
- memcpy(dst_ptr + n * BPP, temp, r * BPP); \
+#define ANY1(NAMEANY, ANY_SIMD, T, BPP, MASK) \
+ void NAMEANY(uint8_t* dst_ptr, T v32, int width) { \
+ SIMD_ALIGNED(uint8_t temp[64]); \
+ int r = width & MASK; \
+ int n = width & ~MASK; \
+ if (n > 0) { \
+ ANY_SIMD(dst_ptr, v32, n); \
+ } \
+ ANY_SIMD(temp, v32, MASK + 1); \
+ memcpy(dst_ptr + n * BPP, temp, r * BPP); \
}
#ifdef HAS_SETROW_X86
-ANY1(SetRow_Any_X86, SetRow_X86, uint8, 1, 3)
+ANY1(SetRow_Any_X86, SetRow_X86, uint8_t, 1, 3)
#endif
#ifdef HAS_SETROW_NEON
-ANY1(SetRow_Any_NEON, SetRow_NEON, uint8, 1, 15)
+ANY1(SetRow_Any_NEON, SetRow_NEON, uint8_t, 1, 15)
#endif
#ifdef HAS_ARGBSETROW_NEON
-ANY1(ARGBSetRow_Any_NEON, ARGBSetRow_NEON, uint32, 4, 3)
+ANY1(ARGBSetRow_Any_NEON, ARGBSetRow_NEON, uint32_t, 4, 3)
#endif
#ifdef HAS_ARGBSETROW_MSA
-ANY1(ARGBSetRow_Any_MSA, ARGBSetRow_MSA, uint32, 4, 3)
+ANY1(ARGBSetRow_Any_MSA, ARGBSetRow_MSA, uint32_t, 4, 3)
#endif
#undef ANY1
// Any 1 to 2. Outputs UV planes.
-#define ANY12(NAMEANY, ANY_SIMD, UVSHIFT, BPP, DUVSHIFT, MASK) \
- void NAMEANY(const uint8* src_ptr, uint8* dst_u, uint8* dst_v, int width) { \
- SIMD_ALIGNED(uint8 temp[128 * 3]); \
- memset(temp, 0, 128); /* for msan */ \
- int r = width & MASK; \
- int n = width & ~MASK; \
- if (n > 0) { \
- ANY_SIMD(src_ptr, dst_u, dst_v, n); \
- } \
- memcpy(temp, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \
- ANY_SIMD(temp, temp + 128, temp + 256, MASK + 1); \
- memcpy(dst_u + (n >> DUVSHIFT), temp + 128, SS(r, DUVSHIFT)); \
- memcpy(dst_v + (n >> DUVSHIFT), temp + 256, SS(r, DUVSHIFT)); \
+#define ANY12(NAMEANY, ANY_SIMD, UVSHIFT, BPP, DUVSHIFT, MASK) \
+ void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v, \
+ int width) { \
+ SIMD_ALIGNED(uint8_t temp[128 * 3]); \
+ memset(temp, 0, 128); /* for msan */ \
+ int r = width & MASK; \
+ int n = width & ~MASK; \
+ if (n > 0) { \
+ ANY_SIMD(src_ptr, dst_u, dst_v, n); \
+ } \
+ memcpy(temp, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \
+ ANY_SIMD(temp, temp + 128, temp + 256, MASK + 1); \
+ memcpy(dst_u + (n >> DUVSHIFT), temp + 128, SS(r, DUVSHIFT)); \
+ memcpy(dst_v + (n >> DUVSHIFT), temp + 256, SS(r, DUVSHIFT)); \
}
#ifdef HAS_SPLITUVROW_SSE2
@@ -975,21 +1020,21 @@ ANY12(UYVYToUV422Row_Any_MSA, UYVYToUV422Row_MSA, 1, 4, 1, 31)
#undef ANY12
// Any 1 to 3. Outputs RGB planes.
-#define ANY13(NAMEANY, ANY_SIMD, BPP, MASK) \
- void NAMEANY(const uint8* src_ptr, uint8* dst_r, uint8* dst_g, uint8* dst_b, \
- int width) { \
- SIMD_ALIGNED(uint8 temp[16 * 6]); \
- memset(temp, 0, 16 * 3); /* for msan */ \
- int r = width & MASK; \
- int n = width & ~MASK; \
- if (n > 0) { \
- ANY_SIMD(src_ptr, dst_r, dst_g, dst_b, n); \
- } \
- memcpy(temp, src_ptr + n * BPP, r * BPP); \
- ANY_SIMD(temp, temp + 16 * 3, temp + 16 * 4, temp + 16 * 5, MASK + 1); \
- memcpy(dst_r + n, temp + 16 * 3, r); \
- memcpy(dst_g + n, temp + 16 * 4, r); \
- memcpy(dst_b + n, temp + 16 * 5, r); \
+#define ANY13(NAMEANY, ANY_SIMD, BPP, MASK) \
+ void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_r, uint8_t* dst_g, \
+ uint8_t* dst_b, int width) { \
+ SIMD_ALIGNED(uint8_t temp[16 * 6]); \
+ memset(temp, 0, 16 * 3); /* for msan */ \
+ int r = width & MASK; \
+ int n = width & ~MASK; \
+ if (n > 0) { \
+ ANY_SIMD(src_ptr, dst_r, dst_g, dst_b, n); \
+ } \
+ memcpy(temp, src_ptr + n * BPP, r * BPP); \
+ ANY_SIMD(temp, temp + 16 * 3, temp + 16 * 4, temp + 16 * 5, MASK + 1); \
+ memcpy(dst_r + n, temp + 16 * 3, r); \
+ memcpy(dst_g + n, temp + 16 * 4, r); \
+ memcpy(dst_b + n, temp + 16 * 5, r); \
}
#ifdef HAS_SPLITRGBROW_SSSE3
@@ -1002,9 +1047,9 @@ ANY13(SplitRGBRow_Any_NEON, SplitRGBRow_NEON, 3, 15)
// Any 1 to 2 with source stride (2 rows of source). Outputs UV planes.
// 128 byte row allows for 32 avx ARGB pixels.
#define ANY12S(NAMEANY, ANY_SIMD, UVSHIFT, BPP, MASK) \
- void NAMEANY(const uint8* src_ptr, int src_stride_ptr, uint8* dst_u, \
- uint8* dst_v, int width) { \
- SIMD_ALIGNED(uint8 temp[128 * 4]); \
+ void NAMEANY(const uint8_t* src_ptr, int src_stride_ptr, uint8_t* dst_u, \
+ uint8_t* dst_v, int width) { \
+ SIMD_ALIGNED(uint8_t temp[128 * 4]); \
memset(temp, 0, 128 * 2); /* for msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
diff --git a/chromium/third_party/libyuv/source/row_common.cc b/chromium/third_party/libyuv/source/row_common.cc
index a0ca90b8ab8..297d87e01db 100644
--- a/chromium/third_party/libyuv/source/row_common.cc
+++ b/chromium/third_party/libyuv/source/row_common.cc
@@ -10,6 +10,7 @@
#include "libyuv/row.h"
+#include <stdio.h>
#include <string.h> // For memcpy and memset.
#include "libyuv/basic_types.h"
@@ -23,59 +24,69 @@ extern "C" {
#define USE_BRANCHLESS 1
#if USE_BRANCHLESS
-static __inline int32 clamp0(int32 v) {
+static __inline int32_t clamp0(int32_t v) {
return ((-(v) >> 31) & (v));
}
-static __inline int32 clamp255(int32 v) {
+static __inline int32_t clamp255(int32_t v) {
return (((255 - (v)) >> 31) | (v)) & 255;
}
-static __inline uint32 Clamp(int32 val) {
- int v = clamp0(val);
- return (uint32)(clamp255(v));
+static __inline int32_t clamp1023(int32_t v) {
+ return (((1023 - (v)) >> 31) | (v)) & 1023;
}
-static __inline uint32 Abs(int32 v) {
+static __inline uint32_t Abs(int32_t v) {
int m = v >> 31;
return (v + m) ^ m;
}
#else // USE_BRANCHLESS
-static __inline int32 clamp0(int32 v) {
+static __inline int32_t clamp0(int32_t v) {
return (v < 0) ? 0 : v;
}
-static __inline int32 clamp255(int32 v) {
+static __inline int32_t clamp255(int32_t v) {
return (v > 255) ? 255 : v;
}
-static __inline uint32 Clamp(int32 val) {
- int v = clamp0(val);
- return (uint32)(clamp255(v));
+static __inline int32_t clamp1023(int32_t v) {
+ return (v > 1023) ? 1023 : v;
}
-static __inline uint32 Abs(int32 v) {
+static __inline uint32_t Abs(int32_t v) {
return (v < 0) ? -v : v;
}
#endif // USE_BRANCHLESS
+static __inline uint32_t Clamp(int32_t val) {
+ int v = clamp0(val);
+ return (uint32_t)(clamp255(v));
+}
+
+static __inline uint32_t Clamp10(int32_t val) {
+ int v = clamp0(val);
+ return (uint32_t)(clamp1023(v));
+}
-#ifdef LIBYUV_LITTLE_ENDIAN
-#define WRITEWORD(p, v) *(uint32*)(p) = v
+// Little Endian
+#if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || \
+ defined(_M_IX86) || defined(__arm__) || defined(_M_ARM) || \
+ (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
+#define WRITEWORD(p, v) *(uint32_t*)(p) = v
#else
-static inline void WRITEWORD(uint8* p, uint32 v) {
- p[0] = (uint8)(v & 255);
- p[1] = (uint8)((v >> 8) & 255);
- p[2] = (uint8)((v >> 16) & 255);
- p[3] = (uint8)((v >> 24) & 255);
+static inline void WRITEWORD(uint8_t* p, uint32_t v) {
+ p[0] = (uint8_t)(v & 255);
+ p[1] = (uint8_t)((v >> 8) & 255);
+ p[2] = (uint8_t)((v >> 16) & 255);
+ p[3] = (uint8_t)((v >> 24) & 255);
}
#endif
-void RGB24ToARGBRow_C(const uint8* src_rgb24, uint8* dst_argb, int width) {
+void RGB24ToARGBRow_C(const uint8_t* src_rgb24, uint8_t* dst_argb, int width) {
int x;
for (x = 0; x < width; ++x) {
- uint8 b = src_rgb24[0];
- uint8 g = src_rgb24[1];
- uint8 r = src_rgb24[2];
+ uint8_t b = src_rgb24[0];
+ uint8_t g = src_rgb24[1];
+ uint8_t r = src_rgb24[2];
dst_argb[0] = b;
dst_argb[1] = g;
dst_argb[2] = r;
@@ -85,12 +96,12 @@ void RGB24ToARGBRow_C(const uint8* src_rgb24, uint8* dst_argb, int width) {
}
}
-void RAWToARGBRow_C(const uint8* src_raw, uint8* dst_argb, int width) {
+void RAWToARGBRow_C(const uint8_t* src_raw, uint8_t* dst_argb, int width) {
int x;
for (x = 0; x < width; ++x) {
- uint8 r = src_raw[0];
- uint8 g = src_raw[1];
- uint8 b = src_raw[2];
+ uint8_t r = src_raw[0];
+ uint8_t g = src_raw[1];
+ uint8_t b = src_raw[2];
dst_argb[0] = b;
dst_argb[1] = g;
dst_argb[2] = r;
@@ -100,12 +111,12 @@ void RAWToARGBRow_C(const uint8* src_raw, uint8* dst_argb, int width) {
}
}
-void RAWToRGB24Row_C(const uint8* src_raw, uint8* dst_rgb24, int width) {
+void RAWToRGB24Row_C(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) {
int x;
for (x = 0; x < width; ++x) {
- uint8 r = src_raw[0];
- uint8 g = src_raw[1];
- uint8 b = src_raw[2];
+ uint8_t r = src_raw[0];
+ uint8_t g = src_raw[1];
+ uint8_t b = src_raw[2];
dst_rgb24[0] = b;
dst_rgb24[1] = g;
dst_rgb24[2] = r;
@@ -114,12 +125,14 @@ void RAWToRGB24Row_C(const uint8* src_raw, uint8* dst_rgb24, int width) {
}
}
-void RGB565ToARGBRow_C(const uint8* src_rgb565, uint8* dst_argb, int width) {
+void RGB565ToARGBRow_C(const uint8_t* src_rgb565,
+ uint8_t* dst_argb,
+ int width) {
int x;
for (x = 0; x < width; ++x) {
- uint8 b = src_rgb565[0] & 0x1f;
- uint8 g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
- uint8 r = src_rgb565[1] >> 3;
+ uint8_t b = src_rgb565[0] & 0x1f;
+ uint8_t g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
+ uint8_t r = src_rgb565[1] >> 3;
dst_argb[0] = (b << 3) | (b >> 2);
dst_argb[1] = (g << 2) | (g >> 4);
dst_argb[2] = (r << 3) | (r >> 2);
@@ -129,15 +142,15 @@ void RGB565ToARGBRow_C(const uint8* src_rgb565, uint8* dst_argb, int width) {
}
}
-void ARGB1555ToARGBRow_C(const uint8* src_argb1555,
- uint8* dst_argb,
+void ARGB1555ToARGBRow_C(const uint8_t* src_argb1555,
+ uint8_t* dst_argb,
int width) {
int x;
for (x = 0; x < width; ++x) {
- uint8 b = src_argb1555[0] & 0x1f;
- uint8 g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
- uint8 r = (src_argb1555[1] & 0x7c) >> 2;
- uint8 a = src_argb1555[1] >> 7;
+ uint8_t b = src_argb1555[0] & 0x1f;
+ uint8_t g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
+ uint8_t r = (src_argb1555[1] & 0x7c) >> 2;
+ uint8_t a = src_argb1555[1] >> 7;
dst_argb[0] = (b << 3) | (b >> 2);
dst_argb[1] = (g << 3) | (g >> 2);
dst_argb[2] = (r << 3) | (r >> 2);
@@ -147,15 +160,15 @@ void ARGB1555ToARGBRow_C(const uint8* src_argb1555,
}
}
-void ARGB4444ToARGBRow_C(const uint8* src_argb4444,
- uint8* dst_argb,
+void ARGB4444ToARGBRow_C(const uint8_t* src_argb4444,
+ uint8_t* dst_argb,
int width) {
int x;
for (x = 0; x < width; ++x) {
- uint8 b = src_argb4444[0] & 0x0f;
- uint8 g = src_argb4444[0] >> 4;
- uint8 r = src_argb4444[1] & 0x0f;
- uint8 a = src_argb4444[1] >> 4;
+ uint8_t b = src_argb4444[0] & 0x0f;
+ uint8_t g = src_argb4444[0] >> 4;
+ uint8_t r = src_argb4444[1] & 0x0f;
+ uint8_t a = src_argb4444[1] >> 4;
dst_argb[0] = (b << 4) | b;
dst_argb[1] = (g << 4) | g;
dst_argb[2] = (r << 4) | r;
@@ -165,14 +178,14 @@ void ARGB4444ToARGBRow_C(const uint8* src_argb4444,
}
}
-void AR30ToARGBRow_C(const uint8* src_ar30, uint8* dst_argb, int width) {
+void AR30ToARGBRow_C(const uint8_t* src_ar30, uint8_t* dst_argb, int width) {
int x;
for (x = 0; x < width; ++x) {
- uint32 ar30 = *(uint32*)src_ar30;
- uint32 b = ar30 & 0x3ff;
- uint32 g = (ar30 >> 10) & 0x3ff;
- uint32 r = (ar30 >> 20) & 0x3ff;
- uint32 a = (ar30 >> 30) & 0x3;
+ uint32_t ar30 = *(uint32_t*)src_ar30;
+ uint32_t b = ar30 & 0x3ff;
+ uint32_t g = (ar30 >> 10) & 0x3ff;
+ uint32_t r = (ar30 >> 20) & 0x3ff;
+ uint32_t a = (ar30 >> 30) & 0x3;
dst_argb[0] = b >> 2;
dst_argb[1] = g >> 2;
dst_argb[2] = r >> 2;
@@ -182,12 +195,29 @@ void AR30ToARGBRow_C(const uint8* src_ar30, uint8* dst_argb, int width) {
}
}
-void ARGBToRGB24Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
+void AR30ToABGRRow_C(const uint8_t* src_ar30, uint8_t* dst_abgr, int width) {
int x;
for (x = 0; x < width; ++x) {
- uint8 b = src_argb[0];
- uint8 g = src_argb[1];
- uint8 r = src_argb[2];
+ uint32_t ar30 = *(uint32_t*)src_ar30;
+ uint32_t b = ar30 & 0x3ff;
+ uint32_t g = (ar30 >> 10) & 0x3ff;
+ uint32_t r = (ar30 >> 20) & 0x3ff;
+ uint32_t a = (ar30 >> 30) & 0x3;
+ dst_abgr[0] = r >> 2;
+ dst_abgr[1] = g >> 2;
+ dst_abgr[2] = b >> 2;
+ dst_abgr[3] = a * 0x55;
+ dst_abgr += 4;
+ src_ar30 += 4;
+ }
+}
+
+void ARGBToRGB24Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
+ int x;
+ for (x = 0; x < width; ++x) {
+ uint8_t b = src_argb[0];
+ uint8_t g = src_argb[1];
+ uint8_t r = src_argb[2];
dst_rgb[0] = b;
dst_rgb[1] = g;
dst_rgb[2] = r;
@@ -196,12 +226,12 @@ void ARGBToRGB24Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
}
}
-void ARGBToRAWRow_C(const uint8* src_argb, uint8* dst_rgb, int width) {
+void ARGBToRAWRow_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
int x;
for (x = 0; x < width; ++x) {
- uint8 b = src_argb[0];
- uint8 g = src_argb[1];
- uint8 r = src_argb[2];
+ uint8_t b = src_argb[0];
+ uint8_t g = src_argb[1];
+ uint8_t r = src_argb[2];
dst_rgb[0] = r;
dst_rgb[1] = g;
dst_rgb[2] = b;
@@ -210,25 +240,25 @@ void ARGBToRAWRow_C(const uint8* src_argb, uint8* dst_rgb, int width) {
}
}
-void ARGBToRGB565Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
+void ARGBToRGB565Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
- uint8 b0 = src_argb[0] >> 3;
- uint8 g0 = src_argb[1] >> 2;
- uint8 r0 = src_argb[2] >> 3;
- uint8 b1 = src_argb[4] >> 3;
- uint8 g1 = src_argb[5] >> 2;
- uint8 r1 = src_argb[6] >> 3;
+ uint8_t b0 = src_argb[0] >> 3;
+ uint8_t g0 = src_argb[1] >> 2;
+ uint8_t r0 = src_argb[2] >> 3;
+ uint8_t b1 = src_argb[4] >> 3;
+ uint8_t g1 = src_argb[5] >> 2;
+ uint8_t r1 = src_argb[6] >> 3;
WRITEWORD(dst_rgb, b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) |
(r1 << 27));
dst_rgb += 4;
src_argb += 8;
}
if (width & 1) {
- uint8 b0 = src_argb[0] >> 3;
- uint8 g0 = src_argb[1] >> 2;
- uint8 r0 = src_argb[2] >> 3;
- *(uint16*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 11);
+ uint8_t b0 = src_argb[0] >> 3;
+ uint8_t g0 = src_argb[1] >> 2;
+ uint8_t r0 = src_argb[2] >> 3;
+ *(uint16_t*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 11);
}
}
@@ -240,20 +270,20 @@ void ARGBToRGB565Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
// endian will not affect order of the original matrix. But the dither4
// will containing the first pixel in the lower byte for little endian
// or the upper byte for big endian.
-void ARGBToRGB565DitherRow_C(const uint8* src_argb,
- uint8* dst_rgb,
- const uint32 dither4,
+void ARGBToRGB565DitherRow_C(const uint8_t* src_argb,
+ uint8_t* dst_rgb,
+ const uint32_t dither4,
int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
int dither0 = ((const unsigned char*)(&dither4))[x & 3];
int dither1 = ((const unsigned char*)(&dither4))[(x + 1) & 3];
- uint8 b0 = clamp255(src_argb[0] + dither0) >> 3;
- uint8 g0 = clamp255(src_argb[1] + dither0) >> 2;
- uint8 r0 = clamp255(src_argb[2] + dither0) >> 3;
- uint8 b1 = clamp255(src_argb[4] + dither1) >> 3;
- uint8 g1 = clamp255(src_argb[5] + dither1) >> 2;
- uint8 r1 = clamp255(src_argb[6] + dither1) >> 3;
+ uint8_t b0 = clamp255(src_argb[0] + dither0) >> 3;
+ uint8_t g0 = clamp255(src_argb[1] + dither0) >> 2;
+ uint8_t r0 = clamp255(src_argb[2] + dither0) >> 3;
+ uint8_t b1 = clamp255(src_argb[4] + dither1) >> 3;
+ uint8_t g1 = clamp255(src_argb[5] + dither1) >> 2;
+ uint8_t r1 = clamp255(src_argb[6] + dither1) >> 3;
WRITEWORD(dst_rgb, b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) |
(r1 << 27));
dst_rgb += 4;
@@ -261,125 +291,138 @@ void ARGBToRGB565DitherRow_C(const uint8* src_argb,
}
if (width & 1) {
int dither0 = ((const unsigned char*)(&dither4))[(width - 1) & 3];
- uint8 b0 = clamp255(src_argb[0] + dither0) >> 3;
- uint8 g0 = clamp255(src_argb[1] + dither0) >> 2;
- uint8 r0 = clamp255(src_argb[2] + dither0) >> 3;
- *(uint16*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 11);
+ uint8_t b0 = clamp255(src_argb[0] + dither0) >> 3;
+ uint8_t g0 = clamp255(src_argb[1] + dither0) >> 2;
+ uint8_t r0 = clamp255(src_argb[2] + dither0) >> 3;
+ *(uint16_t*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 11);
}
}
-void ARGBToARGB1555Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
+void ARGBToARGB1555Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
- uint8 b0 = src_argb[0] >> 3;
- uint8 g0 = src_argb[1] >> 3;
- uint8 r0 = src_argb[2] >> 3;
- uint8 a0 = src_argb[3] >> 7;
- uint8 b1 = src_argb[4] >> 3;
- uint8 g1 = src_argb[5] >> 3;
- uint8 r1 = src_argb[6] >> 3;
- uint8 a1 = src_argb[7] >> 7;
- *(uint32*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 10) | (a0 << 15) |
- (b1 << 16) | (g1 << 21) | (r1 << 26) | (a1 << 31);
+ uint8_t b0 = src_argb[0] >> 3;
+ uint8_t g0 = src_argb[1] >> 3;
+ uint8_t r0 = src_argb[2] >> 3;
+ uint8_t a0 = src_argb[3] >> 7;
+ uint8_t b1 = src_argb[4] >> 3;
+ uint8_t g1 = src_argb[5] >> 3;
+ uint8_t r1 = src_argb[6] >> 3;
+ uint8_t a1 = src_argb[7] >> 7;
+ *(uint32_t*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 10) | (a0 << 15) |
+ (b1 << 16) | (g1 << 21) | (r1 << 26) | (a1 << 31);
dst_rgb += 4;
src_argb += 8;
}
if (width & 1) {
- uint8 b0 = src_argb[0] >> 3;
- uint8 g0 = src_argb[1] >> 3;
- uint8 r0 = src_argb[2] >> 3;
- uint8 a0 = src_argb[3] >> 7;
- *(uint16*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 10) | (a0 << 15);
+ uint8_t b0 = src_argb[0] >> 3;
+ uint8_t g0 = src_argb[1] >> 3;
+ uint8_t r0 = src_argb[2] >> 3;
+ uint8_t a0 = src_argb[3] >> 7;
+ *(uint16_t*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 10) | (a0 << 15);
}
}
-void ARGBToARGB4444Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
+void ARGBToARGB4444Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
- uint8 b0 = src_argb[0] >> 4;
- uint8 g0 = src_argb[1] >> 4;
- uint8 r0 = src_argb[2] >> 4;
- uint8 a0 = src_argb[3] >> 4;
- uint8 b1 = src_argb[4] >> 4;
- uint8 g1 = src_argb[5] >> 4;
- uint8 r1 = src_argb[6] >> 4;
- uint8 a1 = src_argb[7] >> 4;
- *(uint32*)(dst_rgb) = b0 | (g0 << 4) | (r0 << 8) | (a0 << 12) | (b1 << 16) |
- (g1 << 20) | (r1 << 24) | (a1 << 28);
+ uint8_t b0 = src_argb[0] >> 4;
+ uint8_t g0 = src_argb[1] >> 4;
+ uint8_t r0 = src_argb[2] >> 4;
+ uint8_t a0 = src_argb[3] >> 4;
+ uint8_t b1 = src_argb[4] >> 4;
+ uint8_t g1 = src_argb[5] >> 4;
+ uint8_t r1 = src_argb[6] >> 4;
+ uint8_t a1 = src_argb[7] >> 4;
+ *(uint32_t*)(dst_rgb) = b0 | (g0 << 4) | (r0 << 8) | (a0 << 12) |
+ (b1 << 16) | (g1 << 20) | (r1 << 24) | (a1 << 28);
dst_rgb += 4;
src_argb += 8;
}
if (width & 1) {
- uint8 b0 = src_argb[0] >> 4;
- uint8 g0 = src_argb[1] >> 4;
- uint8 r0 = src_argb[2] >> 4;
- uint8 a0 = src_argb[3] >> 4;
- *(uint16*)(dst_rgb) = b0 | (g0 << 4) | (r0 << 8) | (a0 << 12);
+ uint8_t b0 = src_argb[0] >> 4;
+ uint8_t g0 = src_argb[1] >> 4;
+ uint8_t r0 = src_argb[2] >> 4;
+ uint8_t a0 = src_argb[3] >> 4;
+ *(uint16_t*)(dst_rgb) = b0 | (g0 << 4) | (r0 << 8) | (a0 << 12);
}
}
-void ARGBToAR30Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
+void ABGRToAR30Row_C(const uint8_t* src_abgr, uint8_t* dst_ar30, int width) {
int x;
for (x = 0; x < width; ++x) {
- uint32 b0 = (src_argb[0] >> 6) | ((uint32)(src_argb[0]) << 2);
- uint32 g0 = (src_argb[1] >> 6) | ((uint32)(src_argb[1]) << 2);
- uint32 r0 = (src_argb[2] >> 6) | ((uint32)(src_argb[2]) << 2);
- uint32 a0 = (src_argb[3] >> 6);
- *(uint32*)(dst_rgb) = b0 | (g0 << 10) | (r0 << 20) | (a0 << 30);
- dst_rgb += 4;
+ uint32_t b0 = (src_abgr[0] >> 6) | ((uint32_t)(src_abgr[0]) << 2);
+ uint32_t g0 = (src_abgr[1] >> 6) | ((uint32_t)(src_abgr[1]) << 2);
+ uint32_t r0 = (src_abgr[2] >> 6) | ((uint32_t)(src_abgr[2]) << 2);
+ uint32_t a0 = (src_abgr[3] >> 6);
+ *(uint32_t*)(dst_ar30) = r0 | (g0 << 10) | (b0 << 20) | (a0 << 30);
+ dst_ar30 += 4;
+ src_abgr += 4;
+ }
+}
+
+void ARGBToAR30Row_C(const uint8_t* src_argb, uint8_t* dst_ar30, int width) {
+ int x;
+ for (x = 0; x < width; ++x) {
+ uint32_t b0 = (src_argb[0] >> 6) | ((uint32_t)(src_argb[0]) << 2);
+ uint32_t g0 = (src_argb[1] >> 6) | ((uint32_t)(src_argb[1]) << 2);
+ uint32_t r0 = (src_argb[2] >> 6) | ((uint32_t)(src_argb[2]) << 2);
+ uint32_t a0 = (src_argb[3] >> 6);
+ *(uint32_t*)(dst_ar30) = b0 | (g0 << 10) | (r0 << 20) | (a0 << 30);
+ dst_ar30 += 4;
src_argb += 4;
}
}
-static __inline int RGBToY(uint8 r, uint8 g, uint8 b) {
+static __inline int RGBToY(uint8_t r, uint8_t g, uint8_t b) {
return (66 * r + 129 * g + 25 * b + 0x1080) >> 8;
}
-static __inline int RGBToU(uint8 r, uint8 g, uint8 b) {
+static __inline int RGBToU(uint8_t r, uint8_t g, uint8_t b) {
return (112 * b - 74 * g - 38 * r + 0x8080) >> 8;
}
-static __inline int RGBToV(uint8 r, uint8 g, uint8 b) {
+static __inline int RGBToV(uint8_t r, uint8_t g, uint8_t b) {
return (112 * r - 94 * g - 18 * b + 0x8080) >> 8;
}
// ARGBToY_C and ARGBToUV_C
-#define MAKEROWY(NAME, R, G, B, BPP) \
- void NAME##ToYRow_C(const uint8* src_argb0, uint8* dst_y, int width) { \
- int x; \
- for (x = 0; x < width; ++x) { \
- dst_y[0] = RGBToY(src_argb0[R], src_argb0[G], src_argb0[B]); \
- src_argb0 += BPP; \
- dst_y += 1; \
- } \
- } \
- void NAME##ToUVRow_C(const uint8* src_rgb0, int src_stride_rgb, \
- uint8* dst_u, uint8* dst_v, int width) { \
- const uint8* src_rgb1 = src_rgb0 + src_stride_rgb; \
- int x; \
- for (x = 0; x < width - 1; x += 2) { \
- uint8 ab = (src_rgb0[B] + src_rgb0[B + BPP] + src_rgb1[B] + \
- src_rgb1[B + BPP]) >> \
- 2; \
- uint8 ag = (src_rgb0[G] + src_rgb0[G + BPP] + src_rgb1[G] + \
- src_rgb1[G + BPP]) >> \
- 2; \
- uint8 ar = (src_rgb0[R] + src_rgb0[R + BPP] + src_rgb1[R] + \
- src_rgb1[R + BPP]) >> \
- 2; \
- dst_u[0] = RGBToU(ar, ag, ab); \
- dst_v[0] = RGBToV(ar, ag, ab); \
- src_rgb0 += BPP * 2; \
- src_rgb1 += BPP * 2; \
- dst_u += 1; \
- dst_v += 1; \
- } \
- if (width & 1) { \
- uint8 ab = (src_rgb0[B] + src_rgb1[B]) >> 1; \
- uint8 ag = (src_rgb0[G] + src_rgb1[G]) >> 1; \
- uint8 ar = (src_rgb0[R] + src_rgb1[R]) >> 1; \
- dst_u[0] = RGBToU(ar, ag, ab); \
- dst_v[0] = RGBToV(ar, ag, ab); \
- } \
+#define MAKEROWY(NAME, R, G, B, BPP) \
+ void NAME##ToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width) { \
+ int x; \
+ for (x = 0; x < width; ++x) { \
+ dst_y[0] = RGBToY(src_argb0[R], src_argb0[G], src_argb0[B]); \
+ src_argb0 += BPP; \
+ dst_y += 1; \
+ } \
+ } \
+ void NAME##ToUVRow_C(const uint8_t* src_rgb0, int src_stride_rgb, \
+ uint8_t* dst_u, uint8_t* dst_v, int width) { \
+ const uint8_t* src_rgb1 = src_rgb0 + src_stride_rgb; \
+ int x; \
+ for (x = 0; x < width - 1; x += 2) { \
+ uint8_t ab = (src_rgb0[B] + src_rgb0[B + BPP] + src_rgb1[B] + \
+ src_rgb1[B + BPP]) >> \
+ 2; \
+ uint8_t ag = (src_rgb0[G] + src_rgb0[G + BPP] + src_rgb1[G] + \
+ src_rgb1[G + BPP]) >> \
+ 2; \
+ uint8_t ar = (src_rgb0[R] + src_rgb0[R + BPP] + src_rgb1[R] + \
+ src_rgb1[R + BPP]) >> \
+ 2; \
+ dst_u[0] = RGBToU(ar, ag, ab); \
+ dst_v[0] = RGBToV(ar, ag, ab); \
+ src_rgb0 += BPP * 2; \
+ src_rgb1 += BPP * 2; \
+ dst_u += 1; \
+ dst_v += 1; \
+ } \
+ if (width & 1) { \
+ uint8_t ab = (src_rgb0[B] + src_rgb1[B]) >> 1; \
+ uint8_t ag = (src_rgb0[G] + src_rgb1[G]) >> 1; \
+ uint8_t ar = (src_rgb0[R] + src_rgb1[R]) >> 1; \
+ dst_u[0] = RGBToU(ar, ag, ab); \
+ dst_v[0] = RGBToV(ar, ag, ab); \
+ } \
}
MAKEROWY(ARGB, 2, 1, 0, 4)
@@ -415,65 +458,65 @@ MAKEROWY(RAW, 0, 1, 2, 3)
// g -0.41869 * 255 = -106.76595 = -107
// r 0.50000 * 255 = 127.5 = 127
-static __inline int RGBToYJ(uint8 r, uint8 g, uint8 b) {
+static __inline int RGBToYJ(uint8_t r, uint8_t g, uint8_t b) {
return (38 * r + 75 * g + 15 * b + 64) >> 7;
}
-static __inline int RGBToUJ(uint8 r, uint8 g, uint8 b) {
+static __inline int RGBToUJ(uint8_t r, uint8_t g, uint8_t b) {
return (127 * b - 84 * g - 43 * r + 0x8080) >> 8;
}
-static __inline int RGBToVJ(uint8 r, uint8 g, uint8 b) {
+static __inline int RGBToVJ(uint8_t r, uint8_t g, uint8_t b) {
return (127 * r - 107 * g - 20 * b + 0x8080) >> 8;
}
#define AVGB(a, b) (((a) + (b) + 1) >> 1)
// ARGBToYJ_C and ARGBToUVJ_C
-#define MAKEROWYJ(NAME, R, G, B, BPP) \
- void NAME##ToYJRow_C(const uint8* src_argb0, uint8* dst_y, int width) { \
- int x; \
- for (x = 0; x < width; ++x) { \
- dst_y[0] = RGBToYJ(src_argb0[R], src_argb0[G], src_argb0[B]); \
- src_argb0 += BPP; \
- dst_y += 1; \
- } \
- } \
- void NAME##ToUVJRow_C(const uint8* src_rgb0, int src_stride_rgb, \
- uint8* dst_u, uint8* dst_v, int width) { \
- const uint8* src_rgb1 = src_rgb0 + src_stride_rgb; \
- int x; \
- for (x = 0; x < width - 1; x += 2) { \
- uint8 ab = AVGB(AVGB(src_rgb0[B], src_rgb1[B]), \
- AVGB(src_rgb0[B + BPP], src_rgb1[B + BPP])); \
- uint8 ag = AVGB(AVGB(src_rgb0[G], src_rgb1[G]), \
- AVGB(src_rgb0[G + BPP], src_rgb1[G + BPP])); \
- uint8 ar = AVGB(AVGB(src_rgb0[R], src_rgb1[R]), \
- AVGB(src_rgb0[R + BPP], src_rgb1[R + BPP])); \
- dst_u[0] = RGBToUJ(ar, ag, ab); \
- dst_v[0] = RGBToVJ(ar, ag, ab); \
- src_rgb0 += BPP * 2; \
- src_rgb1 += BPP * 2; \
- dst_u += 1; \
- dst_v += 1; \
- } \
- if (width & 1) { \
- uint8 ab = AVGB(src_rgb0[B], src_rgb1[B]); \
- uint8 ag = AVGB(src_rgb0[G], src_rgb1[G]); \
- uint8 ar = AVGB(src_rgb0[R], src_rgb1[R]); \
- dst_u[0] = RGBToUJ(ar, ag, ab); \
- dst_v[0] = RGBToVJ(ar, ag, ab); \
- } \
+#define MAKEROWYJ(NAME, R, G, B, BPP) \
+ void NAME##ToYJRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width) { \
+ int x; \
+ for (x = 0; x < width; ++x) { \
+ dst_y[0] = RGBToYJ(src_argb0[R], src_argb0[G], src_argb0[B]); \
+ src_argb0 += BPP; \
+ dst_y += 1; \
+ } \
+ } \
+ void NAME##ToUVJRow_C(const uint8_t* src_rgb0, int src_stride_rgb, \
+ uint8_t* dst_u, uint8_t* dst_v, int width) { \
+ const uint8_t* src_rgb1 = src_rgb0 + src_stride_rgb; \
+ int x; \
+ for (x = 0; x < width - 1; x += 2) { \
+ uint8_t ab = AVGB(AVGB(src_rgb0[B], src_rgb1[B]), \
+ AVGB(src_rgb0[B + BPP], src_rgb1[B + BPP])); \
+ uint8_t ag = AVGB(AVGB(src_rgb0[G], src_rgb1[G]), \
+ AVGB(src_rgb0[G + BPP], src_rgb1[G + BPP])); \
+ uint8_t ar = AVGB(AVGB(src_rgb0[R], src_rgb1[R]), \
+ AVGB(src_rgb0[R + BPP], src_rgb1[R + BPP])); \
+ dst_u[0] = RGBToUJ(ar, ag, ab); \
+ dst_v[0] = RGBToVJ(ar, ag, ab); \
+ src_rgb0 += BPP * 2; \
+ src_rgb1 += BPP * 2; \
+ dst_u += 1; \
+ dst_v += 1; \
+ } \
+ if (width & 1) { \
+ uint8_t ab = AVGB(src_rgb0[B], src_rgb1[B]); \
+ uint8_t ag = AVGB(src_rgb0[G], src_rgb1[G]); \
+ uint8_t ar = AVGB(src_rgb0[R], src_rgb1[R]); \
+ dst_u[0] = RGBToUJ(ar, ag, ab); \
+ dst_v[0] = RGBToVJ(ar, ag, ab); \
+ } \
}
MAKEROWYJ(ARGB, 2, 1, 0, 4)
#undef MAKEROWYJ
-void RGB565ToYRow_C(const uint8* src_rgb565, uint8* dst_y, int width) {
+void RGB565ToYRow_C(const uint8_t* src_rgb565, uint8_t* dst_y, int width) {
int x;
for (x = 0; x < width; ++x) {
- uint8 b = src_rgb565[0] & 0x1f;
- uint8 g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
- uint8 r = src_rgb565[1] >> 3;
+ uint8_t b = src_rgb565[0] & 0x1f;
+ uint8_t g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
+ uint8_t r = src_rgb565[1] >> 3;
b = (b << 3) | (b >> 2);
g = (g << 2) | (g >> 4);
r = (r << 3) | (r >> 2);
@@ -483,12 +526,12 @@ void RGB565ToYRow_C(const uint8* src_rgb565, uint8* dst_y, int width) {
}
}
-void ARGB1555ToYRow_C(const uint8* src_argb1555, uint8* dst_y, int width) {
+void ARGB1555ToYRow_C(const uint8_t* src_argb1555, uint8_t* dst_y, int width) {
int x;
for (x = 0; x < width; ++x) {
- uint8 b = src_argb1555[0] & 0x1f;
- uint8 g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
- uint8 r = (src_argb1555[1] & 0x7c) >> 2;
+ uint8_t b = src_argb1555[0] & 0x1f;
+ uint8_t g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
+ uint8_t r = (src_argb1555[1] & 0x7c) >> 2;
b = (b << 3) | (b >> 2);
g = (g << 3) | (g >> 2);
r = (r << 3) | (r >> 2);
@@ -498,12 +541,12 @@ void ARGB1555ToYRow_C(const uint8* src_argb1555, uint8* dst_y, int width) {
}
}
-void ARGB4444ToYRow_C(const uint8* src_argb4444, uint8* dst_y, int width) {
+void ARGB4444ToYRow_C(const uint8_t* src_argb4444, uint8_t* dst_y, int width) {
int x;
for (x = 0; x < width; ++x) {
- uint8 b = src_argb4444[0] & 0x0f;
- uint8 g = src_argb4444[0] >> 4;
- uint8 r = src_argb4444[1] & 0x0f;
+ uint8_t b = src_argb4444[0] & 0x0f;
+ uint8_t g = src_argb4444[0] >> 4;
+ uint8_t r = src_argb4444[1] & 0x0f;
b = (b << 4) | b;
g = (g << 4) | g;
r = (r << 4) | r;
@@ -513,29 +556,29 @@ void ARGB4444ToYRow_C(const uint8* src_argb4444, uint8* dst_y, int width) {
}
}
-void RGB565ToUVRow_C(const uint8* src_rgb565,
+void RGB565ToUVRow_C(const uint8_t* src_rgb565,
int src_stride_rgb565,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
- const uint8* next_rgb565 = src_rgb565 + src_stride_rgb565;
+ const uint8_t* next_rgb565 = src_rgb565 + src_stride_rgb565;
int x;
for (x = 0; x < width - 1; x += 2) {
- uint8 b0 = src_rgb565[0] & 0x1f;
- uint8 g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
- uint8 r0 = src_rgb565[1] >> 3;
- uint8 b1 = src_rgb565[2] & 0x1f;
- uint8 g1 = (src_rgb565[2] >> 5) | ((src_rgb565[3] & 0x07) << 3);
- uint8 r1 = src_rgb565[3] >> 3;
- uint8 b2 = next_rgb565[0] & 0x1f;
- uint8 g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3);
- uint8 r2 = next_rgb565[1] >> 3;
- uint8 b3 = next_rgb565[2] & 0x1f;
- uint8 g3 = (next_rgb565[2] >> 5) | ((next_rgb565[3] & 0x07) << 3);
- uint8 r3 = next_rgb565[3] >> 3;
- uint8 b = (b0 + b1 + b2 + b3); // 565 * 4 = 787.
- uint8 g = (g0 + g1 + g2 + g3);
- uint8 r = (r0 + r1 + r2 + r3);
+ uint8_t b0 = src_rgb565[0] & 0x1f;
+ uint8_t g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
+ uint8_t r0 = src_rgb565[1] >> 3;
+ uint8_t b1 = src_rgb565[2] & 0x1f;
+ uint8_t g1 = (src_rgb565[2] >> 5) | ((src_rgb565[3] & 0x07) << 3);
+ uint8_t r1 = src_rgb565[3] >> 3;
+ uint8_t b2 = next_rgb565[0] & 0x1f;
+ uint8_t g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3);
+ uint8_t r2 = next_rgb565[1] >> 3;
+ uint8_t b3 = next_rgb565[2] & 0x1f;
+ uint8_t g3 = (next_rgb565[2] >> 5) | ((next_rgb565[3] & 0x07) << 3);
+ uint8_t r3 = next_rgb565[3] >> 3;
+ uint8_t b = (b0 + b1 + b2 + b3); // 565 * 4 = 787.
+ uint8_t g = (g0 + g1 + g2 + g3);
+ uint8_t r = (r0 + r1 + r2 + r3);
b = (b << 1) | (b >> 6); // 787 -> 888.
r = (r << 1) | (r >> 6);
dst_u[0] = RGBToU(r, g, b);
@@ -546,15 +589,15 @@ void RGB565ToUVRow_C(const uint8* src_rgb565,
dst_v += 1;
}
if (width & 1) {
- uint8 b0 = src_rgb565[0] & 0x1f;
- uint8 g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
- uint8 r0 = src_rgb565[1] >> 3;
- uint8 b2 = next_rgb565[0] & 0x1f;
- uint8 g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3);
- uint8 r2 = next_rgb565[1] >> 3;
- uint8 b = (b0 + b2); // 565 * 2 = 676.
- uint8 g = (g0 + g2);
- uint8 r = (r0 + r2);
+ uint8_t b0 = src_rgb565[0] & 0x1f;
+ uint8_t g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
+ uint8_t r0 = src_rgb565[1] >> 3;
+ uint8_t b2 = next_rgb565[0] & 0x1f;
+ uint8_t g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3);
+ uint8_t r2 = next_rgb565[1] >> 3;
+ uint8_t b = (b0 + b2); // 565 * 2 = 676.
+ uint8_t g = (g0 + g2);
+ uint8_t r = (r0 + r2);
b = (b << 2) | (b >> 4); // 676 -> 888
g = (g << 1) | (g >> 6);
r = (r << 2) | (r >> 4);
@@ -563,29 +606,29 @@ void RGB565ToUVRow_C(const uint8* src_rgb565,
}
}
-void ARGB1555ToUVRow_C(const uint8* src_argb1555,
+void ARGB1555ToUVRow_C(const uint8_t* src_argb1555,
int src_stride_argb1555,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
- const uint8* next_argb1555 = src_argb1555 + src_stride_argb1555;
+ const uint8_t* next_argb1555 = src_argb1555 + src_stride_argb1555;
int x;
for (x = 0; x < width - 1; x += 2) {
- uint8 b0 = src_argb1555[0] & 0x1f;
- uint8 g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
- uint8 r0 = (src_argb1555[1] & 0x7c) >> 2;
- uint8 b1 = src_argb1555[2] & 0x1f;
- uint8 g1 = (src_argb1555[2] >> 5) | ((src_argb1555[3] & 0x03) << 3);
- uint8 r1 = (src_argb1555[3] & 0x7c) >> 2;
- uint8 b2 = next_argb1555[0] & 0x1f;
- uint8 g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3);
- uint8 r2 = (next_argb1555[1] & 0x7c) >> 2;
- uint8 b3 = next_argb1555[2] & 0x1f;
- uint8 g3 = (next_argb1555[2] >> 5) | ((next_argb1555[3] & 0x03) << 3);
- uint8 r3 = (next_argb1555[3] & 0x7c) >> 2;
- uint8 b = (b0 + b1 + b2 + b3); // 555 * 4 = 777.
- uint8 g = (g0 + g1 + g2 + g3);
- uint8 r = (r0 + r1 + r2 + r3);
+ uint8_t b0 = src_argb1555[0] & 0x1f;
+ uint8_t g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
+ uint8_t r0 = (src_argb1555[1] & 0x7c) >> 2;
+ uint8_t b1 = src_argb1555[2] & 0x1f;
+ uint8_t g1 = (src_argb1555[2] >> 5) | ((src_argb1555[3] & 0x03) << 3);
+ uint8_t r1 = (src_argb1555[3] & 0x7c) >> 2;
+ uint8_t b2 = next_argb1555[0] & 0x1f;
+ uint8_t g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3);
+ uint8_t r2 = (next_argb1555[1] & 0x7c) >> 2;
+ uint8_t b3 = next_argb1555[2] & 0x1f;
+ uint8_t g3 = (next_argb1555[2] >> 5) | ((next_argb1555[3] & 0x03) << 3);
+ uint8_t r3 = (next_argb1555[3] & 0x7c) >> 2;
+ uint8_t b = (b0 + b1 + b2 + b3); // 555 * 4 = 777.
+ uint8_t g = (g0 + g1 + g2 + g3);
+ uint8_t r = (r0 + r1 + r2 + r3);
b = (b << 1) | (b >> 6); // 777 -> 888.
g = (g << 1) | (g >> 6);
r = (r << 1) | (r >> 6);
@@ -597,15 +640,15 @@ void ARGB1555ToUVRow_C(const uint8* src_argb1555,
dst_v += 1;
}
if (width & 1) {
- uint8 b0 = src_argb1555[0] & 0x1f;
- uint8 g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
- uint8 r0 = (src_argb1555[1] & 0x7c) >> 2;
- uint8 b2 = next_argb1555[0] & 0x1f;
- uint8 g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3);
- uint8 r2 = next_argb1555[1] >> 3;
- uint8 b = (b0 + b2); // 555 * 2 = 666.
- uint8 g = (g0 + g2);
- uint8 r = (r0 + r2);
+ uint8_t b0 = src_argb1555[0] & 0x1f;
+ uint8_t g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
+ uint8_t r0 = (src_argb1555[1] & 0x7c) >> 2;
+ uint8_t b2 = next_argb1555[0] & 0x1f;
+ uint8_t g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3);
+ uint8_t r2 = next_argb1555[1] >> 3;
+ uint8_t b = (b0 + b2); // 555 * 2 = 666.
+ uint8_t g = (g0 + g2);
+ uint8_t r = (r0 + r2);
b = (b << 2) | (b >> 4); // 666 -> 888.
g = (g << 2) | (g >> 4);
r = (r << 2) | (r >> 4);
@@ -614,29 +657,29 @@ void ARGB1555ToUVRow_C(const uint8* src_argb1555,
}
}
-void ARGB4444ToUVRow_C(const uint8* src_argb4444,
+void ARGB4444ToUVRow_C(const uint8_t* src_argb4444,
int src_stride_argb4444,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
- const uint8* next_argb4444 = src_argb4444 + src_stride_argb4444;
+ const uint8_t* next_argb4444 = src_argb4444 + src_stride_argb4444;
int x;
for (x = 0; x < width - 1; x += 2) {
- uint8 b0 = src_argb4444[0] & 0x0f;
- uint8 g0 = src_argb4444[0] >> 4;
- uint8 r0 = src_argb4444[1] & 0x0f;
- uint8 b1 = src_argb4444[2] & 0x0f;
- uint8 g1 = src_argb4444[2] >> 4;
- uint8 r1 = src_argb4444[3] & 0x0f;
- uint8 b2 = next_argb4444[0] & 0x0f;
- uint8 g2 = next_argb4444[0] >> 4;
- uint8 r2 = next_argb4444[1] & 0x0f;
- uint8 b3 = next_argb4444[2] & 0x0f;
- uint8 g3 = next_argb4444[2] >> 4;
- uint8 r3 = next_argb4444[3] & 0x0f;
- uint8 b = (b0 + b1 + b2 + b3); // 444 * 4 = 666.
- uint8 g = (g0 + g1 + g2 + g3);
- uint8 r = (r0 + r1 + r2 + r3);
+ uint8_t b0 = src_argb4444[0] & 0x0f;
+ uint8_t g0 = src_argb4444[0] >> 4;
+ uint8_t r0 = src_argb4444[1] & 0x0f;
+ uint8_t b1 = src_argb4444[2] & 0x0f;
+ uint8_t g1 = src_argb4444[2] >> 4;
+ uint8_t r1 = src_argb4444[3] & 0x0f;
+ uint8_t b2 = next_argb4444[0] & 0x0f;
+ uint8_t g2 = next_argb4444[0] >> 4;
+ uint8_t r2 = next_argb4444[1] & 0x0f;
+ uint8_t b3 = next_argb4444[2] & 0x0f;
+ uint8_t g3 = next_argb4444[2] >> 4;
+ uint8_t r3 = next_argb4444[3] & 0x0f;
+ uint8_t b = (b0 + b1 + b2 + b3); // 444 * 4 = 666.
+ uint8_t g = (g0 + g1 + g2 + g3);
+ uint8_t r = (r0 + r1 + r2 + r3);
b = (b << 2) | (b >> 4); // 666 -> 888.
g = (g << 2) | (g >> 4);
r = (r << 2) | (r >> 4);
@@ -648,15 +691,15 @@ void ARGB4444ToUVRow_C(const uint8* src_argb4444,
dst_v += 1;
}
if (width & 1) {
- uint8 b0 = src_argb4444[0] & 0x0f;
- uint8 g0 = src_argb4444[0] >> 4;
- uint8 r0 = src_argb4444[1] & 0x0f;
- uint8 b2 = next_argb4444[0] & 0x0f;
- uint8 g2 = next_argb4444[0] >> 4;
- uint8 r2 = next_argb4444[1] & 0x0f;
- uint8 b = (b0 + b2); // 444 * 2 = 555.
- uint8 g = (g0 + g2);
- uint8 r = (r0 + r2);
+ uint8_t b0 = src_argb4444[0] & 0x0f;
+ uint8_t g0 = src_argb4444[0] >> 4;
+ uint8_t r0 = src_argb4444[1] & 0x0f;
+ uint8_t b2 = next_argb4444[0] & 0x0f;
+ uint8_t g2 = next_argb4444[0] >> 4;
+ uint8_t r2 = next_argb4444[1] & 0x0f;
+ uint8_t b = (b0 + b2); // 444 * 2 = 555.
+ uint8_t g = (g0 + g2);
+ uint8_t r = (r0 + r2);
b = (b << 3) | (b >> 2); // 555 -> 888.
g = (g << 3) | (g >> 2);
r = (r << 3) | (r >> 2);
@@ -665,15 +708,15 @@ void ARGB4444ToUVRow_C(const uint8* src_argb4444,
}
}
-void ARGBToUV444Row_C(const uint8* src_argb,
- uint8* dst_u,
- uint8* dst_v,
+void ARGBToUV444Row_C(const uint8_t* src_argb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
int x;
for (x = 0; x < width; ++x) {
- uint8 ab = src_argb[0];
- uint8 ag = src_argb[1];
- uint8 ar = src_argb[2];
+ uint8_t ab = src_argb[0];
+ uint8_t ag = src_argb[1];
+ uint8_t ar = src_argb[2];
dst_u[0] = RGBToU(ar, ag, ab);
dst_v[0] = RGBToV(ar, ag, ab);
src_argb += 4;
@@ -682,10 +725,10 @@ void ARGBToUV444Row_C(const uint8* src_argb,
}
}
-void ARGBGrayRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
+void ARGBGrayRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width) {
int x;
for (x = 0; x < width; ++x) {
- uint8 y = RGBToYJ(src_argb[2], src_argb[1], src_argb[0]);
+ uint8_t y = RGBToYJ(src_argb[2], src_argb[1], src_argb[0]);
dst_argb[2] = dst_argb[1] = dst_argb[0] = y;
dst_argb[3] = src_argb[3];
dst_argb += 4;
@@ -694,7 +737,7 @@ void ARGBGrayRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
}
// Convert a row of image to Sepia tone.
-void ARGBSepiaRow_C(uint8* dst_argb, int width) {
+void ARGBSepiaRow_C(uint8_t* dst_argb, int width) {
int x;
for (x = 0; x < width; ++x) {
int b = dst_argb[0];
@@ -713,9 +756,9 @@ void ARGBSepiaRow_C(uint8* dst_argb, int width) {
// Apply color matrix to a row of image. Matrix is signed.
// TODO(fbarchard): Consider adding rounding (+32).
-void ARGBColorMatrixRow_C(const uint8* src_argb,
- uint8* dst_argb,
- const int8* matrix_argb,
+void ARGBColorMatrixRow_C(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ const int8_t* matrix_argb,
int width) {
int x;
for (x = 0; x < width; ++x) {
@@ -745,7 +788,9 @@ void ARGBColorMatrixRow_C(const uint8* src_argb,
}
// Apply color table to a row of image.
-void ARGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width) {
+void ARGBColorTableRow_C(uint8_t* dst_argb,
+ const uint8_t* table_argb,
+ int width) {
int x;
for (x = 0; x < width; ++x) {
int b = dst_argb[0];
@@ -761,7 +806,9 @@ void ARGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width) {
}
// Apply color table to a row of image.
-void RGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width) {
+void RGBColorTableRow_C(uint8_t* dst_argb,
+ const uint8_t* table_argb,
+ int width) {
int x;
for (x = 0; x < width; ++x) {
int b = dst_argb[0];
@@ -774,7 +821,7 @@ void RGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width) {
}
}
-void ARGBQuantizeRow_C(uint8* dst_argb,
+void ARGBQuantizeRow_C(uint8_t* dst_argb,
int scale,
int interval_size,
int interval_offset,
@@ -794,21 +841,21 @@ void ARGBQuantizeRow_C(uint8* dst_argb,
#define REPEAT8(v) (v) | ((v) << 8)
#define SHADE(f, v) v* f >> 24
-void ARGBShadeRow_C(const uint8* src_argb,
- uint8* dst_argb,
+void ARGBShadeRow_C(const uint8_t* src_argb,
+ uint8_t* dst_argb,
int width,
- uint32 value) {
- const uint32 b_scale = REPEAT8(value & 0xff);
- const uint32 g_scale = REPEAT8((value >> 8) & 0xff);
- const uint32 r_scale = REPEAT8((value >> 16) & 0xff);
- const uint32 a_scale = REPEAT8(value >> 24);
+ uint32_t value) {
+ const uint32_t b_scale = REPEAT8(value & 0xff);
+ const uint32_t g_scale = REPEAT8((value >> 8) & 0xff);
+ const uint32_t r_scale = REPEAT8((value >> 16) & 0xff);
+ const uint32_t a_scale = REPEAT8(value >> 24);
int i;
for (i = 0; i < width; ++i) {
- const uint32 b = REPEAT8(src_argb[0]);
- const uint32 g = REPEAT8(src_argb[1]);
- const uint32 r = REPEAT8(src_argb[2]);
- const uint32 a = REPEAT8(src_argb[3]);
+ const uint32_t b = REPEAT8(src_argb[0]);
+ const uint32_t g = REPEAT8(src_argb[1]);
+ const uint32_t r = REPEAT8(src_argb[2]);
+ const uint32_t a = REPEAT8(src_argb[3]);
dst_argb[0] = SHADE(b, b_scale);
dst_argb[1] = SHADE(g, g_scale);
dst_argb[2] = SHADE(r, r_scale);
@@ -823,20 +870,20 @@ void ARGBShadeRow_C(const uint8* src_argb,
#define REPEAT8(v) (v) | ((v) << 8)
#define SHADE(f, v) v* f >> 16
-void ARGBMultiplyRow_C(const uint8* src_argb0,
- const uint8* src_argb1,
- uint8* dst_argb,
+void ARGBMultiplyRow_C(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
int width) {
int i;
for (i = 0; i < width; ++i) {
- const uint32 b = REPEAT8(src_argb0[0]);
- const uint32 g = REPEAT8(src_argb0[1]);
- const uint32 r = REPEAT8(src_argb0[2]);
- const uint32 a = REPEAT8(src_argb0[3]);
- const uint32 b_scale = src_argb1[0];
- const uint32 g_scale = src_argb1[1];
- const uint32 r_scale = src_argb1[2];
- const uint32 a_scale = src_argb1[3];
+ const uint32_t b = REPEAT8(src_argb0[0]);
+ const uint32_t g = REPEAT8(src_argb0[1]);
+ const uint32_t r = REPEAT8(src_argb0[2]);
+ const uint32_t a = REPEAT8(src_argb0[3]);
+ const uint32_t b_scale = src_argb1[0];
+ const uint32_t g_scale = src_argb1[1];
+ const uint32_t r_scale = src_argb1[2];
+ const uint32_t a_scale = src_argb1[3];
dst_argb[0] = SHADE(b, b_scale);
dst_argb[1] = SHADE(g, g_scale);
dst_argb[2] = SHADE(r, r_scale);
@@ -851,9 +898,9 @@ void ARGBMultiplyRow_C(const uint8* src_argb0,
#define SHADE(f, v) clamp255(v + f)
-void ARGBAddRow_C(const uint8* src_argb0,
- const uint8* src_argb1,
- uint8* dst_argb,
+void ARGBAddRow_C(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
int width) {
int i;
for (i = 0; i < width; ++i) {
@@ -878,9 +925,9 @@ void ARGBAddRow_C(const uint8* src_argb0,
#define SHADE(f, v) clamp0(f - v)
-void ARGBSubtractRow_C(const uint8* src_argb0,
- const uint8* src_argb1,
- uint8* dst_argb,
+void ARGBSubtractRow_C(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
int width) {
int i;
for (i = 0; i < width; ++i) {
@@ -904,10 +951,10 @@ void ARGBSubtractRow_C(const uint8* src_argb0,
#undef SHADE
// Sobel functions which mimics SSSE3.
-void SobelXRow_C(const uint8* src_y0,
- const uint8* src_y1,
- const uint8* src_y2,
- uint8* dst_sobelx,
+void SobelXRow_C(const uint8_t* src_y0,
+ const uint8_t* src_y1,
+ const uint8_t* src_y2,
+ uint8_t* dst_sobelx,
int width) {
int i;
for (i = 0; i < width; ++i) {
@@ -921,13 +968,13 @@ void SobelXRow_C(const uint8* src_y0,
int b_diff = b - b_sub;
int c_diff = c - c_sub;
int sobel = Abs(a_diff + b_diff * 2 + c_diff);
- dst_sobelx[i] = (uint8)(clamp255(sobel));
+ dst_sobelx[i] = (uint8_t)(clamp255(sobel));
}
}
-void SobelYRow_C(const uint8* src_y0,
- const uint8* src_y1,
- uint8* dst_sobely,
+void SobelYRow_C(const uint8_t* src_y0,
+ const uint8_t* src_y1,
+ uint8_t* dst_sobely,
int width) {
int i;
for (i = 0; i < width; ++i) {
@@ -941,62 +988,62 @@ void SobelYRow_C(const uint8* src_y0,
int b_diff = b - b_sub;
int c_diff = c - c_sub;
int sobel = Abs(a_diff + b_diff * 2 + c_diff);
- dst_sobely[i] = (uint8)(clamp255(sobel));
+ dst_sobely[i] = (uint8_t)(clamp255(sobel));
}
}
-void SobelRow_C(const uint8* src_sobelx,
- const uint8* src_sobely,
- uint8* dst_argb,
+void SobelRow_C(const uint8_t* src_sobelx,
+ const uint8_t* src_sobely,
+ uint8_t* dst_argb,
int width) {
int i;
for (i = 0; i < width; ++i) {
int r = src_sobelx[i];
int b = src_sobely[i];
int s = clamp255(r + b);
- dst_argb[0] = (uint8)(s);
- dst_argb[1] = (uint8)(s);
- dst_argb[2] = (uint8)(s);
- dst_argb[3] = (uint8)(255u);
+ dst_argb[0] = (uint8_t)(s);
+ dst_argb[1] = (uint8_t)(s);
+ dst_argb[2] = (uint8_t)(s);
+ dst_argb[3] = (uint8_t)(255u);
dst_argb += 4;
}
}
-void SobelToPlaneRow_C(const uint8* src_sobelx,
- const uint8* src_sobely,
- uint8* dst_y,
+void SobelToPlaneRow_C(const uint8_t* src_sobelx,
+ const uint8_t* src_sobely,
+ uint8_t* dst_y,
int width) {
int i;
for (i = 0; i < width; ++i) {
int r = src_sobelx[i];
int b = src_sobely[i];
int s = clamp255(r + b);
- dst_y[i] = (uint8)(s);
+ dst_y[i] = (uint8_t)(s);
}
}
-void SobelXYRow_C(const uint8* src_sobelx,
- const uint8* src_sobely,
- uint8* dst_argb,
+void SobelXYRow_C(const uint8_t* src_sobelx,
+ const uint8_t* src_sobely,
+ uint8_t* dst_argb,
int width) {
int i;
for (i = 0; i < width; ++i) {
int r = src_sobelx[i];
int b = src_sobely[i];
int g = clamp255(r + b);
- dst_argb[0] = (uint8)(b);
- dst_argb[1] = (uint8)(g);
- dst_argb[2] = (uint8)(r);
- dst_argb[3] = (uint8)(255u);
+ dst_argb[0] = (uint8_t)(b);
+ dst_argb[1] = (uint8_t)(g);
+ dst_argb[2] = (uint8_t)(r);
+ dst_argb[3] = (uint8_t)(255u);
dst_argb += 4;
}
}
-void J400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int width) {
+void J400ToARGBRow_C(const uint8_t* src_y, uint8_t* dst_argb, int width) {
// Copy a Y to RGB.
int x;
for (x = 0; x < width; ++x) {
- uint8 y = src_y[0];
+ uint8_t y = src_y[0];
dst_argb[2] = dst_argb[1] = dst_argb[0] = y;
dst_argb[3] = 255u;
dst_argb += 4;
@@ -1253,12 +1300,14 @@ const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants) = {
#undef YG
// C reference code that mimics the YUV assembly.
-static __inline void YuvPixel(uint8 y,
- uint8 u,
- uint8 v,
- uint8* b,
- uint8* g,
- uint8* r,
+// Reads 8 bit YUV and leaves result as 16 bit.
+
+static __inline void YuvPixel(uint8_t y,
+ uint8_t u,
+ uint8_t v,
+ uint8_t* b,
+ uint8_t* g,
+ uint8_t* r,
const struct YuvConstants* yuvconstants) {
#if defined(__aarch64__)
int ub = -yuvconstants->kUVToRB[0];
@@ -1289,19 +1338,63 @@ static __inline void YuvPixel(uint8 y,
int yg = yuvconstants->kYToRgb[0];
#endif
- uint32 y1 = (uint32)(y * 0x0101 * yg) >> 16;
- *b = Clamp((int32)(-(u * ub) + y1 + bb) >> 6);
- *g = Clamp((int32)(-(u * ug + v * vg) + y1 + bg) >> 6);
- *r = Clamp((int32)(-(v * vr) + y1 + br) >> 6);
+ uint32_t y1 = (uint32_t)(y * 0x0101 * yg) >> 16;
+ *b = Clamp((int32_t)(-(u * ub) + y1 + bb) >> 6);
+ *g = Clamp((int32_t)(-(u * ug + v * vg) + y1 + bg) >> 6);
+ *r = Clamp((int32_t)(-(v * vr) + y1 + br) >> 6);
}
-// C reference code that mimics the YUV 10 bit assembly.
-static __inline void YuvPixel10(uint16 y,
- uint16 u,
- uint16 v,
- uint8* b,
- uint8* g,
- uint8* r,
+// Reads 8 bit YUV and leaves result as 16 bit.
+static __inline void YuvPixel8_16(uint8_t y,
+ uint8_t u,
+ uint8_t v,
+ int* b,
+ int* g,
+ int* r,
+ const struct YuvConstants* yuvconstants) {
+#if defined(__aarch64__)
+ int ub = -yuvconstants->kUVToRB[0];
+ int ug = yuvconstants->kUVToG[0];
+ int vg = yuvconstants->kUVToG[1];
+ int vr = -yuvconstants->kUVToRB[1];
+ int bb = yuvconstants->kUVBiasBGR[0];
+ int bg = yuvconstants->kUVBiasBGR[1];
+ int br = yuvconstants->kUVBiasBGR[2];
+ int yg = yuvconstants->kYToRgb[0] / 0x0101;
+#elif defined(__arm__)
+ int ub = -yuvconstants->kUVToRB[0];
+ int ug = yuvconstants->kUVToG[0];
+ int vg = yuvconstants->kUVToG[4];
+ int vr = -yuvconstants->kUVToRB[4];
+ int bb = yuvconstants->kUVBiasBGR[0];
+ int bg = yuvconstants->kUVBiasBGR[1];
+ int br = yuvconstants->kUVBiasBGR[2];
+ int yg = yuvconstants->kYToRgb[0] / 0x0101;
+#else
+ int ub = yuvconstants->kUVToB[0];
+ int ug = yuvconstants->kUVToG[0];
+ int vg = yuvconstants->kUVToG[1];
+ int vr = yuvconstants->kUVToR[1];
+ int bb = yuvconstants->kUVBiasB[0];
+ int bg = yuvconstants->kUVBiasG[0];
+ int br = yuvconstants->kUVBiasR[0];
+ int yg = yuvconstants->kYToRgb[0];
+#endif
+
+ uint32_t y1 = (uint32_t)(y * 0x0101 * yg) >> 16;
+ *b = (int)(-(u * ub) + y1 + bb);
+ *g = (int)(-(u * ug + v * vg) + y1 + bg);
+ *r = (int)(-(v * vr) + y1 + br);
+}
+
+// C reference code that mimics the YUV 16 bit assembly.
+// Reads 10 bit YUV and leaves result as 16 bit.
+static __inline void YuvPixel16(int16_t y,
+ int16_t u,
+ int16_t v,
+ int* b,
+ int* g,
+ int* r,
const struct YuvConstants* yuvconstants) {
#if defined(__aarch64__)
int ub = -yuvconstants->kUVToRB[0];
@@ -1332,12 +1425,30 @@ static __inline void YuvPixel10(uint16 y,
int yg = yuvconstants->kYToRgb[0];
#endif
- uint32 y1 = (uint32)((y << 6) * yg) >> 16;
+ uint32_t y1 = (uint32_t)((y << 6) * yg) >> 16;
u = clamp255(u >> 2);
v = clamp255(v >> 2);
- *b = Clamp((int32)(-(u * ub) + y1 + bb) >> 6);
- *g = Clamp((int32)(-(u * ug + v * vg) + y1 + bg) >> 6);
- *r = Clamp((int32)(-(v * vr) + y1 + br) >> 6);
+ *b = (int)(-(u * ub) + y1 + bb);
+ *g = (int)(-(u * ug + v * vg) + y1 + bg);
+ *r = (int)(-(v * vr) + y1 + br);
+}
+
+// C reference code that mimics the YUV 10 bit assembly.
+// Reads 10 bit YUV and clamps down to 8 bit RGB.
+static __inline void YuvPixel10(uint16_t y,
+ uint16_t u,
+ uint16_t v,
+ uint8_t* b,
+ uint8_t* g,
+ uint8_t* r,
+ const struct YuvConstants* yuvconstants) {
+ int b16;
+ int g16;
+ int r16;
+ YuvPixel16(y, u, v, &b16, &g16, &r16, yuvconstants);
+ *b = Clamp(b16 >> 6);
+ *g = Clamp(g16 >> 6);
+ *r = Clamp(r16 >> 6);
}
// Y contribution to R,G,B. Scale and bias.
@@ -1345,11 +1456,11 @@ static __inline void YuvPixel10(uint16 y,
#define YGB -1160 /* 1.164 * 64 * -16 + 64 / 2 */
// C reference code that mimics the YUV assembly.
-static __inline void YPixel(uint8 y, uint8* b, uint8* g, uint8* r) {
- uint32 y1 = (uint32)(y * 0x0101 * YG) >> 16;
- *b = Clamp((int32)(y1 + YGB) >> 6);
- *g = Clamp((int32)(y1 + YGB) >> 6);
- *r = Clamp((int32)(y1 + YGB) >> 6);
+static __inline void YPixel(uint8_t y, uint8_t* b, uint8_t* g, uint8_t* r) {
+ uint32_t y1 = (uint32_t)(y * 0x0101 * YG) >> 16;
+ *b = Clamp((int32_t)(y1 + YGB) >> 6);
+ *g = Clamp((int32_t)(y1 + YGB) >> 6);
+ *r = Clamp((int32_t)(y1 + YGB) >> 6);
}
#undef YG
@@ -1359,16 +1470,16 @@ static __inline void YPixel(uint8 y, uint8* b, uint8* g, uint8* r) {
(defined(__ARM_NEON__) || defined(__aarch64__) || defined(LIBYUV_NEON))
// C mimic assembly.
// TODO(fbarchard): Remove subsampling from Neon.
-void I444ToARGBRow_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* rgb_buf,
+void I444ToARGBRow_C(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
- uint8 u = (src_u[0] + src_u[1] + 1) >> 1;
- uint8 v = (src_v[0] + src_v[1] + 1) >> 1;
+ uint8_t u = (src_u[0] + src_u[1] + 1) >> 1;
+ uint8_t v = (src_v[0] + src_v[1] + 1) >> 1;
YuvPixel(src_y[0], u, v, rgb_buf + 0, rgb_buf + 1, rgb_buf + 2,
yuvconstants);
rgb_buf[3] = 255;
@@ -1387,10 +1498,10 @@ void I444ToARGBRow_C(const uint8* src_y,
}
}
#else
-void I444ToARGBRow_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* rgb_buf,
+void I444ToARGBRow_C(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width) {
int x;
@@ -1407,10 +1518,10 @@ void I444ToARGBRow_C(const uint8* src_y,
#endif
// Also used for 420
-void I422ToARGBRow_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* rgb_buf,
+void I422ToARGBRow_C(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width) {
int x;
@@ -1434,10 +1545,10 @@ void I422ToARGBRow_C(const uint8* src_y,
}
// 10 bit YUV to ARGB
-void I210ToARGBRow_C(const uint16* src_y,
- const uint16* src_u,
- const uint16* src_v,
- uint8* rgb_buf,
+void I210ToARGBRow_C(const uint16_t* src_y,
+ const uint16_t* src_u,
+ const uint16_t* src_v,
+ uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width) {
int x;
@@ -1460,11 +1571,78 @@ void I210ToARGBRow_C(const uint16* src_y,
}
}
-void I422AlphaToARGBRow_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- const uint8* src_a,
- uint8* rgb_buf,
+static void StoreAR30(uint8_t* rgb_buf, int b, int g, int r) {
+ uint32_t ar30;
+ b = b >> 4; // convert 10.6 to 10 bit.
+ g = g >> 4;
+ r = r >> 4;
+ b = Clamp10(b);
+ g = Clamp10(g);
+ r = Clamp10(r);
+ ar30 = b | ((uint32_t)g << 10) | ((uint32_t)r << 20) | 0xc0000000;
+ (*(uint32_t*)rgb_buf) = ar30;
+}
+
+// 10 bit YUV to 10 bit AR30
+void I210ToAR30Row_C(const uint16_t* src_y,
+ const uint16_t* src_u,
+ const uint16_t* src_v,
+ uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ int x;
+ int b;
+ int g;
+ int r;
+ for (x = 0; x < width - 1; x += 2) {
+ YuvPixel16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
+ StoreAR30(rgb_buf, b, g, r);
+ YuvPixel16(src_y[1], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
+ StoreAR30(rgb_buf + 4, b, g, r);
+ src_y += 2;
+ src_u += 1;
+ src_v += 1;
+ rgb_buf += 8; // Advance 2 pixels.
+ }
+ if (width & 1) {
+ YuvPixel16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
+ StoreAR30(rgb_buf, b, g, r);
+ }
+}
+
+// 8 bit YUV to 10 bit AR30
+// Uses same code as 10 bit YUV bit shifts the 8 bit values up to 10 bits.
+void I422ToAR30Row_C(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ int x;
+ int b;
+ int g;
+ int r;
+ for (x = 0; x < width - 1; x += 2) {
+ YuvPixel8_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
+ StoreAR30(rgb_buf, b, g, r);
+ YuvPixel8_16(src_y[1], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
+ StoreAR30(rgb_buf + 4, b, g, r);
+ src_y += 2;
+ src_u += 1;
+ src_v += 1;
+ rgb_buf += 8; // Advance 2 pixels.
+ }
+ if (width & 1) {
+ YuvPixel8_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
+ StoreAR30(rgb_buf, b, g, r);
+ }
+}
+
+void I422AlphaToARGBRow_C(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ const uint8_t* src_a,
+ uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width) {
int x;
@@ -1488,10 +1666,10 @@ void I422AlphaToARGBRow_C(const uint8* src_y,
}
}
-void I422ToRGB24Row_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* rgb_buf,
+void I422ToRGB24Row_C(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width) {
int x;
@@ -1511,18 +1689,18 @@ void I422ToRGB24Row_C(const uint8* src_y,
}
}
-void I422ToARGB4444Row_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb4444,
+void I422ToARGB4444Row_C(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb4444,
const struct YuvConstants* yuvconstants,
int width) {
- uint8 b0;
- uint8 g0;
- uint8 r0;
- uint8 b1;
- uint8 g1;
- uint8 r1;
+ uint8_t b0;
+ uint8_t g0;
+ uint8_t r0;
+ uint8_t b1;
+ uint8_t g1;
+ uint8_t r1;
int x;
for (x = 0; x < width - 1; x += 2) {
YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
@@ -1533,8 +1711,8 @@ void I422ToARGB4444Row_C(const uint8* src_y,
b1 = b1 >> 4;
g1 = g1 >> 4;
r1 = r1 >> 4;
- *(uint32*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) | (b1 << 16) |
- (g1 << 20) | (r1 << 24) | 0xf000f000;
+ *(uint32_t*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) | (b1 << 16) |
+ (g1 << 20) | (r1 << 24) | 0xf000f000;
src_y += 2;
src_u += 1;
src_v += 1;
@@ -1545,22 +1723,22 @@ void I422ToARGB4444Row_C(const uint8* src_y,
b0 = b0 >> 4;
g0 = g0 >> 4;
r0 = r0 >> 4;
- *(uint16*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) | 0xf000;
+ *(uint16_t*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) | 0xf000;
}
}
-void I422ToARGB1555Row_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb1555,
+void I422ToARGB1555Row_C(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb1555,
const struct YuvConstants* yuvconstants,
int width) {
- uint8 b0;
- uint8 g0;
- uint8 r0;
- uint8 b1;
- uint8 g1;
- uint8 r1;
+ uint8_t b0;
+ uint8_t g0;
+ uint8_t r0;
+ uint8_t b1;
+ uint8_t g1;
+ uint8_t r1;
int x;
for (x = 0; x < width - 1; x += 2) {
YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
@@ -1571,8 +1749,8 @@ void I422ToARGB1555Row_C(const uint8* src_y,
b1 = b1 >> 3;
g1 = g1 >> 3;
r1 = r1 >> 3;
- *(uint32*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) | (b1 << 16) |
- (g1 << 21) | (r1 << 26) | 0x80008000;
+ *(uint32_t*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) | (b1 << 16) |
+ (g1 << 21) | (r1 << 26) | 0x80008000;
src_y += 2;
src_u += 1;
src_v += 1;
@@ -1583,22 +1761,22 @@ void I422ToARGB1555Row_C(const uint8* src_y,
b0 = b0 >> 3;
g0 = g0 >> 3;
r0 = r0 >> 3;
- *(uint16*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) | 0x8000;
+ *(uint16_t*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) | 0x8000;
}
}
-void I422ToRGB565Row_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_rgb565,
+void I422ToRGB565Row_C(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_rgb565,
const struct YuvConstants* yuvconstants,
int width) {
- uint8 b0;
- uint8 g0;
- uint8 r0;
- uint8 b1;
- uint8 g1;
- uint8 r1;
+ uint8_t b0;
+ uint8_t g0;
+ uint8_t r0;
+ uint8_t b1;
+ uint8_t g1;
+ uint8_t r1;
int x;
for (x = 0; x < width - 1; x += 2) {
YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
@@ -1609,7 +1787,7 @@ void I422ToRGB565Row_C(const uint8* src_y,
b1 = b1 >> 3;
g1 = g1 >> 2;
r1 = r1 >> 3;
- *(uint32*)(dst_rgb565) =
+ *(uint32_t*)(dst_rgb565) =
b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) | (r1 << 27);
src_y += 2;
src_u += 1;
@@ -1621,13 +1799,13 @@ void I422ToRGB565Row_C(const uint8* src_y,
b0 = b0 >> 3;
g0 = g0 >> 2;
r0 = r0 >> 3;
- *(uint16*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11);
+ *(uint16_t*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11);
}
}
-void NV12ToARGBRow_C(const uint8* src_y,
- const uint8* src_uv,
- uint8* rgb_buf,
+void NV12ToARGBRow_C(const uint8_t* src_y,
+ const uint8_t* src_uv,
+ uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width) {
int x;
@@ -1649,9 +1827,9 @@ void NV12ToARGBRow_C(const uint8* src_y,
}
}
-void NV21ToARGBRow_C(const uint8* src_y,
- const uint8* src_vu,
- uint8* rgb_buf,
+void NV21ToARGBRow_C(const uint8_t* src_y,
+ const uint8_t* src_vu,
+ uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width) {
int x;
@@ -1673,17 +1851,17 @@ void NV21ToARGBRow_C(const uint8* src_y,
}
}
-void NV12ToRGB565Row_C(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_rgb565,
+void NV12ToRGB565Row_C(const uint8_t* src_y,
+ const uint8_t* src_uv,
+ uint8_t* dst_rgb565,
const struct YuvConstants* yuvconstants,
int width) {
- uint8 b0;
- uint8 g0;
- uint8 r0;
- uint8 b1;
- uint8 g1;
- uint8 r1;
+ uint8_t b0;
+ uint8_t g0;
+ uint8_t r0;
+ uint8_t b1;
+ uint8_t g1;
+ uint8_t r1;
int x;
for (x = 0; x < width - 1; x += 2) {
YuvPixel(src_y[0], src_uv[0], src_uv[1], &b0, &g0, &r0, yuvconstants);
@@ -1694,7 +1872,7 @@ void NV12ToRGB565Row_C(const uint8* src_y,
b1 = b1 >> 3;
g1 = g1 >> 2;
r1 = r1 >> 3;
- *(uint32*)(dst_rgb565) =
+ *(uint32_t*)(dst_rgb565) =
b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) | (r1 << 27);
src_y += 2;
src_uv += 2;
@@ -1705,12 +1883,12 @@ void NV12ToRGB565Row_C(const uint8* src_y,
b0 = b0 >> 3;
g0 = g0 >> 2;
r0 = r0 >> 3;
- *(uint16*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11);
+ *(uint16_t*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11);
}
}
-void YUY2ToARGBRow_C(const uint8* src_yuy2,
- uint8* rgb_buf,
+void YUY2ToARGBRow_C(const uint8_t* src_yuy2,
+ uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width) {
int x;
@@ -1731,8 +1909,8 @@ void YUY2ToARGBRow_C(const uint8* src_yuy2,
}
}
-void UYVYToARGBRow_C(const uint8* src_uyvy,
- uint8* rgb_buf,
+void UYVYToARGBRow_C(const uint8_t* src_uyvy,
+ uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width) {
int x;
@@ -1753,10 +1931,10 @@ void UYVYToARGBRow_C(const uint8* src_uyvy,
}
}
-void I422ToRGBARow_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* rgb_buf,
+void I422ToRGBARow_C(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width) {
int x;
@@ -1779,7 +1957,7 @@ void I422ToRGBARow_C(const uint8* src_y,
}
}
-void I400ToARGBRow_C(const uint8* src_y, uint8* rgb_buf, int width) {
+void I400ToARGBRow_C(const uint8_t* src_y, uint8_t* rgb_buf, int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
YPixel(src_y[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
@@ -1795,7 +1973,7 @@ void I400ToARGBRow_C(const uint8* src_y, uint8* rgb_buf, int width) {
}
}
-void MirrorRow_C(const uint8* src, uint8* dst, int width) {
+void MirrorRow_C(const uint8_t* src, uint8_t* dst, int width) {
int x;
src += width - 1;
for (x = 0; x < width - 1; x += 2) {
@@ -1808,7 +1986,10 @@ void MirrorRow_C(const uint8* src, uint8* dst, int width) {
}
}
-void MirrorUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) {
+void MirrorUVRow_C(const uint8_t* src_uv,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
int x;
src_uv += (width - 1) << 1;
for (x = 0; x < width - 1; x += 2) {
@@ -1824,10 +2005,10 @@ void MirrorUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) {
}
}
-void ARGBMirrorRow_C(const uint8* src, uint8* dst, int width) {
+void ARGBMirrorRow_C(const uint8_t* src, uint8_t* dst, int width) {
int x;
- const uint32* src32 = (const uint32*)(src);
- uint32* dst32 = (uint32*)(dst);
+ const uint32_t* src32 = (const uint32_t*)(src);
+ uint32_t* dst32 = (uint32_t*)(dst);
src32 += width - 1;
for (x = 0; x < width - 1; x += 2) {
dst32[x] = src32[0];
@@ -1839,7 +2020,10 @@ void ARGBMirrorRow_C(const uint8* src, uint8* dst, int width) {
}
}
-void SplitUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) {
+void SplitUVRow_C(const uint8_t* src_uv,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
dst_u[x] = src_uv[0];
@@ -1854,9 +2038,9 @@ void SplitUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) {
}
}
-void MergeUVRow_C(const uint8* src_u,
- const uint8* src_v,
- uint8* dst_uv,
+void MergeUVRow_C(const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_uv,
int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
@@ -1872,10 +2056,10 @@ void MergeUVRow_C(const uint8* src_u,
}
}
-void SplitRGBRow_C(const uint8* src_rgb,
- uint8* dst_r,
- uint8* dst_g,
- uint8* dst_b,
+void SplitRGBRow_C(const uint8_t* src_rgb,
+ uint8_t* dst_r,
+ uint8_t* dst_g,
+ uint8_t* dst_b,
int width) {
int x;
for (x = 0; x < width; ++x) {
@@ -1886,10 +2070,10 @@ void SplitRGBRow_C(const uint8* src_rgb,
}
}
-void MergeRGBRow_C(const uint8* src_r,
- const uint8* src_g,
- const uint8* src_b,
- uint8* dst_rgb,
+void MergeRGBRow_C(const uint8_t* src_r,
+ const uint8_t* src_g,
+ const uint8_t* src_b,
+ uint8_t* dst_rgb,
int width) {
int x;
for (x = 0; x < width; ++x) {
@@ -1905,9 +2089,9 @@ void MergeRGBRow_C(const uint8* src_r,
// 64 = 10 bits
// 16 = 12 bits
// 1 = 16 bits
-void MergeUVRow_16_C(const uint16* src_u,
- const uint16* src_v,
- uint16* dst_uv,
+void MergeUVRow_16_C(const uint16_t* src_u,
+ const uint16_t* src_v,
+ uint16_t* dst_uv,
int scale,
int width) {
int x;
@@ -1924,8 +2108,8 @@ void MergeUVRow_16_C(const uint16* src_u,
}
}
-void MultiplyRow_16_C(const uint16* src_y,
- uint16* dst_y,
+void MultiplyRow_16_C(const uint16_t* src_y,
+ uint16_t* dst_y,
int scale,
int width) {
int x;
@@ -1939,8 +2123,8 @@ void MultiplyRow_16_C(const uint16* src_y,
// 16384 = 10 bits
// 4096 = 12 bits
// 256 = 16 bits
-void Convert16To8Row_C(const uint16* src_y,
- uint8* dst_y,
+void Convert16To8Row_C(const uint16_t* src_y,
+ uint8_t* dst_y,
int scale,
int width) {
int x;
@@ -1951,8 +2135,8 @@ void Convert16To8Row_C(const uint16* src_y,
// Use scale to convert lsb formats to msb, depending how many bits there are:
// 1024 = 10 bits
-void Convert8To16Row_C(const uint8* src_y,
- uint16* dst_y,
+void Convert8To16Row_C(const uint8_t* src_y,
+ uint16_t* dst_y,
int scale,
int width) {
int x;
@@ -1962,20 +2146,20 @@ void Convert8To16Row_C(const uint8* src_y,
}
}
-void CopyRow_C(const uint8* src, uint8* dst, int count) {
+void CopyRow_C(const uint8_t* src, uint8_t* dst, int count) {
memcpy(dst, src, count);
}
-void CopyRow_16_C(const uint16* src, uint16* dst, int count) {
+void CopyRow_16_C(const uint16_t* src, uint16_t* dst, int count) {
memcpy(dst, src, count * 2);
}
-void SetRow_C(uint8* dst, uint8 v8, int width) {
+void SetRow_C(uint8_t* dst, uint8_t v8, int width) {
memset(dst, v8, width);
}
-void ARGBSetRow_C(uint8* dst_argb, uint32 v32, int width) {
- uint32* d = (uint32*)(dst_argb);
+void ARGBSetRow_C(uint8_t* dst_argb, uint32_t v32, int width) {
+ uint32_t* d = (uint32_t*)(dst_argb);
int x;
for (x = 0; x < width; ++x) {
d[x] = v32;
@@ -1983,10 +2167,10 @@ void ARGBSetRow_C(uint8* dst_argb, uint32 v32, int width) {
}
// Filter 2 rows of YUY2 UV's (422) into U and V (420).
-void YUY2ToUVRow_C(const uint8* src_yuy2,
+void YUY2ToUVRow_C(const uint8_t* src_yuy2,
int src_stride_yuy2,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
// Output a row of UV values, filtering 2 rows of YUY2.
int x;
@@ -2000,9 +2184,9 @@ void YUY2ToUVRow_C(const uint8* src_yuy2,
}
// Copy row of YUY2 UV's (422) into U and V (422).
-void YUY2ToUV422Row_C(const uint8* src_yuy2,
- uint8* dst_u,
- uint8* dst_v,
+void YUY2ToUV422Row_C(const uint8_t* src_yuy2,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
// Output a row of UV values.
int x;
@@ -2016,7 +2200,7 @@ void YUY2ToUV422Row_C(const uint8* src_yuy2,
}
// Copy row of YUY2 Y's (422) into Y (420/422).
-void YUY2ToYRow_C(const uint8* src_yuy2, uint8* dst_y, int width) {
+void YUY2ToYRow_C(const uint8_t* src_yuy2, uint8_t* dst_y, int width) {
// Output a row of Y values.
int x;
for (x = 0; x < width - 1; x += 2) {
@@ -2030,10 +2214,10 @@ void YUY2ToYRow_C(const uint8* src_yuy2, uint8* dst_y, int width) {
}
// Filter 2 rows of UYVY UV's (422) into U and V (420).
-void UYVYToUVRow_C(const uint8* src_uyvy,
+void UYVYToUVRow_C(const uint8_t* src_uyvy,
int src_stride_uyvy,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
// Output a row of UV values.
int x;
@@ -2047,9 +2231,9 @@ void UYVYToUVRow_C(const uint8* src_uyvy,
}
// Copy row of UYVY UV's (422) into U and V (422).
-void UYVYToUV422Row_C(const uint8* src_uyvy,
- uint8* dst_u,
- uint8* dst_v,
+void UYVYToUV422Row_C(const uint8_t* src_uyvy,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
// Output a row of UV values.
int x;
@@ -2063,7 +2247,7 @@ void UYVYToUV422Row_C(const uint8* src_uyvy,
}
// Copy row of UYVY Y's (422) into Y (420/422).
-void UYVYToYRow_C(const uint8* src_uyvy, uint8* dst_y, int width) {
+void UYVYToYRow_C(const uint8_t* src_uyvy, uint8_t* dst_y, int width) {
// Output a row of Y values.
int x;
for (x = 0; x < width - 1; x += 2) {
@@ -2081,19 +2265,19 @@ void UYVYToYRow_C(const uint8* src_uyvy, uint8* dst_y, int width) {
// Blend src_argb0 over src_argb1 and store to dst_argb.
// dst_argb may be src_argb0 or src_argb1.
// This code mimics the SSSE3 version for better testability.
-void ARGBBlendRow_C(const uint8* src_argb0,
- const uint8* src_argb1,
- uint8* dst_argb,
+void ARGBBlendRow_C(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
- uint32 fb = src_argb0[0];
- uint32 fg = src_argb0[1];
- uint32 fr = src_argb0[2];
- uint32 a = src_argb0[3];
- uint32 bb = src_argb1[0];
- uint32 bg = src_argb1[1];
- uint32 br = src_argb1[2];
+ uint32_t fb = src_argb0[0];
+ uint32_t fg = src_argb0[1];
+ uint32_t fr = src_argb0[2];
+ uint32_t a = src_argb0[3];
+ uint32_t bb = src_argb1[0];
+ uint32_t bg = src_argb1[1];
+ uint32_t br = src_argb1[2];
dst_argb[0] = BLEND(fb, bb, a);
dst_argb[1] = BLEND(fg, bg, a);
dst_argb[2] = BLEND(fr, br, a);
@@ -2116,13 +2300,13 @@ void ARGBBlendRow_C(const uint8* src_argb0,
}
if (width & 1) {
- uint32 fb = src_argb0[0];
- uint32 fg = src_argb0[1];
- uint32 fr = src_argb0[2];
- uint32 a = src_argb0[3];
- uint32 bb = src_argb1[0];
- uint32 bg = src_argb1[1];
- uint32 br = src_argb1[2];
+ uint32_t fb = src_argb0[0];
+ uint32_t fg = src_argb0[1];
+ uint32_t fr = src_argb0[2];
+ uint32_t a = src_argb0[3];
+ uint32_t bb = src_argb1[0];
+ uint32_t bg = src_argb1[1];
+ uint32_t br = src_argb1[2];
dst_argb[0] = BLEND(fb, bb, a);
dst_argb[1] = BLEND(fg, bg, a);
dst_argb[2] = BLEND(fr, br, a);
@@ -2132,10 +2316,10 @@ void ARGBBlendRow_C(const uint8* src_argb0,
#undef BLEND
#define UBLEND(f, b, a) (((a)*f) + ((255 - a) * b) + 255) >> 8
-void BlendPlaneRow_C(const uint8* src0,
- const uint8* src1,
- const uint8* alpha,
- uint8* dst,
+void BlendPlaneRow_C(const uint8_t* src0,
+ const uint8_t* src1,
+ const uint8_t* alpha,
+ uint8_t* dst,
int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
@@ -2156,13 +2340,13 @@ void BlendPlaneRow_C(const uint8* src0,
// Multiply source RGB by alpha and store to destination.
// This code mimics the SSSE3 version for better testability.
-void ARGBAttenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
+void ARGBAttenuateRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width) {
int i;
for (i = 0; i < width - 1; i += 2) {
- uint32 b = src_argb[0];
- uint32 g = src_argb[1];
- uint32 r = src_argb[2];
- uint32 a = src_argb[3];
+ uint32_t b = src_argb[0];
+ uint32_t g = src_argb[1];
+ uint32_t r = src_argb[2];
+ uint32_t a = src_argb[3];
dst_argb[0] = ATTENUATE(b, a);
dst_argb[1] = ATTENUATE(g, a);
dst_argb[2] = ATTENUATE(r, a);
@@ -2180,10 +2364,10 @@ void ARGBAttenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
}
if (width & 1) {
- const uint32 b = src_argb[0];
- const uint32 g = src_argb[1];
- const uint32 r = src_argb[2];
- const uint32 a = src_argb[3];
+ const uint32_t b = src_argb[0];
+ const uint32_t g = src_argb[1];
+ const uint32_t r = src_argb[2];
+ const uint32_t a = src_argb[3];
dst_argb[0] = ATTENUATE(b, a);
dst_argb[1] = ATTENUATE(g, a);
dst_argb[2] = ATTENUATE(r, a);
@@ -2199,7 +2383,7 @@ void ARGBAttenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
// Reciprocal method is off by 1 on some values. ie 125
// 8.8 fixed point inverse table with 1.0 in upper short and 1 / a in lower.
#define T(a) 0x01000000 + (0x10000 / a)
-const uint32 fixed_invtbl8[256] = {
+const uint32_t fixed_invtbl8[256] = {
0x01000000, 0x0100ffff, T(0x02), T(0x03), T(0x04), T(0x05), T(0x06),
T(0x07), T(0x08), T(0x09), T(0x0a), T(0x0b), T(0x0c), T(0x0d),
T(0x0e), T(0x0f), T(0x10), T(0x11), T(0x12), T(0x13), T(0x14),
@@ -2239,14 +2423,16 @@ const uint32 fixed_invtbl8[256] = {
T(0xfc), T(0xfd), T(0xfe), 0x01000100};
#undef T
-void ARGBUnattenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
+void ARGBUnattenuateRow_C(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ int width) {
int i;
for (i = 0; i < width; ++i) {
- uint32 b = src_argb[0];
- uint32 g = src_argb[1];
- uint32 r = src_argb[2];
- const uint32 a = src_argb[3];
- const uint32 ia = fixed_invtbl8[a] & 0xffff; // 8.8 fixed point
+ uint32_t b = src_argb[0];
+ uint32_t g = src_argb[1];
+ uint32_t r = src_argb[2];
+ const uint32_t a = src_argb[3];
+ const uint32_t ia = fixed_invtbl8[a] & 0xffff; // 8.8 fixed point
b = (b * ia) >> 8;
g = (g * ia) >> 8;
r = (r * ia) >> 8;
@@ -2260,11 +2446,11 @@ void ARGBUnattenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
}
}
-void ComputeCumulativeSumRow_C(const uint8* row,
- int32* cumsum,
- const int32* previous_cumsum,
+void ComputeCumulativeSumRow_C(const uint8_t* row,
+ int32_t* cumsum,
+ const int32_t* previous_cumsum,
int width) {
- int32 row_sum[4] = {0, 0, 0, 0};
+ int32_t row_sum[4] = {0, 0, 0, 0};
int x;
for (x = 0; x < width; ++x) {
row_sum[0] += row[x * 4 + 0];
@@ -2278,19 +2464,19 @@ void ComputeCumulativeSumRow_C(const uint8* row,
}
}
-void CumulativeSumToAverageRow_C(const int32* tl,
- const int32* bl,
+void CumulativeSumToAverageRow_C(const int32_t* tl,
+ const int32_t* bl,
int w,
int area,
- uint8* dst,
+ uint8_t* dst,
int count) {
float ooa = 1.0f / area;
int i;
for (i = 0; i < count; ++i) {
- dst[0] = (uint8)((bl[w + 0] + tl[0] - bl[0] - tl[w + 0]) * ooa);
- dst[1] = (uint8)((bl[w + 1] + tl[1] - bl[1] - tl[w + 1]) * ooa);
- dst[2] = (uint8)((bl[w + 2] + tl[2] - bl[2] - tl[w + 2]) * ooa);
- dst[3] = (uint8)((bl[w + 3] + tl[3] - bl[3] - tl[w + 3]) * ooa);
+ dst[0] = (uint8_t)((bl[w + 0] + tl[0] - bl[0] - tl[w + 0]) * ooa);
+ dst[1] = (uint8_t)((bl[w + 1] + tl[1] - bl[1] - tl[w + 1]) * ooa);
+ dst[2] = (uint8_t)((bl[w + 2] + tl[2] - bl[2] - tl[w + 2]) * ooa);
+ dst[3] = (uint8_t)((bl[w + 3] + tl[3] - bl[3] - tl[w + 3]) * ooa);
dst += 4;
tl += 4;
bl += 4;
@@ -2299,9 +2485,9 @@ void CumulativeSumToAverageRow_C(const int32* tl,
// Copy pixels from rotated source to destination row with a slope.
LIBYUV_API
-void ARGBAffineRow_C(const uint8* src_argb,
+void ARGBAffineRow_C(const uint8_t* src_argb,
int src_argb_stride,
- uint8* dst_argb,
+ uint8_t* dst_argb,
const float* uv_dudv,
int width) {
int i;
@@ -2312,8 +2498,8 @@ void ARGBAffineRow_C(const uint8* src_argb,
for (i = 0; i < width; ++i) {
int x = (int)(uv[0]);
int y = (int)(uv[1]);
- *(uint32*)(dst_argb) =
- *(const uint32*)(src_argb + y * src_argb_stride + x * 4);
+ *(uint32_t*)(dst_argb) =
+ *(const uint32_t*)(src_argb + y * src_argb_stride + x * 4);
dst_argb += 4;
uv[0] += uv_dudv[2];
uv[1] += uv_dudv[3];
@@ -2321,9 +2507,9 @@ void ARGBAffineRow_C(const uint8* src_argb,
}
// Blend 2 rows into 1.
-static void HalfRow_C(const uint8* src_uv,
+static void HalfRow_C(const uint8_t* src_uv,
ptrdiff_t src_uv_stride,
- uint8* dst_uv,
+ uint8_t* dst_uv,
int width) {
int x;
for (x = 0; x < width; ++x) {
@@ -2331,9 +2517,9 @@ static void HalfRow_C(const uint8* src_uv,
}
}
-static void HalfRow_16_C(const uint16* src_uv,
+static void HalfRow_16_C(const uint16_t* src_uv,
ptrdiff_t src_uv_stride,
- uint16* dst_uv,
+ uint16_t* dst_uv,
int width) {
int x;
for (x = 0; x < width; ++x) {
@@ -2342,14 +2528,14 @@ static void HalfRow_16_C(const uint16* src_uv,
}
// C version 2x2 -> 2x1.
-void InterpolateRow_C(uint8* dst_ptr,
- const uint8* src_ptr,
+void InterpolateRow_C(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
ptrdiff_t src_stride,
int width,
int source_y_fraction) {
int y1_fraction = source_y_fraction;
int y0_fraction = 256 - y1_fraction;
- const uint8* src_ptr1 = src_ptr + src_stride;
+ const uint8_t* src_ptr1 = src_ptr + src_stride;
int x;
if (y1_fraction == 0) {
memcpy(dst_ptr, src_ptr, width);
@@ -2374,14 +2560,14 @@ void InterpolateRow_C(uint8* dst_ptr,
}
}
-void InterpolateRow_16_C(uint16* dst_ptr,
- const uint16* src_ptr,
+void InterpolateRow_16_C(uint16_t* dst_ptr,
+ const uint16_t* src_ptr,
ptrdiff_t src_stride,
int width,
int source_y_fraction) {
int y1_fraction = source_y_fraction;
int y0_fraction = 256 - y1_fraction;
- const uint16* src_ptr1 = src_ptr + src_stride;
+ const uint16_t* src_ptr1 = src_ptr + src_stride;
int x;
if (source_y_fraction == 0) {
memcpy(dst_ptr, src_ptr, width * 2);
@@ -2404,9 +2590,9 @@ void InterpolateRow_16_C(uint16* dst_ptr,
}
// Use first 4 shuffler values to reorder ARGB channels.
-void ARGBShuffleRow_C(const uint8* src_argb,
- uint8* dst_argb,
- const uint8* shuffler,
+void ARGBShuffleRow_C(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ const uint8_t* shuffler,
int width) {
int index0 = shuffler[0];
int index1 = shuffler[1];
@@ -2416,10 +2602,10 @@ void ARGBShuffleRow_C(const uint8* src_argb,
int x;
for (x = 0; x < width; ++x) {
// To support in-place conversion.
- uint8 b = src_argb[index0];
- uint8 g = src_argb[index1];
- uint8 r = src_argb[index2];
- uint8 a = src_argb[index3];
+ uint8_t b = src_argb[index0];
+ uint8_t g = src_argb[index1];
+ uint8_t r = src_argb[index2];
+ uint8_t a = src_argb[index3];
dst_argb[0] = b;
dst_argb[1] = g;
dst_argb[2] = r;
@@ -2429,10 +2615,10 @@ void ARGBShuffleRow_C(const uint8* src_argb,
}
}
-void I422ToYUY2Row_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_frame,
+void I422ToYUY2Row_C(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_frame,
int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
@@ -2453,10 +2639,10 @@ void I422ToYUY2Row_C(const uint8* src_y,
}
}
-void I422ToUYVYRow_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_frame,
+void I422ToUYVYRow_C(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_frame,
int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
@@ -2477,8 +2663,8 @@ void I422ToUYVYRow_C(const uint8* src_y,
}
}
-void ARGBPolynomialRow_C(const uint8* src_argb,
- uint8* dst_argb,
+void ARGBPolynomialRow_C(const uint8_t* src_argb,
+ uint8_t* dst_argb,
const float* poly,
int width) {
int i;
@@ -2508,10 +2694,10 @@ void ARGBPolynomialRow_C(const uint8* src_argb,
dr += poly[14] * r3;
da += poly[15] * a3;
- dst_argb[0] = Clamp((int32)(db));
- dst_argb[1] = Clamp((int32)(dg));
- dst_argb[2] = Clamp((int32)(dr));
- dst_argb[3] = Clamp((int32)(da));
+ dst_argb[0] = Clamp((int32_t)(db));
+ dst_argb[1] = Clamp((int32_t)(dg));
+ dst_argb[2] = Clamp((int32_t)(dr));
+ dst_argb[3] = Clamp((int32_t)(da));
src_argb += 4;
dst_argb += 4;
}
@@ -2527,31 +2713,34 @@ void ARGBPolynomialRow_C(const uint8* src_argb,
// simply extract the low bits of the exponent and the high
// bits of the mantissa from our float and we're done.
-void HalfFloatRow_C(const uint16* src, uint16* dst, float scale, int width) {
+void HalfFloatRow_C(const uint16_t* src,
+ uint16_t* dst,
+ float scale,
+ int width) {
int i;
float mult = 1.9259299444e-34f * scale;
for (i = 0; i < width; ++i) {
float value = src[i] * mult;
- dst[i] = (uint16)((*(uint32_t*)&value) >> 13);
+ dst[i] = (uint16_t)((*(uint32_t*)&value) >> 13);
}
}
-void ARGBLumaColorTableRow_C(const uint8* src_argb,
- uint8* dst_argb,
+void ARGBLumaColorTableRow_C(const uint8_t* src_argb,
+ uint8_t* dst_argb,
int width,
- const uint8* luma,
- uint32 lumacoeff) {
- uint32 bc = lumacoeff & 0xff;
- uint32 gc = (lumacoeff >> 8) & 0xff;
- uint32 rc = (lumacoeff >> 16) & 0xff;
+ const uint8_t* luma,
+ uint32_t lumacoeff) {
+ uint32_t bc = lumacoeff & 0xff;
+ uint32_t gc = (lumacoeff >> 8) & 0xff;
+ uint32_t rc = (lumacoeff >> 16) & 0xff;
int i;
for (i = 0; i < width - 1; i += 2) {
// Luminance in rows, color values in columns.
- const uint8* luma0 =
+ const uint8_t* luma0 =
((src_argb[0] * bc + src_argb[1] * gc + src_argb[2] * rc) & 0x7F00u) +
luma;
- const uint8* luma1;
+ const uint8_t* luma1;
dst_argb[0] = luma0[src_argb[0]];
dst_argb[1] = luma0[src_argb[1]];
dst_argb[2] = luma0[src_argb[2]];
@@ -2568,7 +2757,7 @@ void ARGBLumaColorTableRow_C(const uint8* src_argb,
}
if (width & 1) {
// Luminance in rows, color values in columns.
- const uint8* luma0 =
+ const uint8_t* luma0 =
((src_argb[0] * bc + src_argb[1] * gc + src_argb[2] * rc) & 0x7F00u) +
luma;
dst_argb[0] = luma0[src_argb[0]];
@@ -2578,7 +2767,7 @@ void ARGBLumaColorTableRow_C(const uint8* src_argb,
}
}
-void ARGBCopyAlphaRow_C(const uint8* src, uint8* dst, int width) {
+void ARGBCopyAlphaRow_C(const uint8_t* src, uint8_t* dst, int width) {
int i;
for (i = 0; i < width - 1; i += 2) {
dst[3] = src[3];
@@ -2591,7 +2780,7 @@ void ARGBCopyAlphaRow_C(const uint8* src, uint8* dst, int width) {
}
}
-void ARGBExtractAlphaRow_C(const uint8* src_argb, uint8* dst_a, int width) {
+void ARGBExtractAlphaRow_C(const uint8_t* src_argb, uint8_t* dst_a, int width) {
int i;
for (i = 0; i < width - 1; i += 2) {
dst_a[0] = src_argb[3];
@@ -2604,7 +2793,7 @@ void ARGBExtractAlphaRow_C(const uint8* src_argb, uint8* dst_a, int width) {
}
}
-void ARGBCopyYToAlphaRow_C(const uint8* src, uint8* dst, int width) {
+void ARGBCopyYToAlphaRow_C(const uint8_t* src, uint8_t* dst, int width) {
int i;
for (i = 0; i < width - 1; i += 2) {
dst[3] = src[0];
@@ -2623,13 +2812,13 @@ void ARGBCopyYToAlphaRow_C(const uint8* src, uint8* dst, int width) {
#if !(defined(_MSC_VER) && defined(_M_IX86)) && \
defined(HAS_I422TORGB565ROW_SSSE3)
// row_win.cc has asm version, but GCC uses 2 step wrapper.
-void I422ToRGB565Row_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_rgb565,
+void I422ToRGB565Row_SSSE3(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_rgb565,
const struct YuvConstants* yuvconstants,
int width) {
- SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]);
+ SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
while (width > 0) {
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth);
@@ -2644,14 +2833,14 @@ void I422ToRGB565Row_SSSE3(const uint8* src_y,
#endif
#if defined(HAS_I422TOARGB1555ROW_SSSE3)
-void I422ToARGB1555Row_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb1555,
+void I422ToARGB1555Row_SSSE3(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb1555,
const struct YuvConstants* yuvconstants,
int width) {
// Row buffer for intermediate ARGB pixels.
- SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]);
+ SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
while (width > 0) {
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth);
@@ -2666,14 +2855,14 @@ void I422ToARGB1555Row_SSSE3(const uint8* src_y,
#endif
#if defined(HAS_I422TOARGB4444ROW_SSSE3)
-void I422ToARGB4444Row_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb4444,
+void I422ToARGB4444Row_SSSE3(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb4444,
const struct YuvConstants* yuvconstants,
int width) {
// Row buffer for intermediate ARGB pixels.
- SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]);
+ SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
while (width > 0) {
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth);
@@ -2688,13 +2877,13 @@ void I422ToARGB4444Row_SSSE3(const uint8* src_y,
#endif
#if defined(HAS_NV12TORGB565ROW_SSSE3)
-void NV12ToRGB565Row_SSSE3(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_rgb565,
+void NV12ToRGB565Row_SSSE3(const uint8_t* src_y,
+ const uint8_t* src_uv,
+ uint8_t* dst_rgb565,
const struct YuvConstants* yuvconstants,
int width) {
// Row buffer for intermediate ARGB pixels.
- SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]);
+ SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
while (width > 0) {
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
NV12ToARGBRow_SSSE3(src_y, src_uv, row, yuvconstants, twidth);
@@ -2708,13 +2897,13 @@ void NV12ToRGB565Row_SSSE3(const uint8* src_y,
#endif
#if defined(HAS_I422TORGB565ROW_AVX2)
-void I422ToRGB565Row_AVX2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_rgb565,
+void I422ToRGB565Row_AVX2(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_rgb565,
const struct YuvConstants* yuvconstants,
int width) {
- SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]);
+ SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
while (width > 0) {
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
@@ -2733,14 +2922,14 @@ void I422ToRGB565Row_AVX2(const uint8* src_y,
#endif
#if defined(HAS_I422TOARGB1555ROW_AVX2)
-void I422ToARGB1555Row_AVX2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb1555,
+void I422ToARGB1555Row_AVX2(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb1555,
const struct YuvConstants* yuvconstants,
int width) {
// Row buffer for intermediate ARGB pixels.
- SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]);
+ SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
while (width > 0) {
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
@@ -2759,14 +2948,14 @@ void I422ToARGB1555Row_AVX2(const uint8* src_y,
#endif
#if defined(HAS_I422TOARGB4444ROW_AVX2)
-void I422ToARGB4444Row_AVX2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb4444,
+void I422ToARGB4444Row_AVX2(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb4444,
const struct YuvConstants* yuvconstants,
int width) {
// Row buffer for intermediate ARGB pixels.
- SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]);
+ SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
while (width > 0) {
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
@@ -2785,14 +2974,14 @@ void I422ToARGB4444Row_AVX2(const uint8* src_y,
#endif
#if defined(HAS_I422TORGB24ROW_AVX2)
-void I422ToRGB24Row_AVX2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_rgb24,
+void I422ToRGB24Row_AVX2(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_rgb24,
const struct YuvConstants* yuvconstants,
int width) {
// Row buffer for intermediate ARGB pixels.
- SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]);
+ SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
while (width > 0) {
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
@@ -2808,13 +2997,13 @@ void I422ToRGB24Row_AVX2(const uint8* src_y,
#endif
#if defined(HAS_NV12TORGB565ROW_AVX2)
-void NV12ToRGB565Row_AVX2(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_rgb565,
+void NV12ToRGB565Row_AVX2(const uint8_t* src_y,
+ const uint8_t* src_uv,
+ uint8_t* dst_rgb565,
const struct YuvConstants* yuvconstants,
int width) {
// Row buffer for intermediate ARGB pixels.
- SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]);
+ SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
while (width > 0) {
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
NV12ToARGBRow_AVX2(src_y, src_uv, row, yuvconstants, twidth);
@@ -2864,7 +3053,7 @@ void ScaleSamples_C(const float* src, float* dst, float scale, int width) {
}
}
-void GaussRow_C(const uint32* src, uint16* dst, int width) {
+void GaussRow_C(const uint32_t* src, uint16_t* dst, int width) {
int i;
for (i = 0; i < width; ++i) {
*dst++ =
@@ -2874,12 +3063,12 @@ void GaussRow_C(const uint32* src, uint16* dst, int width) {
}
// filter 5 rows with 1, 4, 6, 4, 1 coefficients to produce 1 row.
-void GaussCol_C(const uint16* src0,
- const uint16* src1,
- const uint16* src2,
- const uint16* src3,
- const uint16* src4,
- uint32* dst,
+void GaussCol_C(const uint16_t* src0,
+ const uint16_t* src1,
+ const uint16_t* src2,
+ const uint16_t* src3,
+ const uint16_t* src4,
+ uint32_t* dst,
int width) {
int i;
for (i = 0; i < width; ++i) {
diff --git a/chromium/third_party/libyuv/source/row_gcc.cc b/chromium/third_party/libyuv/source/row_gcc.cc
index 0dc126678e0..95845c2592f 100644
--- a/chromium/third_party/libyuv/source/row_gcc.cc
+++ b/chromium/third_party/libyuv/source/row_gcc.cc
@@ -152,392 +152,399 @@ static const lvec8 kShuffleNV21 = {
#endif // HAS_RGB24TOARGBROW_SSSE3
#ifdef HAS_J400TOARGBROW_SSE2
-void J400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int width) {
- asm volatile (
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "pslld $0x18,%%xmm5 \n"
- LABELALIGN
- "1: \n"
- "movq " MEMACCESS(0) ",%%xmm0 \n"
- "lea " MEMLEA(0x8,0) ",%0 \n"
- "punpcklbw %%xmm0,%%xmm0 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "punpcklwd %%xmm0,%%xmm0 \n"
- "punpckhwd %%xmm1,%%xmm1 \n"
- "por %%xmm5,%%xmm0 \n"
- "por %%xmm5,%%xmm1 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "movdqu %%xmm1," MEMACCESS2(0x10,1) " \n"
- "lea " MEMLEA(0x20,1) ",%1 \n"
- "sub $0x8,%2 \n"
- "jg 1b \n"
- : "+r"(src_y), // %0
- "+r"(dst_argb), // %1
- "+r"(width) // %2
- :: "memory", "cc", "xmm0", "xmm1", "xmm5"
- );
+void J400ToARGBRow_SSE2(const uint8_t* src_y, uint8_t* dst_argb, int width) {
+ asm volatile(
+ "pcmpeqb %%xmm5,%%xmm5 \n"
+ "pslld $0x18,%%xmm5 \n"
+
+ LABELALIGN
+ "1: \n"
+ "movq (%0),%%xmm0 \n"
+ "lea 0x8(%0),%0 \n"
+ "punpcklbw %%xmm0,%%xmm0 \n"
+ "movdqa %%xmm0,%%xmm1 \n"
+ "punpcklwd %%xmm0,%%xmm0 \n"
+ "punpckhwd %%xmm1,%%xmm1 \n"
+ "por %%xmm5,%%xmm0 \n"
+ "por %%xmm5,%%xmm1 \n"
+ "movdqu %%xmm0,(%1) \n"
+ "movdqu %%xmm1,0x10(%1) \n"
+ "lea 0x20(%1),%1 \n"
+ "sub $0x8,%2 \n"
+ "jg 1b \n"
+ : "+r"(src_y), // %0
+ "+r"(dst_argb), // %1
+ "+r"(width) // %2
+ ::"memory",
+ "cc", "xmm0", "xmm1", "xmm5");
}
#endif // HAS_J400TOARGBROW_SSE2
#ifdef HAS_RGB24TOARGBROW_SSSE3
-void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int width) {
- asm volatile (
- "pcmpeqb %%xmm5,%%xmm5 \n" // generate mask 0xff000000
- "pslld $0x18,%%xmm5 \n"
- "movdqa %3,%%xmm4 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqu " MEMACCESS2(0x20,0) ",%%xmm3 \n"
- "lea " MEMLEA(0x30,0) ",%0 \n"
- "movdqa %%xmm3,%%xmm2 \n"
- "palignr $0x8,%%xmm1,%%xmm2 \n"
- "pshufb %%xmm4,%%xmm2 \n"
- "por %%xmm5,%%xmm2 \n"
- "palignr $0xc,%%xmm0,%%xmm1 \n"
- "pshufb %%xmm4,%%xmm0 \n"
- "movdqu %%xmm2," MEMACCESS2(0x20,1) " \n"
- "por %%xmm5,%%xmm0 \n"
- "pshufb %%xmm4,%%xmm1 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "por %%xmm5,%%xmm1 \n"
- "palignr $0x4,%%xmm3,%%xmm3 \n"
- "pshufb %%xmm4,%%xmm3 \n"
- "movdqu %%xmm1," MEMACCESS2(0x10,1) " \n"
- "por %%xmm5,%%xmm3 \n"
- "movdqu %%xmm3," MEMACCESS2(0x30,1) " \n"
- "lea " MEMLEA(0x40,1) ",%1 \n"
- "sub $0x10,%2 \n"
- "jg 1b \n"
- : "+r"(src_rgb24), // %0
- "+r"(dst_argb), // %1
- "+r"(width) // %2
- : "m"(kShuffleMaskRGB24ToARGB) // %3
- : "memory", "cc" , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
- );
+void RGB24ToARGBRow_SSSE3(const uint8_t* src_rgb24,
+ uint8_t* dst_argb,
+ int width) {
+ asm volatile(
+ "pcmpeqb %%xmm5,%%xmm5 \n" // 0xff000000
+ "pslld $0x18,%%xmm5 \n"
+ "movdqa %3,%%xmm4 \n"
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "movdqu 0x20(%0),%%xmm3 \n"
+ "lea 0x30(%0),%0 \n"
+ "movdqa %%xmm3,%%xmm2 \n"
+ "palignr $0x8,%%xmm1,%%xmm2 \n"
+ "pshufb %%xmm4,%%xmm2 \n"
+ "por %%xmm5,%%xmm2 \n"
+ "palignr $0xc,%%xmm0,%%xmm1 \n"
+ "pshufb %%xmm4,%%xmm0 \n"
+ "movdqu %%xmm2,0x20(%1) \n"
+ "por %%xmm5,%%xmm0 \n"
+ "pshufb %%xmm4,%%xmm1 \n"
+ "movdqu %%xmm0,(%1) \n"
+ "por %%xmm5,%%xmm1 \n"
+ "palignr $0x4,%%xmm3,%%xmm3 \n"
+ "pshufb %%xmm4,%%xmm3 \n"
+ "movdqu %%xmm1,0x10(%1) \n"
+ "por %%xmm5,%%xmm3 \n"
+ "movdqu %%xmm3,0x30(%1) \n"
+ "lea 0x40(%1),%1 \n"
+ "sub $0x10,%2 \n"
+ "jg 1b \n"
+ : "+r"(src_rgb24), // %0
+ "+r"(dst_argb), // %1
+ "+r"(width) // %2
+ : "m"(kShuffleMaskRGB24ToARGB) // %3
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5");
}
-void RAWToARGBRow_SSSE3(const uint8* src_raw, uint8* dst_argb, int width) {
- asm volatile (
- "pcmpeqb %%xmm5,%%xmm5 \n" // generate mask 0xff000000
- "pslld $0x18,%%xmm5 \n"
- "movdqa %3,%%xmm4 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqu " MEMACCESS2(0x20,0) ",%%xmm3 \n"
- "lea " MEMLEA(0x30,0) ",%0 \n"
- "movdqa %%xmm3,%%xmm2 \n"
- "palignr $0x8,%%xmm1,%%xmm2 \n"
- "pshufb %%xmm4,%%xmm2 \n"
- "por %%xmm5,%%xmm2 \n"
- "palignr $0xc,%%xmm0,%%xmm1 \n"
- "pshufb %%xmm4,%%xmm0 \n"
- "movdqu %%xmm2," MEMACCESS2(0x20,1) " \n"
- "por %%xmm5,%%xmm0 \n"
- "pshufb %%xmm4,%%xmm1 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "por %%xmm5,%%xmm1 \n"
- "palignr $0x4,%%xmm3,%%xmm3 \n"
- "pshufb %%xmm4,%%xmm3 \n"
- "movdqu %%xmm1," MEMACCESS2(0x10,1) " \n"
- "por %%xmm5,%%xmm3 \n"
- "movdqu %%xmm3," MEMACCESS2(0x30,1) " \n"
- "lea " MEMLEA(0x40,1) ",%1 \n"
- "sub $0x10,%2 \n"
- "jg 1b \n"
- : "+r"(src_raw), // %0
- "+r"(dst_argb), // %1
- "+r"(width) // %2
- : "m"(kShuffleMaskRAWToARGB) // %3
- : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
- );
+void RAWToARGBRow_SSSE3(const uint8_t* src_raw, uint8_t* dst_argb, int width) {
+ asm volatile(
+ "pcmpeqb %%xmm5,%%xmm5 \n" // 0xff000000
+ "pslld $0x18,%%xmm5 \n"
+ "movdqa %3,%%xmm4 \n"
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "movdqu 0x20(%0),%%xmm3 \n"
+ "lea 0x30(%0),%0 \n"
+ "movdqa %%xmm3,%%xmm2 \n"
+ "palignr $0x8,%%xmm1,%%xmm2 \n"
+ "pshufb %%xmm4,%%xmm2 \n"
+ "por %%xmm5,%%xmm2 \n"
+ "palignr $0xc,%%xmm0,%%xmm1 \n"
+ "pshufb %%xmm4,%%xmm0 \n"
+ "movdqu %%xmm2,0x20(%1) \n"
+ "por %%xmm5,%%xmm0 \n"
+ "pshufb %%xmm4,%%xmm1 \n"
+ "movdqu %%xmm0,(%1) \n"
+ "por %%xmm5,%%xmm1 \n"
+ "palignr $0x4,%%xmm3,%%xmm3 \n"
+ "pshufb %%xmm4,%%xmm3 \n"
+ "movdqu %%xmm1,0x10(%1) \n"
+ "por %%xmm5,%%xmm3 \n"
+ "movdqu %%xmm3,0x30(%1) \n"
+ "lea 0x40(%1),%1 \n"
+ "sub $0x10,%2 \n"
+ "jg 1b \n"
+ : "+r"(src_raw), // %0
+ "+r"(dst_argb), // %1
+ "+r"(width) // %2
+ : "m"(kShuffleMaskRAWToARGB) // %3
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5");
}
-void RAWToRGB24Row_SSSE3(const uint8* src_raw, uint8* dst_rgb24, int width) {
- asm volatile (
- "movdqa %3,%%xmm3 \n"
- "movdqa %4,%%xmm4 \n"
- "movdqa %5,%%xmm5 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x4,0) ",%%xmm1 \n"
- "movdqu " MEMACCESS2(0x8,0) ",%%xmm2 \n"
- "lea " MEMLEA(0x18,0) ",%0 \n"
- "pshufb %%xmm3,%%xmm0 \n"
- "pshufb %%xmm4,%%xmm1 \n"
- "pshufb %%xmm5,%%xmm2 \n"
- "movq %%xmm0," MEMACCESS(1) " \n"
- "movq %%xmm1," MEMACCESS2(0x8,1) " \n"
- "movq %%xmm2," MEMACCESS2(0x10,1) " \n"
- "lea " MEMLEA(0x18,1) ",%1 \n"
- "sub $0x8,%2 \n"
- "jg 1b \n"
- : "+r"(src_raw), // %0
- "+r"(dst_rgb24), // %1
- "+r"(width) // %2
- : "m"(kShuffleMaskRAWToRGB24_0), // %3
- "m"(kShuffleMaskRAWToRGB24_1), // %4
- "m"(kShuffleMaskRAWToRGB24_2) // %5
- : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
- );
+void RAWToRGB24Row_SSSE3(const uint8_t* src_raw,
+ uint8_t* dst_rgb24,
+ int width) {
+ asm volatile(
+ "movdqa %3,%%xmm3 \n"
+ "movdqa %4,%%xmm4 \n"
+ "movdqa %5,%%xmm5 \n"
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x4(%0),%%xmm1 \n"
+ "movdqu 0x8(%0),%%xmm2 \n"
+ "lea 0x18(%0),%0 \n"
+ "pshufb %%xmm3,%%xmm0 \n"
+ "pshufb %%xmm4,%%xmm1 \n"
+ "pshufb %%xmm5,%%xmm2 \n"
+ "movq %%xmm0,(%1) \n"
+ "movq %%xmm1,0x8(%1) \n"
+ "movq %%xmm2,0x10(%1) \n"
+ "lea 0x18(%1),%1 \n"
+ "sub $0x8,%2 \n"
+ "jg 1b \n"
+ : "+r"(src_raw), // %0
+ "+r"(dst_rgb24), // %1
+ "+r"(width) // %2
+ : "m"(kShuffleMaskRAWToRGB24_0), // %3
+ "m"(kShuffleMaskRAWToRGB24_1), // %4
+ "m"(kShuffleMaskRAWToRGB24_2) // %5
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5");
}
-void RGB565ToARGBRow_SSE2(const uint8* src, uint8* dst, int width) {
- asm volatile (
- "mov $0x1080108,%%eax \n"
- "movd %%eax,%%xmm5 \n"
- "pshufd $0x0,%%xmm5,%%xmm5 \n"
- "mov $0x20802080,%%eax \n"
- "movd %%eax,%%xmm6 \n"
- "pshufd $0x0,%%xmm6,%%xmm6 \n"
- "pcmpeqb %%xmm3,%%xmm3 \n"
- "psllw $0xb,%%xmm3 \n"
- "pcmpeqb %%xmm4,%%xmm4 \n"
- "psllw $0xa,%%xmm4 \n"
- "psrlw $0x5,%%xmm4 \n"
- "pcmpeqb %%xmm7,%%xmm7 \n"
- "psllw $0x8,%%xmm7 \n"
- "sub %0,%1 \n"
- "sub %0,%1 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "movdqa %%xmm0,%%xmm2 \n"
- "pand %%xmm3,%%xmm1 \n"
- "psllw $0xb,%%xmm2 \n"
- "pmulhuw %%xmm5,%%xmm1 \n"
- "pmulhuw %%xmm5,%%xmm2 \n"
- "psllw $0x8,%%xmm1 \n"
- "por %%xmm2,%%xmm1 \n"
- "pand %%xmm4,%%xmm0 \n"
- "pmulhuw %%xmm6,%%xmm0 \n"
- "por %%xmm7,%%xmm0 \n"
- "movdqa %%xmm1,%%xmm2 \n"
- "punpcklbw %%xmm0,%%xmm1 \n"
- "punpckhbw %%xmm0,%%xmm2 \n"
- MEMOPMEM(movdqu,xmm1,0x00,1,0,2) // movdqu %%xmm1,(%1,%0,2)
- MEMOPMEM(movdqu,xmm2,0x10,1,0,2) // movdqu %%xmm2,0x10(%1,%0,2)
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "sub $0x8,%2 \n"
- "jg 1b \n"
- : "+r"(src), // %0
- "+r"(dst), // %1
- "+r"(width) // %2
- :
- : "memory", "cc", "eax", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
- );
+void RGB565ToARGBRow_SSE2(const uint8_t* src, uint8_t* dst, int width) {
+ asm volatile(
+ "mov $0x1080108,%%eax \n"
+ "movd %%eax,%%xmm5 \n"
+ "pshufd $0x0,%%xmm5,%%xmm5 \n"
+ "mov $0x20802080,%%eax \n"
+ "movd %%eax,%%xmm6 \n"
+ "pshufd $0x0,%%xmm6,%%xmm6 \n"
+ "pcmpeqb %%xmm3,%%xmm3 \n"
+ "psllw $0xb,%%xmm3 \n"
+ "pcmpeqb %%xmm4,%%xmm4 \n"
+ "psllw $0xa,%%xmm4 \n"
+ "psrlw $0x5,%%xmm4 \n"
+ "pcmpeqb %%xmm7,%%xmm7 \n"
+ "psllw $0x8,%%xmm7 \n"
+ "sub %0,%1 \n"
+ "sub %0,%1 \n"
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqa %%xmm0,%%xmm1 \n"
+ "movdqa %%xmm0,%%xmm2 \n"
+ "pand %%xmm3,%%xmm1 \n"
+ "psllw $0xb,%%xmm2 \n"
+ "pmulhuw %%xmm5,%%xmm1 \n"
+ "pmulhuw %%xmm5,%%xmm2 \n"
+ "psllw $0x8,%%xmm1 \n"
+ "por %%xmm2,%%xmm1 \n"
+ "pand %%xmm4,%%xmm0 \n"
+ "pmulhuw %%xmm6,%%xmm0 \n"
+ "por %%xmm7,%%xmm0 \n"
+ "movdqa %%xmm1,%%xmm2 \n"
+ "punpcklbw %%xmm0,%%xmm1 \n"
+ "punpckhbw %%xmm0,%%xmm2 \n"
+ "movdqu %%xmm1,0x00(%1,%0,2) \n"
+ "movdqu %%xmm2,0x10(%1,%0,2) \n"
+ "lea 0x10(%0),%0 \n"
+ "sub $0x8,%2 \n"
+ "jg 1b \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
+ :
+ : "memory", "cc", "eax", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5",
+ "xmm6", "xmm7");
}
-void ARGB1555ToARGBRow_SSE2(const uint8* src, uint8* dst, int width) {
- asm volatile (
- "mov $0x1080108,%%eax \n"
- "movd %%eax,%%xmm5 \n"
- "pshufd $0x0,%%xmm5,%%xmm5 \n"
- "mov $0x42004200,%%eax \n"
- "movd %%eax,%%xmm6 \n"
- "pshufd $0x0,%%xmm6,%%xmm6 \n"
- "pcmpeqb %%xmm3,%%xmm3 \n"
- "psllw $0xb,%%xmm3 \n"
- "movdqa %%xmm3,%%xmm4 \n"
- "psrlw $0x6,%%xmm4 \n"
- "pcmpeqb %%xmm7,%%xmm7 \n"
- "psllw $0x8,%%xmm7 \n"
- "sub %0,%1 \n"
- "sub %0,%1 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "movdqa %%xmm0,%%xmm2 \n"
- "psllw $0x1,%%xmm1 \n"
- "psllw $0xb,%%xmm2 \n"
- "pand %%xmm3,%%xmm1 \n"
- "pmulhuw %%xmm5,%%xmm2 \n"
- "pmulhuw %%xmm5,%%xmm1 \n"
- "psllw $0x8,%%xmm1 \n"
- "por %%xmm2,%%xmm1 \n"
- "movdqa %%xmm0,%%xmm2 \n"
- "pand %%xmm4,%%xmm0 \n"
- "psraw $0x8,%%xmm2 \n"
- "pmulhuw %%xmm6,%%xmm0 \n"
- "pand %%xmm7,%%xmm2 \n"
- "por %%xmm2,%%xmm0 \n"
- "movdqa %%xmm1,%%xmm2 \n"
- "punpcklbw %%xmm0,%%xmm1 \n"
- "punpckhbw %%xmm0,%%xmm2 \n"
- MEMOPMEM(movdqu,xmm1,0x00,1,0,2) // movdqu %%xmm1,(%1,%0,2)
- MEMOPMEM(movdqu,xmm2,0x10,1,0,2) // movdqu %%xmm2,0x10(%1,%0,2)
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "sub $0x8,%2 \n"
- "jg 1b \n"
- : "+r"(src), // %0
- "+r"(dst), // %1
- "+r"(width) // %2
- :
- : "memory", "cc", "eax", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
- );
+void ARGB1555ToARGBRow_SSE2(const uint8_t* src, uint8_t* dst, int width) {
+ asm volatile(
+ "mov $0x1080108,%%eax \n"
+ "movd %%eax,%%xmm5 \n"
+ "pshufd $0x0,%%xmm5,%%xmm5 \n"
+ "mov $0x42004200,%%eax \n"
+ "movd %%eax,%%xmm6 \n"
+ "pshufd $0x0,%%xmm6,%%xmm6 \n"
+ "pcmpeqb %%xmm3,%%xmm3 \n"
+ "psllw $0xb,%%xmm3 \n"
+ "movdqa %%xmm3,%%xmm4 \n"
+ "psrlw $0x6,%%xmm4 \n"
+ "pcmpeqb %%xmm7,%%xmm7 \n"
+ "psllw $0x8,%%xmm7 \n"
+ "sub %0,%1 \n"
+ "sub %0,%1 \n"
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqa %%xmm0,%%xmm1 \n"
+ "movdqa %%xmm0,%%xmm2 \n"
+ "psllw $0x1,%%xmm1 \n"
+ "psllw $0xb,%%xmm2 \n"
+ "pand %%xmm3,%%xmm1 \n"
+ "pmulhuw %%xmm5,%%xmm2 \n"
+ "pmulhuw %%xmm5,%%xmm1 \n"
+ "psllw $0x8,%%xmm1 \n"
+ "por %%xmm2,%%xmm1 \n"
+ "movdqa %%xmm0,%%xmm2 \n"
+ "pand %%xmm4,%%xmm0 \n"
+ "psraw $0x8,%%xmm2 \n"
+ "pmulhuw %%xmm6,%%xmm0 \n"
+ "pand %%xmm7,%%xmm2 \n"
+ "por %%xmm2,%%xmm0 \n"
+ "movdqa %%xmm1,%%xmm2 \n"
+ "punpcklbw %%xmm0,%%xmm1 \n"
+ "punpckhbw %%xmm0,%%xmm2 \n"
+ "movdqu %%xmm1,0x00(%1,%0,2) \n"
+ "movdqu %%xmm2,0x10(%1,%0,2) \n"
+ "lea 0x10(%0),%0 \n"
+ "sub $0x8,%2 \n"
+ "jg 1b \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
+ :
+ : "memory", "cc", "eax", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5",
+ "xmm6", "xmm7");
}
-void ARGB4444ToARGBRow_SSE2(const uint8* src, uint8* dst, int width) {
- asm volatile (
- "mov $0xf0f0f0f,%%eax \n"
- "movd %%eax,%%xmm4 \n"
- "pshufd $0x0,%%xmm4,%%xmm4 \n"
- "movdqa %%xmm4,%%xmm5 \n"
- "pslld $0x4,%%xmm5 \n"
- "sub %0,%1 \n"
- "sub %0,%1 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa %%xmm0,%%xmm2 \n"
- "pand %%xmm4,%%xmm0 \n"
- "pand %%xmm5,%%xmm2 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "movdqa %%xmm2,%%xmm3 \n"
- "psllw $0x4,%%xmm1 \n"
- "psrlw $0x4,%%xmm3 \n"
- "por %%xmm1,%%xmm0 \n"
- "por %%xmm3,%%xmm2 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "punpcklbw %%xmm2,%%xmm0 \n"
- "punpckhbw %%xmm2,%%xmm1 \n"
- MEMOPMEM(movdqu,xmm0,0x00,1,0,2) // movdqu %%xmm0,(%1,%0,2)
- MEMOPMEM(movdqu,xmm1,0x10,1,0,2) // movdqu %%xmm1,0x10(%1,%0,2)
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "sub $0x8,%2 \n"
- "jg 1b \n"
- : "+r"(src), // %0
- "+r"(dst), // %1
- "+r"(width) // %2
- :
- : "memory", "cc", "eax", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
- );
+void ARGB4444ToARGBRow_SSE2(const uint8_t* src, uint8_t* dst, int width) {
+ asm volatile(
+ "mov $0xf0f0f0f,%%eax \n"
+ "movd %%eax,%%xmm4 \n"
+ "pshufd $0x0,%%xmm4,%%xmm4 \n"
+ "movdqa %%xmm4,%%xmm5 \n"
+ "pslld $0x4,%%xmm5 \n"
+ "sub %0,%1 \n"
+ "sub %0,%1 \n"
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqa %%xmm0,%%xmm2 \n"
+ "pand %%xmm4,%%xmm0 \n"
+ "pand %%xmm5,%%xmm2 \n"
+ "movdqa %%xmm0,%%xmm1 \n"
+ "movdqa %%xmm2,%%xmm3 \n"
+ "psllw $0x4,%%xmm1 \n"
+ "psrlw $0x4,%%xmm3 \n"
+ "por %%xmm1,%%xmm0 \n"
+ "por %%xmm3,%%xmm2 \n"
+ "movdqa %%xmm0,%%xmm1 \n"
+ "punpcklbw %%xmm2,%%xmm0 \n"
+ "punpckhbw %%xmm2,%%xmm1 \n"
+ "movdqu %%xmm0,0x00(%1,%0,2) \n"
+ "movdqu %%xmm1,0x10(%1,%0,2) \n"
+ "lea 0x10(%0),%0 \n"
+ "sub $0x8,%2 \n"
+ "jg 1b \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
+ :
+ : "memory", "cc", "eax", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5");
}
-void ARGBToRGB24Row_SSSE3(const uint8* src, uint8* dst, int width) {
- asm volatile (
- "movdqa %3,%%xmm6 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "movdqu " MEMACCESS2(0x30,0) ",%%xmm3 \n"
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "pshufb %%xmm6,%%xmm0 \n"
- "pshufb %%xmm6,%%xmm1 \n"
- "pshufb %%xmm6,%%xmm2 \n"
- "pshufb %%xmm6,%%xmm3 \n"
- "movdqa %%xmm1,%%xmm4 \n"
- "psrldq $0x4,%%xmm1 \n"
- "pslldq $0xc,%%xmm4 \n"
- "movdqa %%xmm2,%%xmm5 \n"
- "por %%xmm4,%%xmm0 \n"
- "pslldq $0x8,%%xmm5 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "por %%xmm5,%%xmm1 \n"
- "psrldq $0x8,%%xmm2 \n"
- "pslldq $0x4,%%xmm3 \n"
- "por %%xmm3,%%xmm2 \n"
- "movdqu %%xmm1," MEMACCESS2(0x10,1) " \n"
- "movdqu %%xmm2," MEMACCESS2(0x20,1) " \n"
- "lea " MEMLEA(0x30,1) ",%1 \n"
- "sub $0x10,%2 \n"
- "jg 1b \n"
- : "+r"(src), // %0
- "+r"(dst), // %1
- "+r"(width) // %2
- : "m"(kShuffleMaskARGBToRGB24) // %3
- : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
- );
+void ARGBToRGB24Row_SSSE3(const uint8_t* src, uint8_t* dst, int width) {
+ asm volatile(
+
+ "movdqa %3,%%xmm6 \n"
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "movdqu 0x20(%0),%%xmm2 \n"
+ "movdqu 0x30(%0),%%xmm3 \n"
+ "lea 0x40(%0),%0 \n"
+ "pshufb %%xmm6,%%xmm0 \n"
+ "pshufb %%xmm6,%%xmm1 \n"
+ "pshufb %%xmm6,%%xmm2 \n"
+ "pshufb %%xmm6,%%xmm3 \n"
+ "movdqa %%xmm1,%%xmm4 \n"
+ "psrldq $0x4,%%xmm1 \n"
+ "pslldq $0xc,%%xmm4 \n"
+ "movdqa %%xmm2,%%xmm5 \n"
+ "por %%xmm4,%%xmm0 \n"
+ "pslldq $0x8,%%xmm5 \n"
+ "movdqu %%xmm0,(%1) \n"
+ "por %%xmm5,%%xmm1 \n"
+ "psrldq $0x8,%%xmm2 \n"
+ "pslldq $0x4,%%xmm3 \n"
+ "por %%xmm3,%%xmm2 \n"
+ "movdqu %%xmm1,0x10(%1) \n"
+ "movdqu %%xmm2,0x20(%1) \n"
+ "lea 0x30(%1),%1 \n"
+ "sub $0x10,%2 \n"
+ "jg 1b \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
+ : "m"(kShuffleMaskARGBToRGB24) // %3
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6");
}
-void ARGBToRAWRow_SSSE3(const uint8* src, uint8* dst, int width) {
- asm volatile (
- "movdqa %3,%%xmm6 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "movdqu " MEMACCESS2(0x30,0) ",%%xmm3 \n"
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "pshufb %%xmm6,%%xmm0 \n"
- "pshufb %%xmm6,%%xmm1 \n"
- "pshufb %%xmm6,%%xmm2 \n"
- "pshufb %%xmm6,%%xmm3 \n"
- "movdqa %%xmm1,%%xmm4 \n"
- "psrldq $0x4,%%xmm1 \n"
- "pslldq $0xc,%%xmm4 \n"
- "movdqa %%xmm2,%%xmm5 \n"
- "por %%xmm4,%%xmm0 \n"
- "pslldq $0x8,%%xmm5 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "por %%xmm5,%%xmm1 \n"
- "psrldq $0x8,%%xmm2 \n"
- "pslldq $0x4,%%xmm3 \n"
- "por %%xmm3,%%xmm2 \n"
- "movdqu %%xmm1," MEMACCESS2(0x10,1) " \n"
- "movdqu %%xmm2," MEMACCESS2(0x20,1) " \n"
- "lea " MEMLEA(0x30,1) ",%1 \n"
- "sub $0x10,%2 \n"
- "jg 1b \n"
- : "+r"(src), // %0
- "+r"(dst), // %1
- "+r"(width) // %2
- : "m"(kShuffleMaskARGBToRAW) // %3
- : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
- );
+void ARGBToRAWRow_SSSE3(const uint8_t* src, uint8_t* dst, int width) {
+ asm volatile(
+
+ "movdqa %3,%%xmm6 \n"
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "movdqu 0x20(%0),%%xmm2 \n"
+ "movdqu 0x30(%0),%%xmm3 \n"
+ "lea 0x40(%0),%0 \n"
+ "pshufb %%xmm6,%%xmm0 \n"
+ "pshufb %%xmm6,%%xmm1 \n"
+ "pshufb %%xmm6,%%xmm2 \n"
+ "pshufb %%xmm6,%%xmm3 \n"
+ "movdqa %%xmm1,%%xmm4 \n"
+ "psrldq $0x4,%%xmm1 \n"
+ "pslldq $0xc,%%xmm4 \n"
+ "movdqa %%xmm2,%%xmm5 \n"
+ "por %%xmm4,%%xmm0 \n"
+ "pslldq $0x8,%%xmm5 \n"
+ "movdqu %%xmm0,(%1) \n"
+ "por %%xmm5,%%xmm1 \n"
+ "psrldq $0x8,%%xmm2 \n"
+ "pslldq $0x4,%%xmm3 \n"
+ "por %%xmm3,%%xmm2 \n"
+ "movdqu %%xmm1,0x10(%1) \n"
+ "movdqu %%xmm2,0x20(%1) \n"
+ "lea 0x30(%1),%1 \n"
+ "sub $0x10,%2 \n"
+ "jg 1b \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
+ : "m"(kShuffleMaskARGBToRAW) // %3
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6");
}
-void ARGBToRGB565Row_SSE2(const uint8* src, uint8* dst, int width) {
- asm volatile (
- "pcmpeqb %%xmm3,%%xmm3 \n"
- "psrld $0x1b,%%xmm3 \n"
- "pcmpeqb %%xmm4,%%xmm4 \n"
- "psrld $0x1a,%%xmm4 \n"
- "pslld $0x5,%%xmm4 \n"
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "pslld $0xb,%%xmm5 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "movdqa %%xmm0,%%xmm2 \n"
- "pslld $0x8,%%xmm0 \n"
- "psrld $0x3,%%xmm1 \n"
- "psrld $0x5,%%xmm2 \n"
- "psrad $0x10,%%xmm0 \n"
- "pand %%xmm3,%%xmm1 \n"
- "pand %%xmm4,%%xmm2 \n"
- "pand %%xmm5,%%xmm0 \n"
- "por %%xmm2,%%xmm1 \n"
- "por %%xmm1,%%xmm0 \n"
- "packssdw %%xmm0,%%xmm0 \n"
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "movq %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "sub $0x4,%2 \n"
- "jg 1b \n"
- : "+r"(src), // %0
- "+r"(dst), // %1
- "+r"(width) // %2
- :: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
- );
+void ARGBToRGB565Row_SSE2(const uint8_t* src, uint8_t* dst, int width) {
+ asm volatile(
+ "pcmpeqb %%xmm3,%%xmm3 \n"
+ "psrld $0x1b,%%xmm3 \n"
+ "pcmpeqb %%xmm4,%%xmm4 \n"
+ "psrld $0x1a,%%xmm4 \n"
+ "pslld $0x5,%%xmm4 \n"
+ "pcmpeqb %%xmm5,%%xmm5 \n"
+ "pslld $0xb,%%xmm5 \n"
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqa %%xmm0,%%xmm1 \n"
+ "movdqa %%xmm0,%%xmm2 \n"
+ "pslld $0x8,%%xmm0 \n"
+ "psrld $0x3,%%xmm1 \n"
+ "psrld $0x5,%%xmm2 \n"
+ "psrad $0x10,%%xmm0 \n"
+ "pand %%xmm3,%%xmm1 \n"
+ "pand %%xmm4,%%xmm2 \n"
+ "pand %%xmm5,%%xmm0 \n"
+ "por %%xmm2,%%xmm1 \n"
+ "por %%xmm1,%%xmm0 \n"
+ "packssdw %%xmm0,%%xmm0 \n"
+ "lea 0x10(%0),%0 \n"
+ "movq %%xmm0,(%1) \n"
+ "lea 0x8(%1),%1 \n"
+ "sub $0x4,%2 \n"
+ "jg 1b \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
+ ::"memory",
+ "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5");
}
-void ARGBToRGB565DitherRow_SSE2(const uint8* src,
- uint8* dst,
- const uint32 dither4,
+void ARGBToRGB565DitherRow_SSE2(const uint8_t* src,
+ uint8_t* dst,
+ const uint32_t dither4,
int width) {
asm volatile(
"movd %3,%%xmm6 \n"
@@ -583,9 +590,9 @@ void ARGBToRGB565DitherRow_SSE2(const uint8* src,
}
#ifdef HAS_ARGBTORGB565DITHERROW_AVX2
-void ARGBToRGB565DitherRow_AVX2(const uint8* src,
- uint8* dst,
- const uint32 dither4,
+void ARGBToRGB565DitherRow_AVX2(const uint8_t* src,
+ uint8_t* dst,
+ const uint32_t dither4,
int width) {
asm volatile(
"vbroadcastss %3,%%xmm6 \n"
@@ -628,75 +635,74 @@ void ARGBToRGB565DitherRow_AVX2(const uint8* src,
}
#endif // HAS_ARGBTORGB565DITHERROW_AVX2
-void ARGBToARGB1555Row_SSE2(const uint8* src, uint8* dst, int width) {
- asm volatile (
- "pcmpeqb %%xmm4,%%xmm4 \n"
- "psrld $0x1b,%%xmm4 \n"
- "movdqa %%xmm4,%%xmm5 \n"
- "pslld $0x5,%%xmm5 \n"
- "movdqa %%xmm4,%%xmm6 \n"
- "pslld $0xa,%%xmm6 \n"
- "pcmpeqb %%xmm7,%%xmm7 \n"
- "pslld $0xf,%%xmm7 \n"
+void ARGBToARGB1555Row_SSE2(const uint8_t* src, uint8_t* dst, int width) {
+ asm volatile(
+ "pcmpeqb %%xmm4,%%xmm4 \n"
+ "psrld $0x1b,%%xmm4 \n"
+ "movdqa %%xmm4,%%xmm5 \n"
+ "pslld $0x5,%%xmm5 \n"
+ "movdqa %%xmm4,%%xmm6 \n"
+ "pslld $0xa,%%xmm6 \n"
+ "pcmpeqb %%xmm7,%%xmm7 \n"
+ "pslld $0xf,%%xmm7 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "movdqa %%xmm0,%%xmm2 \n"
- "movdqa %%xmm0,%%xmm3 \n"
- "psrad $0x10,%%xmm0 \n"
- "psrld $0x3,%%xmm1 \n"
- "psrld $0x6,%%xmm2 \n"
- "psrld $0x9,%%xmm3 \n"
- "pand %%xmm7,%%xmm0 \n"
- "pand %%xmm4,%%xmm1 \n"
- "pand %%xmm5,%%xmm2 \n"
- "pand %%xmm6,%%xmm3 \n"
- "por %%xmm1,%%xmm0 \n"
- "por %%xmm3,%%xmm2 \n"
- "por %%xmm2,%%xmm0 \n"
- "packssdw %%xmm0,%%xmm0 \n"
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "movq %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "sub $0x4,%2 \n"
- "jg 1b \n"
- : "+r"(src), // %0
- "+r"(dst), // %1
- "+r"(width) // %2
- :: "memory", "cc",
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
- );
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqa %%xmm0,%%xmm1 \n"
+ "movdqa %%xmm0,%%xmm2 \n"
+ "movdqa %%xmm0,%%xmm3 \n"
+ "psrad $0x10,%%xmm0 \n"
+ "psrld $0x3,%%xmm1 \n"
+ "psrld $0x6,%%xmm2 \n"
+ "psrld $0x9,%%xmm3 \n"
+ "pand %%xmm7,%%xmm0 \n"
+ "pand %%xmm4,%%xmm1 \n"
+ "pand %%xmm5,%%xmm2 \n"
+ "pand %%xmm6,%%xmm3 \n"
+ "por %%xmm1,%%xmm0 \n"
+ "por %%xmm3,%%xmm2 \n"
+ "por %%xmm2,%%xmm0 \n"
+ "packssdw %%xmm0,%%xmm0 \n"
+ "lea 0x10(%0),%0 \n"
+ "movq %%xmm0,(%1) \n"
+ "lea 0x8(%1),%1 \n"
+ "sub $0x4,%2 \n"
+ "jg 1b \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
+ ::"memory",
+ "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7");
}
-void ARGBToARGB4444Row_SSE2(const uint8* src, uint8* dst, int width) {
- asm volatile (
- "pcmpeqb %%xmm4,%%xmm4 \n"
- "psllw $0xc,%%xmm4 \n"
- "movdqa %%xmm4,%%xmm3 \n"
- "psrlw $0x8,%%xmm3 \n"
+void ARGBToARGB4444Row_SSE2(const uint8_t* src, uint8_t* dst, int width) {
+ asm volatile(
+ "pcmpeqb %%xmm4,%%xmm4 \n"
+ "psllw $0xc,%%xmm4 \n"
+ "movdqa %%xmm4,%%xmm3 \n"
+ "psrlw $0x8,%%xmm3 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "pand %%xmm3,%%xmm0 \n"
- "pand %%xmm4,%%xmm1 \n"
- "psrlq $0x4,%%xmm0 \n"
- "psrlq $0x8,%%xmm1 \n"
- "por %%xmm1,%%xmm0 \n"
- "packuswb %%xmm0,%%xmm0 \n"
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "movq %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "sub $0x4,%2 \n"
- "jg 1b \n"
- : "+r"(src), // %0
- "+r"(dst), // %1
- "+r"(width) // %2
- :: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4"
- );
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqa %%xmm0,%%xmm1 \n"
+ "pand %%xmm3,%%xmm0 \n"
+ "pand %%xmm4,%%xmm1 \n"
+ "psrlq $0x4,%%xmm0 \n"
+ "psrlq $0x8,%%xmm1 \n"
+ "por %%xmm1,%%xmm0 \n"
+ "packuswb %%xmm0,%%xmm0 \n"
+ "lea 0x10(%0),%0 \n"
+ "movq %%xmm0,(%1) \n"
+ "lea 0x8(%1),%1 \n"
+ "sub $0x4,%2 \n"
+ "jg 1b \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
+ ::"memory",
+ "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4");
}
#endif // HAS_RGB24TOARGBROW_SSSE3
@@ -724,12 +730,16 @@ result left 10 to position the A and G channels.
// Shuffle table for converting RAW to RGB24. Last 8.
static const uvec8 kShuffleRB30 = {128u, 0u, 128u, 2u, 128u, 4u, 128u, 6u,
128u, 8u, 128u, 10u, 128u, 12u, 128u, 14u};
-static const uint32 kMulRB10 = 1028 * 16 * 65536 + 1028;
-static const uint32 kMaskRB10 = 0x3ff003ff;
-static const uint32 kMaskAG10 = 0xc000ff00;
-static const uint32 kMulAG10 = 64 * 65536 + 1028;
-void ARGBToAR30Row_SSSE3(const uint8* src, uint8* dst, int width) {
+static const uvec8 kShuffleBR30 = {128u, 2u, 128u, 0u, 128u, 6u, 128u, 4u,
+ 128u, 10u, 128u, 8u, 128u, 14u, 128u, 12u};
+
+static const uint32_t kMulRB10 = 1028 * 16 * 65536 + 1028;
+static const uint32_t kMaskRB10 = 0x3ff003ff;
+static const uint32_t kMaskAG10 = 0xc000ff00;
+static const uint32_t kMulAG10 = 64 * 65536 + 1028;
+
+void ARGBToAR30Row_SSSE3(const uint8_t* src, uint8_t* dst, int width) {
asm volatile(
"movdqa %3,%%xmm2 \n" // shuffler for RB
"movd %4,%%xmm3 \n" // multipler for RB
@@ -768,9 +778,47 @@ void ARGBToAR30Row_SSSE3(const uint8* src, uint8* dst, int width) {
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6");
}
-#ifdef HAS_ARGBTOAR30ROW_AVX2
+void ABGRToAR30Row_SSSE3(const uint8_t* src, uint8_t* dst, int width) {
+ asm volatile(
+ "movdqa %3,%%xmm2 \n" // shuffler for RB
+ "movd %4,%%xmm3 \n" // multipler for RB
+ "movd %5,%%xmm4 \n" // mask for R10 B10
+ "movd %6,%%xmm5 \n" // mask for AG
+ "movd %7,%%xmm6 \n" // multipler for AG
+ "pshufd $0x0,%%xmm3,%%xmm3 \n"
+ "pshufd $0x0,%%xmm4,%%xmm4 \n"
+ "pshufd $0x0,%%xmm5,%%xmm5 \n"
+ "pshufd $0x0,%%xmm6,%%xmm6 \n"
+ "sub %0,%1 \n"
+
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n" // fetch 4 ABGR pixels
+ "movdqa %%xmm0,%%xmm1 \n"
+ "pshufb %%xmm2,%%xmm1 \n" // R0B0
+ "pand %%xmm5,%%xmm0 \n" // A0G0
+ "pmulhuw %%xmm3,%%xmm1 \n" // X2 R16 X4 B10
+ "pmulhuw %%xmm6,%%xmm0 \n" // X10 A2 X10 G10
+ "pand %%xmm4,%%xmm1 \n" // X2 R10 X10 B10
+ "pslld $10,%%xmm0 \n" // A2 x10 G10 x10
+ "por %%xmm1,%%xmm0 \n" // A2 R10 G10 B10
+ "movdqu %%xmm0,(%1,%0) \n" // store 4 AR30 pixels
+ "add $0x10,%0 \n"
+ "sub $0x4,%2 \n"
+ "jg 1b \n"
+
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
+ : "m"(kShuffleBR30), // %3 reversed shuffler
+ "m"(kMulRB10), // %4
+ "m"(kMaskRB10), // %5
+ "m"(kMaskAG10), // %6
+ "m"(kMulAG10) // %7
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6");
+}
-void ARGBToAR30Row_AVX2(const uint8* src, uint8* dst, int width) {
+#ifdef HAS_ARGBTOAR30ROW_AVX2
+void ARGBToAR30Row_AVX2(const uint8_t* src, uint8_t* dst, int width) {
asm volatile(
"vbroadcastf128 %3,%%ymm2 \n" // shuffler for RB
"vbroadcastss %4,%%ymm3 \n" // multipler for RB
@@ -806,81 +854,116 @@ void ARGBToAR30Row_AVX2(const uint8* src, uint8* dst, int width) {
}
#endif
+#ifdef HAS_ABGRTOAR30ROW_AVX2
+void ABGRToAR30Row_AVX2(const uint8_t* src, uint8_t* dst, int width) {
+ asm volatile(
+ "vbroadcastf128 %3,%%ymm2 \n" // shuffler for RB
+ "vbroadcastss %4,%%ymm3 \n" // multipler for RB
+ "vbroadcastss %5,%%ymm4 \n" // mask for R10 B10
+ "vbroadcastss %6,%%ymm5 \n" // mask for AG
+ "vbroadcastss %7,%%ymm6 \n" // multipler for AG
+ "sub %0,%1 \n"
+
+ "1: \n"
+ "vmovdqu (%0),%%ymm0 \n" // fetch 8 ABGR pixels
+ "vpshufb %%ymm2,%%ymm0,%%ymm1 \n" // R0B0
+ "vpand %%ymm5,%%ymm0,%%ymm0 \n" // A0G0
+ "vpmulhuw %%ymm3,%%ymm1,%%ymm1 \n" // X2 R16 X4 B10
+ "vpmulhuw %%ymm6,%%ymm0,%%ymm0 \n" // X10 A2 X10 G10
+ "vpand %%ymm4,%%ymm1,%%ymm1 \n" // X2 R10 X10 B10
+ "vpslld $10,%%ymm0,%%ymm0 \n" // A2 x10 G10 x10
+ "vpor %%ymm1,%%ymm0,%%ymm0 \n" // A2 R10 G10 B10
+ "vmovdqu %%ymm0,(%1,%0) \n" // store 8 AR30 pixels
+ "add $0x20,%0 \n"
+ "sub $0x8,%2 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
+ : "m"(kShuffleBR30), // %3 reversed shuffler
+ "m"(kMulRB10), // %4
+ "m"(kMaskRB10), // %5
+ "m"(kMaskAG10), // %6
+ "m"(kMulAG10) // %7
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6");
+}
+#endif
+
#ifdef HAS_ARGBTOYROW_SSSE3
// Convert 16 ARGB pixels (64 bytes) to 16 Y values.
-void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int width) {
- asm volatile (
- "movdqa %3,%%xmm4 \n"
- "movdqa %4,%%xmm5 \n"
+void ARGBToYRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_y, int width) {
+ asm volatile(
+ "movdqa %3,%%xmm4 \n"
+ "movdqa %4,%%xmm5 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "movdqu " MEMACCESS2(0x30,0) ",%%xmm3 \n"
- "pmaddubsw %%xmm4,%%xmm0 \n"
- "pmaddubsw %%xmm4,%%xmm1 \n"
- "pmaddubsw %%xmm4,%%xmm2 \n"
- "pmaddubsw %%xmm4,%%xmm3 \n"
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "phaddw %%xmm1,%%xmm0 \n"
- "phaddw %%xmm3,%%xmm2 \n"
- "psrlw $0x7,%%xmm0 \n"
- "psrlw $0x7,%%xmm2 \n"
- "packuswb %%xmm2,%%xmm0 \n"
- "paddb %%xmm5,%%xmm0 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x10,%2 \n"
- "jg 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_y), // %1
- "+r"(width) // %2
- : "m"(kARGBToY), // %3
- "m"(kAddY16) // %4
- : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
- );
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "movdqu 0x20(%0),%%xmm2 \n"
+ "movdqu 0x30(%0),%%xmm3 \n"
+ "pmaddubsw %%xmm4,%%xmm0 \n"
+ "pmaddubsw %%xmm4,%%xmm1 \n"
+ "pmaddubsw %%xmm4,%%xmm2 \n"
+ "pmaddubsw %%xmm4,%%xmm3 \n"
+ "lea 0x40(%0),%0 \n"
+ "phaddw %%xmm1,%%xmm0 \n"
+ "phaddw %%xmm3,%%xmm2 \n"
+ "psrlw $0x7,%%xmm0 \n"
+ "psrlw $0x7,%%xmm2 \n"
+ "packuswb %%xmm2,%%xmm0 \n"
+ "paddb %%xmm5,%%xmm0 \n"
+ "movdqu %%xmm0,(%1) \n"
+ "lea 0x10(%1),%1 \n"
+ "sub $0x10,%2 \n"
+ "jg 1b \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_y), // %1
+ "+r"(width) // %2
+ : "m"(kARGBToY), // %3
+ "m"(kAddY16) // %4
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5");
}
#endif // HAS_ARGBTOYROW_SSSE3
#ifdef HAS_ARGBTOYJROW_SSSE3
// Convert 16 ARGB pixels (64 bytes) to 16 YJ values.
// Same as ARGBToYRow but different coefficients, no add 16, but do rounding.
-void ARGBToYJRow_SSSE3(const uint8* src_argb, uint8* dst_y, int width) {
- asm volatile (
- "movdqa %3,%%xmm4 \n"
- "movdqa %4,%%xmm5 \n"
+void ARGBToYJRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_y, int width) {
+ asm volatile(
+ "movdqa %3,%%xmm4 \n"
+ "movdqa %4,%%xmm5 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "movdqu " MEMACCESS2(0x30,0) ",%%xmm3 \n"
- "pmaddubsw %%xmm4,%%xmm0 \n"
- "pmaddubsw %%xmm4,%%xmm1 \n"
- "pmaddubsw %%xmm4,%%xmm2 \n"
- "pmaddubsw %%xmm4,%%xmm3 \n"
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "phaddw %%xmm1,%%xmm0 \n"
- "phaddw %%xmm3,%%xmm2 \n"
- "paddw %%xmm5,%%xmm0 \n"
- "paddw %%xmm5,%%xmm2 \n"
- "psrlw $0x7,%%xmm0 \n"
- "psrlw $0x7,%%xmm2 \n"
- "packuswb %%xmm2,%%xmm0 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x10,%2 \n"
- "jg 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_y), // %1
- "+r"(width) // %2
- : "m"(kARGBToYJ), // %3
- "m"(kAddYJ64) // %4
- : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
- );
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "movdqu 0x20(%0),%%xmm2 \n"
+ "movdqu 0x30(%0),%%xmm3 \n"
+ "pmaddubsw %%xmm4,%%xmm0 \n"
+ "pmaddubsw %%xmm4,%%xmm1 \n"
+ "pmaddubsw %%xmm4,%%xmm2 \n"
+ "pmaddubsw %%xmm4,%%xmm3 \n"
+ "lea 0x40(%0),%0 \n"
+ "phaddw %%xmm1,%%xmm0 \n"
+ "phaddw %%xmm3,%%xmm2 \n"
+ "paddw %%xmm5,%%xmm0 \n"
+ "paddw %%xmm5,%%xmm2 \n"
+ "psrlw $0x7,%%xmm0 \n"
+ "psrlw $0x7,%%xmm2 \n"
+ "packuswb %%xmm2,%%xmm0 \n"
+ "movdqu %%xmm0,(%1) \n"
+ "lea 0x10(%1),%1 \n"
+ "sub $0x10,%2 \n"
+ "jg 1b \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_y), // %1
+ "+r"(width) // %2
+ : "m"(kARGBToYJ), // %3
+ "m"(kAddYJ64) // %4
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5");
}
#endif // HAS_ARGBTOYJROW_SSSE3
@@ -889,153 +972,149 @@ void ARGBToYJRow_SSSE3(const uint8* src_argb, uint8* dst_y, int width) {
static const lvec32 kPermdARGBToY_AVX = {0, 4, 1, 5, 2, 6, 3, 7};
// Convert 32 ARGB pixels (128 bytes) to 32 Y values.
-void ARGBToYRow_AVX2(const uint8* src_argb, uint8* dst_y, int width) {
- asm volatile (
- "vbroadcastf128 %3,%%ymm4 \n"
- "vbroadcastf128 %4,%%ymm5 \n"
- "vmovdqu %5,%%ymm6 \n"
+void ARGBToYRow_AVX2(const uint8_t* src_argb, uint8_t* dst_y, int width) {
+ asm volatile(
+ "vbroadcastf128 %3,%%ymm4 \n"
+ "vbroadcastf128 %4,%%ymm5 \n"
+ "vmovdqu %5,%%ymm6 \n"
- LABELALIGN
- "1: \n"
- "vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
- "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n"
- "vmovdqu " MEMACCESS2(0x40,0) ",%%ymm2 \n"
- "vmovdqu " MEMACCESS2(0x60,0) ",%%ymm3 \n"
- "vpmaddubsw %%ymm4,%%ymm0,%%ymm0 \n"
- "vpmaddubsw %%ymm4,%%ymm1,%%ymm1 \n"
- "vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n"
- "vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n"
- "lea " MEMLEA(0x80,0) ",%0 \n"
- "vphaddw %%ymm1,%%ymm0,%%ymm0 \n" // mutates.
- "vphaddw %%ymm3,%%ymm2,%%ymm2 \n"
- "vpsrlw $0x7,%%ymm0,%%ymm0 \n"
- "vpsrlw $0x7,%%ymm2,%%ymm2 \n"
- "vpackuswb %%ymm2,%%ymm0,%%ymm0 \n" // mutates.
- "vpermd %%ymm0,%%ymm6,%%ymm0 \n" // unmutate.
- "vpaddb %%ymm5,%%ymm0,%%ymm0 \n" // add 16 for Y
- "vmovdqu %%ymm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x20,1) ",%1 \n"
- "sub $0x20,%2 \n"
- "jg 1b \n"
- "vzeroupper \n"
- : "+r"(src_argb), // %0
- "+r"(dst_y), // %1
- "+r"(width) // %2
- : "m"(kARGBToY), // %3
- "m"(kAddY16), // %4
- "m"(kPermdARGBToY_AVX) // %5
- : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
- );
+ LABELALIGN
+ "1: \n"
+ "vmovdqu (%0),%%ymm0 \n"
+ "vmovdqu 0x20(%0),%%ymm1 \n"
+ "vmovdqu 0x40(%0),%%ymm2 \n"
+ "vmovdqu 0x60(%0),%%ymm3 \n"
+ "vpmaddubsw %%ymm4,%%ymm0,%%ymm0 \n"
+ "vpmaddubsw %%ymm4,%%ymm1,%%ymm1 \n"
+ "vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n"
+ "vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n"
+ "lea 0x80(%0),%0 \n"
+ "vphaddw %%ymm1,%%ymm0,%%ymm0 \n" // mutates.
+ "vphaddw %%ymm3,%%ymm2,%%ymm2 \n"
+ "vpsrlw $0x7,%%ymm0,%%ymm0 \n"
+ "vpsrlw $0x7,%%ymm2,%%ymm2 \n"
+ "vpackuswb %%ymm2,%%ymm0,%%ymm0 \n" // mutates.
+ "vpermd %%ymm0,%%ymm6,%%ymm0 \n" // unmutate.
+ "vpaddb %%ymm5,%%ymm0,%%ymm0 \n" // add 16 for Y
+ "vmovdqu %%ymm0,(%1) \n"
+ "lea 0x20(%1),%1 \n"
+ "sub $0x20,%2 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_y), // %1
+ "+r"(width) // %2
+ : "m"(kARGBToY), // %3
+ "m"(kAddY16), // %4
+ "m"(kPermdARGBToY_AVX) // %5
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6");
}
#endif // HAS_ARGBTOYROW_AVX2
#ifdef HAS_ARGBTOYJROW_AVX2
// Convert 32 ARGB pixels (128 bytes) to 32 Y values.
-void ARGBToYJRow_AVX2(const uint8* src_argb, uint8* dst_y, int width) {
- asm volatile (
- "vbroadcastf128 %3,%%ymm4 \n"
- "vbroadcastf128 %4,%%ymm5 \n"
- "vmovdqu %5,%%ymm6 \n"
+void ARGBToYJRow_AVX2(const uint8_t* src_argb, uint8_t* dst_y, int width) {
+ asm volatile(
+ "vbroadcastf128 %3,%%ymm4 \n"
+ "vbroadcastf128 %4,%%ymm5 \n"
+ "vmovdqu %5,%%ymm6 \n"
- LABELALIGN
- "1: \n"
- "vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
- "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n"
- "vmovdqu " MEMACCESS2(0x40,0) ",%%ymm2 \n"
- "vmovdqu " MEMACCESS2(0x60,0) ",%%ymm3 \n"
- "vpmaddubsw %%ymm4,%%ymm0,%%ymm0 \n"
- "vpmaddubsw %%ymm4,%%ymm1,%%ymm1 \n"
- "vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n"
- "vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n"
- "lea " MEMLEA(0x80,0) ",%0 \n"
- "vphaddw %%ymm1,%%ymm0,%%ymm0 \n" // mutates.
- "vphaddw %%ymm3,%%ymm2,%%ymm2 \n"
- "vpaddw %%ymm5,%%ymm0,%%ymm0 \n" // Add .5 for rounding.
- "vpaddw %%ymm5,%%ymm2,%%ymm2 \n"
- "vpsrlw $0x7,%%ymm0,%%ymm0 \n"
- "vpsrlw $0x7,%%ymm2,%%ymm2 \n"
- "vpackuswb %%ymm2,%%ymm0,%%ymm0 \n" // mutates.
- "vpermd %%ymm0,%%ymm6,%%ymm0 \n" // unmutate.
- "vmovdqu %%ymm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x20,1) ",%1 \n"
- "sub $0x20,%2 \n"
- "jg 1b \n"
- "vzeroupper \n"
- : "+r"(src_argb), // %0
- "+r"(dst_y), // %1
- "+r"(width) // %2
- : "m"(kARGBToYJ), // %3
- "m"(kAddYJ64), // %4
- "m"(kPermdARGBToY_AVX) // %5
- : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
- );
+ LABELALIGN
+ "1: \n"
+ "vmovdqu (%0),%%ymm0 \n"
+ "vmovdqu 0x20(%0),%%ymm1 \n"
+ "vmovdqu 0x40(%0),%%ymm2 \n"
+ "vmovdqu 0x60(%0),%%ymm3 \n"
+ "vpmaddubsw %%ymm4,%%ymm0,%%ymm0 \n"
+ "vpmaddubsw %%ymm4,%%ymm1,%%ymm1 \n"
+ "vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n"
+ "vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n"
+ "lea 0x80(%0),%0 \n"
+ "vphaddw %%ymm1,%%ymm0,%%ymm0 \n" // mutates.
+ "vphaddw %%ymm3,%%ymm2,%%ymm2 \n"
+ "vpaddw %%ymm5,%%ymm0,%%ymm0 \n" // Add .5 for rounding.
+ "vpaddw %%ymm5,%%ymm2,%%ymm2 \n"
+ "vpsrlw $0x7,%%ymm0,%%ymm0 \n"
+ "vpsrlw $0x7,%%ymm2,%%ymm2 \n"
+ "vpackuswb %%ymm2,%%ymm0,%%ymm0 \n" // mutates.
+ "vpermd %%ymm0,%%ymm6,%%ymm0 \n" // unmutate.
+ "vmovdqu %%ymm0,(%1) \n"
+ "lea 0x20(%1),%1 \n"
+ "sub $0x20,%2 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_y), // %1
+ "+r"(width) // %2
+ : "m"(kARGBToYJ), // %3
+ "m"(kAddYJ64), // %4
+ "m"(kPermdARGBToY_AVX) // %5
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6");
}
#endif // HAS_ARGBTOYJROW_AVX2
#ifdef HAS_ARGBTOUVROW_SSSE3
-void ARGBToUVRow_SSSE3(const uint8* src_argb0,
+void ARGBToUVRow_SSSE3(const uint8_t* src_argb0,
int src_stride_argb,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
- asm volatile (
- "movdqa %5,%%xmm3 \n"
- "movdqa %6,%%xmm4 \n"
- "movdqa %7,%%xmm5 \n"
- "sub %1,%2 \n"
+ asm volatile(
+ "movdqa %5,%%xmm3 \n"
+ "movdqa %6,%%xmm4 \n"
+ "movdqa %7,%%xmm5 \n"
+ "sub %1,%2 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- MEMOPREG(movdqu,0x00,0,4,1,xmm7) // movdqu (%0,%4,1),%%xmm7
- "pavgb %%xmm7,%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- MEMOPREG(movdqu,0x10,0,4,1,xmm7) // movdqu 0x10(%0,%4,1),%%xmm7
- "pavgb %%xmm7,%%xmm1 \n"
- "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- MEMOPREG(movdqu,0x20,0,4,1,xmm7) // movdqu 0x20(%0,%4,1),%%xmm7
- "pavgb %%xmm7,%%xmm2 \n"
- "movdqu " MEMACCESS2(0x30,0) ",%%xmm6 \n"
- MEMOPREG(movdqu,0x30,0,4,1,xmm7) // movdqu 0x30(%0,%4,1),%%xmm7
- "pavgb %%xmm7,%%xmm6 \n"
-
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "movdqa %%xmm0,%%xmm7 \n"
- "shufps $0x88,%%xmm1,%%xmm0 \n"
- "shufps $0xdd,%%xmm1,%%xmm7 \n"
- "pavgb %%xmm7,%%xmm0 \n"
- "movdqa %%xmm2,%%xmm7 \n"
- "shufps $0x88,%%xmm6,%%xmm2 \n"
- "shufps $0xdd,%%xmm6,%%xmm7 \n"
- "pavgb %%xmm7,%%xmm2 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "movdqa %%xmm2,%%xmm6 \n"
- "pmaddubsw %%xmm4,%%xmm0 \n"
- "pmaddubsw %%xmm4,%%xmm2 \n"
- "pmaddubsw %%xmm3,%%xmm1 \n"
- "pmaddubsw %%xmm3,%%xmm6 \n"
- "phaddw %%xmm2,%%xmm0 \n"
- "phaddw %%xmm6,%%xmm1 \n"
- "psraw $0x8,%%xmm0 \n"
- "psraw $0x8,%%xmm1 \n"
- "packsswb %%xmm1,%%xmm0 \n"
- "paddb %%xmm5,%%xmm0 \n"
- "movlps %%xmm0," MEMACCESS(1) " \n"
- MEMOPMEM(movhps,xmm0,0x00,1,2,1) // movhps %%xmm0,(%1,%2,1)
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "sub $0x10,%3 \n"
- "jg 1b \n"
- : "+r"(src_argb0), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+rm"(width) // %3
- : "r"((intptr_t)(src_stride_argb)), // %4
- "m"(kARGBToV), // %5
- "m"(kARGBToU), // %6
- "m"(kAddUV128) // %7
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"
- );
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x00(%0,%4,1),%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "movdqu 0x10(%0,%4,1),%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm1 \n"
+ "movdqu 0x20(%0),%%xmm2 \n"
+ "movdqu 0x20(%0,%4,1),%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm2 \n"
+ "movdqu 0x30(%0),%%xmm6 \n"
+ "movdqu 0x30(%0,%4,1),%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm6 \n"
+
+ "lea 0x40(%0),%0 \n"
+ "movdqa %%xmm0,%%xmm7 \n"
+ "shufps $0x88,%%xmm1,%%xmm0 \n"
+ "shufps $0xdd,%%xmm1,%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm0 \n"
+ "movdqa %%xmm2,%%xmm7 \n"
+ "shufps $0x88,%%xmm6,%%xmm2 \n"
+ "shufps $0xdd,%%xmm6,%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm2 \n"
+ "movdqa %%xmm0,%%xmm1 \n"
+ "movdqa %%xmm2,%%xmm6 \n"
+ "pmaddubsw %%xmm4,%%xmm0 \n"
+ "pmaddubsw %%xmm4,%%xmm2 \n"
+ "pmaddubsw %%xmm3,%%xmm1 \n"
+ "pmaddubsw %%xmm3,%%xmm6 \n"
+ "phaddw %%xmm2,%%xmm0 \n"
+ "phaddw %%xmm6,%%xmm1 \n"
+ "psraw $0x8,%%xmm0 \n"
+ "psraw $0x8,%%xmm1 \n"
+ "packsswb %%xmm1,%%xmm0 \n"
+ "paddb %%xmm5,%%xmm0 \n"
+ "movlps %%xmm0,(%1) \n"
+ "movhps %%xmm0,0x00(%1,%2,1) \n"
+ "lea 0x8(%1),%1 \n"
+ "sub $0x10,%3 \n"
+ "jg 1b \n"
+ : "+r"(src_argb0), // %0
+ "+r"(dst_u), // %1
+ "+r"(dst_v), // %2
+ "+rm"(width) // %3
+ : "r"((intptr_t)(src_stride_argb)), // %4
+ "m"(kARGBToV), // %5
+ "m"(kARGBToU), // %6
+ "m"(kAddUV128) // %7
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm6", "xmm7");
}
#endif // HAS_ARGBTOUVROW_SSSE3
@@ -1044,659 +1123,644 @@ void ARGBToUVRow_SSSE3(const uint8* src_argb0,
static const lvec8 kShufARGBToUV_AVX = {
0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15,
0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15};
-void ARGBToUVRow_AVX2(const uint8* src_argb0,
+void ARGBToUVRow_AVX2(const uint8_t* src_argb0,
int src_stride_argb,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
- asm volatile (
- "vbroadcastf128 %5,%%ymm5 \n"
- "vbroadcastf128 %6,%%ymm6 \n"
- "vbroadcastf128 %7,%%ymm7 \n"
- "sub %1,%2 \n"
+ asm volatile(
+ "vbroadcastf128 %5,%%ymm5 \n"
+ "vbroadcastf128 %6,%%ymm6 \n"
+ "vbroadcastf128 %7,%%ymm7 \n"
+ "sub %1,%2 \n"
- LABELALIGN
- "1: \n"
- "vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
- "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n"
- "vmovdqu " MEMACCESS2(0x40,0) ",%%ymm2 \n"
- "vmovdqu " MEMACCESS2(0x60,0) ",%%ymm3 \n"
- VMEMOPREG(vpavgb,0x00,0,4,1,ymm0,ymm0) // vpavgb (%0,%4,1),%%ymm0,%%ymm0
- VMEMOPREG(vpavgb,0x20,0,4,1,ymm1,ymm1)
- VMEMOPREG(vpavgb,0x40,0,4,1,ymm2,ymm2)
- VMEMOPREG(vpavgb,0x60,0,4,1,ymm3,ymm3)
- "lea " MEMLEA(0x80,0) ",%0 \n"
- "vshufps $0x88,%%ymm1,%%ymm0,%%ymm4 \n"
- "vshufps $0xdd,%%ymm1,%%ymm0,%%ymm0 \n"
- "vpavgb %%ymm4,%%ymm0,%%ymm0 \n"
- "vshufps $0x88,%%ymm3,%%ymm2,%%ymm4 \n"
- "vshufps $0xdd,%%ymm3,%%ymm2,%%ymm2 \n"
- "vpavgb %%ymm4,%%ymm2,%%ymm2 \n"
-
- "vpmaddubsw %%ymm7,%%ymm0,%%ymm1 \n"
- "vpmaddubsw %%ymm7,%%ymm2,%%ymm3 \n"
- "vpmaddubsw %%ymm6,%%ymm0,%%ymm0 \n"
- "vpmaddubsw %%ymm6,%%ymm2,%%ymm2 \n"
- "vphaddw %%ymm3,%%ymm1,%%ymm1 \n"
- "vphaddw %%ymm2,%%ymm0,%%ymm0 \n"
- "vpsraw $0x8,%%ymm1,%%ymm1 \n"
- "vpsraw $0x8,%%ymm0,%%ymm0 \n"
- "vpacksswb %%ymm0,%%ymm1,%%ymm0 \n"
- "vpermq $0xd8,%%ymm0,%%ymm0 \n"
- "vpshufb %8,%%ymm0,%%ymm0 \n"
- "vpaddb %%ymm5,%%ymm0,%%ymm0 \n"
-
- "vextractf128 $0x0,%%ymm0," MEMACCESS(1) " \n"
- VEXTOPMEM(vextractf128,1,ymm0,0x0,1,2,1) // vextractf128 $1,%%ymm0,(%1,%2,1)
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x20,%3 \n"
- "jg 1b \n"
- "vzeroupper \n"
- : "+r"(src_argb0), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+rm"(width) // %3
- : "r"((intptr_t)(src_stride_argb)), // %4
- "m"(kAddUV128), // %5
- "m"(kARGBToV), // %6
- "m"(kARGBToU), // %7
- "m"(kShufARGBToUV_AVX) // %8
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
- );
+ LABELALIGN
+ "1: \n"
+ "vmovdqu (%0),%%ymm0 \n"
+ "vmovdqu 0x20(%0),%%ymm1 \n"
+ "vmovdqu 0x40(%0),%%ymm2 \n"
+ "vmovdqu 0x60(%0),%%ymm3 \n"
+ "vpavgb 0x00(%0,%4,1),%%ymm0,%%ymm0 \n"
+ "vpavgb 0x20(%0,%4,1),%%ymm1,%%ymm1 \n"
+ "vpavgb 0x40(%0,%4,1),%%ymm2,%%ymm2 \n"
+ "vpavgb 0x60(%0,%4,1),%%ymm3,%%ymm3 \n"
+ "lea 0x80(%0),%0 \n"
+ "vshufps $0x88,%%ymm1,%%ymm0,%%ymm4 \n"
+ "vshufps $0xdd,%%ymm1,%%ymm0,%%ymm0 \n"
+ "vpavgb %%ymm4,%%ymm0,%%ymm0 \n"
+ "vshufps $0x88,%%ymm3,%%ymm2,%%ymm4 \n"
+ "vshufps $0xdd,%%ymm3,%%ymm2,%%ymm2 \n"
+ "vpavgb %%ymm4,%%ymm2,%%ymm2 \n"
+
+ "vpmaddubsw %%ymm7,%%ymm0,%%ymm1 \n"
+ "vpmaddubsw %%ymm7,%%ymm2,%%ymm3 \n"
+ "vpmaddubsw %%ymm6,%%ymm0,%%ymm0 \n"
+ "vpmaddubsw %%ymm6,%%ymm2,%%ymm2 \n"
+ "vphaddw %%ymm3,%%ymm1,%%ymm1 \n"
+ "vphaddw %%ymm2,%%ymm0,%%ymm0 \n"
+ "vpsraw $0x8,%%ymm1,%%ymm1 \n"
+ "vpsraw $0x8,%%ymm0,%%ymm0 \n"
+ "vpacksswb %%ymm0,%%ymm1,%%ymm0 \n"
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n"
+ "vpshufb %8,%%ymm0,%%ymm0 \n"
+ "vpaddb %%ymm5,%%ymm0,%%ymm0 \n"
+
+ "vextractf128 $0x0,%%ymm0,(%1) \n"
+ "vextractf128 $0x1,%%ymm0,0x0(%1,%2,1) \n"
+ "lea 0x10(%1),%1 \n"
+ "sub $0x20,%3 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_argb0), // %0
+ "+r"(dst_u), // %1
+ "+r"(dst_v), // %2
+ "+rm"(width) // %3
+ : "r"((intptr_t)(src_stride_argb)), // %4
+ "m"(kAddUV128), // %5
+ "m"(kARGBToV), // %6
+ "m"(kARGBToU), // %7
+ "m"(kShufARGBToUV_AVX) // %8
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
+ "xmm7");
}
#endif // HAS_ARGBTOUVROW_AVX2
#ifdef HAS_ARGBTOUVJROW_AVX2
-void ARGBToUVJRow_AVX2(const uint8* src_argb0,
+void ARGBToUVJRow_AVX2(const uint8_t* src_argb0,
int src_stride_argb,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
- asm volatile (
- "vbroadcastf128 %5,%%ymm5 \n"
- "vbroadcastf128 %6,%%ymm6 \n"
- "vbroadcastf128 %7,%%ymm7 \n"
- "sub %1,%2 \n"
+ asm volatile(
+ "vbroadcastf128 %5,%%ymm5 \n"
+ "vbroadcastf128 %6,%%ymm6 \n"
+ "vbroadcastf128 %7,%%ymm7 \n"
+ "sub %1,%2 \n"
- LABELALIGN
- "1: \n"
- "vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
- "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n"
- "vmovdqu " MEMACCESS2(0x40,0) ",%%ymm2 \n"
- "vmovdqu " MEMACCESS2(0x60,0) ",%%ymm3 \n"
- VMEMOPREG(vpavgb,0x00,0,4,1,ymm0,ymm0) // vpavgb (%0,%4,1),%%ymm0,%%ymm0
- VMEMOPREG(vpavgb,0x20,0,4,1,ymm1,ymm1)
- VMEMOPREG(vpavgb,0x40,0,4,1,ymm2,ymm2)
- VMEMOPREG(vpavgb,0x60,0,4,1,ymm3,ymm3)
- "lea " MEMLEA(0x80,0) ",%0 \n"
- "vshufps $0x88,%%ymm1,%%ymm0,%%ymm4 \n"
- "vshufps $0xdd,%%ymm1,%%ymm0,%%ymm0 \n"
- "vpavgb %%ymm4,%%ymm0,%%ymm0 \n"
- "vshufps $0x88,%%ymm3,%%ymm2,%%ymm4 \n"
- "vshufps $0xdd,%%ymm3,%%ymm2,%%ymm2 \n"
- "vpavgb %%ymm4,%%ymm2,%%ymm2 \n"
-
- "vpmaddubsw %%ymm7,%%ymm0,%%ymm1 \n"
- "vpmaddubsw %%ymm7,%%ymm2,%%ymm3 \n"
- "vpmaddubsw %%ymm6,%%ymm0,%%ymm0 \n"
- "vpmaddubsw %%ymm6,%%ymm2,%%ymm2 \n"
- "vphaddw %%ymm3,%%ymm1,%%ymm1 \n"
- "vphaddw %%ymm2,%%ymm0,%%ymm0 \n"
- "vpaddw %%ymm5,%%ymm0,%%ymm0 \n"
- "vpaddw %%ymm5,%%ymm1,%%ymm1 \n"
- "vpsraw $0x8,%%ymm1,%%ymm1 \n"
- "vpsraw $0x8,%%ymm0,%%ymm0 \n"
- "vpacksswb %%ymm0,%%ymm1,%%ymm0 \n"
- "vpermq $0xd8,%%ymm0,%%ymm0 \n"
- "vpshufb %8,%%ymm0,%%ymm0 \n"
-
- "vextractf128 $0x0,%%ymm0," MEMACCESS(1) " \n"
- VEXTOPMEM(vextractf128,1,ymm0,0x0,1,2,1) // vextractf128 $1,%%ymm0,(%1,%2,1)
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x20,%3 \n"
- "jg 1b \n"
- "vzeroupper \n"
- : "+r"(src_argb0), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+rm"(width) // %3
- : "r"((intptr_t)(src_stride_argb)), // %4
- "m"(kAddUVJ128), // %5
- "m"(kARGBToVJ), // %6
- "m"(kARGBToUJ), // %7
- "m"(kShufARGBToUV_AVX) // %8
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
- );
+ LABELALIGN
+ "1: \n"
+ "vmovdqu (%0),%%ymm0 \n"
+ "vmovdqu 0x20(%0),%%ymm1 \n"
+ "vmovdqu 0x40(%0),%%ymm2 \n"
+ "vmovdqu 0x60(%0),%%ymm3 \n"
+ "vpavgb 0x00(%0,%4,1),%%ymm0,%%ymm0 \n"
+ "vpavgb 0x20(%0,%4,1),%%ymm1,%%ymm1 \n"
+ "vpavgb 0x40(%0,%4,1),%%ymm2,%%ymm2 \n"
+ "vpavgb 0x60(%0,%4,1),%%ymm3,%%ymm3 \n"
+ "lea 0x80(%0),%0 \n"
+ "vshufps $0x88,%%ymm1,%%ymm0,%%ymm4 \n"
+ "vshufps $0xdd,%%ymm1,%%ymm0,%%ymm0 \n"
+ "vpavgb %%ymm4,%%ymm0,%%ymm0 \n"
+ "vshufps $0x88,%%ymm3,%%ymm2,%%ymm4 \n"
+ "vshufps $0xdd,%%ymm3,%%ymm2,%%ymm2 \n"
+ "vpavgb %%ymm4,%%ymm2,%%ymm2 \n"
+
+ "vpmaddubsw %%ymm7,%%ymm0,%%ymm1 \n"
+ "vpmaddubsw %%ymm7,%%ymm2,%%ymm3 \n"
+ "vpmaddubsw %%ymm6,%%ymm0,%%ymm0 \n"
+ "vpmaddubsw %%ymm6,%%ymm2,%%ymm2 \n"
+ "vphaddw %%ymm3,%%ymm1,%%ymm1 \n"
+ "vphaddw %%ymm2,%%ymm0,%%ymm0 \n"
+ "vpaddw %%ymm5,%%ymm0,%%ymm0 \n"
+ "vpaddw %%ymm5,%%ymm1,%%ymm1 \n"
+ "vpsraw $0x8,%%ymm1,%%ymm1 \n"
+ "vpsraw $0x8,%%ymm0,%%ymm0 \n"
+ "vpacksswb %%ymm0,%%ymm1,%%ymm0 \n"
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n"
+ "vpshufb %8,%%ymm0,%%ymm0 \n"
+
+ "vextractf128 $0x0,%%ymm0,(%1) \n"
+ "vextractf128 $0x1,%%ymm0,0x0(%1,%2,1) \n"
+ "lea 0x10(%1),%1 \n"
+ "sub $0x20,%3 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_argb0), // %0
+ "+r"(dst_u), // %1
+ "+r"(dst_v), // %2
+ "+rm"(width) // %3
+ : "r"((intptr_t)(src_stride_argb)), // %4
+ "m"(kAddUVJ128), // %5
+ "m"(kARGBToVJ), // %6
+ "m"(kARGBToUJ), // %7
+ "m"(kShufARGBToUV_AVX) // %8
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
+ "xmm7");
}
#endif // HAS_ARGBTOUVJROW_AVX2
#ifdef HAS_ARGBTOUVJROW_SSSE3
-void ARGBToUVJRow_SSSE3(const uint8* src_argb0,
+void ARGBToUVJRow_SSSE3(const uint8_t* src_argb0,
int src_stride_argb,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
- asm volatile (
- "movdqa %5,%%xmm3 \n"
- "movdqa %6,%%xmm4 \n"
- "movdqa %7,%%xmm5 \n"
- "sub %1,%2 \n"
+ asm volatile(
+ "movdqa %5,%%xmm3 \n"
+ "movdqa %6,%%xmm4 \n"
+ "movdqa %7,%%xmm5 \n"
+ "sub %1,%2 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- MEMOPREG(movdqu,0x00,0,4,1,xmm7) // movdqu (%0,%4,1),%%xmm7
- "pavgb %%xmm7,%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- MEMOPREG(movdqu,0x10,0,4,1,xmm7) // movdqu 0x10(%0,%4,1),%%xmm7
- "pavgb %%xmm7,%%xmm1 \n"
- "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- MEMOPREG(movdqu,0x20,0,4,1,xmm7) // movdqu 0x20(%0,%4,1),%%xmm7
- "pavgb %%xmm7,%%xmm2 \n"
- "movdqu " MEMACCESS2(0x30,0) ",%%xmm6 \n"
- MEMOPREG(movdqu,0x30,0,4,1,xmm7) // movdqu 0x30(%0,%4,1),%%xmm7
- "pavgb %%xmm7,%%xmm6 \n"
-
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "movdqa %%xmm0,%%xmm7 \n"
- "shufps $0x88,%%xmm1,%%xmm0 \n"
- "shufps $0xdd,%%xmm1,%%xmm7 \n"
- "pavgb %%xmm7,%%xmm0 \n"
- "movdqa %%xmm2,%%xmm7 \n"
- "shufps $0x88,%%xmm6,%%xmm2 \n"
- "shufps $0xdd,%%xmm6,%%xmm7 \n"
- "pavgb %%xmm7,%%xmm2 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "movdqa %%xmm2,%%xmm6 \n"
- "pmaddubsw %%xmm4,%%xmm0 \n"
- "pmaddubsw %%xmm4,%%xmm2 \n"
- "pmaddubsw %%xmm3,%%xmm1 \n"
- "pmaddubsw %%xmm3,%%xmm6 \n"
- "phaddw %%xmm2,%%xmm0 \n"
- "phaddw %%xmm6,%%xmm1 \n"
- "paddw %%xmm5,%%xmm0 \n"
- "paddw %%xmm5,%%xmm1 \n"
- "psraw $0x8,%%xmm0 \n"
- "psraw $0x8,%%xmm1 \n"
- "packsswb %%xmm1,%%xmm0 \n"
- "movlps %%xmm0," MEMACCESS(1) " \n"
- MEMOPMEM(movhps,xmm0,0x00,1,2,1) // movhps %%xmm0,(%1,%2,1)
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "sub $0x10,%3 \n"
- "jg 1b \n"
- : "+r"(src_argb0), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+rm"(width) // %3
- : "r"((intptr_t)(src_stride_argb)), // %4
- "m"(kARGBToVJ), // %5
- "m"(kARGBToUJ), // %6
- "m"(kAddUVJ128) // %7
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"
- );
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x00(%0,%4,1),%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "movdqu 0x10(%0,%4,1),%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm1 \n"
+ "movdqu 0x20(%0),%%xmm2 \n"
+ "movdqu 0x20(%0,%4,1),%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm2 \n"
+ "movdqu 0x30(%0),%%xmm6 \n"
+ "movdqu 0x30(%0,%4,1),%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm6 \n"
+
+ "lea 0x40(%0),%0 \n"
+ "movdqa %%xmm0,%%xmm7 \n"
+ "shufps $0x88,%%xmm1,%%xmm0 \n"
+ "shufps $0xdd,%%xmm1,%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm0 \n"
+ "movdqa %%xmm2,%%xmm7 \n"
+ "shufps $0x88,%%xmm6,%%xmm2 \n"
+ "shufps $0xdd,%%xmm6,%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm2 \n"
+ "movdqa %%xmm0,%%xmm1 \n"
+ "movdqa %%xmm2,%%xmm6 \n"
+ "pmaddubsw %%xmm4,%%xmm0 \n"
+ "pmaddubsw %%xmm4,%%xmm2 \n"
+ "pmaddubsw %%xmm3,%%xmm1 \n"
+ "pmaddubsw %%xmm3,%%xmm6 \n"
+ "phaddw %%xmm2,%%xmm0 \n"
+ "phaddw %%xmm6,%%xmm1 \n"
+ "paddw %%xmm5,%%xmm0 \n"
+ "paddw %%xmm5,%%xmm1 \n"
+ "psraw $0x8,%%xmm0 \n"
+ "psraw $0x8,%%xmm1 \n"
+ "packsswb %%xmm1,%%xmm0 \n"
+ "movlps %%xmm0,(%1) \n"
+ "movhps %%xmm0,0x00(%1,%2,1) \n"
+ "lea 0x8(%1),%1 \n"
+ "sub $0x10,%3 \n"
+ "jg 1b \n"
+ : "+r"(src_argb0), // %0
+ "+r"(dst_u), // %1
+ "+r"(dst_v), // %2
+ "+rm"(width) // %3
+ : "r"((intptr_t)(src_stride_argb)), // %4
+ "m"(kARGBToVJ), // %5
+ "m"(kARGBToUJ), // %6
+ "m"(kAddUVJ128) // %7
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm6", "xmm7");
}
#endif // HAS_ARGBTOUVJROW_SSSE3
#ifdef HAS_ARGBTOUV444ROW_SSSE3
-void ARGBToUV444Row_SSSE3(const uint8* src_argb,
- uint8* dst_u,
- uint8* dst_v,
+void ARGBToUV444Row_SSSE3(const uint8_t* src_argb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
- asm volatile (
- "movdqa %4,%%xmm3 \n"
- "movdqa %5,%%xmm4 \n"
- "movdqa %6,%%xmm5 \n"
- "sub %1,%2 \n"
+ asm volatile(
+ "movdqa %4,%%xmm3 \n"
+ "movdqa %5,%%xmm4 \n"
+ "movdqa %6,%%xmm5 \n"
+ "sub %1,%2 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "movdqu " MEMACCESS2(0x30,0) ",%%xmm6 \n"
- "pmaddubsw %%xmm4,%%xmm0 \n"
- "pmaddubsw %%xmm4,%%xmm1 \n"
- "pmaddubsw %%xmm4,%%xmm2 \n"
- "pmaddubsw %%xmm4,%%xmm6 \n"
- "phaddw %%xmm1,%%xmm0 \n"
- "phaddw %%xmm6,%%xmm2 \n"
- "psraw $0x8,%%xmm0 \n"
- "psraw $0x8,%%xmm2 \n"
- "packsswb %%xmm2,%%xmm0 \n"
- "paddb %%xmm5,%%xmm0 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "movdqu " MEMACCESS2(0x30,0) ",%%xmm6 \n"
- "pmaddubsw %%xmm3,%%xmm0 \n"
- "pmaddubsw %%xmm3,%%xmm1 \n"
- "pmaddubsw %%xmm3,%%xmm2 \n"
- "pmaddubsw %%xmm3,%%xmm6 \n"
- "phaddw %%xmm1,%%xmm0 \n"
- "phaddw %%xmm6,%%xmm2 \n"
- "psraw $0x8,%%xmm0 \n"
- "psraw $0x8,%%xmm2 \n"
- "packsswb %%xmm2,%%xmm0 \n"
- "paddb %%xmm5,%%xmm0 \n"
- "lea " MEMLEA(0x40,0) ",%0 \n"
- MEMOPMEM(movdqu,xmm0,0x00,1,2,1) // movdqu %%xmm0,(%1,%2,1)
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x10,%3 \n"
- "jg 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+rm"(width) // %3
- : "m"(kARGBToV), // %4
- "m"(kARGBToU), // %5
- "m"(kAddUV128) // %6
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm6"
- );
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "movdqu 0x20(%0),%%xmm2 \n"
+ "movdqu 0x30(%0),%%xmm6 \n"
+ "pmaddubsw %%xmm4,%%xmm0 \n"
+ "pmaddubsw %%xmm4,%%xmm1 \n"
+ "pmaddubsw %%xmm4,%%xmm2 \n"
+ "pmaddubsw %%xmm4,%%xmm6 \n"
+ "phaddw %%xmm1,%%xmm0 \n"
+ "phaddw %%xmm6,%%xmm2 \n"
+ "psraw $0x8,%%xmm0 \n"
+ "psraw $0x8,%%xmm2 \n"
+ "packsswb %%xmm2,%%xmm0 \n"
+ "paddb %%xmm5,%%xmm0 \n"
+ "movdqu %%xmm0,(%1) \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "movdqu 0x20(%0),%%xmm2 \n"
+ "movdqu 0x30(%0),%%xmm6 \n"
+ "pmaddubsw %%xmm3,%%xmm0 \n"
+ "pmaddubsw %%xmm3,%%xmm1 \n"
+ "pmaddubsw %%xmm3,%%xmm2 \n"
+ "pmaddubsw %%xmm3,%%xmm6 \n"
+ "phaddw %%xmm1,%%xmm0 \n"
+ "phaddw %%xmm6,%%xmm2 \n"
+ "psraw $0x8,%%xmm0 \n"
+ "psraw $0x8,%%xmm2 \n"
+ "packsswb %%xmm2,%%xmm0 \n"
+ "paddb %%xmm5,%%xmm0 \n"
+ "lea 0x40(%0),%0 \n"
+ "movdqu %%xmm0,0x00(%1,%2,1) \n"
+ "lea 0x10(%1),%1 \n"
+ "sub $0x10,%3 \n"
+ "jg 1b \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_u), // %1
+ "+r"(dst_v), // %2
+ "+rm"(width) // %3
+ : "m"(kARGBToV), // %4
+ "m"(kARGBToU), // %5
+ "m"(kAddUV128) // %6
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm6");
}
#endif // HAS_ARGBTOUV444ROW_SSSE3
-void BGRAToYRow_SSSE3(const uint8* src_bgra, uint8* dst_y, int width) {
- asm volatile (
- "movdqa %4,%%xmm5 \n"
- "movdqa %3,%%xmm4 \n"
+void BGRAToYRow_SSSE3(const uint8_t* src_bgra, uint8_t* dst_y, int width) {
+ asm volatile(
+ "movdqa %4,%%xmm5 \n"
+ "movdqa %3,%%xmm4 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "movdqu " MEMACCESS2(0x30,0) ",%%xmm3 \n"
- "pmaddubsw %%xmm4,%%xmm0 \n"
- "pmaddubsw %%xmm4,%%xmm1 \n"
- "pmaddubsw %%xmm4,%%xmm2 \n"
- "pmaddubsw %%xmm4,%%xmm3 \n"
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "phaddw %%xmm1,%%xmm0 \n"
- "phaddw %%xmm3,%%xmm2 \n"
- "psrlw $0x7,%%xmm0 \n"
- "psrlw $0x7,%%xmm2 \n"
- "packuswb %%xmm2,%%xmm0 \n"
- "paddb %%xmm5,%%xmm0 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x10,%2 \n"
- "jg 1b \n"
- : "+r"(src_bgra), // %0
- "+r"(dst_y), // %1
- "+r"(width) // %2
- : "m"(kBGRAToY), // %3
- "m"(kAddY16) // %4
- : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
- );
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "movdqu 0x20(%0),%%xmm2 \n"
+ "movdqu 0x30(%0),%%xmm3 \n"
+ "pmaddubsw %%xmm4,%%xmm0 \n"
+ "pmaddubsw %%xmm4,%%xmm1 \n"
+ "pmaddubsw %%xmm4,%%xmm2 \n"
+ "pmaddubsw %%xmm4,%%xmm3 \n"
+ "lea 0x40(%0),%0 \n"
+ "phaddw %%xmm1,%%xmm0 \n"
+ "phaddw %%xmm3,%%xmm2 \n"
+ "psrlw $0x7,%%xmm0 \n"
+ "psrlw $0x7,%%xmm2 \n"
+ "packuswb %%xmm2,%%xmm0 \n"
+ "paddb %%xmm5,%%xmm0 \n"
+ "movdqu %%xmm0,(%1) \n"
+ "lea 0x10(%1),%1 \n"
+ "sub $0x10,%2 \n"
+ "jg 1b \n"
+ : "+r"(src_bgra), // %0
+ "+r"(dst_y), // %1
+ "+r"(width) // %2
+ : "m"(kBGRAToY), // %3
+ "m"(kAddY16) // %4
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5");
}
-void BGRAToUVRow_SSSE3(const uint8* src_bgra0,
+void BGRAToUVRow_SSSE3(const uint8_t* src_bgra0,
int src_stride_bgra,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
- asm volatile (
- "movdqa %5,%%xmm3 \n"
- "movdqa %6,%%xmm4 \n"
- "movdqa %7,%%xmm5 \n"
- "sub %1,%2 \n"
-
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- MEMOPREG(movdqu,0x00,0,4,1,xmm7) // movdqu (%0,%4,1),%%xmm7
- "pavgb %%xmm7,%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- MEMOPREG(movdqu,0x10,0,4,1,xmm7) // movdqu 0x10(%0,%4,1),%%xmm7
- "pavgb %%xmm7,%%xmm1 \n"
- "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- MEMOPREG(movdqu,0x20,0,4,1,xmm7) // movdqu 0x20(%0,%4,1),%%xmm7
- "pavgb %%xmm7,%%xmm2 \n"
- "movdqu " MEMACCESS2(0x30,0) ",%%xmm6 \n"
- MEMOPREG(movdqu,0x30,0,4,1,xmm7) // movdqu 0x30(%0,%4,1),%%xmm7
- "pavgb %%xmm7,%%xmm6 \n"
-
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "movdqa %%xmm0,%%xmm7 \n"
- "shufps $0x88,%%xmm1,%%xmm0 \n"
- "shufps $0xdd,%%xmm1,%%xmm7 \n"
- "pavgb %%xmm7,%%xmm0 \n"
- "movdqa %%xmm2,%%xmm7 \n"
- "shufps $0x88,%%xmm6,%%xmm2 \n"
- "shufps $0xdd,%%xmm6,%%xmm7 \n"
- "pavgb %%xmm7,%%xmm2 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "movdqa %%xmm2,%%xmm6 \n"
- "pmaddubsw %%xmm4,%%xmm0 \n"
- "pmaddubsw %%xmm4,%%xmm2 \n"
- "pmaddubsw %%xmm3,%%xmm1 \n"
- "pmaddubsw %%xmm3,%%xmm6 \n"
- "phaddw %%xmm2,%%xmm0 \n"
- "phaddw %%xmm6,%%xmm1 \n"
- "psraw $0x8,%%xmm0 \n"
- "psraw $0x8,%%xmm1 \n"
- "packsswb %%xmm1,%%xmm0 \n"
- "paddb %%xmm5,%%xmm0 \n"
- "movlps %%xmm0," MEMACCESS(1) " \n"
- MEMOPMEM(movhps,xmm0,0x00,1,2,1) // movhps %%xmm0,(%1,%2,1)
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "sub $0x10,%3 \n"
- "jg 1b \n"
- : "+r"(src_bgra0), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+rm"(width) // %3
- : "r"((intptr_t)(src_stride_bgra)), // %4
- "m"(kBGRAToV), // %5
- "m"(kBGRAToU), // %6
- "m"(kAddUV128) // %7
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"
- );
-}
+ asm volatile(
+ "movdqa %5,%%xmm3 \n"
+ "movdqa %6,%%xmm4 \n"
+ "movdqa %7,%%xmm5 \n"
+ "sub %1,%2 \n"
-void ABGRToYRow_SSSE3(const uint8* src_abgr, uint8* dst_y, int width) {
- asm volatile (
- "movdqa %4,%%xmm5 \n"
- "movdqa %3,%%xmm4 \n"
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x00(%0,%4,1),%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "movdqu 0x10(%0,%4,1),%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm1 \n"
+ "movdqu 0x20(%0),%%xmm2 \n"
+ "movdqu 0x20(%0,%4,1),%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm2 \n"
+ "movdqu 0x30(%0),%%xmm6 \n"
+ "movdqu 0x30(%0,%4,1),%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm6 \n"
+
+ "lea 0x40(%0),%0 \n"
+ "movdqa %%xmm0,%%xmm7 \n"
+ "shufps $0x88,%%xmm1,%%xmm0 \n"
+ "shufps $0xdd,%%xmm1,%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm0 \n"
+ "movdqa %%xmm2,%%xmm7 \n"
+ "shufps $0x88,%%xmm6,%%xmm2 \n"
+ "shufps $0xdd,%%xmm6,%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm2 \n"
+ "movdqa %%xmm0,%%xmm1 \n"
+ "movdqa %%xmm2,%%xmm6 \n"
+ "pmaddubsw %%xmm4,%%xmm0 \n"
+ "pmaddubsw %%xmm4,%%xmm2 \n"
+ "pmaddubsw %%xmm3,%%xmm1 \n"
+ "pmaddubsw %%xmm3,%%xmm6 \n"
+ "phaddw %%xmm2,%%xmm0 \n"
+ "phaddw %%xmm6,%%xmm1 \n"
+ "psraw $0x8,%%xmm0 \n"
+ "psraw $0x8,%%xmm1 \n"
+ "packsswb %%xmm1,%%xmm0 \n"
+ "paddb %%xmm5,%%xmm0 \n"
+ "movlps %%xmm0,(%1) \n"
+ "movhps %%xmm0,0x00(%1,%2,1) \n"
+ "lea 0x8(%1),%1 \n"
+ "sub $0x10,%3 \n"
+ "jg 1b \n"
+ : "+r"(src_bgra0), // %0
+ "+r"(dst_u), // %1
+ "+r"(dst_v), // %2
+ "+rm"(width) // %3
+ : "r"((intptr_t)(src_stride_bgra)), // %4
+ "m"(kBGRAToV), // %5
+ "m"(kBGRAToU), // %6
+ "m"(kAddUV128) // %7
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm6", "xmm7");
+}
+
+void ABGRToYRow_SSSE3(const uint8_t* src_abgr, uint8_t* dst_y, int width) {
+ asm volatile(
+ "movdqa %4,%%xmm5 \n"
+ "movdqa %3,%%xmm4 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "movdqu " MEMACCESS2(0x30,0) ",%%xmm3 \n"
- "pmaddubsw %%xmm4,%%xmm0 \n"
- "pmaddubsw %%xmm4,%%xmm1 \n"
- "pmaddubsw %%xmm4,%%xmm2 \n"
- "pmaddubsw %%xmm4,%%xmm3 \n"
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "phaddw %%xmm1,%%xmm0 \n"
- "phaddw %%xmm3,%%xmm2 \n"
- "psrlw $0x7,%%xmm0 \n"
- "psrlw $0x7,%%xmm2 \n"
- "packuswb %%xmm2,%%xmm0 \n"
- "paddb %%xmm5,%%xmm0 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x10,%2 \n"
- "jg 1b \n"
- : "+r"(src_abgr), // %0
- "+r"(dst_y), // %1
- "+r"(width) // %2
- : "m"(kABGRToY), // %3
- "m"(kAddY16) // %4
- : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
- );
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "movdqu 0x20(%0),%%xmm2 \n"
+ "movdqu 0x30(%0),%%xmm3 \n"
+ "pmaddubsw %%xmm4,%%xmm0 \n"
+ "pmaddubsw %%xmm4,%%xmm1 \n"
+ "pmaddubsw %%xmm4,%%xmm2 \n"
+ "pmaddubsw %%xmm4,%%xmm3 \n"
+ "lea 0x40(%0),%0 \n"
+ "phaddw %%xmm1,%%xmm0 \n"
+ "phaddw %%xmm3,%%xmm2 \n"
+ "psrlw $0x7,%%xmm0 \n"
+ "psrlw $0x7,%%xmm2 \n"
+ "packuswb %%xmm2,%%xmm0 \n"
+ "paddb %%xmm5,%%xmm0 \n"
+ "movdqu %%xmm0,(%1) \n"
+ "lea 0x10(%1),%1 \n"
+ "sub $0x10,%2 \n"
+ "jg 1b \n"
+ : "+r"(src_abgr), // %0
+ "+r"(dst_y), // %1
+ "+r"(width) // %2
+ : "m"(kABGRToY), // %3
+ "m"(kAddY16) // %4
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5");
}
-void RGBAToYRow_SSSE3(const uint8* src_rgba, uint8* dst_y, int width) {
- asm volatile (
- "movdqa %4,%%xmm5 \n"
- "movdqa %3,%%xmm4 \n"
+void RGBAToYRow_SSSE3(const uint8_t* src_rgba, uint8_t* dst_y, int width) {
+ asm volatile(
+ "movdqa %4,%%xmm5 \n"
+ "movdqa %3,%%xmm4 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "movdqu " MEMACCESS2(0x30,0) ",%%xmm3 \n"
- "pmaddubsw %%xmm4,%%xmm0 \n"
- "pmaddubsw %%xmm4,%%xmm1 \n"
- "pmaddubsw %%xmm4,%%xmm2 \n"
- "pmaddubsw %%xmm4,%%xmm3 \n"
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "phaddw %%xmm1,%%xmm0 \n"
- "phaddw %%xmm3,%%xmm2 \n"
- "psrlw $0x7,%%xmm0 \n"
- "psrlw $0x7,%%xmm2 \n"
- "packuswb %%xmm2,%%xmm0 \n"
- "paddb %%xmm5,%%xmm0 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x10,%2 \n"
- "jg 1b \n"
- : "+r"(src_rgba), // %0
- "+r"(dst_y), // %1
- "+r"(width) // %2
- : "m"(kRGBAToY), // %3
- "m"(kAddY16) // %4
- : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
- );
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "movdqu 0x20(%0),%%xmm2 \n"
+ "movdqu 0x30(%0),%%xmm3 \n"
+ "pmaddubsw %%xmm4,%%xmm0 \n"
+ "pmaddubsw %%xmm4,%%xmm1 \n"
+ "pmaddubsw %%xmm4,%%xmm2 \n"
+ "pmaddubsw %%xmm4,%%xmm3 \n"
+ "lea 0x40(%0),%0 \n"
+ "phaddw %%xmm1,%%xmm0 \n"
+ "phaddw %%xmm3,%%xmm2 \n"
+ "psrlw $0x7,%%xmm0 \n"
+ "psrlw $0x7,%%xmm2 \n"
+ "packuswb %%xmm2,%%xmm0 \n"
+ "paddb %%xmm5,%%xmm0 \n"
+ "movdqu %%xmm0,(%1) \n"
+ "lea 0x10(%1),%1 \n"
+ "sub $0x10,%2 \n"
+ "jg 1b \n"
+ : "+r"(src_rgba), // %0
+ "+r"(dst_y), // %1
+ "+r"(width) // %2
+ : "m"(kRGBAToY), // %3
+ "m"(kAddY16) // %4
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5");
}
-void ABGRToUVRow_SSSE3(const uint8* src_abgr0,
+void ABGRToUVRow_SSSE3(const uint8_t* src_abgr0,
int src_stride_abgr,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
- asm volatile (
- "movdqa %5,%%xmm3 \n"
- "movdqa %6,%%xmm4 \n"
- "movdqa %7,%%xmm5 \n"
- "sub %1,%2 \n"
-
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- MEMOPREG(movdqu,0x00,0,4,1,xmm7) // movdqu (%0,%4,1),%%xmm7
- "pavgb %%xmm7,%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- MEMOPREG(movdqu,0x10,0,4,1,xmm7) // movdqu 0x10(%0,%4,1),%%xmm7
- "pavgb %%xmm7,%%xmm1 \n"
- "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- MEMOPREG(movdqu,0x20,0,4,1,xmm7) // movdqu 0x20(%0,%4,1),%%xmm7
- "pavgb %%xmm7,%%xmm2 \n"
- "movdqu " MEMACCESS2(0x30,0) ",%%xmm6 \n"
- MEMOPREG(movdqu,0x30,0,4,1,xmm7) // movdqu 0x30(%0,%4,1),%%xmm7
- "pavgb %%xmm7,%%xmm6 \n"
-
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "movdqa %%xmm0,%%xmm7 \n"
- "shufps $0x88,%%xmm1,%%xmm0 \n"
- "shufps $0xdd,%%xmm1,%%xmm7 \n"
- "pavgb %%xmm7,%%xmm0 \n"
- "movdqa %%xmm2,%%xmm7 \n"
- "shufps $0x88,%%xmm6,%%xmm2 \n"
- "shufps $0xdd,%%xmm6,%%xmm7 \n"
- "pavgb %%xmm7,%%xmm2 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "movdqa %%xmm2,%%xmm6 \n"
- "pmaddubsw %%xmm4,%%xmm0 \n"
- "pmaddubsw %%xmm4,%%xmm2 \n"
- "pmaddubsw %%xmm3,%%xmm1 \n"
- "pmaddubsw %%xmm3,%%xmm6 \n"
- "phaddw %%xmm2,%%xmm0 \n"
- "phaddw %%xmm6,%%xmm1 \n"
- "psraw $0x8,%%xmm0 \n"
- "psraw $0x8,%%xmm1 \n"
- "packsswb %%xmm1,%%xmm0 \n"
- "paddb %%xmm5,%%xmm0 \n"
- "movlps %%xmm0," MEMACCESS(1) " \n"
- MEMOPMEM(movhps,xmm0,0x00,1,2,1) // movhps %%xmm0,(%1,%2,1)
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "sub $0x10,%3 \n"
- "jg 1b \n"
- : "+r"(src_abgr0), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+rm"(width) // %3
- : "r"((intptr_t)(src_stride_abgr)), // %4
- "m"(kABGRToV), // %5
- "m"(kABGRToU), // %6
- "m"(kAddUV128) // %7
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"
- );
-}
+ asm volatile(
+ "movdqa %5,%%xmm3 \n"
+ "movdqa %6,%%xmm4 \n"
+ "movdqa %7,%%xmm5 \n"
+ "sub %1,%2 \n"
-void RGBAToUVRow_SSSE3(const uint8* src_rgba0,
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x00(%0,%4,1),%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "movdqu 0x10(%0,%4,1),%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm1 \n"
+ "movdqu 0x20(%0),%%xmm2 \n"
+ "movdqu 0x20(%0,%4,1),%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm2 \n"
+ "movdqu 0x30(%0),%%xmm6 \n"
+ "movdqu 0x30(%0,%4,1),%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm6 \n"
+
+ "lea 0x40(%0),%0 \n"
+ "movdqa %%xmm0,%%xmm7 \n"
+ "shufps $0x88,%%xmm1,%%xmm0 \n"
+ "shufps $0xdd,%%xmm1,%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm0 \n"
+ "movdqa %%xmm2,%%xmm7 \n"
+ "shufps $0x88,%%xmm6,%%xmm2 \n"
+ "shufps $0xdd,%%xmm6,%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm2 \n"
+ "movdqa %%xmm0,%%xmm1 \n"
+ "movdqa %%xmm2,%%xmm6 \n"
+ "pmaddubsw %%xmm4,%%xmm0 \n"
+ "pmaddubsw %%xmm4,%%xmm2 \n"
+ "pmaddubsw %%xmm3,%%xmm1 \n"
+ "pmaddubsw %%xmm3,%%xmm6 \n"
+ "phaddw %%xmm2,%%xmm0 \n"
+ "phaddw %%xmm6,%%xmm1 \n"
+ "psraw $0x8,%%xmm0 \n"
+ "psraw $0x8,%%xmm1 \n"
+ "packsswb %%xmm1,%%xmm0 \n"
+ "paddb %%xmm5,%%xmm0 \n"
+ "movlps %%xmm0,(%1) \n"
+ "movhps %%xmm0,0x00(%1,%2,1) \n"
+ "lea 0x8(%1),%1 \n"
+ "sub $0x10,%3 \n"
+ "jg 1b \n"
+ : "+r"(src_abgr0), // %0
+ "+r"(dst_u), // %1
+ "+r"(dst_v), // %2
+ "+rm"(width) // %3
+ : "r"((intptr_t)(src_stride_abgr)), // %4
+ "m"(kABGRToV), // %5
+ "m"(kABGRToU), // %6
+ "m"(kAddUV128) // %7
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm6", "xmm7");
+}
+
+void RGBAToUVRow_SSSE3(const uint8_t* src_rgba0,
int src_stride_rgba,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
- asm volatile (
- "movdqa %5,%%xmm3 \n"
- "movdqa %6,%%xmm4 \n"
- "movdqa %7,%%xmm5 \n"
- "sub %1,%2 \n"
+ asm volatile(
+ "movdqa %5,%%xmm3 \n"
+ "movdqa %6,%%xmm4 \n"
+ "movdqa %7,%%xmm5 \n"
+ "sub %1,%2 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- MEMOPREG(movdqu,0x00,0,4,1,xmm7) // movdqu (%0,%4,1),%%xmm7
- "pavgb %%xmm7,%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- MEMOPREG(movdqu,0x10,0,4,1,xmm7) // movdqu 0x10(%0,%4,1),%%xmm7
- "pavgb %%xmm7,%%xmm1 \n"
- "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- MEMOPREG(movdqu,0x20,0,4,1,xmm7) // movdqu 0x20(%0,%4,1),%%xmm7
- "pavgb %%xmm7,%%xmm2 \n"
- "movdqu " MEMACCESS2(0x30,0) ",%%xmm6 \n"
- MEMOPREG(movdqu,0x30,0,4,1,xmm7) // movdqu 0x30(%0,%4,1),%%xmm7
- "pavgb %%xmm7,%%xmm6 \n"
-
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "movdqa %%xmm0,%%xmm7 \n"
- "shufps $0x88,%%xmm1,%%xmm0 \n"
- "shufps $0xdd,%%xmm1,%%xmm7 \n"
- "pavgb %%xmm7,%%xmm0 \n"
- "movdqa %%xmm2,%%xmm7 \n"
- "shufps $0x88,%%xmm6,%%xmm2 \n"
- "shufps $0xdd,%%xmm6,%%xmm7 \n"
- "pavgb %%xmm7,%%xmm2 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "movdqa %%xmm2,%%xmm6 \n"
- "pmaddubsw %%xmm4,%%xmm0 \n"
- "pmaddubsw %%xmm4,%%xmm2 \n"
- "pmaddubsw %%xmm3,%%xmm1 \n"
- "pmaddubsw %%xmm3,%%xmm6 \n"
- "phaddw %%xmm2,%%xmm0 \n"
- "phaddw %%xmm6,%%xmm1 \n"
- "psraw $0x8,%%xmm0 \n"
- "psraw $0x8,%%xmm1 \n"
- "packsswb %%xmm1,%%xmm0 \n"
- "paddb %%xmm5,%%xmm0 \n"
- "movlps %%xmm0," MEMACCESS(1) " \n"
- MEMOPMEM(movhps,xmm0,0x00,1,2,1) // movhps %%xmm0,(%1,%2,1)
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "sub $0x10,%3 \n"
- "jg 1b \n"
- : "+r"(src_rgba0), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+rm"(width) // %3
- : "r"((intptr_t)(src_stride_rgba)), // %4
- "m"(kRGBAToV), // %5
- "m"(kRGBAToU), // %6
- "m"(kAddUV128) // %7
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"
- );
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x00(%0,%4,1),%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "movdqu 0x10(%0,%4,1),%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm1 \n"
+ "movdqu 0x20(%0),%%xmm2 \n"
+ "movdqu 0x20(%0,%4,1),%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm2 \n"
+ "movdqu 0x30(%0),%%xmm6 \n"
+ "movdqu 0x30(%0,%4,1),%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm6 \n"
+
+ "lea 0x40(%0),%0 \n"
+ "movdqa %%xmm0,%%xmm7 \n"
+ "shufps $0x88,%%xmm1,%%xmm0 \n"
+ "shufps $0xdd,%%xmm1,%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm0 \n"
+ "movdqa %%xmm2,%%xmm7 \n"
+ "shufps $0x88,%%xmm6,%%xmm2 \n"
+ "shufps $0xdd,%%xmm6,%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm2 \n"
+ "movdqa %%xmm0,%%xmm1 \n"
+ "movdqa %%xmm2,%%xmm6 \n"
+ "pmaddubsw %%xmm4,%%xmm0 \n"
+ "pmaddubsw %%xmm4,%%xmm2 \n"
+ "pmaddubsw %%xmm3,%%xmm1 \n"
+ "pmaddubsw %%xmm3,%%xmm6 \n"
+ "phaddw %%xmm2,%%xmm0 \n"
+ "phaddw %%xmm6,%%xmm1 \n"
+ "psraw $0x8,%%xmm0 \n"
+ "psraw $0x8,%%xmm1 \n"
+ "packsswb %%xmm1,%%xmm0 \n"
+ "paddb %%xmm5,%%xmm0 \n"
+ "movlps %%xmm0,(%1) \n"
+ "movhps %%xmm0,0x00(%1,%2,1) \n"
+ "lea 0x8(%1),%1 \n"
+ "sub $0x10,%3 \n"
+ "jg 1b \n"
+ : "+r"(src_rgba0), // %0
+ "+r"(dst_u), // %1
+ "+r"(dst_v), // %2
+ "+rm"(width) // %3
+ : "r"((intptr_t)(src_stride_rgba)), // %4
+ "m"(kRGBAToV), // %5
+ "m"(kRGBAToU), // %6
+ "m"(kAddUV128) // %7
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm6", "xmm7");
}
#if defined(HAS_I422TOARGBROW_SSSE3) || defined(HAS_I422TOARGBROW_AVX2)
// Read 8 UV from 444
-#define READYUV444 \
- "movq " MEMACCESS([u_buf]) ",%%xmm0 \n" \
- MEMOPREG(movq, 0x00, [u_buf], [v_buf], 1, xmm1) \
- "lea " MEMLEA(0x8, [u_buf]) ",%[u_buf] \n" \
- "punpcklbw %%xmm1,%%xmm0 \n" \
- "movq " MEMACCESS([y_buf]) ",%%xmm4 \n" \
- "punpcklbw %%xmm4,%%xmm4 \n" \
- "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n"
+#define READYUV444 \
+ "movq (%[u_buf]),%%xmm0 \n" \
+ "movq 0x00(%[u_buf],%[v_buf],1),%%xmm1 \n" \
+ "lea 0x8(%[u_buf]),%[u_buf] \n" \
+ "punpcklbw %%xmm1,%%xmm0 \n" \
+ "movq (%[y_buf]),%%xmm4 \n" \
+ "punpcklbw %%xmm4,%%xmm4 \n" \
+ "lea 0x8(%[y_buf]),%[y_buf] \n"
// Read 4 UV from 422, upsample to 8 UV
-#define READYUV422 \
- "movd " MEMACCESS([u_buf]) ",%%xmm0 \n" \
- MEMOPREG(movd, 0x00, [u_buf], [v_buf], 1, xmm1) \
- "lea " MEMLEA(0x4, [u_buf]) ",%[u_buf] \n" \
- "punpcklbw %%xmm1,%%xmm0 \n" \
- "punpcklwd %%xmm0,%%xmm0 \n" \
- "movq " MEMACCESS([y_buf]) ",%%xmm4 \n" \
- "punpcklbw %%xmm4,%%xmm4 \n" \
- "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n"
+#define READYUV422 \
+ "movd (%[u_buf]),%%xmm0 \n" \
+ "movd 0x00(%[u_buf],%[v_buf],1),%%xmm1 \n" \
+ "lea 0x4(%[u_buf]),%[u_buf] \n" \
+ "punpcklbw %%xmm1,%%xmm0 \n" \
+ "punpcklwd %%xmm0,%%xmm0 \n" \
+ "movq (%[y_buf]),%%xmm4 \n" \
+ "punpcklbw %%xmm4,%%xmm4 \n" \
+ "lea 0x8(%[y_buf]),%[y_buf] \n"
// Read 4 UV from 422 10 bit, upsample to 8 UV
// TODO(fbarchard): Consider shufb to replace pack/unpack
// TODO(fbarchard): Consider pmulhuw to replace psraw
// TODO(fbarchard): Consider pmullw to replace psllw and allow different bits.
-#define READYUV422_10 \
- "movq " MEMACCESS([u_buf]) ",%%xmm0 \n" \
- MEMOPREG(movq, 0x00, [u_buf], [v_buf], 1, xmm1) \
- "lea " MEMLEA(0x8, [u_buf]) ",%[u_buf] \n" \
- "punpcklwd %%xmm1,%%xmm0 \n" \
- "psraw $0x2,%%xmm0 \n" \
- "packuswb %%xmm0,%%xmm0 \n" \
- "punpcklwd %%xmm0,%%xmm0 \n" \
- "movdqu " MEMACCESS([y_buf]) ",%%xmm4 \n" \
- "psllw $0x6,%%xmm4 \n" \
- "lea " MEMLEA(0x10, [y_buf]) ",%[y_buf] \n"
+#define READYUV210 \
+ "movq (%[u_buf]),%%xmm0 \n" \
+ "movq 0x00(%[u_buf],%[v_buf],1),%%xmm1 \n" \
+ "lea 0x8(%[u_buf]),%[u_buf] \n" \
+ "punpcklwd %%xmm1,%%xmm0 \n" \
+ "psraw $0x2,%%xmm0 \n" \
+ "packuswb %%xmm0,%%xmm0 \n" \
+ "punpcklwd %%xmm0,%%xmm0 \n" \
+ "movdqu (%[y_buf]),%%xmm4 \n" \
+ "psllw $0x6,%%xmm4 \n" \
+ "lea 0x10(%[y_buf]),%[y_buf] \n"
// Read 4 UV from 422, upsample to 8 UV. With 8 Alpha.
-#define READYUVA422 \
- "movd " MEMACCESS([u_buf]) ",%%xmm0 \n" \
- MEMOPREG(movd, 0x00, [u_buf], [v_buf], 1, xmm1) \
- "lea " MEMLEA(0x4, [u_buf]) ",%[u_buf] \n" \
- "punpcklbw %%xmm1,%%xmm0 \n" \
- "punpcklwd %%xmm0,%%xmm0 \n" \
- "movq " MEMACCESS([y_buf]) ",%%xmm4 \n" \
- "punpcklbw %%xmm4,%%xmm4 \n" \
- "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" \
- "movq " MEMACCESS([a_buf]) ",%%xmm5 \n" \
- "lea " MEMLEA(0x8, [a_buf]) ",%[a_buf] \n"
+#define READYUVA422 \
+ "movd (%[u_buf]),%%xmm0 \n" \
+ "movd 0x00(%[u_buf],%[v_buf],1),%%xmm1 \n" \
+ "lea 0x4(%[u_buf]),%[u_buf] \n" \
+ "punpcklbw %%xmm1,%%xmm0 \n" \
+ "punpcklwd %%xmm0,%%xmm0 \n" \
+ "movq (%[y_buf]),%%xmm4 \n" \
+ "punpcklbw %%xmm4,%%xmm4 \n" \
+ "lea 0x8(%[y_buf]),%[y_buf] \n" \
+ "movq (%[a_buf]),%%xmm5 \n" \
+ "lea 0x8(%[a_buf]),%[a_buf] \n"
// Read 4 UV from NV12, upsample to 8 UV
-#define READNV12 \
- "movq " MEMACCESS([uv_buf]) ",%%xmm0 \n" \
- "lea " MEMLEA(0x8, [uv_buf]) ",%[uv_buf] \n" \
- "punpcklwd %%xmm0,%%xmm0 \n" \
- "movq " MEMACCESS([y_buf]) ",%%xmm4 \n" \
- "punpcklbw %%xmm4,%%xmm4 \n" \
- "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n"
+#define READNV12 \
+ "movq (%[uv_buf]),%%xmm0 \n" \
+ "lea 0x8(%[uv_buf]),%[uv_buf] \n" \
+ "punpcklwd %%xmm0,%%xmm0 \n" \
+ "movq (%[y_buf]),%%xmm4 \n" \
+ "punpcklbw %%xmm4,%%xmm4 \n" \
+ "lea 0x8(%[y_buf]),%[y_buf] \n"
// Read 4 VU from NV21, upsample to 8 UV
-#define READNV21 \
- "movq " MEMACCESS([vu_buf]) ",%%xmm0 \n" \
- "lea " MEMLEA(0x8, [vu_buf]) ",%[vu_buf] \n" \
- "pshufb %[kShuffleNV21], %%xmm0 \n" \
- "movq " MEMACCESS([y_buf]) ",%%xmm4 \n" \
- "punpcklbw %%xmm4,%%xmm4 \n" \
- "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n"
+#define READNV21 \
+ "movq (%[vu_buf]),%%xmm0 \n" \
+ "lea 0x8(%[vu_buf]),%[vu_buf] \n" \
+ "pshufb %[kShuffleNV21], %%xmm0 \n" \
+ "movq (%[y_buf]),%%xmm4 \n" \
+ "punpcklbw %%xmm4,%%xmm4 \n" \
+ "lea 0x8(%[y_buf]),%[y_buf] \n"
// Read 4 YUY2 with 8 Y and update 4 UV to 8 UV.
-#define READYUY2 \
- "movdqu " MEMACCESS([yuy2_buf]) ",%%xmm4 \n" \
- "pshufb %[kShuffleYUY2Y], %%xmm4 \n" \
- "movdqu " MEMACCESS([yuy2_buf]) ",%%xmm0 \n" \
- "pshufb %[kShuffleYUY2UV], %%xmm0 \n" \
- "lea " MEMLEA(0x10, [yuy2_buf]) ",%[yuy2_buf] \n"
+#define READYUY2 \
+ "movdqu (%[yuy2_buf]),%%xmm4 \n" \
+ "pshufb %[kShuffleYUY2Y], %%xmm4 \n" \
+ "movdqu (%[yuy2_buf]),%%xmm0 \n" \
+ "pshufb %[kShuffleYUY2UV], %%xmm0 \n" \
+ "lea 0x10(%[yuy2_buf]),%[yuy2_buf] \n"
// Read 4 UYVY with 8 Y and update 4 UV to 8 UV.
-#define READUYVY \
- "movdqu " MEMACCESS([uyvy_buf]) ",%%xmm4 \n" \
- "pshufb %[kShuffleUYVYY], %%xmm4 \n" \
- "movdqu " MEMACCESS([uyvy_buf]) ",%%xmm0 \n" \
- "pshufb %[kShuffleUYVYUV], %%xmm0 \n" \
- "lea " MEMLEA(0x10, [uyvy_buf]) ",%[uyvy_buf] \n"
+#define READUYVY \
+ "movdqu (%[uyvy_buf]),%%xmm4 \n" \
+ "pshufb %[kShuffleUYVYY], %%xmm4 \n" \
+ "movdqu (%[uyvy_buf]),%%xmm0 \n" \
+ "pshufb %[kShuffleUYVYUV], %%xmm0 \n" \
+ "lea 0x10(%[uyvy_buf]),%[uyvy_buf] \n"
#if defined(__x86_64__)
-#define YUVTORGB_SETUP(yuvconstants) \
- "movdqa " MEMACCESS([yuvconstants]) ",%%xmm8 \n" \
- "movdqa " MEMACCESS2(32, [yuvconstants]) ",%%xmm9 \n" \
- "movdqa " MEMACCESS2(64, [yuvconstants]) ",%%xmm10 \n" \
- "movdqa " MEMACCESS2(96, [yuvconstants]) ",%%xmm11 \n" \
- "movdqa " MEMACCESS2(128, [yuvconstants]) ",%%xmm12 \n" \
- "movdqa " MEMACCESS2(160, [yuvconstants]) ",%%xmm13 \n" \
- "movdqa " MEMACCESS2(192, [yuvconstants]) ",%%xmm14 \n"
+#define YUVTORGB_SETUP(yuvconstants) \
+ "movdqa (%[yuvconstants]),%%xmm8 \n" \
+ "movdqa 32(%[yuvconstants]),%%xmm9 \n" \
+ "movdqa 64(%[yuvconstants]),%%xmm10 \n" \
+ "movdqa 96(%[yuvconstants]),%%xmm11 \n" \
+ "movdqa 128(%[yuvconstants]),%%xmm12 \n" \
+ "movdqa 160(%[yuvconstants]),%%xmm13 \n" \
+ "movdqa 192(%[yuvconstants]),%%xmm14 \n"
// Convert 8 pixels: 8 UV and 8 Y
-#define YUVTORGB(yuvconstants) \
+#define YUVTORGB16(yuvconstants) \
"movdqa %%xmm0,%%xmm1 \n" \
"movdqa %%xmm0,%%xmm2 \n" \
"movdqa %%xmm0,%%xmm3 \n" \
@@ -1712,72 +1776,95 @@ void RGBAToUVRow_SSSE3(const uint8* src_rgba0,
"pmulhuw %%xmm14,%%xmm4 \n" \
"paddsw %%xmm4,%%xmm0 \n" \
"paddsw %%xmm4,%%xmm1 \n" \
- "paddsw %%xmm4,%%xmm2 \n" \
- "psraw $0x6,%%xmm0 \n" \
- "psraw $0x6,%%xmm1 \n" \
- "psraw $0x6,%%xmm2 \n" \
- "packuswb %%xmm0,%%xmm0 \n" \
- "packuswb %%xmm1,%%xmm1 \n" \
- "packuswb %%xmm2,%%xmm2 \n"
+ "paddsw %%xmm4,%%xmm2 \n"
#define YUVTORGB_REGS \
"xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14",
#else
#define YUVTORGB_SETUP(yuvconstants)
// Convert 8 pixels: 8 UV and 8 Y
-#define YUVTORGB(yuvconstants) \
- "movdqa %%xmm0,%%xmm1 \n" \
- "movdqa %%xmm0,%%xmm2 \n" \
- "movdqa %%xmm0,%%xmm3 \n" \
- "movdqa " MEMACCESS2(96, [yuvconstants]) ",%%xmm0 \n" \
- "pmaddubsw " MEMACCESS([yuvconstants]) ",%%xmm1 \n" \
- "psubw %%xmm1,%%xmm0 \n" \
- "movdqa " MEMACCESS2(128, [yuvconstants]) ",%%xmm1 \n" \
- "pmaddubsw " MEMACCESS2(32, [yuvconstants]) ",%%xmm2 \n" \
- "psubw %%xmm2,%%xmm1 \n" \
- "movdqa " MEMACCESS2(160, [yuvconstants]) ",%%xmm2 \n" \
- "pmaddubsw " MEMACCESS2(64, [yuvconstants]) ",%%xmm3 \n" \
- "psubw %%xmm3,%%xmm2 \n" \
- "pmulhuw " MEMACCESS2(192, [yuvconstants]) ",%%xmm4 \n" \
- "paddsw %%xmm4,%%xmm0 \n" \
- "paddsw %%xmm4,%%xmm1 \n" \
- "paddsw %%xmm4,%%xmm2 \n" \
- "psraw $0x6,%%xmm0 \n" \
- "psraw $0x6,%%xmm1 \n" \
- "psraw $0x6,%%xmm2 \n" \
- "packuswb %%xmm0,%%xmm0 \n" \
- "packuswb %%xmm1,%%xmm1 \n" \
- "packuswb %%xmm2,%%xmm2 \n"
+#define YUVTORGB16(yuvconstants) \
+ "movdqa %%xmm0,%%xmm1 \n" \
+ "movdqa %%xmm0,%%xmm2 \n" \
+ "movdqa %%xmm0,%%xmm3 \n" \
+ "movdqa 96(%[yuvconstants]),%%xmm0 \n" \
+ "pmaddubsw (%[yuvconstants]),%%xmm1 \n" \
+ "psubw %%xmm1,%%xmm0 \n" \
+ "movdqa 128(%[yuvconstants]),%%xmm1 \n" \
+ "pmaddubsw 32(%[yuvconstants]),%%xmm2 \n" \
+ "psubw %%xmm2,%%xmm1 \n" \
+ "movdqa 160(%[yuvconstants]),%%xmm2 \n" \
+ "pmaddubsw 64(%[yuvconstants]),%%xmm3 \n" \
+ "psubw %%xmm3,%%xmm2 \n" \
+ "pmulhuw 192(%[yuvconstants]),%%xmm4 \n" \
+ "paddsw %%xmm4,%%xmm0 \n" \
+ "paddsw %%xmm4,%%xmm1 \n" \
+ "paddsw %%xmm4,%%xmm2 \n"
#define YUVTORGB_REGS
#endif
+#define YUVTORGB(yuvconstants) \
+ YUVTORGB16(yuvconstants) \
+ "psraw $0x6,%%xmm0 \n" \
+ "psraw $0x6,%%xmm1 \n" \
+ "psraw $0x6,%%xmm2 \n" \
+ "packuswb %%xmm0,%%xmm0 \n" \
+ "packuswb %%xmm1,%%xmm1 \n" \
+ "packuswb %%xmm2,%%xmm2 \n"
+
// Store 8 ARGB values.
-#define STOREARGB \
- "punpcklbw %%xmm1,%%xmm0 \n" \
- "punpcklbw %%xmm5,%%xmm2 \n" \
- "movdqa %%xmm0,%%xmm1 \n" \
- "punpcklwd %%xmm2,%%xmm0 \n" \
- "punpckhwd %%xmm2,%%xmm1 \n" \
- "movdqu %%xmm0," MEMACCESS([dst_argb]) " \n" \
- "movdqu %%xmm1," MEMACCESS2(0x10, [dst_argb]) " \n" \
- "lea " MEMLEA(0x20, [dst_argb]) ", %[dst_argb] \n"
+#define STOREARGB \
+ "punpcklbw %%xmm1,%%xmm0 \n" \
+ "punpcklbw %%xmm5,%%xmm2 \n" \
+ "movdqa %%xmm0,%%xmm1 \n" \
+ "punpcklwd %%xmm2,%%xmm0 \n" \
+ "punpckhwd %%xmm2,%%xmm1 \n" \
+ "movdqu %%xmm0,(%[dst_argb]) \n" \
+ "movdqu %%xmm1,0x10(%[dst_argb]) \n" \
+ "lea 0x20(%[dst_argb]), %[dst_argb] \n"
// Store 8 RGBA values.
-#define STORERGBA \
- "pcmpeqb %%xmm5,%%xmm5 \n" \
- "punpcklbw %%xmm2,%%xmm1 \n" \
- "punpcklbw %%xmm0,%%xmm5 \n" \
- "movdqa %%xmm5,%%xmm0 \n" \
- "punpcklwd %%xmm1,%%xmm5 \n" \
- "punpckhwd %%xmm1,%%xmm0 \n" \
- "movdqu %%xmm5," MEMACCESS([dst_rgba]) " \n" \
- "movdqu %%xmm0," MEMACCESS2(0x10, [dst_rgba]) " \n" \
- "lea " MEMLEA(0x20, [dst_rgba]) ",%[dst_rgba] \n"
-
-void OMITFP I444ToARGBRow_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_argb,
+#define STORERGBA \
+ "pcmpeqb %%xmm5,%%xmm5 \n" \
+ "punpcklbw %%xmm2,%%xmm1 \n" \
+ "punpcklbw %%xmm0,%%xmm5 \n" \
+ "movdqa %%xmm5,%%xmm0 \n" \
+ "punpcklwd %%xmm1,%%xmm5 \n" \
+ "punpckhwd %%xmm1,%%xmm0 \n" \
+ "movdqu %%xmm5,(%[dst_rgba]) \n" \
+ "movdqu %%xmm0,0x10(%[dst_rgba]) \n" \
+ "lea 0x20(%[dst_rgba]),%[dst_rgba] \n"
+
+// Store 8 AR30 values.
+#define STOREAR30 \
+ "psraw $0x4,%%xmm0 \n" \
+ "psraw $0x4,%%xmm1 \n" \
+ "psraw $0x4,%%xmm2 \n" \
+ "pminsw %%xmm7,%%xmm0 \n" \
+ "pminsw %%xmm7,%%xmm1 \n" \
+ "pminsw %%xmm7,%%xmm2 \n" \
+ "pmaxsw %%xmm6,%%xmm0 \n" \
+ "pmaxsw %%xmm6,%%xmm1 \n" \
+ "pmaxsw %%xmm6,%%xmm2 \n" \
+ "psllw $0x4,%%xmm2 \n" \
+ "movdqa %%xmm0,%%xmm3 \n" \
+ "punpcklwd %%xmm2,%%xmm0 \n" \
+ "punpckhwd %%xmm2,%%xmm3 \n" \
+ "movdqa %%xmm1,%%xmm2 \n" \
+ "punpcklwd %%xmm5,%%xmm1 \n" \
+ "punpckhwd %%xmm5,%%xmm2 \n" \
+ "pslld $0xa,%%xmm1 \n" \
+ "pslld $0xa,%%xmm2 \n" \
+ "por %%xmm1,%%xmm0 \n" \
+ "por %%xmm2,%%xmm3 \n" \
+ "movdqu %%xmm0,(%[dst_ar30]) \n" \
+ "movdqu %%xmm3,0x10(%[dst_ar30]) \n" \
+ "lea 0x20(%[dst_ar30]), %[dst_ar30] \n"
+
+void OMITFP I444ToARGBRow_SSSE3(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
asm volatile (
@@ -1798,15 +1885,15 @@ void OMITFP I444ToARGBRow_SSSE3(const uint8* y_buf,
[dst_argb]"+r"(dst_argb), // %[dst_argb]
[width]"+rm"(width) // %[width]
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
- : "memory", "cc", NACL_R14 YUVTORGB_REGS
+ : "memory", "cc", YUVTORGB_REGS
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
);
}
-void OMITFP I422ToRGB24Row_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_rgb24,
+void OMITFP I422ToRGB24Row_SSSE3(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_rgb24,
const struct YuvConstants* yuvconstants,
int width) {
asm volatile (
@@ -1827,9 +1914,9 @@ void OMITFP I422ToRGB24Row_SSSE3(const uint8* y_buf,
"pshufb %%xmm5,%%xmm0 \n"
"pshufb %%xmm6,%%xmm1 \n"
"palignr $0xc,%%xmm0,%%xmm1 \n"
- "movq %%xmm0," MEMACCESS([dst_rgb24]) "\n"
- "movdqu %%xmm1," MEMACCESS2(0x8,[dst_rgb24]) "\n"
- "lea " MEMLEA(0x18,[dst_rgb24]) ",%[dst_rgb24] \n"
+ "movq %%xmm0,(%[dst_rgb24]) \n"
+ "movdqu %%xmm1,0x8(%[dst_rgb24]) \n"
+ "lea 0x18(%[dst_rgb24]),%[dst_rgb24] \n"
"subl $0x8,%[width] \n"
"jg 1b \n"
: [y_buf]"+r"(y_buf), // %[y_buf]
@@ -1844,15 +1931,15 @@ void OMITFP I422ToRGB24Row_SSSE3(const uint8* y_buf,
: [yuvconstants]"r"(yuvconstants), // %[yuvconstants]
[kShuffleMaskARGBToRGB24_0]"m"(kShuffleMaskARGBToRGB24_0),
[kShuffleMaskARGBToRGB24]"m"(kShuffleMaskARGBToRGB24)
- : "memory", "cc", NACL_R14 YUVTORGB_REGS
+ : "memory", "cc", YUVTORGB_REGS
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
);
}
-void OMITFP I422ToARGBRow_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_argb,
+void OMITFP I422ToARGBRow_SSSE3(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
asm volatile (
@@ -1873,16 +1960,50 @@ void OMITFP I422ToARGBRow_SSSE3(const uint8* y_buf,
[dst_argb]"+r"(dst_argb), // %[dst_argb]
[width]"+rm"(width) // %[width]
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
- : "memory", "cc", NACL_R14 YUVTORGB_REGS
+ : "memory", "cc", YUVTORGB_REGS
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
);
}
+void OMITFP I422ToAR30Row_SSSE3(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ar30,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ asm volatile (
+ YUVTORGB_SETUP(yuvconstants)
+ "sub %[u_buf],%[v_buf] \n"
+ "pcmpeqb %%xmm5,%%xmm5 \n" // AR30 constants
+ "psrlw $14,%%xmm5 \n"
+ "psllw $4,%%xmm5 \n" // 2 alpha bits
+ "pxor %%xmm6,%%xmm6 \n"
+ "pcmpeqb %%xmm7,%%xmm7 \n" // 0 for min
+ "psrlw $6,%%xmm7 \n" // 1023 for max
+
+ LABELALIGN
+ "1: \n"
+ READYUV422
+ YUVTORGB16(yuvconstants)
+ STOREAR30
+ "sub $0x8,%[width] \n"
+ "jg 1b \n"
+ : [y_buf]"+r"(y_buf), // %[y_buf]
+ [u_buf]"+r"(u_buf), // %[u_buf]
+ [v_buf]"+r"(v_buf), // %[v_buf]
+ [dst_ar30]"+r"(dst_ar30), // %[dst_ar30]
+ [width]"+rm"(width) // %[width]
+ : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
+ : "memory", "cc", YUVTORGB_REGS
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
+ );
+}
+
// 10 bit YUV to ARGB
-void OMITFP I210ToARGBRow_SSSE3(const uint16* y_buf,
- const uint16* u_buf,
- const uint16* v_buf,
- uint8* dst_argb,
+void OMITFP I210ToARGBRow_SSSE3(const uint16_t* y_buf,
+ const uint16_t* u_buf,
+ const uint16_t* v_buf,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
asm volatile (
@@ -1892,7 +2013,7 @@ void OMITFP I210ToARGBRow_SSSE3(const uint16* y_buf,
LABELALIGN
"1: \n"
- READYUV422_10
+ READYUV210
YUVTORGB(yuvconstants)
STOREARGB
"sub $0x8,%[width] \n"
@@ -1903,17 +2024,52 @@ void OMITFP I210ToARGBRow_SSSE3(const uint16* y_buf,
[dst_argb]"+r"(dst_argb), // %[dst_argb]
[width]"+rm"(width) // %[width]
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
- : "memory", "cc", NACL_R14 YUVTORGB_REGS
+ : "memory", "cc", YUVTORGB_REGS
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
);
}
+// 10 bit YUV to AR30
+void OMITFP I210ToAR30Row_SSSE3(const uint16_t* y_buf,
+ const uint16_t* u_buf,
+ const uint16_t* v_buf,
+ uint8_t* dst_ar30,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ asm volatile (
+ YUVTORGB_SETUP(yuvconstants)
+ "sub %[u_buf],%[v_buf] \n"
+ "pcmpeqb %%xmm5,%%xmm5 \n"
+ "psrlw $14,%%xmm5 \n"
+ "psllw $4,%%xmm5 \n" // 2 alpha bits
+ "pxor %%xmm6,%%xmm6 \n"
+ "pcmpeqb %%xmm7,%%xmm7 \n" // 0 for min
+ "psrlw $6,%%xmm7 \n" // 1023 for max
+
+ LABELALIGN
+ "1: \n"
+ READYUV210
+ YUVTORGB16(yuvconstants)
+ STOREAR30
+ "sub $0x8,%[width] \n"
+ "jg 1b \n"
+ : [y_buf]"+r"(y_buf), // %[y_buf]
+ [u_buf]"+r"(u_buf), // %[u_buf]
+ [v_buf]"+r"(v_buf), // %[v_buf]
+ [dst_ar30]"+r"(dst_ar30), // %[dst_ar30]
+ [width]"+rm"(width) // %[width]
+ : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
+ : "memory", "cc", YUVTORGB_REGS
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
+ );
+}
+
#ifdef HAS_I422ALPHATOARGBROW_SSSE3
-void OMITFP I422AlphaToARGBRow_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- const uint8* a_buf,
- uint8* dst_argb,
+void OMITFP I422AlphaToARGBRow_SSSE3(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ const uint8_t* a_buf,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
// clang-format off
@@ -1939,16 +2095,16 @@ void OMITFP I422AlphaToARGBRow_SSSE3(const uint8* y_buf,
[width]"+rm"(width) // %[width]
#endif
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
- : "memory", "cc", NACL_R14 YUVTORGB_REGS
+ : "memory", "cc", YUVTORGB_REGS
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
);
// clang-format on
}
#endif // HAS_I422ALPHATOARGBROW_SSSE3
-void OMITFP NV12ToARGBRow_SSSE3(const uint8* y_buf,
- const uint8* uv_buf,
- uint8* dst_argb,
+void OMITFP NV12ToARGBRow_SSSE3(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
// clang-format off
@@ -1968,15 +2124,15 @@ void OMITFP NV12ToARGBRow_SSSE3(const uint8* y_buf,
[dst_argb]"+r"(dst_argb), // %[dst_argb]
[width]"+rm"(width) // %[width]
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
- : "memory", "cc", YUVTORGB_REGS // Does not use r14.
+ : "memory", "cc", YUVTORGB_REGS
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
);
// clang-format on
}
-void OMITFP NV21ToARGBRow_SSSE3(const uint8* y_buf,
- const uint8* vu_buf,
- uint8* dst_argb,
+void OMITFP NV21ToARGBRow_SSSE3(const uint8_t* y_buf,
+ const uint8_t* vu_buf,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
// clang-format off
@@ -1997,14 +2153,14 @@ void OMITFP NV21ToARGBRow_SSSE3(const uint8* y_buf,
[width]"+rm"(width) // %[width]
: [yuvconstants]"r"(yuvconstants), // %[yuvconstants]
[kShuffleNV21]"m"(kShuffleNV21)
- : "memory", "cc", YUVTORGB_REGS // Does not use r14.
+ : "memory", "cc", YUVTORGB_REGS
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
);
// clang-format on
}
-void OMITFP YUY2ToARGBRow_SSSE3(const uint8* yuy2_buf,
- uint8* dst_argb,
+void OMITFP YUY2ToARGBRow_SSSE3(const uint8_t* yuy2_buf,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
// clang-format off
@@ -2025,14 +2181,14 @@ void OMITFP YUY2ToARGBRow_SSSE3(const uint8* yuy2_buf,
: [yuvconstants]"r"(yuvconstants), // %[yuvconstants]
[kShuffleYUY2Y]"m"(kShuffleYUY2Y),
[kShuffleYUY2UV]"m"(kShuffleYUY2UV)
- : "memory", "cc", YUVTORGB_REGS // Does not use r14.
+ : "memory", "cc", YUVTORGB_REGS
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
);
// clang-format on
}
-void OMITFP UYVYToARGBRow_SSSE3(const uint8* uyvy_buf,
- uint8* dst_argb,
+void OMITFP UYVYToARGBRow_SSSE3(const uint8_t* uyvy_buf,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
// clang-format off
@@ -2053,16 +2209,16 @@ void OMITFP UYVYToARGBRow_SSSE3(const uint8* uyvy_buf,
: [yuvconstants]"r"(yuvconstants), // %[yuvconstants]
[kShuffleUYVYY]"m"(kShuffleUYVYY),
[kShuffleUYVYUV]"m"(kShuffleUYVYUV)
- : "memory", "cc", YUVTORGB_REGS // Does not use r14.
+ : "memory", "cc", YUVTORGB_REGS
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
);
// clang-format on
}
-void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_rgba,
+void OMITFP I422ToRGBARow_SSSE3(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_rgba,
const struct YuvConstants* yuvconstants,
int width) {
asm volatile (
@@ -2083,7 +2239,7 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf,
[dst_rgba]"+r"(dst_rgba), // %[dst_rgba]
[width]"+rm"(width) // %[width]
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
- : "memory", "cc", NACL_R14 YUVTORGB_REGS
+ : "memory", "cc", YUVTORGB_REGS
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
);
}
@@ -2091,96 +2247,113 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf,
#endif // HAS_I422TOARGBROW_SSSE3
// Read 16 UV from 444
-#define READYUV444_AVX2 \
- "vmovdqu " MEMACCESS([u_buf]) ",%%xmm0 \n" \
- MEMOPREG(vmovdqu, 0x00, [u_buf], [v_buf], 1, xmm1) \
- "lea " MEMLEA(0x10, [u_buf]) ",%[u_buf] \n" \
- "vpermq $0xd8,%%ymm0,%%ymm0 \n" \
- "vpermq $0xd8,%%ymm1,%%ymm1 \n" \
- "vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" \
- "vmovdqu " MEMACCESS([y_buf]) ",%%xmm4 \n" \
- "vpermq $0xd8,%%ymm4,%%ymm4 \n" \
- "vpunpcklbw %%ymm4,%%ymm4,%%ymm4 \n" \
- "lea " MEMLEA(0x10, [y_buf]) ",%[y_buf] \n"
+#define READYUV444_AVX2 \
+ "vmovdqu (%[u_buf]),%%xmm0 \n" \
+ "vmovdqu 0x00(%[u_buf],%[v_buf],1),%%xmm1 \n" \
+ "lea 0x10(%[u_buf]),%[u_buf] \n" \
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n" \
+ "vpermq $0xd8,%%ymm1,%%ymm1 \n" \
+ "vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" \
+ "vmovdqu (%[y_buf]),%%xmm4 \n" \
+ "vpermq $0xd8,%%ymm4,%%ymm4 \n" \
+ "vpunpcklbw %%ymm4,%%ymm4,%%ymm4 \n" \
+ "lea 0x10(%[y_buf]),%[y_buf] \n"
// Read 8 UV from 422, upsample to 16 UV.
-#define READYUV422_AVX2 \
- "vmovq " MEMACCESS([u_buf]) ",%%xmm0 \n" \
- MEMOPREG(vmovq, 0x00, [u_buf], [v_buf], 1, xmm1) \
- "lea " MEMLEA(0x8, [u_buf]) ",%[u_buf] \n" \
- "vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" \
- "vpermq $0xd8,%%ymm0,%%ymm0 \n" \
- "vpunpcklwd %%ymm0,%%ymm0,%%ymm0 \n" \
- "vmovdqu " MEMACCESS([y_buf]) ",%%xmm4 \n" \
- "vpermq $0xd8,%%ymm4,%%ymm4 \n" \
- "vpunpcklbw %%ymm4,%%ymm4,%%ymm4 \n" \
- "lea " MEMLEA(0x10, [y_buf]) ",%[y_buf] \n"
+#define READYUV422_AVX2 \
+ "vmovq (%[u_buf]),%%xmm0 \n" \
+ "vmovq 0x00(%[u_buf],%[v_buf],1),%%xmm1 \n" \
+ "lea 0x8(%[u_buf]),%[u_buf] \n" \
+ "vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" \
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n" \
+ "vpunpcklwd %%ymm0,%%ymm0,%%ymm0 \n" \
+ "vmovdqu (%[y_buf]),%%xmm4 \n" \
+ "vpermq $0xd8,%%ymm4,%%ymm4 \n" \
+ "vpunpcklbw %%ymm4,%%ymm4,%%ymm4 \n" \
+ "lea 0x10(%[y_buf]),%[y_buf] \n"
+
+// Read 8 UV from 210 10 bit, upsample to 16 UV
+// TODO(fbarchard): Consider vshufb to replace pack/unpack
+// TODO(fbarchard): Consider vunpcklpd to combine the 2 registers into 1.
+#define READYUV210_AVX2 \
+ "vmovdqu (%[u_buf]),%%xmm0 \n" \
+ "vmovdqu 0x00(%[u_buf],%[v_buf],1),%%xmm1 \n" \
+ "lea 0x10(%[u_buf]),%[u_buf] \n" \
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n" \
+ "vpermq $0xd8,%%ymm1,%%ymm1 \n" \
+ "vpunpcklwd %%ymm1,%%ymm0,%%ymm0 \n" \
+ "vpsraw $0x2,%%ymm0,%%ymm0 \n" \
+ "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n" \
+ "vpunpcklwd %%ymm0,%%ymm0,%%ymm0 \n" \
+ "vmovdqu (%[y_buf]),%%ymm4 \n" \
+ "vpsllw $0x6,%%ymm4,%%ymm4 \n" \
+ "lea 0x20(%[y_buf]),%[y_buf] \n"
// Read 8 UV from 422, upsample to 16 UV. With 16 Alpha.
-#define READYUVA422_AVX2 \
- "vmovq " MEMACCESS([u_buf]) ",%%xmm0 \n" \
- MEMOPREG(vmovq, 0x00, [u_buf], [v_buf], 1, xmm1) \
- "lea " MEMLEA(0x8, [u_buf]) ",%[u_buf] \n" \
- "vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" \
- "vpermq $0xd8,%%ymm0,%%ymm0 \n" \
- "vpunpcklwd %%ymm0,%%ymm0,%%ymm0 \n" \
- "vmovdqu " MEMACCESS([y_buf]) ",%%xmm4 \n" \
- "vpermq $0xd8,%%ymm4,%%ymm4 \n" \
- "vpunpcklbw %%ymm4,%%ymm4,%%ymm4 \n" \
- "lea " MEMLEA(0x10, [y_buf]) ",%[y_buf] \n" \
- "vmovdqu " MEMACCESS([a_buf]) ",%%xmm5 \n" \
- "vpermq $0xd8,%%ymm5,%%ymm5 \n" \
- "lea " MEMLEA(0x10, [a_buf]) ",%[a_buf] \n"
+#define READYUVA422_AVX2 \
+ "vmovq (%[u_buf]),%%xmm0 \n" \
+ "vmovq 0x00(%[u_buf],%[v_buf],1),%%xmm1 \n" \
+ "lea 0x8(%[u_buf]),%[u_buf] \n" \
+ "vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" \
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n" \
+ "vpunpcklwd %%ymm0,%%ymm0,%%ymm0 \n" \
+ "vmovdqu (%[y_buf]),%%xmm4 \n" \
+ "vpermq $0xd8,%%ymm4,%%ymm4 \n" \
+ "vpunpcklbw %%ymm4,%%ymm4,%%ymm4 \n" \
+ "lea 0x10(%[y_buf]),%[y_buf] \n" \
+ "vmovdqu (%[a_buf]),%%xmm5 \n" \
+ "vpermq $0xd8,%%ymm5,%%ymm5 \n" \
+ "lea 0x10(%[a_buf]),%[a_buf] \n"
// Read 8 UV from NV12, upsample to 16 UV.
-#define READNV12_AVX2 \
- "vmovdqu " MEMACCESS([uv_buf]) ",%%xmm0 \n" \
- "lea " MEMLEA(0x10, [uv_buf]) ",%[uv_buf] \n" \
- "vpermq $0xd8,%%ymm0,%%ymm0 \n" \
- "vpunpcklwd %%ymm0,%%ymm0,%%ymm0 \n" \
- "vmovdqu " MEMACCESS([y_buf]) ",%%xmm4 \n" \
- "vpermq $0xd8,%%ymm4,%%ymm4 \n" \
- "vpunpcklbw %%ymm4,%%ymm4,%%ymm4 \n" \
- "lea " MEMLEA(0x10, [y_buf]) ",%[y_buf] \n"
+#define READNV12_AVX2 \
+ "vmovdqu (%[uv_buf]),%%xmm0 \n" \
+ "lea 0x10(%[uv_buf]),%[uv_buf] \n" \
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n" \
+ "vpunpcklwd %%ymm0,%%ymm0,%%ymm0 \n" \
+ "vmovdqu (%[y_buf]),%%xmm4 \n" \
+ "vpermq $0xd8,%%ymm4,%%ymm4 \n" \
+ "vpunpcklbw %%ymm4,%%ymm4,%%ymm4 \n" \
+ "lea 0x10(%[y_buf]),%[y_buf] \n"
// Read 8 VU from NV21, upsample to 16 UV.
-#define READNV21_AVX2 \
- "vmovdqu " MEMACCESS([vu_buf]) ",%%xmm0 \n" \
- "lea " MEMLEA(0x10, [vu_buf]) ",%[vu_buf] \n" \
- "vpermq $0xd8,%%ymm0,%%ymm0 \n" \
- "vpshufb %[kShuffleNV21], %%ymm0, %%ymm0 \n" \
- "vmovdqu " MEMACCESS([y_buf]) ",%%xmm4 \n" \
- "vpermq $0xd8,%%ymm4,%%ymm4 \n" \
- "vpunpcklbw %%ymm4,%%ymm4,%%ymm4 \n" \
- "lea " MEMLEA(0x10, [y_buf]) ",%[y_buf] \n"
+#define READNV21_AVX2 \
+ "vmovdqu (%[vu_buf]),%%xmm0 \n" \
+ "lea 0x10(%[vu_buf]),%[vu_buf] \n" \
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n" \
+ "vpshufb %[kShuffleNV21], %%ymm0, %%ymm0 \n" \
+ "vmovdqu (%[y_buf]),%%xmm4 \n" \
+ "vpermq $0xd8,%%ymm4,%%ymm4 \n" \
+ "vpunpcklbw %%ymm4,%%ymm4,%%ymm4 \n" \
+ "lea 0x10(%[y_buf]),%[y_buf] \n"
// Read 8 YUY2 with 16 Y and upsample 8 UV to 16 UV.
-#define READYUY2_AVX2 \
- "vmovdqu " MEMACCESS([yuy2_buf]) ",%%ymm4 \n" \
- "vpshufb %[kShuffleYUY2Y], %%ymm4, %%ymm4 \n" \
- "vmovdqu " MEMACCESS([yuy2_buf]) ",%%ymm0 \n" \
- "vpshufb %[kShuffleYUY2UV], %%ymm0, %%ymm0 \n" \
- "lea " MEMLEA(0x20, [yuy2_buf]) ",%[yuy2_buf] \n"
+#define READYUY2_AVX2 \
+ "vmovdqu (%[yuy2_buf]),%%ymm4 \n" \
+ "vpshufb %[kShuffleYUY2Y], %%ymm4, %%ymm4 \n" \
+ "vmovdqu (%[yuy2_buf]),%%ymm0 \n" \
+ "vpshufb %[kShuffleYUY2UV], %%ymm0, %%ymm0 \n" \
+ "lea 0x20(%[yuy2_buf]),%[yuy2_buf] \n"
// Read 8 UYVY with 16 Y and upsample 8 UV to 16 UV.
-#define READUYVY_AVX2 \
- "vmovdqu " MEMACCESS([uyvy_buf]) ",%%ymm4 \n" \
- "vpshufb %[kShuffleUYVYY], %%ymm4, %%ymm4 \n" \
- "vmovdqu " MEMACCESS([uyvy_buf]) ",%%ymm0 \n" \
- "vpshufb %[kShuffleUYVYUV], %%ymm0, %%ymm0 \n" \
- "lea " MEMLEA(0x20, [uyvy_buf]) ",%[uyvy_buf] \n"
+#define READUYVY_AVX2 \
+ "vmovdqu (%[uyvy_buf]),%%ymm4 \n" \
+ "vpshufb %[kShuffleUYVYY], %%ymm4, %%ymm4 \n" \
+ "vmovdqu (%[uyvy_buf]),%%ymm0 \n" \
+ "vpshufb %[kShuffleUYVYUV], %%ymm0, %%ymm0 \n" \
+ "lea 0x20(%[uyvy_buf]),%[uyvy_buf] \n"
#if defined(__x86_64__)
-#define YUVTORGB_SETUP_AVX2(yuvconstants) \
- "vmovdqa " MEMACCESS([yuvconstants]) ",%%ymm8 \n" \
- "vmovdqa " MEMACCESS2(32, [yuvconstants]) ",%%ymm9 \n" \
- "vmovdqa " MEMACCESS2(64, [yuvconstants]) ",%%ymm10 \n" \
- "vmovdqa " MEMACCESS2(96, [yuvconstants]) ",%%ymm11 \n" \
- "vmovdqa " MEMACCESS2(128, [yuvconstants]) ",%%ymm12 \n" \
- "vmovdqa " MEMACCESS2(160, [yuvconstants]) ",%%ymm13 \n" \
- "vmovdqa " MEMACCESS2(192, [yuvconstants]) ",%%ymm14 \n"
-
-#define YUVTORGB_AVX2(yuvconstants) \
+#define YUVTORGB_SETUP_AVX2(yuvconstants) \
+ "vmovdqa (%[yuvconstants]),%%ymm8 \n" \
+ "vmovdqa 32(%[yuvconstants]),%%ymm9 \n" \
+ "vmovdqa 64(%[yuvconstants]),%%ymm10 \n" \
+ "vmovdqa 96(%[yuvconstants]),%%ymm11 \n" \
+ "vmovdqa 128(%[yuvconstants]),%%ymm12 \n" \
+ "vmovdqa 160(%[yuvconstants]),%%ymm13 \n" \
+ "vmovdqa 192(%[yuvconstants]),%%ymm14 \n"
+
+#define YUVTORGB16_AVX2(yuvconstants) \
"vpmaddubsw %%ymm10,%%ymm0,%%ymm2 \n" \
"vpmaddubsw %%ymm9,%%ymm0,%%ymm1 \n" \
"vpmaddubsw %%ymm8,%%ymm0,%%ymm0 \n" \
@@ -2190,13 +2363,7 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf,
"vpmulhuw %%ymm14,%%ymm4,%%ymm4 \n" \
"vpaddsw %%ymm4,%%ymm0,%%ymm0 \n" \
"vpaddsw %%ymm4,%%ymm1,%%ymm1 \n" \
- "vpaddsw %%ymm4,%%ymm2,%%ymm2 \n" \
- "vpsraw $0x6,%%ymm0,%%ymm0 \n" \
- "vpsraw $0x6,%%ymm1,%%ymm1 \n" \
- "vpsraw $0x6,%%ymm2,%%ymm2 \n" \
- "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n" \
- "vpackuswb %%ymm1,%%ymm1,%%ymm1 \n" \
- "vpackuswb %%ymm2,%%ymm2,%%ymm2 \n"
+ "vpaddsw %%ymm4,%%ymm2,%%ymm2 \n"
#define YUVTORGB_REGS_AVX2 \
"xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14",
@@ -2204,48 +2371,78 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf,
#else // Convert 16 pixels: 16 UV and 16 Y.
#define YUVTORGB_SETUP_AVX2(yuvconstants)
-#define YUVTORGB_AVX2(yuvconstants) \
- "vpmaddubsw " MEMACCESS2(64, [yuvconstants]) ",%%ymm0,%%ymm2 \n" \
- "vpmaddubsw " MEMACCESS2(32, [yuvconstants]) ",%%ymm0,%%ymm1 \n" \
- "vpmaddubsw " MEMACCESS([yuvconstants]) ",%%ymm0,%%ymm0 \n" \
- "vmovdqu " MEMACCESS2(160, [yuvconstants]) ",%%ymm3 \n" \
- "vpsubw %%ymm2,%%ymm3,%%ymm2 \n" \
- "vmovdqu " MEMACCESS2(128, [yuvconstants]) ",%%ymm3 \n" \
- "vpsubw %%ymm1,%%ymm3,%%ymm1 \n" \
- "vmovdqu " MEMACCESS2(96, [yuvconstants]) ",%%ymm3 \n" \
- "vpsubw %%ymm0,%%ymm3,%%ymm0 \n" \
- "vpmulhuw " MEMACCESS2(192, [yuvconstants]) ",%%ymm4,%%ymm4 \n" \
- "vpaddsw %%ymm4,%%ymm0,%%ymm0 \n" \
- "vpaddsw %%ymm4,%%ymm1,%%ymm1 \n" \
- "vpaddsw %%ymm4,%%ymm2,%%ymm2 \n" \
- "vpsraw $0x6,%%ymm0,%%ymm0 \n" \
- "vpsraw $0x6,%%ymm1,%%ymm1 \n" \
- "vpsraw $0x6,%%ymm2,%%ymm2 \n" \
- "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n" \
- "vpackuswb %%ymm1,%%ymm1,%%ymm1 \n" \
- "vpackuswb %%ymm2,%%ymm2,%%ymm2 \n"
+#define YUVTORGB16_AVX2(yuvconstants) \
+ "vpmaddubsw 64(%[yuvconstants]),%%ymm0,%%ymm2 \n" \
+ "vpmaddubsw 32(%[yuvconstants]),%%ymm0,%%ymm1 \n" \
+ "vpmaddubsw (%[yuvconstants]),%%ymm0,%%ymm0 \n" \
+ "vmovdqu 160(%[yuvconstants]),%%ymm3 \n" \
+ "vpsubw %%ymm2,%%ymm3,%%ymm2 \n" \
+ "vmovdqu 128(%[yuvconstants]),%%ymm3 \n" \
+ "vpsubw %%ymm1,%%ymm3,%%ymm1 \n" \
+ "vmovdqu 96(%[yuvconstants]),%%ymm3 \n" \
+ "vpsubw %%ymm0,%%ymm3,%%ymm0 \n" \
+ "vpmulhuw 192(%[yuvconstants]),%%ymm4,%%ymm4 \n" \
+ "vpaddsw %%ymm4,%%ymm0,%%ymm0 \n" \
+ "vpaddsw %%ymm4,%%ymm1,%%ymm1 \n" \
+ "vpaddsw %%ymm4,%%ymm2,%%ymm2 \n"
#define YUVTORGB_REGS_AVX2
#endif
+#define YUVTORGB_AVX2(yuvconstants) \
+ YUVTORGB16_AVX2(yuvconstants) \
+ "vpsraw $0x6,%%ymm0,%%ymm0 \n" \
+ "vpsraw $0x6,%%ymm1,%%ymm1 \n" \
+ "vpsraw $0x6,%%ymm2,%%ymm2 \n" \
+ "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n" \
+ "vpackuswb %%ymm1,%%ymm1,%%ymm1 \n" \
+ "vpackuswb %%ymm2,%%ymm2,%%ymm2 \n"
+
// Store 16 ARGB values.
-#define STOREARGB_AVX2 \
- "vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" \
- "vpermq $0xd8,%%ymm0,%%ymm0 \n" \
- "vpunpcklbw %%ymm5,%%ymm2,%%ymm2 \n" \
- "vpermq $0xd8,%%ymm2,%%ymm2 \n" \
- "vpunpcklwd %%ymm2,%%ymm0,%%ymm1 \n" \
- "vpunpckhwd %%ymm2,%%ymm0,%%ymm0 \n" \
- "vmovdqu %%ymm1," MEMACCESS([dst_argb]) " \n" \
- "vmovdqu %%ymm0," MEMACCESS2(0x20, [dst_argb]) " \n" \
- "lea " MEMLEA(0x40, [dst_argb]) ", %[dst_argb] \n"
+#define STOREARGB_AVX2 \
+ "vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" \
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n" \
+ "vpunpcklbw %%ymm5,%%ymm2,%%ymm2 \n" \
+ "vpermq $0xd8,%%ymm2,%%ymm2 \n" \
+ "vpunpcklwd %%ymm2,%%ymm0,%%ymm1 \n" \
+ "vpunpckhwd %%ymm2,%%ymm0,%%ymm0 \n" \
+ "vmovdqu %%ymm1,(%[dst_argb]) \n" \
+ "vmovdqu %%ymm0,0x20(%[dst_argb]) \n" \
+ "lea 0x40(%[dst_argb]), %[dst_argb] \n"
+
+// Store 16 AR30 values.
+#define STOREAR30_AVX2 \
+ "vpsraw $0x4,%%ymm0,%%ymm0 \n" \
+ "vpsraw $0x4,%%ymm1,%%ymm1 \n" \
+ "vpsraw $0x4,%%ymm2,%%ymm2 \n" \
+ "vpminsw %%ymm7,%%ymm0,%%ymm0 \n" \
+ "vpminsw %%ymm7,%%ymm1,%%ymm1 \n" \
+ "vpminsw %%ymm7,%%ymm2,%%ymm2 \n" \
+ "vpmaxsw %%ymm6,%%ymm0,%%ymm0 \n" \
+ "vpmaxsw %%ymm6,%%ymm1,%%ymm1 \n" \
+ "vpmaxsw %%ymm6,%%ymm2,%%ymm2 \n" \
+ "vpsllw $0x4,%%ymm2,%%ymm2 \n" \
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n" \
+ "vpermq $0xd8,%%ymm1,%%ymm1 \n" \
+ "vpermq $0xd8,%%ymm2,%%ymm2 \n" \
+ "vpunpckhwd %%ymm2,%%ymm0,%%ymm3 \n" \
+ "vpunpcklwd %%ymm2,%%ymm0,%%ymm0 \n" \
+ "vpunpckhwd %%ymm5,%%ymm1,%%ymm2 \n" \
+ "vpunpcklwd %%ymm5,%%ymm1,%%ymm1 \n" \
+ "vpslld $0xa,%%ymm1,%%ymm1 \n" \
+ "vpslld $0xa,%%ymm2,%%ymm2 \n" \
+ "vpor %%ymm1,%%ymm0,%%ymm0 \n" \
+ "vpor %%ymm2,%%ymm3,%%ymm3 \n" \
+ "vmovdqu %%ymm0,(%[dst_ar30]) \n" \
+ "vmovdqu %%ymm3,0x20(%[dst_ar30]) \n" \
+ "lea 0x40(%[dst_ar30]), %[dst_ar30] \n"
#ifdef HAS_I444TOARGBROW_AVX2
// 16 pixels
// 16 UV values with 16 Y producing 16 ARGB (64 bytes).
-void OMITFP I444ToARGBRow_AVX2(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_argb,
+void OMITFP I444ToARGBRow_AVX2(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
asm volatile (
@@ -2267,7 +2464,7 @@ void OMITFP I444ToARGBRow_AVX2(const uint8* y_buf,
[dst_argb]"+r"(dst_argb), // %[dst_argb]
[width]"+rm"(width) // %[width]
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
- : "memory", "cc", NACL_R14 YUVTORGB_REGS_AVX2
+ : "memory", "cc", YUVTORGB_REGS_AVX2
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
);
}
@@ -2276,10 +2473,10 @@ void OMITFP I444ToARGBRow_AVX2(const uint8* y_buf,
#if defined(HAS_I422TOARGBROW_AVX2)
// 16 pixels
// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
-void OMITFP I422ToARGBRow_AVX2(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_argb,
+void OMITFP I422ToARGBRow_AVX2(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
asm volatile (
@@ -2302,20 +2499,135 @@ void OMITFP I422ToARGBRow_AVX2(const uint8* y_buf,
[dst_argb]"+r"(dst_argb), // %[dst_argb]
[width]"+rm"(width) // %[width]
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
- : "memory", "cc", NACL_R14 YUVTORGB_REGS_AVX2
+ : "memory", "cc", YUVTORGB_REGS_AVX2
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
);
}
#endif // HAS_I422TOARGBROW_AVX2
+#if defined(HAS_I422TOAR30ROW_AVX2)
+// 16 pixels
+// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 AR30 (64 bytes).
+void OMITFP I422ToAR30Row_AVX2(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ar30,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ asm volatile (
+ YUVTORGB_SETUP_AVX2(yuvconstants)
+ "sub %[u_buf],%[v_buf] \n"
+ "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" // AR30 constants
+ "vpsrlw $14,%%ymm5,%%ymm5 \n"
+ "vpsllw $4,%%ymm5,%%ymm5 \n" // 2 alpha bits
+ "vpxor %%ymm6,%%ymm6,%%ymm6 \n" // 0 for min
+ "vpcmpeqb %%ymm7,%%ymm7,%%ymm7 \n" // 1023 for max
+ "vpsrlw $6,%%ymm7,%%ymm7 \n"
+
+ LABELALIGN
+ "1: \n"
+ READYUV422_AVX2
+ YUVTORGB16_AVX2(yuvconstants)
+ STOREAR30_AVX2
+ "sub $0x10,%[width] \n"
+ "jg 1b \n"
+
+ "vzeroupper \n"
+ : [y_buf]"+r"(y_buf), // %[y_buf]
+ [u_buf]"+r"(u_buf), // %[u_buf]
+ [v_buf]"+r"(v_buf), // %[v_buf]
+ [dst_ar30]"+r"(dst_ar30), // %[dst_ar30]
+ [width]"+rm"(width) // %[width]
+ : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
+ : "memory", "cc", YUVTORGB_REGS_AVX2
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
+ );
+}
+#endif // HAS_I422TOAR30ROW_AVX2
+
+#if defined(HAS_I210TOARGBROW_AVX2)
+// 16 pixels
+// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
+void OMITFP I210ToARGBRow_AVX2(const uint16_t* y_buf,
+ const uint16_t* u_buf,
+ const uint16_t* v_buf,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ asm volatile (
+ YUVTORGB_SETUP_AVX2(yuvconstants)
+ "sub %[u_buf],%[v_buf] \n"
+ "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
+
+ LABELALIGN
+ "1: \n"
+ READYUV210_AVX2
+ YUVTORGB_AVX2(yuvconstants)
+ STOREARGB_AVX2
+ "sub $0x10,%[width] \n"
+ "jg 1b \n"
+
+ "vzeroupper \n"
+ : [y_buf]"+r"(y_buf), // %[y_buf]
+ [u_buf]"+r"(u_buf), // %[u_buf]
+ [v_buf]"+r"(v_buf), // %[v_buf]
+ [dst_argb]"+r"(dst_argb), // %[dst_argb]
+ [width]"+rm"(width) // %[width]
+ : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
+ : "memory", "cc", YUVTORGB_REGS_AVX2
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+ );
+}
+#endif // HAS_I210TOARGBROW_AVX2
+
+#if defined(HAS_I210TOAR30ROW_AVX2)
+// 16 pixels
+// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 AR30 (64 bytes).
+void OMITFP I210ToAR30Row_AVX2(const uint16_t* y_buf,
+ const uint16_t* u_buf,
+ const uint16_t* v_buf,
+ uint8_t* dst_ar30,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ asm volatile (
+ YUVTORGB_SETUP_AVX2(yuvconstants)
+ "sub %[u_buf],%[v_buf] \n"
+ "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" // AR30 constants
+ "vpsrlw $14,%%ymm5,%%ymm5 \n"
+ "vpsllw $4,%%ymm5,%%ymm5 \n" // 2 alpha bits
+ "vpxor %%ymm6,%%ymm6,%%ymm6 \n" // 0 for min
+ "vpcmpeqb %%ymm7,%%ymm7,%%ymm7 \n" // 1023 for max
+ "vpsrlw $6,%%ymm7,%%ymm7 \n"
+
+ LABELALIGN
+ "1: \n"
+ READYUV210_AVX2
+ YUVTORGB16_AVX2(yuvconstants)
+ STOREAR30_AVX2
+ "sub $0x10,%[width] \n"
+ "jg 1b \n"
+
+ "vzeroupper \n"
+ : [y_buf]"+r"(y_buf), // %[y_buf]
+ [u_buf]"+r"(u_buf), // %[u_buf]
+ [v_buf]"+r"(v_buf), // %[v_buf]
+ [dst_ar30]"+r"(dst_ar30), // %[dst_ar30]
+ [width]"+rm"(width) // %[width]
+ : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
+ : "memory", "cc", YUVTORGB_REGS_AVX2
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+ );
+}
+#endif // HAS_I210TOAR30ROW_AVX2
+
#if defined(HAS_I422ALPHATOARGBROW_AVX2)
// 16 pixels
// 8 UV values upsampled to 16 UV, mixed with 16 Y and 16 A producing 16 ARGB.
-void OMITFP I422AlphaToARGBRow_AVX2(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- const uint8* a_buf,
- uint8* dst_argb,
+void OMITFP I422AlphaToARGBRow_AVX2(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ const uint8_t* a_buf,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
// clang-format off
@@ -2342,7 +2654,7 @@ void OMITFP I422AlphaToARGBRow_AVX2(const uint8* y_buf,
[width]"+rm"(width) // %[width]
#endif
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
- : "memory", "cc", NACL_R14 YUVTORGB_REGS_AVX2
+ : "memory", "cc", YUVTORGB_REGS_AVX2
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
);
// clang-format on
@@ -2352,10 +2664,10 @@ void OMITFP I422AlphaToARGBRow_AVX2(const uint8* y_buf,
#if defined(HAS_I422TORGBAROW_AVX2)
// 16 pixels
// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 RGBA (64 bytes).
-void OMITFP I422ToRGBARow_AVX2(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_argb,
+void OMITFP I422ToRGBARow_AVX2(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
asm volatile (
@@ -2375,11 +2687,11 @@ void OMITFP I422ToRGBARow_AVX2(const uint8* y_buf,
"vpermq $0xd8,%%ymm2,%%ymm2 \n"
"vpunpcklwd %%ymm1,%%ymm2,%%ymm0 \n"
"vpunpckhwd %%ymm1,%%ymm2,%%ymm1 \n"
- "vmovdqu %%ymm0," MEMACCESS([dst_argb]) "\n"
- "vmovdqu %%ymm1," MEMACCESS2(0x20,[dst_argb]) "\n"
- "lea " MEMLEA(0x40,[dst_argb]) ",%[dst_argb] \n"
- "sub $0x10,%[width] \n"
- "jg 1b \n"
+ "vmovdqu %%ymm0,(%[dst_argb]) \n"
+ "vmovdqu %%ymm1,0x20(%[dst_argb]) \n"
+ "lea 0x40(%[dst_argb]),%[dst_argb] \n"
+ "sub $0x10,%[width] \n"
+ "jg 1b \n"
"vzeroupper \n"
: [y_buf]"+r"(y_buf), // %[y_buf]
[u_buf]"+r"(u_buf), // %[u_buf]
@@ -2387,7 +2699,7 @@ void OMITFP I422ToRGBARow_AVX2(const uint8* y_buf,
[dst_argb]"+r"(dst_argb), // %[dst_argb]
[width]"+rm"(width) // %[width]
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
- : "memory", "cc", NACL_R14 YUVTORGB_REGS_AVX2
+ : "memory", "cc", YUVTORGB_REGS_AVX2
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
);
}
@@ -2396,9 +2708,9 @@ void OMITFP I422ToRGBARow_AVX2(const uint8* y_buf,
#if defined(HAS_NV12TOARGBROW_AVX2)
// 16 pixels.
// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
-void OMITFP NV12ToARGBRow_AVX2(const uint8* y_buf,
- const uint8* uv_buf,
- uint8* dst_argb,
+void OMITFP NV12ToARGBRow_AVX2(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
// clang-format off
@@ -2419,7 +2731,7 @@ void OMITFP NV12ToARGBRow_AVX2(const uint8* y_buf,
[dst_argb]"+r"(dst_argb), // %[dst_argb]
[width]"+rm"(width) // %[width]
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
- : "memory", "cc", YUVTORGB_REGS_AVX2 // Does not use r14.
+ : "memory", "cc", YUVTORGB_REGS_AVX2
"xmm0", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
);
// clang-format on
@@ -2429,9 +2741,9 @@ void OMITFP NV12ToARGBRow_AVX2(const uint8* y_buf,
#if defined(HAS_NV21TOARGBROW_AVX2)
// 16 pixels.
// 8 VU values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
-void OMITFP NV21ToARGBRow_AVX2(const uint8* y_buf,
- const uint8* vu_buf,
- uint8* dst_argb,
+void OMITFP NV21ToARGBRow_AVX2(const uint8_t* y_buf,
+ const uint8_t* vu_buf,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
// clang-format off
@@ -2453,7 +2765,7 @@ void OMITFP NV21ToARGBRow_AVX2(const uint8* y_buf,
[width]"+rm"(width) // %[width]
: [yuvconstants]"r"(yuvconstants), // %[yuvconstants]
[kShuffleNV21]"m"(kShuffleNV21)
- : "memory", "cc", YUVTORGB_REGS_AVX2 // Does not use r14.
+ : "memory", "cc", YUVTORGB_REGS_AVX2
"xmm0", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
);
// clang-format on
@@ -2463,8 +2775,8 @@ void OMITFP NV21ToARGBRow_AVX2(const uint8* y_buf,
#if defined(HAS_YUY2TOARGBROW_AVX2)
// 16 pixels.
// 8 YUY2 values with 16 Y and 8 UV producing 16 ARGB (64 bytes).
-void OMITFP YUY2ToARGBRow_AVX2(const uint8* yuy2_buf,
- uint8* dst_argb,
+void OMITFP YUY2ToARGBRow_AVX2(const uint8_t* yuy2_buf,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
// clang-format off
@@ -2486,7 +2798,7 @@ void OMITFP YUY2ToARGBRow_AVX2(const uint8* yuy2_buf,
: [yuvconstants]"r"(yuvconstants), // %[yuvconstants]
[kShuffleYUY2Y]"m"(kShuffleYUY2Y),
[kShuffleYUY2UV]"m"(kShuffleYUY2UV)
- : "memory", "cc", YUVTORGB_REGS_AVX2 // Does not use r14.
+ : "memory", "cc", YUVTORGB_REGS_AVX2
"xmm0", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
);
// clang-format on
@@ -2496,8 +2808,8 @@ void OMITFP YUY2ToARGBRow_AVX2(const uint8* yuy2_buf,
#if defined(HAS_UYVYTOARGBROW_AVX2)
// 16 pixels.
// 8 UYVY values with 16 Y and 8 UV producing 16 ARGB (64 bytes).
-void OMITFP UYVYToARGBRow_AVX2(const uint8* uyvy_buf,
- uint8* dst_argb,
+void OMITFP UYVYToARGBRow_AVX2(const uint8_t* uyvy_buf,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
// clang-format off
@@ -2519,7 +2831,7 @@ void OMITFP UYVYToARGBRow_AVX2(const uint8* uyvy_buf,
: [yuvconstants]"r"(yuvconstants), // %[yuvconstants]
[kShuffleUYVYY]"m"(kShuffleUYVYY),
[kShuffleUYVYUV]"m"(kShuffleUYVYUV)
- : "memory", "cc", YUVTORGB_REGS_AVX2 // Does not use r14.
+ : "memory", "cc", YUVTORGB_REGS_AVX2
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
);
// clang-format on
@@ -2527,95 +2839,93 @@ void OMITFP UYVYToARGBRow_AVX2(const uint8* uyvy_buf,
#endif // HAS_UYVYTOARGBROW_AVX2
#ifdef HAS_I400TOARGBROW_SSE2
-void I400ToARGBRow_SSE2(const uint8* y_buf, uint8* dst_argb, int width) {
- asm volatile (
- "mov $0x4a354a35,%%eax \n" // 4a35 = 18997 = 1.164
- "movd %%eax,%%xmm2 \n"
- "pshufd $0x0,%%xmm2,%%xmm2 \n"
- "mov $0x04880488,%%eax \n" // 0488 = 1160 = 1.164 * 16
- "movd %%eax,%%xmm3 \n"
- "pshufd $0x0,%%xmm3,%%xmm3 \n"
- "pcmpeqb %%xmm4,%%xmm4 \n"
- "pslld $0x18,%%xmm4 \n"
+void I400ToARGBRow_SSE2(const uint8_t* y_buf, uint8_t* dst_argb, int width) {
+ asm volatile(
+ "mov $0x4a354a35,%%eax \n" // 4a35 = 18997 = 1.164
+ "movd %%eax,%%xmm2 \n"
+ "pshufd $0x0,%%xmm2,%%xmm2 \n"
+ "mov $0x04880488,%%eax \n" // 0488 = 1160 = 1.164 *
+ // 16
+ "movd %%eax,%%xmm3 \n"
+ "pshufd $0x0,%%xmm3,%%xmm3 \n"
+ "pcmpeqb %%xmm4,%%xmm4 \n"
+ "pslld $0x18,%%xmm4 \n"
- LABELALIGN
- "1: \n"
- // Step 1: Scale Y contribution to 8 G values. G = (y - 16) * 1.164
- "movq " MEMACCESS(0) ",%%xmm0 \n"
- "lea " MEMLEA(0x8,0) ",%0 \n"
- "punpcklbw %%xmm0,%%xmm0 \n"
- "pmulhuw %%xmm2,%%xmm0 \n"
- "psubusw %%xmm3,%%xmm0 \n"
- "psrlw $6, %%xmm0 \n"
- "packuswb %%xmm0,%%xmm0 \n"
-
- // Step 2: Weave into ARGB
- "punpcklbw %%xmm0,%%xmm0 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "punpcklwd %%xmm0,%%xmm0 \n"
- "punpckhwd %%xmm1,%%xmm1 \n"
- "por %%xmm4,%%xmm0 \n"
- "por %%xmm4,%%xmm1 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "movdqu %%xmm1," MEMACCESS2(0x10,1) " \n"
- "lea " MEMLEA(0x20,1) ",%1 \n"
-
- "sub $0x8,%2 \n"
- "jg 1b \n"
- : "+r"(y_buf), // %0
- "+r"(dst_argb), // %1
- "+rm"(width) // %2
- :
- : "memory", "cc", "eax"
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4"
- );
+ LABELALIGN
+ "1: \n"
+ // Step 1: Scale Y contribution to 8 G values. G = (y - 16) * 1.164
+ "movq (%0),%%xmm0 \n"
+ "lea 0x8(%0),%0 \n"
+ "punpcklbw %%xmm0,%%xmm0 \n"
+ "pmulhuw %%xmm2,%%xmm0 \n"
+ "psubusw %%xmm3,%%xmm0 \n"
+ "psrlw $6, %%xmm0 \n"
+ "packuswb %%xmm0,%%xmm0 \n"
+
+ // Step 2: Weave into ARGB
+ "punpcklbw %%xmm0,%%xmm0 \n"
+ "movdqa %%xmm0,%%xmm1 \n"
+ "punpcklwd %%xmm0,%%xmm0 \n"
+ "punpckhwd %%xmm1,%%xmm1 \n"
+ "por %%xmm4,%%xmm0 \n"
+ "por %%xmm4,%%xmm1 \n"
+ "movdqu %%xmm0,(%1) \n"
+ "movdqu %%xmm1,0x10(%1) \n"
+ "lea 0x20(%1),%1 \n"
+
+ "sub $0x8,%2 \n"
+ "jg 1b \n"
+ : "+r"(y_buf), // %0
+ "+r"(dst_argb), // %1
+ "+rm"(width) // %2
+ :
+ : "memory", "cc", "eax", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4");
}
#endif // HAS_I400TOARGBROW_SSE2
#ifdef HAS_I400TOARGBROW_AVX2
// 16 pixels of Y converted to 16 pixels of ARGB (64 bytes).
// note: vpunpcklbw mutates and vpackuswb unmutates.
-void I400ToARGBRow_AVX2(const uint8* y_buf, uint8* dst_argb, int width) {
- asm volatile (
- "mov $0x4a354a35,%%eax \n" // 0488 = 1160 = 1.164 * 16
- "vmovd %%eax,%%xmm2 \n"
- "vbroadcastss %%xmm2,%%ymm2 \n"
- "mov $0x4880488,%%eax \n" // 4a35 = 18997 = 1.164
- "vmovd %%eax,%%xmm3 \n"
- "vbroadcastss %%xmm3,%%ymm3 \n"
- "vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n"
- "vpslld $0x18,%%ymm4,%%ymm4 \n"
+void I400ToARGBRow_AVX2(const uint8_t* y_buf, uint8_t* dst_argb, int width) {
+ asm volatile(
+ "mov $0x4a354a35,%%eax \n" // 0488 = 1160 = 1.164 *
+ // 16
+ "vmovd %%eax,%%xmm2 \n"
+ "vbroadcastss %%xmm2,%%ymm2 \n"
+ "mov $0x4880488,%%eax \n" // 4a35 = 18997 = 1.164
+ "vmovd %%eax,%%xmm3 \n"
+ "vbroadcastss %%xmm3,%%ymm3 \n"
+ "vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n"
+ "vpslld $0x18,%%ymm4,%%ymm4 \n"
- LABELALIGN
- "1: \n"
- // Step 1: Scale Y contribution to 16 G values. G = (y - 16) * 1.164
- "vmovdqu " MEMACCESS(0) ",%%xmm0 \n"
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "vpermq $0xd8,%%ymm0,%%ymm0 \n"
- "vpunpcklbw %%ymm0,%%ymm0,%%ymm0 \n"
- "vpmulhuw %%ymm2,%%ymm0,%%ymm0 \n"
- "vpsubusw %%ymm3,%%ymm0,%%ymm0 \n"
- "vpsrlw $0x6,%%ymm0,%%ymm0 \n"
- "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n"
- "vpunpcklbw %%ymm0,%%ymm0,%%ymm1 \n"
- "vpermq $0xd8,%%ymm1,%%ymm1 \n"
- "vpunpcklwd %%ymm1,%%ymm1,%%ymm0 \n"
- "vpunpckhwd %%ymm1,%%ymm1,%%ymm1 \n"
- "vpor %%ymm4,%%ymm0,%%ymm0 \n"
- "vpor %%ymm4,%%ymm1,%%ymm1 \n"
- "vmovdqu %%ymm0," MEMACCESS(1) " \n"
- "vmovdqu %%ymm1," MEMACCESS2(0x20,1) " \n"
- "lea " MEMLEA(0x40,1) ",%1 \n"
- "sub $0x10,%2 \n"
- "jg 1b \n"
- "vzeroupper \n"
- : "+r"(y_buf), // %0
- "+r"(dst_argb), // %1
- "+rm"(width) // %2
- :
- : "memory", "cc", "eax"
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4"
- );
+ LABELALIGN
+ "1: \n"
+ // Step 1: Scale Y contribution to 16 G values. G = (y - 16) * 1.164
+ "vmovdqu (%0),%%xmm0 \n"
+ "lea 0x10(%0),%0 \n"
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n"
+ "vpunpcklbw %%ymm0,%%ymm0,%%ymm0 \n"
+ "vpmulhuw %%ymm2,%%ymm0,%%ymm0 \n"
+ "vpsubusw %%ymm3,%%ymm0,%%ymm0 \n"
+ "vpsrlw $0x6,%%ymm0,%%ymm0 \n"
+ "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n"
+ "vpunpcklbw %%ymm0,%%ymm0,%%ymm1 \n"
+ "vpermq $0xd8,%%ymm1,%%ymm1 \n"
+ "vpunpcklwd %%ymm1,%%ymm1,%%ymm0 \n"
+ "vpunpckhwd %%ymm1,%%ymm1,%%ymm1 \n"
+ "vpor %%ymm4,%%ymm0,%%ymm0 \n"
+ "vpor %%ymm4,%%ymm1,%%ymm1 \n"
+ "vmovdqu %%ymm0,(%1) \n"
+ "vmovdqu %%ymm1,0x20(%1) \n"
+ "lea 0x40(%1),%1 \n"
+ "sub $0x10,%2 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(y_buf), // %0
+ "+r"(dst_argb), // %1
+ "+rm"(width) // %2
+ :
+ : "memory", "cc", "eax", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4");
}
#endif // HAS_I400TOARGBROW_AVX2
@@ -2624,52 +2934,50 @@ void I400ToARGBRow_AVX2(const uint8* y_buf, uint8* dst_argb, int width) {
static const uvec8 kShuffleMirror = {15u, 14u, 13u, 12u, 11u, 10u, 9u, 8u,
7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u};
-void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width) {
+void MirrorRow_SSSE3(const uint8_t* src, uint8_t* dst, int width) {
intptr_t temp_width = (intptr_t)(width);
- asm volatile (
- "movdqa %3,%%xmm5 \n"
+ asm volatile(
- LABELALIGN
- "1: \n"
- MEMOPREG(movdqu,-0x10,0,2,1,xmm0) // movdqu -0x10(%0,%2),%%xmm0
- "pshufb %%xmm5,%%xmm0 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x10,%2 \n"
- "jg 1b \n"
- : "+r"(src), // %0
- "+r"(dst), // %1
- "+r"(temp_width) // %2
- : "m"(kShuffleMirror) // %3
- : "memory", "cc", NACL_R14
- "xmm0", "xmm5"
- );
+ "movdqa %3,%%xmm5 \n"
+
+ LABELALIGN
+ "1: \n"
+ "movdqu -0x10(%0,%2,1),%%xmm0 \n"
+ "pshufb %%xmm5,%%xmm0 \n"
+ "movdqu %%xmm0,(%1) \n"
+ "lea 0x10(%1),%1 \n"
+ "sub $0x10,%2 \n"
+ "jg 1b \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(temp_width) // %2
+ : "m"(kShuffleMirror) // %3
+ : "memory", "cc", "xmm0", "xmm5");
}
#endif // HAS_MIRRORROW_SSSE3
#ifdef HAS_MIRRORROW_AVX2
-void MirrorRow_AVX2(const uint8* src, uint8* dst, int width) {
+void MirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width) {
intptr_t temp_width = (intptr_t)(width);
- asm volatile (
- "vbroadcastf128 %3,%%ymm5 \n"
+ asm volatile(
- LABELALIGN
- "1: \n"
- MEMOPREG(vmovdqu,-0x20,0,2,1,ymm0) // vmovdqu -0x20(%0,%2),%%ymm0
- "vpshufb %%ymm5,%%ymm0,%%ymm0 \n"
- "vpermq $0x4e,%%ymm0,%%ymm0 \n"
- "vmovdqu %%ymm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x20,1) ",%1 \n"
- "sub $0x20,%2 \n"
- "jg 1b \n"
- "vzeroupper \n"
- : "+r"(src), // %0
- "+r"(dst), // %1
- "+r"(temp_width) // %2
- : "m"(kShuffleMirror) // %3
- : "memory", "cc", NACL_R14
- "xmm0", "xmm5"
- );
+ "vbroadcastf128 %3,%%ymm5 \n"
+
+ LABELALIGN
+ "1: \n"
+ "vmovdqu -0x20(%0,%2,1),%%ymm0 \n"
+ "vpshufb %%ymm5,%%ymm0,%%ymm0 \n"
+ "vpermq $0x4e,%%ymm0,%%ymm0 \n"
+ "vmovdqu %%ymm0,(%1) \n"
+ "lea 0x20(%1),%1 \n"
+ "sub $0x20,%2 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(temp_width) // %2
+ : "m"(kShuffleMirror) // %3
+ : "memory", "cc", "xmm0", "xmm5");
}
#endif // HAS_MIRRORROW_AVX2
@@ -2677,231 +2985,221 @@ void MirrorRow_AVX2(const uint8* src, uint8* dst, int width) {
// Shuffle table for reversing the bytes of UV channels.
static const uvec8 kShuffleMirrorUV = {14u, 12u, 10u, 8u, 6u, 4u, 2u, 0u,
15u, 13u, 11u, 9u, 7u, 5u, 3u, 1u};
-void MirrorUVRow_SSSE3(const uint8* src,
- uint8* dst_u,
- uint8* dst_v,
+void MirrorUVRow_SSSE3(const uint8_t* src,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
intptr_t temp_width = (intptr_t)(width);
- asm volatile (
- "movdqa %4,%%xmm1 \n"
- "lea " MEMLEA4(-0x10,0,3,2) ",%0 \n"
- "sub %1,%2 \n"
+ asm volatile(
+ "movdqa %4,%%xmm1 \n"
+ "lea -0x10(%0,%3,2),%0 \n"
+ "sub %1,%2 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "lea " MEMLEA(-0x10,0) ",%0 \n"
- "pshufb %%xmm1,%%xmm0 \n"
- "movlpd %%xmm0," MEMACCESS(1) " \n"
- MEMOPMEM(movhpd,xmm0,0x00,1,2,1) // movhpd %%xmm0,(%1,%2)
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "sub $8,%3 \n"
- "jg 1b \n"
- : "+r"(src), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+r"(temp_width) // %3
- : "m"(kShuffleMirrorUV) // %4
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1"
- );
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "lea -0x10(%0),%0 \n"
+ "pshufb %%xmm1,%%xmm0 \n"
+ "movlpd %%xmm0,(%1) \n"
+ "movhpd %%xmm0,0x00(%1,%2,1) \n"
+ "lea 0x8(%1),%1 \n"
+ "sub $8,%3 \n"
+ "jg 1b \n"
+ : "+r"(src), // %0
+ "+r"(dst_u), // %1
+ "+r"(dst_v), // %2
+ "+r"(temp_width) // %3
+ : "m"(kShuffleMirrorUV) // %4
+ : "memory", "cc", "xmm0", "xmm1");
}
#endif // HAS_MIRRORUVROW_SSSE3
#ifdef HAS_ARGBMIRRORROW_SSE2
-void ARGBMirrorRow_SSE2(const uint8* src, uint8* dst, int width) {
+void ARGBMirrorRow_SSE2(const uint8_t* src, uint8_t* dst, int width) {
intptr_t temp_width = (intptr_t)(width);
- asm volatile (
- "lea " MEMLEA4(-0x10,0,2,4) ",%0 \n"
+ asm volatile(
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "pshufd $0x1b,%%xmm0,%%xmm0 \n"
- "lea " MEMLEA(-0x10,0) ",%0 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x4,%2 \n"
- "jg 1b \n"
- : "+r"(src), // %0
- "+r"(dst), // %1
- "+r"(temp_width) // %2
- :
- : "memory", "cc"
- , "xmm0"
- );
+ "lea -0x10(%0,%2,4),%0 \n"
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "pshufd $0x1b,%%xmm0,%%xmm0 \n"
+ "lea -0x10(%0),%0 \n"
+ "movdqu %%xmm0,(%1) \n"
+ "lea 0x10(%1),%1 \n"
+ "sub $0x4,%2 \n"
+ "jg 1b \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(temp_width) // %2
+ :
+ : "memory", "cc", "xmm0");
}
#endif // HAS_ARGBMIRRORROW_SSE2
#ifdef HAS_ARGBMIRRORROW_AVX2
// Shuffle table for reversing the bytes.
static const ulvec32 kARGBShuffleMirror_AVX2 = {7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u};
-void ARGBMirrorRow_AVX2(const uint8* src, uint8* dst, int width) {
+void ARGBMirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width) {
intptr_t temp_width = (intptr_t)(width);
- asm volatile (
- "vmovdqu %3,%%ymm5 \n"
+ asm volatile(
- LABELALIGN
- "1: \n"
- VMEMOPREG(vpermd,-0x20,0,2,4,ymm5,ymm0) // vpermd -0x20(%0,%2,4),ymm5,ymm0
- "vmovdqu %%ymm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x20,1) ",%1 \n"
- "sub $0x8,%2 \n"
- "jg 1b \n"
- "vzeroupper \n"
- : "+r"(src), // %0
- "+r"(dst), // %1
- "+r"(temp_width) // %2
- : "m"(kARGBShuffleMirror_AVX2) // %3
- : "memory", "cc", NACL_R14
- "xmm0", "xmm5"
- );
+ "vmovdqu %3,%%ymm5 \n"
+
+ LABELALIGN
+ "1: \n"
+ "vpermd -0x20(%0,%2,4),%%ymm5,%%ymm0 \n"
+ "vmovdqu %%ymm0,(%1) \n"
+ "lea 0x20(%1),%1 \n"
+ "sub $0x8,%2 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(temp_width) // %2
+ : "m"(kARGBShuffleMirror_AVX2) // %3
+ : "memory", "cc", "xmm0", "xmm5");
}
#endif // HAS_ARGBMIRRORROW_AVX2
#ifdef HAS_SPLITUVROW_AVX2
-void SplitUVRow_AVX2(const uint8* src_uv,
- uint8* dst_u,
- uint8* dst_v,
+void SplitUVRow_AVX2(const uint8_t* src_uv,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
- asm volatile (
- "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
- "vpsrlw $0x8,%%ymm5,%%ymm5 \n"
- "sub %1,%2 \n"
+ asm volatile(
+ "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
+ "vpsrlw $0x8,%%ymm5,%%ymm5 \n"
+ "sub %1,%2 \n"
- LABELALIGN
- "1: \n"
- "vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
- "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n"
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "vpsrlw $0x8,%%ymm0,%%ymm2 \n"
- "vpsrlw $0x8,%%ymm1,%%ymm3 \n"
- "vpand %%ymm5,%%ymm0,%%ymm0 \n"
- "vpand %%ymm5,%%ymm1,%%ymm1 \n"
- "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
- "vpackuswb %%ymm3,%%ymm2,%%ymm2 \n"
- "vpermq $0xd8,%%ymm0,%%ymm0 \n"
- "vpermq $0xd8,%%ymm2,%%ymm2 \n"
- "vmovdqu %%ymm0," MEMACCESS(1) " \n"
- MEMOPMEM(vmovdqu,ymm2,0x00,1,2,1) // vmovdqu %%ymm2,(%1,%2)
- "lea " MEMLEA(0x20,1) ",%1 \n"
- "sub $0x20,%3 \n"
- "jg 1b \n"
- "vzeroupper \n"
- : "+r"(src_uv), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+r"(width) // %3
- :
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
- );
+ LABELALIGN
+ "1: \n"
+ "vmovdqu (%0),%%ymm0 \n"
+ "vmovdqu 0x20(%0),%%ymm1 \n"
+ "lea 0x40(%0),%0 \n"
+ "vpsrlw $0x8,%%ymm0,%%ymm2 \n"
+ "vpsrlw $0x8,%%ymm1,%%ymm3 \n"
+ "vpand %%ymm5,%%ymm0,%%ymm0 \n"
+ "vpand %%ymm5,%%ymm1,%%ymm1 \n"
+ "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
+ "vpackuswb %%ymm3,%%ymm2,%%ymm2 \n"
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n"
+ "vpermq $0xd8,%%ymm2,%%ymm2 \n"
+ "vmovdqu %%ymm0,(%1) \n"
+ "vmovdqu %%ymm2,0x00(%1,%2,1) \n"
+ "lea 0x20(%1),%1 \n"
+ "sub $0x20,%3 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_uv), // %0
+ "+r"(dst_u), // %1
+ "+r"(dst_v), // %2
+ "+r"(width) // %3
+ :
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5");
}
#endif // HAS_SPLITUVROW_AVX2
#ifdef HAS_SPLITUVROW_SSE2
-void SplitUVRow_SSE2(const uint8* src_uv,
- uint8* dst_u,
- uint8* dst_v,
+void SplitUVRow_SSE2(const uint8_t* src_uv,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
- asm volatile (
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "psrlw $0x8,%%xmm5 \n"
- "sub %1,%2 \n"
+ asm volatile(
+ "pcmpeqb %%xmm5,%%xmm5 \n"
+ "psrlw $0x8,%%xmm5 \n"
+ "sub %1,%2 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "movdqa %%xmm0,%%xmm2 \n"
- "movdqa %%xmm1,%%xmm3 \n"
- "pand %%xmm5,%%xmm0 \n"
- "pand %%xmm5,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "psrlw $0x8,%%xmm2 \n"
- "psrlw $0x8,%%xmm3 \n"
- "packuswb %%xmm3,%%xmm2 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- MEMOPMEM(movdqu,xmm2,0x00,1,2,1) // movdqu %%xmm2,(%1,%2)
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x10,%3 \n"
- "jg 1b \n"
- : "+r"(src_uv), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+r"(width) // %3
- :
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
- );
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "lea 0x20(%0),%0 \n"
+ "movdqa %%xmm0,%%xmm2 \n"
+ "movdqa %%xmm1,%%xmm3 \n"
+ "pand %%xmm5,%%xmm0 \n"
+ "pand %%xmm5,%%xmm1 \n"
+ "packuswb %%xmm1,%%xmm0 \n"
+ "psrlw $0x8,%%xmm2 \n"
+ "psrlw $0x8,%%xmm3 \n"
+ "packuswb %%xmm3,%%xmm2 \n"
+ "movdqu %%xmm0,(%1) \n"
+ "movdqu %%xmm2,0x00(%1,%2,1) \n"
+ "lea 0x10(%1),%1 \n"
+ "sub $0x10,%3 \n"
+ "jg 1b \n"
+ : "+r"(src_uv), // %0
+ "+r"(dst_u), // %1
+ "+r"(dst_v), // %2
+ "+r"(width) // %3
+ :
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5");
}
#endif // HAS_SPLITUVROW_SSE2
#ifdef HAS_MERGEUVROW_AVX2
-void MergeUVRow_AVX2(const uint8* src_u,
- const uint8* src_v,
- uint8* dst_uv,
+void MergeUVRow_AVX2(const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_uv,
int width) {
- asm volatile (
- "sub %0,%1 \n"
+ asm volatile(
- LABELALIGN
- "1: \n"
- "vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
- MEMOPREG(vmovdqu,0x00,0,1,1,ymm1) // vmovdqu (%0,%1,1),%%ymm1
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "vpunpcklbw %%ymm1,%%ymm0,%%ymm2 \n"
- "vpunpckhbw %%ymm1,%%ymm0,%%ymm0 \n"
- "vextractf128 $0x0,%%ymm2," MEMACCESS(2) " \n"
- "vextractf128 $0x0,%%ymm0," MEMACCESS2(0x10,2) "\n"
- "vextractf128 $0x1,%%ymm2," MEMACCESS2(0x20,2) "\n"
- "vextractf128 $0x1,%%ymm0," MEMACCESS2(0x30,2) "\n"
- "lea " MEMLEA(0x40,2) ",%2 \n"
- "sub $0x20,%3 \n"
- "jg 1b \n"
- "vzeroupper \n"
- : "+r"(src_u), // %0
- "+r"(src_v), // %1
- "+r"(dst_uv), // %2
- "+r"(width) // %3
- :
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2"
- );
+ "sub %0,%1 \n"
+
+ LABELALIGN
+ "1: \n"
+ "vmovdqu (%0),%%ymm0 \n"
+ "vmovdqu 0x00(%0,%1,1),%%ymm1 \n"
+ "lea 0x20(%0),%0 \n"
+ "vpunpcklbw %%ymm1,%%ymm0,%%ymm2 \n"
+ "vpunpckhbw %%ymm1,%%ymm0,%%ymm0 \n"
+ "vextractf128 $0x0,%%ymm2,(%2) \n"
+ "vextractf128 $0x0,%%ymm0,0x10(%2) \n"
+ "vextractf128 $0x1,%%ymm2,0x20(%2) \n"
+ "vextractf128 $0x1,%%ymm0,0x30(%2) \n"
+ "lea 0x40(%2),%2 \n"
+ "sub $0x20,%3 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_u), // %0
+ "+r"(src_v), // %1
+ "+r"(dst_uv), // %2
+ "+r"(width) // %3
+ :
+ : "memory", "cc", "xmm0", "xmm1", "xmm2");
}
#endif // HAS_MERGEUVROW_AVX2
#ifdef HAS_MERGEUVROW_SSE2
-void MergeUVRow_SSE2(const uint8* src_u,
- const uint8* src_v,
- uint8* dst_uv,
+void MergeUVRow_SSE2(const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_uv,
int width) {
- asm volatile (
- "sub %0,%1 \n"
+ asm volatile(
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- MEMOPREG(movdqu,0x00,0,1,1,xmm1) // movdqu (%0,%1,1),%%xmm1
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "movdqa %%xmm0,%%xmm2 \n"
- "punpcklbw %%xmm1,%%xmm0 \n"
- "punpckhbw %%xmm1,%%xmm2 \n"
- "movdqu %%xmm0," MEMACCESS(2) " \n"
- "movdqu %%xmm2," MEMACCESS2(0x10,2) " \n"
- "lea " MEMLEA(0x20,2) ",%2 \n"
- "sub $0x10,%3 \n"
- "jg 1b \n"
- : "+r"(src_u), // %0
- "+r"(src_v), // %1
- "+r"(dst_uv), // %2
- "+r"(width) // %3
- :
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2"
- );
+ "sub %0,%1 \n"
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x00(%0,%1,1),%%xmm1 \n"
+ "lea 0x10(%0),%0 \n"
+ "movdqa %%xmm0,%%xmm2 \n"
+ "punpcklbw %%xmm1,%%xmm0 \n"
+ "punpckhbw %%xmm1,%%xmm2 \n"
+ "movdqu %%xmm0,(%2) \n"
+ "movdqu %%xmm2,0x10(%2) \n"
+ "lea 0x20(%2),%2 \n"
+ "sub $0x10,%3 \n"
+ "jg 1b \n"
+ : "+r"(src_u), // %0
+ "+r"(src_v), // %1
+ "+r"(dst_uv), // %2
+ "+r"(width) // %3
+ :
+ : "memory", "cc", "xmm0", "xmm1", "xmm2");
}
#endif // HAS_MERGEUVROW_SSE2
@@ -2911,9 +3209,9 @@ void MergeUVRow_SSE2(const uint8* src_u,
// 16 = 12 bits
// 1 = 16 bits
#ifdef HAS_MERGEUVROW_16_AVX2
-void MergeUVRow_16_AVX2(const uint16* src_u,
- const uint16* src_v,
- uint16* dst_uv,
+void MergeUVRow_16_AVX2(const uint16_t* src_u,
+ const uint16_t* src_v,
+ uint16_t* dst_uv,
int scale,
int width) {
// clang-format off
@@ -2958,8 +3256,8 @@ void MergeUVRow_16_AVX2(const uint16* src_u,
// 16 = 12 bits
// 1 = 16 bits
#ifdef HAS_MULTIPLYROW_16_AVX2
-void MultiplyRow_16_AVX2(const uint16* src_y,
- uint16* dst_y,
+void MultiplyRow_16_AVX2(const uint16_t* src_y,
+ uint16_t* dst_y,
int scale,
int width) {
// clang-format off
@@ -2996,8 +3294,8 @@ void MultiplyRow_16_AVX2(const uint16* src_y,
// 16384 = 10 bits
// 4096 = 12 bits
// 256 = 16 bits
-void Convert16To8Row_SSSE3(const uint16* src_y,
- uint8* dst_y,
+void Convert16To8Row_SSSE3(const uint16_t* src_y,
+ uint8_t* dst_y,
int scale,
int width) {
// clang-format off
@@ -3028,8 +3326,8 @@ void Convert16To8Row_SSSE3(const uint16* src_y,
}
#ifdef HAS_CONVERT16TO8ROW_AVX2
-void Convert16To8Row_AVX2(const uint16* src_y,
- uint8* dst_y,
+void Convert16To8Row_AVX2(const uint16_t* src_y,
+ uint8_t* dst_y,
int scale,
int width) {
// clang-format off
@@ -3067,8 +3365,8 @@ void Convert16To8Row_AVX2(const uint16* src_y,
// 1024 = 10 bits
// 4096 = 12 bits
// TODO(fbarchard): reduce to SSE2
-void Convert8To16Row_SSE2(const uint8* src_y,
- uint16* dst_y,
+void Convert8To16Row_SSE2(const uint8_t* src_y,
+ uint16_t* dst_y,
int scale,
int width) {
// clang-format off
@@ -3101,8 +3399,8 @@ void Convert8To16Row_SSE2(const uint8* src_y,
}
#ifdef HAS_CONVERT8TO16ROW_AVX2
-void Convert8To16Row_AVX2(const uint8* src_y,
- uint16* dst_y,
+void Convert8To16Row_AVX2(const uint8_t* src_y,
+ uint16_t* dst_y,
int scale,
int width) {
// clang-format off
@@ -3169,66 +3467,65 @@ static const uvec8 kShuffleMaskRGBToB2 = {128u, 128u, 128u, 128u, 128u, 128u,
128u, 128u, 128u, 128u, 0u, 3u,
6u, 9u, 12u, 15u};
-void SplitRGBRow_SSSE3(const uint8* src_rgb,
- uint8* dst_r,
- uint8* dst_g,
- uint8* dst_b,
+void SplitRGBRow_SSSE3(const uint8_t* src_rgb,
+ uint8_t* dst_r,
+ uint8_t* dst_g,
+ uint8_t* dst_b,
int width) {
- asm volatile (
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "pshufb %5, %%xmm0 \n"
- "pshufb %6, %%xmm1 \n"
- "pshufb %7, %%xmm2 \n"
- "por %%xmm1,%%xmm0 \n"
- "por %%xmm2,%%xmm0 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
-
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "pshufb %8, %%xmm0 \n"
- "pshufb %9, %%xmm1 \n"
- "pshufb %10, %%xmm2 \n"
- "por %%xmm1,%%xmm0 \n"
- "por %%xmm2,%%xmm0 \n"
- "movdqu %%xmm0," MEMACCESS(2) " \n"
- "lea " MEMLEA(0x10,2) ",%2 \n"
-
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "pshufb %11, %%xmm0 \n"
- "pshufb %12, %%xmm1 \n"
- "pshufb %13, %%xmm2 \n"
- "por %%xmm1,%%xmm0 \n"
- "por %%xmm2,%%xmm0 \n"
- "movdqu %%xmm0," MEMACCESS(3) " \n"
- "lea " MEMLEA(0x10,3) ",%3 \n"
- "lea " MEMLEA(0x30,0) ",%0 \n"
- "sub $0x10,%4 \n"
- "jg 1b \n"
- : "+r"(src_rgb), // %0
- "+r"(dst_r), // %1
- "+r"(dst_g), // %2
- "+r"(dst_b), // %3
- "+r"(width) // %4
- : "m"(kShuffleMaskRGBToR0), // %5
- "m"(kShuffleMaskRGBToR1), // %6
- "m"(kShuffleMaskRGBToR2), // %7
- "m"(kShuffleMaskRGBToG0), // %8
- "m"(kShuffleMaskRGBToG1), // %9
- "m"(kShuffleMaskRGBToG2), // %10
- "m"(kShuffleMaskRGBToB0), // %11
- "m"(kShuffleMaskRGBToB1), // %12
- "m"(kShuffleMaskRGBToB2) // %13
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2"
- );
+ asm volatile(
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "movdqu 0x20(%0),%%xmm2 \n"
+ "pshufb %5, %%xmm0 \n"
+ "pshufb %6, %%xmm1 \n"
+ "pshufb %7, %%xmm2 \n"
+ "por %%xmm1,%%xmm0 \n"
+ "por %%xmm2,%%xmm0 \n"
+ "movdqu %%xmm0,(%1) \n"
+ "lea 0x10(%1),%1 \n"
+
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "movdqu 0x20(%0),%%xmm2 \n"
+ "pshufb %8, %%xmm0 \n"
+ "pshufb %9, %%xmm1 \n"
+ "pshufb %10, %%xmm2 \n"
+ "por %%xmm1,%%xmm0 \n"
+ "por %%xmm2,%%xmm0 \n"
+ "movdqu %%xmm0,(%2) \n"
+ "lea 0x10(%2),%2 \n"
+
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "movdqu 0x20(%0),%%xmm2 \n"
+ "pshufb %11, %%xmm0 \n"
+ "pshufb %12, %%xmm1 \n"
+ "pshufb %13, %%xmm2 \n"
+ "por %%xmm1,%%xmm0 \n"
+ "por %%xmm2,%%xmm0 \n"
+ "movdqu %%xmm0,(%3) \n"
+ "lea 0x10(%3),%3 \n"
+ "lea 0x30(%0),%0 \n"
+ "sub $0x10,%4 \n"
+ "jg 1b \n"
+ : "+r"(src_rgb), // %0
+ "+r"(dst_r), // %1
+ "+r"(dst_g), // %2
+ "+r"(dst_b), // %3
+ "+r"(width) // %4
+ : "m"(kShuffleMaskRGBToR0), // %5
+ "m"(kShuffleMaskRGBToR1), // %6
+ "m"(kShuffleMaskRGBToR2), // %7
+ "m"(kShuffleMaskRGBToG0), // %8
+ "m"(kShuffleMaskRGBToG1), // %9
+ "m"(kShuffleMaskRGBToG2), // %10
+ "m"(kShuffleMaskRGBToB0), // %11
+ "m"(kShuffleMaskRGBToB1), // %12
+ "m"(kShuffleMaskRGBToB2) // %13
+ : "memory", "cc", "xmm0", "xmm1", "xmm2");
}
#endif // HAS_SPLITRGBROW_SSSE3
@@ -3265,238 +3562,234 @@ static const uvec8 kShuffleMaskGToRGB2 = {128u, 128u, 11u, 128u, 128u, 12u,
128u, 128u, 13u, 128u, 128u, 14u,
128u, 128u, 15u, 128u};
-void MergeRGBRow_SSSE3(const uint8* src_r,
- const uint8* src_g,
- const uint8* src_b,
- uint8* dst_rgb,
+void MergeRGBRow_SSSE3(const uint8_t* src_r,
+ const uint8_t* src_g,
+ const uint8_t* src_b,
+ uint8_t* dst_rgb,
int width) {
- asm volatile (
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS(1) ",%%xmm1 \n"
- "movdqu " MEMACCESS(2) ",%%xmm2 \n"
- "pshufb %5, %%xmm0 \n"
- "pshufb %6, %%xmm1 \n"
- "pshufb %7, %%xmm2 \n"
- "por %%xmm1,%%xmm0 \n"
- "por %%xmm2,%%xmm0 \n"
- "movdqu %%xmm0," MEMACCESS(3) " \n"
-
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS(1) ",%%xmm1 \n"
- "movdqu " MEMACCESS(2) ",%%xmm2 \n"
- "pshufb %8, %%xmm0 \n"
- "pshufb %9, %%xmm1 \n"
- "pshufb %10, %%xmm2 \n"
- "por %%xmm1,%%xmm0 \n"
- "por %%xmm2,%%xmm0 \n"
- "movdqu %%xmm0," MEMACCESS2(16, 3) " \n"
-
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS(1) ",%%xmm1 \n"
- "movdqu " MEMACCESS(2) ",%%xmm2 \n"
- "pshufb %11, %%xmm0 \n"
- "pshufb %12, %%xmm1 \n"
- "pshufb %13, %%xmm2 \n"
- "por %%xmm1,%%xmm0 \n"
- "por %%xmm2,%%xmm0 \n"
- "movdqu %%xmm0," MEMACCESS2(32, 3) " \n"
-
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "lea " MEMLEA(0x10,2) ",%2 \n"
- "lea " MEMLEA(0x30,3) ",%3 \n"
- "sub $0x10,%4 \n"
- "jg 1b \n"
- : "+r"(src_r), // %0
- "+r"(src_g), // %1
- "+r"(src_b), // %2
- "+r"(dst_rgb), // %3
- "+r"(width) // %4
- : "m"(kShuffleMaskRToRGB0), // %5
- "m"(kShuffleMaskGToRGB0), // %6
- "m"(kShuffleMaskBToRGB0), // %7
- "m"(kShuffleMaskRToRGB1), // %8
- "m"(kShuffleMaskGToRGB1), // %9
- "m"(kShuffleMaskBToRGB1), // %10
- "m"(kShuffleMaskRToRGB2), // %11
- "m"(kShuffleMaskGToRGB2), // %12
- "m"(kShuffleMaskBToRGB2) // %13
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2"
- );
+ asm volatile(
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu (%1),%%xmm1 \n"
+ "movdqu (%2),%%xmm2 \n"
+ "pshufb %5, %%xmm0 \n"
+ "pshufb %6, %%xmm1 \n"
+ "pshufb %7, %%xmm2 \n"
+ "por %%xmm1,%%xmm0 \n"
+ "por %%xmm2,%%xmm0 \n"
+ "movdqu %%xmm0,(%3) \n"
+
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu (%1),%%xmm1 \n"
+ "movdqu (%2),%%xmm2 \n"
+ "pshufb %8, %%xmm0 \n"
+ "pshufb %9, %%xmm1 \n"
+ "pshufb %10, %%xmm2 \n"
+ "por %%xmm1,%%xmm0 \n"
+ "por %%xmm2,%%xmm0 \n"
+ "movdqu %%xmm0,16(%3) \n"
+
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu (%1),%%xmm1 \n"
+ "movdqu (%2),%%xmm2 \n"
+ "pshufb %11, %%xmm0 \n"
+ "pshufb %12, %%xmm1 \n"
+ "pshufb %13, %%xmm2 \n"
+ "por %%xmm1,%%xmm0 \n"
+ "por %%xmm2,%%xmm0 \n"
+ "movdqu %%xmm0,32(%3) \n"
+
+ "lea 0x10(%0),%0 \n"
+ "lea 0x10(%1),%1 \n"
+ "lea 0x10(%2),%2 \n"
+ "lea 0x30(%3),%3 \n"
+ "sub $0x10,%4 \n"
+ "jg 1b \n"
+ : "+r"(src_r), // %0
+ "+r"(src_g), // %1
+ "+r"(src_b), // %2
+ "+r"(dst_rgb), // %3
+ "+r"(width) // %4
+ : "m"(kShuffleMaskRToRGB0), // %5
+ "m"(kShuffleMaskGToRGB0), // %6
+ "m"(kShuffleMaskBToRGB0), // %7
+ "m"(kShuffleMaskRToRGB1), // %8
+ "m"(kShuffleMaskGToRGB1), // %9
+ "m"(kShuffleMaskBToRGB1), // %10
+ "m"(kShuffleMaskRToRGB2), // %11
+ "m"(kShuffleMaskGToRGB2), // %12
+ "m"(kShuffleMaskBToRGB2) // %13
+ : "memory", "cc", "xmm0", "xmm1", "xmm2");
}
#endif // HAS_MERGERGBROW_SSSE3
#ifdef HAS_COPYROW_SSE2
-void CopyRow_SSE2(const uint8* src, uint8* dst, int count) {
- asm volatile (
- "test $0xf,%0 \n"
- "jne 2f \n"
- "test $0xf,%1 \n"
- "jne 2f \n"
+void CopyRow_SSE2(const uint8_t* src, uint8_t* dst, int width) {
+ asm volatile(
+ "test $0xf,%0 \n"
+ "jne 2f \n"
+ "test $0xf,%1 \n"
+ "jne 2f \n"
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "movdqa %%xmm0," MEMACCESS(1) " \n"
- "movdqa %%xmm1," MEMACCESS2(0x10,1) " \n"
- "lea " MEMLEA(0x20,1) ",%1 \n"
- "sub $0x20,%2 \n"
- "jg 1b \n"
- "jmp 9f \n"
+ LABELALIGN
+ "1: \n"
+ "movdqa (%0),%%xmm0 \n"
+ "movdqa 0x10(%0),%%xmm1 \n"
+ "lea 0x20(%0),%0 \n"
+ "movdqa %%xmm0,(%1) \n"
+ "movdqa %%xmm1,0x10(%1) \n"
+ "lea 0x20(%1),%1 \n"
+ "sub $0x20,%2 \n"
+ "jg 1b \n"
+ "jmp 9f \n"
- LABELALIGN
- "2: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "movdqu %%xmm1," MEMACCESS2(0x10,1) " \n"
- "lea " MEMLEA(0x20,1) ",%1 \n"
- "sub $0x20,%2 \n"
- "jg 2b \n"
- "9: \n"
- : "+r"(src), // %0
- "+r"(dst), // %1
- "+r"(count) // %2
- :
- : "memory", "cc"
- , "xmm0", "xmm1"
- );
+ LABELALIGN
+ "2: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "lea 0x20(%0),%0 \n"
+ "movdqu %%xmm0,(%1) \n"
+ "movdqu %%xmm1,0x10(%1) \n"
+ "lea 0x20(%1),%1 \n"
+ "sub $0x20,%2 \n"
+ "jg 2b \n"
+
+ LABELALIGN "9: \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
+ :
+ : "memory", "cc", "xmm0", "xmm1");
}
#endif // HAS_COPYROW_SSE2
#ifdef HAS_COPYROW_AVX
-void CopyRow_AVX(const uint8* src, uint8* dst, int count) {
- asm volatile (
- LABELALIGN
- "1: \n"
- "vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
- "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n"
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "vmovdqu %%ymm0," MEMACCESS(1) " \n"
- "vmovdqu %%ymm1," MEMACCESS2(0x20,1) " \n"
- "lea " MEMLEA(0x40,1) ",%1 \n"
- "sub $0x40,%2 \n"
- "jg 1b \n"
- : "+r"(src), // %0
- "+r"(dst), // %1
- "+r"(count) // %2
- :
- : "memory", "cc"
- , "xmm0", "xmm1"
- );
+void CopyRow_AVX(const uint8_t* src, uint8_t* dst, int width) {
+ asm volatile(
+
+ LABELALIGN
+ "1: \n"
+ "vmovdqu (%0),%%ymm0 \n"
+ "vmovdqu 0x20(%0),%%ymm1 \n"
+ "lea 0x40(%0),%0 \n"
+ "vmovdqu %%ymm0,(%1) \n"
+ "vmovdqu %%ymm1,0x20(%1) \n"
+ "lea 0x40(%1),%1 \n"
+ "sub $0x40,%2 \n"
+ "jg 1b \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
+ :
+ : "memory", "cc", "xmm0", "xmm1");
}
#endif // HAS_COPYROW_AVX
#ifdef HAS_COPYROW_ERMS
// Multiple of 1.
-void CopyRow_ERMS(const uint8* src, uint8* dst, int width) {
+void CopyRow_ERMS(const uint8_t* src, uint8_t* dst, int width) {
size_t width_tmp = (size_t)(width);
- asm volatile("rep movsb " MEMMOVESTRING(0, 1) " \n"
- : "+S"(src), // %0
- "+D"(dst), // %1
- "+c"(width_tmp) // %2
- :
- : "memory", "cc");
+ asm volatile(
+
+ "rep movsb \n"
+ : "+S"(src), // %0
+ "+D"(dst), // %1
+ "+c"(width_tmp) // %2
+ :
+ : "memory", "cc");
}
#endif // HAS_COPYROW_ERMS
#ifdef HAS_ARGBCOPYALPHAROW_SSE2
// width in pixels
-void ARGBCopyAlphaRow_SSE2(const uint8* src, uint8* dst, int width) {
- asm volatile (
- "pcmpeqb %%xmm0,%%xmm0 \n"
- "pslld $0x18,%%xmm0 \n"
- "pcmpeqb %%xmm1,%%xmm1 \n"
- "psrld $0x8,%%xmm1 \n"
+void ARGBCopyAlphaRow_SSE2(const uint8_t* src, uint8_t* dst, int width) {
+ asm volatile(
+ "pcmpeqb %%xmm0,%%xmm0 \n"
+ "pslld $0x18,%%xmm0 \n"
+ "pcmpeqb %%xmm1,%%xmm1 \n"
+ "psrld $0x8,%%xmm1 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm2 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm3 \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "movdqu " MEMACCESS(1) ",%%xmm4 \n"
- "movdqu " MEMACCESS2(0x10,1) ",%%xmm5 \n"
- "pand %%xmm0,%%xmm2 \n"
- "pand %%xmm0,%%xmm3 \n"
- "pand %%xmm1,%%xmm4 \n"
- "pand %%xmm1,%%xmm5 \n"
- "por %%xmm4,%%xmm2 \n"
- "por %%xmm5,%%xmm3 \n"
- "movdqu %%xmm2," MEMACCESS(1) " \n"
- "movdqu %%xmm3," MEMACCESS2(0x10,1) " \n"
- "lea " MEMLEA(0x20,1) ",%1 \n"
- "sub $0x8,%2 \n"
- "jg 1b \n"
- : "+r"(src), // %0
- "+r"(dst), // %1
- "+r"(width) // %2
- :
- : "memory", "cc"
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
- );
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm2 \n"
+ "movdqu 0x10(%0),%%xmm3 \n"
+ "lea 0x20(%0),%0 \n"
+ "movdqu (%1),%%xmm4 \n"
+ "movdqu 0x10(%1),%%xmm5 \n"
+ "pand %%xmm0,%%xmm2 \n"
+ "pand %%xmm0,%%xmm3 \n"
+ "pand %%xmm1,%%xmm4 \n"
+ "pand %%xmm1,%%xmm5 \n"
+ "por %%xmm4,%%xmm2 \n"
+ "por %%xmm5,%%xmm3 \n"
+ "movdqu %%xmm2,(%1) \n"
+ "movdqu %%xmm3,0x10(%1) \n"
+ "lea 0x20(%1),%1 \n"
+ "sub $0x8,%2 \n"
+ "jg 1b \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
+ :
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5");
}
#endif // HAS_ARGBCOPYALPHAROW_SSE2
#ifdef HAS_ARGBCOPYALPHAROW_AVX2
// width in pixels
-void ARGBCopyAlphaRow_AVX2(const uint8* src, uint8* dst, int width) {
- asm volatile (
- "vpcmpeqb %%ymm0,%%ymm0,%%ymm0 \n"
- "vpsrld $0x8,%%ymm0,%%ymm0 \n"
+void ARGBCopyAlphaRow_AVX2(const uint8_t* src, uint8_t* dst, int width) {
+ asm volatile(
+ "vpcmpeqb %%ymm0,%%ymm0,%%ymm0 \n"
+ "vpsrld $0x8,%%ymm0,%%ymm0 \n"
- LABELALIGN
- "1: \n"
- "vmovdqu " MEMACCESS(0) ",%%ymm1 \n"
- "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm2 \n"
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "vpblendvb %%ymm0," MEMACCESS(1) ",%%ymm1,%%ymm1 \n"
- "vpblendvb %%ymm0," MEMACCESS2(0x20,1) ",%%ymm2,%%ymm2 \n"
- "vmovdqu %%ymm1," MEMACCESS(1) " \n"
- "vmovdqu %%ymm2," MEMACCESS2(0x20,1) " \n"
- "lea " MEMLEA(0x40,1) ",%1 \n"
- "sub $0x10,%2 \n"
- "jg 1b \n"
- "vzeroupper \n"
- : "+r"(src), // %0
- "+r"(dst), // %1
- "+r"(width) // %2
- :
- : "memory", "cc"
- , "xmm0", "xmm1", "xmm2"
- );
+ LABELALIGN
+ "1: \n"
+ "vmovdqu (%0),%%ymm1 \n"
+ "vmovdqu 0x20(%0),%%ymm2 \n"
+ "lea 0x40(%0),%0 \n"
+ "vpblendvb %%ymm0,(%1),%%ymm1,%%ymm1 \n"
+ "vpblendvb %%ymm0,0x20(%1),%%ymm2,%%ymm2 \n"
+ "vmovdqu %%ymm1,(%1) \n"
+ "vmovdqu %%ymm2,0x20(%1) \n"
+ "lea 0x40(%1),%1 \n"
+ "sub $0x10,%2 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
+ :
+ : "memory", "cc", "xmm0", "xmm1", "xmm2");
}
#endif // HAS_ARGBCOPYALPHAROW_AVX2
#ifdef HAS_ARGBEXTRACTALPHAROW_SSE2
// width in pixels
-void ARGBExtractAlphaRow_SSE2(const uint8* src_argb, uint8* dst_a, int width) {
- asm volatile (
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ", %%xmm0 \n"
- "movdqu " MEMACCESS2(0x10, 0) ", %%xmm1 \n"
- "lea " MEMLEA(0x20, 0) ", %0 \n"
- "psrld $0x18, %%xmm0 \n"
- "psrld $0x18, %%xmm1 \n"
- "packssdw %%xmm1, %%xmm0 \n"
- "packuswb %%xmm0, %%xmm0 \n"
- "movq %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x8, 1) ", %1 \n"
- "sub $0x8, %2 \n"
- "jg 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_a), // %1
- "+rm"(width) // %2
- :
- : "memory", "cc"
- , "xmm0", "xmm1"
- );
+void ARGBExtractAlphaRow_SSE2(const uint8_t* src_argb,
+ uint8_t* dst_a,
+ int width) {
+ asm volatile(
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0), %%xmm0 \n"
+ "movdqu 0x10(%0), %%xmm1 \n"
+ "lea 0x20(%0), %0 \n"
+ "psrld $0x18, %%xmm0 \n"
+ "psrld $0x18, %%xmm1 \n"
+ "packssdw %%xmm1, %%xmm0 \n"
+ "packuswb %%xmm0, %%xmm0 \n"
+ "movq %%xmm0,(%1) \n"
+ "lea 0x8(%1), %1 \n"
+ "sub $0x8, %2 \n"
+ "jg 1b \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_a), // %1
+ "+rm"(width) // %2
+ :
+ : "memory", "cc", "xmm0", "xmm1");
}
#endif // HAS_ARGBEXTRACTALPHAROW_SSE2
@@ -3505,569 +3798,549 @@ static const uvec8 kShuffleAlphaShort_AVX2 = {
3u, 128u, 128u, 128u, 7u, 128u, 128u, 128u,
11u, 128u, 128u, 128u, 15u, 128u, 128u, 128u};
-void ARGBExtractAlphaRow_AVX2(const uint8* src_argb, uint8* dst_a, int width) {
- asm volatile (
- "vmovdqa %3,%%ymm4 \n"
- "vbroadcastf128 %4,%%ymm5 \n"
+void ARGBExtractAlphaRow_AVX2(const uint8_t* src_argb,
+ uint8_t* dst_a,
+ int width) {
+ asm volatile(
+ "vmovdqa %3,%%ymm4 \n"
+ "vbroadcastf128 %4,%%ymm5 \n"
- LABELALIGN
- "1: \n"
- "vmovdqu " MEMACCESS(0) ", %%ymm0 \n"
- "vmovdqu " MEMACCESS2(0x20, 0) ", %%ymm1 \n"
- "vpshufb %%ymm5,%%ymm0,%%ymm0 \n" // vpsrld $0x18, %%ymm0
- "vpshufb %%ymm5,%%ymm1,%%ymm1 \n"
- "vmovdqu " MEMACCESS2(0x40, 0) ", %%ymm2 \n"
- "vmovdqu " MEMACCESS2(0x60, 0) ", %%ymm3 \n"
- "lea " MEMLEA(0x80, 0) ", %0 \n"
- "vpackssdw %%ymm1, %%ymm0, %%ymm0 \n" // mutates
- "vpshufb %%ymm5,%%ymm2,%%ymm2 \n"
- "vpshufb %%ymm5,%%ymm3,%%ymm3 \n"
- "vpackssdw %%ymm3, %%ymm2, %%ymm2 \n" // mutates
- "vpackuswb %%ymm2,%%ymm0,%%ymm0 \n" // mutates.
- "vpermd %%ymm0,%%ymm4,%%ymm0 \n" // unmutate.
- "vmovdqu %%ymm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x20,1) ",%1 \n"
- "sub $0x20, %2 \n"
- "jg 1b \n"
- "vzeroupper \n"
- : "+r"(src_argb), // %0
- "+r"(dst_a), // %1
- "+rm"(width) // %2
- : "m"(kPermdARGBToY_AVX), // %3
- "m"(kShuffleAlphaShort_AVX2) // %4
- : "memory", "cc"
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
- );
+ LABELALIGN
+ "1: \n"
+ "vmovdqu (%0), %%ymm0 \n"
+ "vmovdqu 0x20(%0), %%ymm1 \n"
+ "vpshufb %%ymm5,%%ymm0,%%ymm0 \n" // vpsrld $0x18, %%ymm0
+ "vpshufb %%ymm5,%%ymm1,%%ymm1 \n"
+ "vmovdqu 0x40(%0), %%ymm2 \n"
+ "vmovdqu 0x60(%0), %%ymm3 \n"
+ "lea 0x80(%0), %0 \n"
+ "vpackssdw %%ymm1, %%ymm0, %%ymm0 \n" // mutates
+ "vpshufb %%ymm5,%%ymm2,%%ymm2 \n"
+ "vpshufb %%ymm5,%%ymm3,%%ymm3 \n"
+ "vpackssdw %%ymm3, %%ymm2, %%ymm2 \n" // mutates
+ "vpackuswb %%ymm2,%%ymm0,%%ymm0 \n" // mutates.
+ "vpermd %%ymm0,%%ymm4,%%ymm0 \n" // unmutate.
+ "vmovdqu %%ymm0,(%1) \n"
+ "lea 0x20(%1),%1 \n"
+ "sub $0x20, %2 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_a), // %1
+ "+rm"(width) // %2
+ : "m"(kPermdARGBToY_AVX), // %3
+ "m"(kShuffleAlphaShort_AVX2) // %4
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5");
}
#endif // HAS_ARGBEXTRACTALPHAROW_AVX2
#ifdef HAS_ARGBCOPYYTOALPHAROW_SSE2
// width in pixels
-void ARGBCopyYToAlphaRow_SSE2(const uint8* src, uint8* dst, int width) {
- asm volatile (
- "pcmpeqb %%xmm0,%%xmm0 \n"
- "pslld $0x18,%%xmm0 \n"
- "pcmpeqb %%xmm1,%%xmm1 \n"
- "psrld $0x8,%%xmm1 \n"
+void ARGBCopyYToAlphaRow_SSE2(const uint8_t* src, uint8_t* dst, int width) {
+ asm volatile(
+ "pcmpeqb %%xmm0,%%xmm0 \n"
+ "pslld $0x18,%%xmm0 \n"
+ "pcmpeqb %%xmm1,%%xmm1 \n"
+ "psrld $0x8,%%xmm1 \n"
- LABELALIGN
- "1: \n"
- "movq " MEMACCESS(0) ",%%xmm2 \n"
- "lea " MEMLEA(0x8,0) ",%0 \n"
- "punpcklbw %%xmm2,%%xmm2 \n"
- "punpckhwd %%xmm2,%%xmm3 \n"
- "punpcklwd %%xmm2,%%xmm2 \n"
- "movdqu " MEMACCESS(1) ",%%xmm4 \n"
- "movdqu " MEMACCESS2(0x10,1) ",%%xmm5 \n"
- "pand %%xmm0,%%xmm2 \n"
- "pand %%xmm0,%%xmm3 \n"
- "pand %%xmm1,%%xmm4 \n"
- "pand %%xmm1,%%xmm5 \n"
- "por %%xmm4,%%xmm2 \n"
- "por %%xmm5,%%xmm3 \n"
- "movdqu %%xmm2," MEMACCESS(1) " \n"
- "movdqu %%xmm3," MEMACCESS2(0x10,1) " \n"
- "lea " MEMLEA(0x20,1) ",%1 \n"
- "sub $0x8,%2 \n"
- "jg 1b \n"
- : "+r"(src), // %0
- "+r"(dst), // %1
- "+r"(width) // %2
- :
- : "memory", "cc"
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
- );
+ LABELALIGN
+ "1: \n"
+ "movq (%0),%%xmm2 \n"
+ "lea 0x8(%0),%0 \n"
+ "punpcklbw %%xmm2,%%xmm2 \n"
+ "punpckhwd %%xmm2,%%xmm3 \n"
+ "punpcklwd %%xmm2,%%xmm2 \n"
+ "movdqu (%1),%%xmm4 \n"
+ "movdqu 0x10(%1),%%xmm5 \n"
+ "pand %%xmm0,%%xmm2 \n"
+ "pand %%xmm0,%%xmm3 \n"
+ "pand %%xmm1,%%xmm4 \n"
+ "pand %%xmm1,%%xmm5 \n"
+ "por %%xmm4,%%xmm2 \n"
+ "por %%xmm5,%%xmm3 \n"
+ "movdqu %%xmm2,(%1) \n"
+ "movdqu %%xmm3,0x10(%1) \n"
+ "lea 0x20(%1),%1 \n"
+ "sub $0x8,%2 \n"
+ "jg 1b \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
+ :
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5");
}
#endif // HAS_ARGBCOPYYTOALPHAROW_SSE2
#ifdef HAS_ARGBCOPYYTOALPHAROW_AVX2
// width in pixels
-void ARGBCopyYToAlphaRow_AVX2(const uint8* src, uint8* dst, int width) {
- asm volatile (
- "vpcmpeqb %%ymm0,%%ymm0,%%ymm0 \n"
- "vpsrld $0x8,%%ymm0,%%ymm0 \n"
+void ARGBCopyYToAlphaRow_AVX2(const uint8_t* src, uint8_t* dst, int width) {
+ asm volatile(
+ "vpcmpeqb %%ymm0,%%ymm0,%%ymm0 \n"
+ "vpsrld $0x8,%%ymm0,%%ymm0 \n"
- LABELALIGN
- "1: \n"
- "vpmovzxbd " MEMACCESS(0) ",%%ymm1 \n"
- "vpmovzxbd " MEMACCESS2(0x8,0) ",%%ymm2 \n"
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "vpslld $0x18,%%ymm1,%%ymm1 \n"
- "vpslld $0x18,%%ymm2,%%ymm2 \n"
- "vpblendvb %%ymm0," MEMACCESS(1) ",%%ymm1,%%ymm1 \n"
- "vpblendvb %%ymm0," MEMACCESS2(0x20,1) ",%%ymm2,%%ymm2 \n"
- "vmovdqu %%ymm1," MEMACCESS(1) " \n"
- "vmovdqu %%ymm2," MEMACCESS2(0x20,1) " \n"
- "lea " MEMLEA(0x40,1) ",%1 \n"
- "sub $0x10,%2 \n"
- "jg 1b \n"
- "vzeroupper \n"
- : "+r"(src), // %0
- "+r"(dst), // %1
- "+r"(width) // %2
- :
- : "memory", "cc"
- , "xmm0", "xmm1", "xmm2"
- );
+ LABELALIGN
+ "1: \n"
+ "vpmovzxbd (%0),%%ymm1 \n"
+ "vpmovzxbd 0x8(%0),%%ymm2 \n"
+ "lea 0x10(%0),%0 \n"
+ "vpslld $0x18,%%ymm1,%%ymm1 \n"
+ "vpslld $0x18,%%ymm2,%%ymm2 \n"
+ "vpblendvb %%ymm0,(%1),%%ymm1,%%ymm1 \n"
+ "vpblendvb %%ymm0,0x20(%1),%%ymm2,%%ymm2 \n"
+ "vmovdqu %%ymm1,(%1) \n"
+ "vmovdqu %%ymm2,0x20(%1) \n"
+ "lea 0x40(%1),%1 \n"
+ "sub $0x10,%2 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
+ :
+ : "memory", "cc", "xmm0", "xmm1", "xmm2");
}
#endif // HAS_ARGBCOPYYTOALPHAROW_AVX2
#ifdef HAS_SETROW_X86
-void SetRow_X86(uint8* dst, uint8 v8, int width) {
+void SetRow_X86(uint8_t* dst, uint8_t v8, int width) {
size_t width_tmp = (size_t)(width >> 2);
- const uint32 v32 = v8 * 0x01010101u; // Duplicate byte to all bytes.
- asm volatile("rep stosl " MEMSTORESTRING(eax, 0) " \n"
- : "+D"(dst), // %0
- "+c"(width_tmp) // %1
- : "a"(v32) // %2
- : "memory", "cc");
+ const uint32_t v32 = v8 * 0x01010101u; // Duplicate byte to all bytes.
+ asm volatile(
+
+ "rep stosl \n"
+ : "+D"(dst), // %0
+ "+c"(width_tmp) // %1
+ : "a"(v32) // %2
+ : "memory", "cc");
}
-void SetRow_ERMS(uint8* dst, uint8 v8, int width) {
+void SetRow_ERMS(uint8_t* dst, uint8_t v8, int width) {
size_t width_tmp = (size_t)(width);
- asm volatile("rep stosb " MEMSTORESTRING(al, 0) " \n"
- : "+D"(dst), // %0
- "+c"(width_tmp) // %1
- : "a"(v8) // %2
- : "memory", "cc");
+ asm volatile(
+
+ "rep stosb \n"
+ : "+D"(dst), // %0
+ "+c"(width_tmp) // %1
+ : "a"(v8) // %2
+ : "memory", "cc");
}
-void ARGBSetRow_X86(uint8* dst_argb, uint32 v32, int width) {
+void ARGBSetRow_X86(uint8_t* dst_argb, uint32_t v32, int width) {
size_t width_tmp = (size_t)(width);
- asm volatile("rep stosl " MEMSTORESTRING(eax, 0) " \n"
- : "+D"(dst_argb), // %0
- "+c"(width_tmp) // %1
- : "a"(v32) // %2
- : "memory", "cc");
+ asm volatile(
+
+ "rep stosl \n"
+ : "+D"(dst_argb), // %0
+ "+c"(width_tmp) // %1
+ : "a"(v32) // %2
+ : "memory", "cc");
}
#endif // HAS_SETROW_X86
#ifdef HAS_YUY2TOYROW_SSE2
-void YUY2ToYRow_SSE2(const uint8* src_yuy2, uint8* dst_y, int width) {
- asm volatile (
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "psrlw $0x8,%%xmm5 \n"
+void YUY2ToYRow_SSE2(const uint8_t* src_yuy2, uint8_t* dst_y, int width) {
+ asm volatile(
+ "pcmpeqb %%xmm5,%%xmm5 \n"
+ "psrlw $0x8,%%xmm5 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "pand %%xmm5,%%xmm0 \n"
- "pand %%xmm5,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x10,%2 \n"
- "jg 1b \n"
- : "+r"(src_yuy2), // %0
- "+r"(dst_y), // %1
- "+r"(width) // %2
- :
- : "memory", "cc"
- , "xmm0", "xmm1", "xmm5"
- );
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "lea 0x20(%0),%0 \n"
+ "pand %%xmm5,%%xmm0 \n"
+ "pand %%xmm5,%%xmm1 \n"
+ "packuswb %%xmm1,%%xmm0 \n"
+ "movdqu %%xmm0,(%1) \n"
+ "lea 0x10(%1),%1 \n"
+ "sub $0x10,%2 \n"
+ "jg 1b \n"
+ : "+r"(src_yuy2), // %0
+ "+r"(dst_y), // %1
+ "+r"(width) // %2
+ :
+ : "memory", "cc", "xmm0", "xmm1", "xmm5");
}
-void YUY2ToUVRow_SSE2(const uint8* src_yuy2,
+void YUY2ToUVRow_SSE2(const uint8_t* src_yuy2,
int stride_yuy2,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
- asm volatile (
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "psrlw $0x8,%%xmm5 \n"
- "sub %1,%2 \n"
+ asm volatile(
+ "pcmpeqb %%xmm5,%%xmm5 \n"
+ "psrlw $0x8,%%xmm5 \n"
+ "sub %1,%2 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- MEMOPREG(movdqu,0x00,0,4,1,xmm2) // movdqu (%0,%4,1),%%xmm2
- MEMOPREG(movdqu,0x10,0,4,1,xmm3) // movdqu 0x10(%0,%4,1),%%xmm3
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "pavgb %%xmm2,%%xmm0 \n"
- "pavgb %%xmm3,%%xmm1 \n"
- "psrlw $0x8,%%xmm0 \n"
- "psrlw $0x8,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "pand %%xmm5,%%xmm0 \n"
- "packuswb %%xmm0,%%xmm0 \n"
- "psrlw $0x8,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm1 \n"
- "movq %%xmm0," MEMACCESS(1) " \n"
- MEMOPMEM(movq,xmm1,0x00,1,2,1) // movq %%xmm1,(%1,%2)
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "sub $0x10,%3 \n"
- "jg 1b \n"
- : "+r"(src_yuy2), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+r"(width) // %3
- : "r"((intptr_t)(stride_yuy2)) // %4
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
- );
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "movdqu 0x00(%0,%4,1),%%xmm2 \n"
+ "movdqu 0x10(%0,%4,1),%%xmm3 \n"
+ "lea 0x20(%0),%0 \n"
+ "pavgb %%xmm2,%%xmm0 \n"
+ "pavgb %%xmm3,%%xmm1 \n"
+ "psrlw $0x8,%%xmm0 \n"
+ "psrlw $0x8,%%xmm1 \n"
+ "packuswb %%xmm1,%%xmm0 \n"
+ "movdqa %%xmm0,%%xmm1 \n"
+ "pand %%xmm5,%%xmm0 \n"
+ "packuswb %%xmm0,%%xmm0 \n"
+ "psrlw $0x8,%%xmm1 \n"
+ "packuswb %%xmm1,%%xmm1 \n"
+ "movq %%xmm0,(%1) \n"
+ "movq %%xmm1,0x00(%1,%2,1) \n"
+ "lea 0x8(%1),%1 \n"
+ "sub $0x10,%3 \n"
+ "jg 1b \n"
+ : "+r"(src_yuy2), // %0
+ "+r"(dst_u), // %1
+ "+r"(dst_v), // %2
+ "+r"(width) // %3
+ : "r"((intptr_t)(stride_yuy2)) // %4
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5");
}
-void YUY2ToUV422Row_SSE2(const uint8* src_yuy2,
- uint8* dst_u,
- uint8* dst_v,
+void YUY2ToUV422Row_SSE2(const uint8_t* src_yuy2,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
- asm volatile (
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "psrlw $0x8,%%xmm5 \n"
- "sub %1,%2 \n"
+ asm volatile(
+ "pcmpeqb %%xmm5,%%xmm5 \n"
+ "psrlw $0x8,%%xmm5 \n"
+ "sub %1,%2 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "psrlw $0x8,%%xmm0 \n"
- "psrlw $0x8,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "pand %%xmm5,%%xmm0 \n"
- "packuswb %%xmm0,%%xmm0 \n"
- "psrlw $0x8,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm1 \n"
- "movq %%xmm0," MEMACCESS(1) " \n"
- MEMOPMEM(movq,xmm1,0x00,1,2,1) // movq %%xmm1,(%1,%2)
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "sub $0x10,%3 \n"
- "jg 1b \n"
- : "+r"(src_yuy2), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+r"(width) // %3
- :
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm5"
- );
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "lea 0x20(%0),%0 \n"
+ "psrlw $0x8,%%xmm0 \n"
+ "psrlw $0x8,%%xmm1 \n"
+ "packuswb %%xmm1,%%xmm0 \n"
+ "movdqa %%xmm0,%%xmm1 \n"
+ "pand %%xmm5,%%xmm0 \n"
+ "packuswb %%xmm0,%%xmm0 \n"
+ "psrlw $0x8,%%xmm1 \n"
+ "packuswb %%xmm1,%%xmm1 \n"
+ "movq %%xmm0,(%1) \n"
+ "movq %%xmm1,0x00(%1,%2,1) \n"
+ "lea 0x8(%1),%1 \n"
+ "sub $0x10,%3 \n"
+ "jg 1b \n"
+ : "+r"(src_yuy2), // %0
+ "+r"(dst_u), // %1
+ "+r"(dst_v), // %2
+ "+r"(width) // %3
+ :
+ : "memory", "cc", "xmm0", "xmm1", "xmm5");
}
-void UYVYToYRow_SSE2(const uint8* src_uyvy, uint8* dst_y, int width) {
- asm volatile (
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "psrlw $0x8,%%xmm0 \n"
- "psrlw $0x8,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x10,%2 \n"
- "jg 1b \n"
- : "+r"(src_uyvy), // %0
- "+r"(dst_y), // %1
- "+r"(width) // %2
- :
- : "memory", "cc"
- , "xmm0", "xmm1"
- );
+void UYVYToYRow_SSE2(const uint8_t* src_uyvy, uint8_t* dst_y, int width) {
+ asm volatile(
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "lea 0x20(%0),%0 \n"
+ "psrlw $0x8,%%xmm0 \n"
+ "psrlw $0x8,%%xmm1 \n"
+ "packuswb %%xmm1,%%xmm0 \n"
+ "movdqu %%xmm0,(%1) \n"
+ "lea 0x10(%1),%1 \n"
+ "sub $0x10,%2 \n"
+ "jg 1b \n"
+ : "+r"(src_uyvy), // %0
+ "+r"(dst_y), // %1
+ "+r"(width) // %2
+ :
+ : "memory", "cc", "xmm0", "xmm1");
}
-void UYVYToUVRow_SSE2(const uint8* src_uyvy,
+void UYVYToUVRow_SSE2(const uint8_t* src_uyvy,
int stride_uyvy,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
- asm volatile (
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "psrlw $0x8,%%xmm5 \n"
- "sub %1,%2 \n"
+ asm volatile(
+ "pcmpeqb %%xmm5,%%xmm5 \n"
+ "psrlw $0x8,%%xmm5 \n"
+ "sub %1,%2 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- MEMOPREG(movdqu,0x00,0,4,1,xmm2) // movdqu (%0,%4,1),%%xmm2
- MEMOPREG(movdqu,0x10,0,4,1,xmm3) // movdqu 0x10(%0,%4,1),%%xmm3
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "pavgb %%xmm2,%%xmm0 \n"
- "pavgb %%xmm3,%%xmm1 \n"
- "pand %%xmm5,%%xmm0 \n"
- "pand %%xmm5,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "pand %%xmm5,%%xmm0 \n"
- "packuswb %%xmm0,%%xmm0 \n"
- "psrlw $0x8,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm1 \n"
- "movq %%xmm0," MEMACCESS(1) " \n"
- MEMOPMEM(movq,xmm1,0x00,1,2,1) // movq %%xmm1,(%1,%2)
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "sub $0x10,%3 \n"
- "jg 1b \n"
- : "+r"(src_uyvy), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+r"(width) // %3
- : "r"((intptr_t)(stride_uyvy)) // %4
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
- );
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "movdqu 0x00(%0,%4,1),%%xmm2 \n"
+ "movdqu 0x10(%0,%4,1),%%xmm3 \n"
+ "lea 0x20(%0),%0 \n"
+ "pavgb %%xmm2,%%xmm0 \n"
+ "pavgb %%xmm3,%%xmm1 \n"
+ "pand %%xmm5,%%xmm0 \n"
+ "pand %%xmm5,%%xmm1 \n"
+ "packuswb %%xmm1,%%xmm0 \n"
+ "movdqa %%xmm0,%%xmm1 \n"
+ "pand %%xmm5,%%xmm0 \n"
+ "packuswb %%xmm0,%%xmm0 \n"
+ "psrlw $0x8,%%xmm1 \n"
+ "packuswb %%xmm1,%%xmm1 \n"
+ "movq %%xmm0,(%1) \n"
+ "movq %%xmm1,0x00(%1,%2,1) \n"
+ "lea 0x8(%1),%1 \n"
+ "sub $0x10,%3 \n"
+ "jg 1b \n"
+ : "+r"(src_uyvy), // %0
+ "+r"(dst_u), // %1
+ "+r"(dst_v), // %2
+ "+r"(width) // %3
+ : "r"((intptr_t)(stride_uyvy)) // %4
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5");
}
-void UYVYToUV422Row_SSE2(const uint8* src_uyvy,
- uint8* dst_u,
- uint8* dst_v,
+void UYVYToUV422Row_SSE2(const uint8_t* src_uyvy,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
- asm volatile (
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "psrlw $0x8,%%xmm5 \n"
- "sub %1,%2 \n"
+ asm volatile(
+ "pcmpeqb %%xmm5,%%xmm5 \n"
+ "psrlw $0x8,%%xmm5 \n"
+ "sub %1,%2 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "pand %%xmm5,%%xmm0 \n"
- "pand %%xmm5,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "pand %%xmm5,%%xmm0 \n"
- "packuswb %%xmm0,%%xmm0 \n"
- "psrlw $0x8,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm1 \n"
- "movq %%xmm0," MEMACCESS(1) " \n"
- MEMOPMEM(movq,xmm1,0x00,1,2,1) // movq %%xmm1,(%1,%2)
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "sub $0x10,%3 \n"
- "jg 1b \n"
- : "+r"(src_uyvy), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+r"(width) // %3
- :
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm5"
- );
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "lea 0x20(%0),%0 \n"
+ "pand %%xmm5,%%xmm0 \n"
+ "pand %%xmm5,%%xmm1 \n"
+ "packuswb %%xmm1,%%xmm0 \n"
+ "movdqa %%xmm0,%%xmm1 \n"
+ "pand %%xmm5,%%xmm0 \n"
+ "packuswb %%xmm0,%%xmm0 \n"
+ "psrlw $0x8,%%xmm1 \n"
+ "packuswb %%xmm1,%%xmm1 \n"
+ "movq %%xmm0,(%1) \n"
+ "movq %%xmm1,0x00(%1,%2,1) \n"
+ "lea 0x8(%1),%1 \n"
+ "sub $0x10,%3 \n"
+ "jg 1b \n"
+ : "+r"(src_uyvy), // %0
+ "+r"(dst_u), // %1
+ "+r"(dst_v), // %2
+ "+r"(width) // %3
+ :
+ : "memory", "cc", "xmm0", "xmm1", "xmm5");
}
#endif // HAS_YUY2TOYROW_SSE2
#ifdef HAS_YUY2TOYROW_AVX2
-void YUY2ToYRow_AVX2(const uint8* src_yuy2, uint8* dst_y, int width) {
- asm volatile (
- "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
- "vpsrlw $0x8,%%ymm5,%%ymm5 \n"
+void YUY2ToYRow_AVX2(const uint8_t* src_yuy2, uint8_t* dst_y, int width) {
+ asm volatile(
+ "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
+ "vpsrlw $0x8,%%ymm5,%%ymm5 \n"
- LABELALIGN
- "1: \n"
- "vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
- "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n"
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "vpand %%ymm5,%%ymm0,%%ymm0 \n"
- "vpand %%ymm5,%%ymm1,%%ymm1 \n"
- "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
- "vpermq $0xd8,%%ymm0,%%ymm0 \n"
- "vmovdqu %%ymm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x20,1) ",%1 \n"
- "sub $0x20,%2 \n"
- "jg 1b \n"
- "vzeroupper \n"
- : "+r"(src_yuy2), // %0
- "+r"(dst_y), // %1
- "+r"(width) // %2
- :
- : "memory", "cc"
- , "xmm0", "xmm1", "xmm5"
- );
+ LABELALIGN
+ "1: \n"
+ "vmovdqu (%0),%%ymm0 \n"
+ "vmovdqu 0x20(%0),%%ymm1 \n"
+ "lea 0x40(%0),%0 \n"
+ "vpand %%ymm5,%%ymm0,%%ymm0 \n"
+ "vpand %%ymm5,%%ymm1,%%ymm1 \n"
+ "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n"
+ "vmovdqu %%ymm0,(%1) \n"
+ "lea 0x20(%1),%1 \n"
+ "sub $0x20,%2 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_yuy2), // %0
+ "+r"(dst_y), // %1
+ "+r"(width) // %2
+ :
+ : "memory", "cc", "xmm0", "xmm1", "xmm5");
}
-void YUY2ToUVRow_AVX2(const uint8* src_yuy2,
+void YUY2ToUVRow_AVX2(const uint8_t* src_yuy2,
int stride_yuy2,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
- asm volatile (
- "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
- "vpsrlw $0x8,%%ymm5,%%ymm5 \n"
- "sub %1,%2 \n"
+ asm volatile(
+ "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
+ "vpsrlw $0x8,%%ymm5,%%ymm5 \n"
+ "sub %1,%2 \n"
- LABELALIGN
- "1: \n"
- "vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
- "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n"
- VMEMOPREG(vpavgb,0x00,0,4,1,ymm0,ymm0) // vpavgb (%0,%4,1),%%ymm0,%%ymm0
- VMEMOPREG(vpavgb,0x20,0,4,1,ymm1,ymm1)
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "vpsrlw $0x8,%%ymm0,%%ymm0 \n"
- "vpsrlw $0x8,%%ymm1,%%ymm1 \n"
- "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
- "vpermq $0xd8,%%ymm0,%%ymm0 \n"
- "vpand %%ymm5,%%ymm0,%%ymm1 \n"
- "vpsrlw $0x8,%%ymm0,%%ymm0 \n"
- "vpackuswb %%ymm1,%%ymm1,%%ymm1 \n"
- "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n"
- "vpermq $0xd8,%%ymm1,%%ymm1 \n"
- "vpermq $0xd8,%%ymm0,%%ymm0 \n"
- "vextractf128 $0x0,%%ymm1," MEMACCESS(1) " \n"
- VEXTOPMEM(vextractf128,0,ymm0,0x00,1,2,1) // vextractf128 $0x0,%%ymm0,(%1,%2,1)
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x20,%3 \n"
- "jg 1b \n"
- "vzeroupper \n"
- : "+r"(src_yuy2), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+r"(width) // %3
- : "r"((intptr_t)(stride_yuy2)) // %4
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm5"
- );
+ LABELALIGN
+ "1: \n"
+ "vmovdqu (%0),%%ymm0 \n"
+ "vmovdqu 0x20(%0),%%ymm1 \n"
+ "vpavgb 0x00(%0,%4,1),%%ymm0,%%ymm0 \n"
+ "vpavgb 0x20(%0,%4,1),%%ymm1,%%ymm1 \n"
+ "lea 0x40(%0),%0 \n"
+ "vpsrlw $0x8,%%ymm0,%%ymm0 \n"
+ "vpsrlw $0x8,%%ymm1,%%ymm1 \n"
+ "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n"
+ "vpand %%ymm5,%%ymm0,%%ymm1 \n"
+ "vpsrlw $0x8,%%ymm0,%%ymm0 \n"
+ "vpackuswb %%ymm1,%%ymm1,%%ymm1 \n"
+ "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n"
+ "vpermq $0xd8,%%ymm1,%%ymm1 \n"
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n"
+ "vextractf128 $0x0,%%ymm1,(%1) \n"
+ "vextractf128 $0x0,%%ymm0,0x00(%1,%2,1) \n"
+ "lea 0x10(%1),%1 \n"
+ "sub $0x20,%3 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_yuy2), // %0
+ "+r"(dst_u), // %1
+ "+r"(dst_v), // %2
+ "+r"(width) // %3
+ : "r"((intptr_t)(stride_yuy2)) // %4
+ : "memory", "cc", "xmm0", "xmm1", "xmm5");
}
-void YUY2ToUV422Row_AVX2(const uint8* src_yuy2,
- uint8* dst_u,
- uint8* dst_v,
+void YUY2ToUV422Row_AVX2(const uint8_t* src_yuy2,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
- asm volatile (
- "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
- "vpsrlw $0x8,%%ymm5,%%ymm5 \n"
- "sub %1,%2 \n"
+ asm volatile(
+ "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
+ "vpsrlw $0x8,%%ymm5,%%ymm5 \n"
+ "sub %1,%2 \n"
- LABELALIGN
- "1: \n"
- "vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
- "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n"
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "vpsrlw $0x8,%%ymm0,%%ymm0 \n"
- "vpsrlw $0x8,%%ymm1,%%ymm1 \n"
- "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
- "vpermq $0xd8,%%ymm0,%%ymm0 \n"
- "vpand %%ymm5,%%ymm0,%%ymm1 \n"
- "vpsrlw $0x8,%%ymm0,%%ymm0 \n"
- "vpackuswb %%ymm1,%%ymm1,%%ymm1 \n"
- "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n"
- "vpermq $0xd8,%%ymm1,%%ymm1 \n"
- "vpermq $0xd8,%%ymm0,%%ymm0 \n"
- "vextractf128 $0x0,%%ymm1," MEMACCESS(1) " \n"
- VEXTOPMEM(vextractf128,0,ymm0,0x00,1,2,1) // vextractf128 $0x0,%%ymm0,(%1,%2,1)
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x20,%3 \n"
- "jg 1b \n"
- "vzeroupper \n"
- : "+r"(src_yuy2), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+r"(width) // %3
- :
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm5"
- );
+ LABELALIGN
+ "1: \n"
+ "vmovdqu (%0),%%ymm0 \n"
+ "vmovdqu 0x20(%0),%%ymm1 \n"
+ "lea 0x40(%0),%0 \n"
+ "vpsrlw $0x8,%%ymm0,%%ymm0 \n"
+ "vpsrlw $0x8,%%ymm1,%%ymm1 \n"
+ "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n"
+ "vpand %%ymm5,%%ymm0,%%ymm1 \n"
+ "vpsrlw $0x8,%%ymm0,%%ymm0 \n"
+ "vpackuswb %%ymm1,%%ymm1,%%ymm1 \n"
+ "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n"
+ "vpermq $0xd8,%%ymm1,%%ymm1 \n"
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n"
+ "vextractf128 $0x0,%%ymm1,(%1) \n"
+ "vextractf128 $0x0,%%ymm0,0x00(%1,%2,1) \n"
+ "lea 0x10(%1),%1 \n"
+ "sub $0x20,%3 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_yuy2), // %0
+ "+r"(dst_u), // %1
+ "+r"(dst_v), // %2
+ "+r"(width) // %3
+ :
+ : "memory", "cc", "xmm0", "xmm1", "xmm5");
}
-void UYVYToYRow_AVX2(const uint8* src_uyvy, uint8* dst_y, int width) {
- asm volatile (
- LABELALIGN
- "1: \n"
- "vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
- "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n"
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "vpsrlw $0x8,%%ymm0,%%ymm0 \n"
- "vpsrlw $0x8,%%ymm1,%%ymm1 \n"
- "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
- "vpermq $0xd8,%%ymm0,%%ymm0 \n"
- "vmovdqu %%ymm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x20,1) ",%1 \n"
- "sub $0x20,%2 \n"
- "jg 1b \n"
- "vzeroupper \n"
- : "+r"(src_uyvy), // %0
- "+r"(dst_y), // %1
- "+r"(width) // %2
- :
- : "memory", "cc"
- , "xmm0", "xmm1", "xmm5"
- );
+void UYVYToYRow_AVX2(const uint8_t* src_uyvy, uint8_t* dst_y, int width) {
+ asm volatile(
+
+ LABELALIGN
+ "1: \n"
+ "vmovdqu (%0),%%ymm0 \n"
+ "vmovdqu 0x20(%0),%%ymm1 \n"
+ "lea 0x40(%0),%0 \n"
+ "vpsrlw $0x8,%%ymm0,%%ymm0 \n"
+ "vpsrlw $0x8,%%ymm1,%%ymm1 \n"
+ "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n"
+ "vmovdqu %%ymm0,(%1) \n"
+ "lea 0x20(%1),%1 \n"
+ "sub $0x20,%2 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_uyvy), // %0
+ "+r"(dst_y), // %1
+ "+r"(width) // %2
+ :
+ : "memory", "cc", "xmm0", "xmm1", "xmm5");
}
-void UYVYToUVRow_AVX2(const uint8* src_uyvy,
+void UYVYToUVRow_AVX2(const uint8_t* src_uyvy,
int stride_uyvy,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
- asm volatile (
- "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
- "vpsrlw $0x8,%%ymm5,%%ymm5 \n"
- "sub %1,%2 \n"
+ asm volatile(
+ "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
+ "vpsrlw $0x8,%%ymm5,%%ymm5 \n"
+ "sub %1,%2 \n"
- LABELALIGN
- "1: \n"
- "vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
- "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n"
- VMEMOPREG(vpavgb,0x00,0,4,1,ymm0,ymm0) // vpavgb (%0,%4,1),%%ymm0,%%ymm0
- VMEMOPREG(vpavgb,0x20,0,4,1,ymm1,ymm1)
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "vpand %%ymm5,%%ymm0,%%ymm0 \n"
- "vpand %%ymm5,%%ymm1,%%ymm1 \n"
- "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
- "vpermq $0xd8,%%ymm0,%%ymm0 \n"
- "vpand %%ymm5,%%ymm0,%%ymm1 \n"
- "vpsrlw $0x8,%%ymm0,%%ymm0 \n"
- "vpackuswb %%ymm1,%%ymm1,%%ymm1 \n"
- "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n"
- "vpermq $0xd8,%%ymm1,%%ymm1 \n"
- "vpermq $0xd8,%%ymm0,%%ymm0 \n"
- "vextractf128 $0x0,%%ymm1," MEMACCESS(1) " \n"
- VEXTOPMEM(vextractf128,0,ymm0,0x00,1,2,1) // vextractf128 $0x0,%%ymm0,(%1,%2,1)
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x20,%3 \n"
- "jg 1b \n"
- "vzeroupper \n"
- : "+r"(src_uyvy), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+r"(width) // %3
- : "r"((intptr_t)(stride_uyvy)) // %4
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm5"
- );
+ LABELALIGN
+ "1: \n"
+ "vmovdqu (%0),%%ymm0 \n"
+ "vmovdqu 0x20(%0),%%ymm1 \n"
+ "vpavgb 0x00(%0,%4,1),%%ymm0,%%ymm0 \n"
+ "vpavgb 0x20(%0,%4,1),%%ymm1,%%ymm1 \n"
+ "lea 0x40(%0),%0 \n"
+ "vpand %%ymm5,%%ymm0,%%ymm0 \n"
+ "vpand %%ymm5,%%ymm1,%%ymm1 \n"
+ "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n"
+ "vpand %%ymm5,%%ymm0,%%ymm1 \n"
+ "vpsrlw $0x8,%%ymm0,%%ymm0 \n"
+ "vpackuswb %%ymm1,%%ymm1,%%ymm1 \n"
+ "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n"
+ "vpermq $0xd8,%%ymm1,%%ymm1 \n"
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n"
+ "vextractf128 $0x0,%%ymm1,(%1) \n"
+ "vextractf128 $0x0,%%ymm0,0x00(%1,%2,1) \n"
+ "lea 0x10(%1),%1 \n"
+ "sub $0x20,%3 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_uyvy), // %0
+ "+r"(dst_u), // %1
+ "+r"(dst_v), // %2
+ "+r"(width) // %3
+ : "r"((intptr_t)(stride_uyvy)) // %4
+ : "memory", "cc", "xmm0", "xmm1", "xmm5");
}
-void UYVYToUV422Row_AVX2(const uint8* src_uyvy,
- uint8* dst_u,
- uint8* dst_v,
+void UYVYToUV422Row_AVX2(const uint8_t* src_uyvy,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
- asm volatile (
- "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
- "vpsrlw $0x8,%%ymm5,%%ymm5 \n"
- "sub %1,%2 \n"
+ asm volatile(
+ "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
+ "vpsrlw $0x8,%%ymm5,%%ymm5 \n"
+ "sub %1,%2 \n"
- LABELALIGN
- "1: \n"
- "vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
- "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n"
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "vpand %%ymm5,%%ymm0,%%ymm0 \n"
- "vpand %%ymm5,%%ymm1,%%ymm1 \n"
- "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
- "vpermq $0xd8,%%ymm0,%%ymm0 \n"
- "vpand %%ymm5,%%ymm0,%%ymm1 \n"
- "vpsrlw $0x8,%%ymm0,%%ymm0 \n"
- "vpackuswb %%ymm1,%%ymm1,%%ymm1 \n"
- "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n"
- "vpermq $0xd8,%%ymm1,%%ymm1 \n"
- "vpermq $0xd8,%%ymm0,%%ymm0 \n"
- "vextractf128 $0x0,%%ymm1," MEMACCESS(1) " \n"
- VEXTOPMEM(vextractf128,0,ymm0,0x00,1,2,1) // vextractf128 $0x0,%%ymm0,(%1,%2,1)
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x20,%3 \n"
- "jg 1b \n"
- "vzeroupper \n"
- : "+r"(src_uyvy), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+r"(width) // %3
- :
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm5"
- );
+ LABELALIGN
+ "1: \n"
+ "vmovdqu (%0),%%ymm0 \n"
+ "vmovdqu 0x20(%0),%%ymm1 \n"
+ "lea 0x40(%0),%0 \n"
+ "vpand %%ymm5,%%ymm0,%%ymm0 \n"
+ "vpand %%ymm5,%%ymm1,%%ymm1 \n"
+ "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n"
+ "vpand %%ymm5,%%ymm0,%%ymm1 \n"
+ "vpsrlw $0x8,%%ymm0,%%ymm0 \n"
+ "vpackuswb %%ymm1,%%ymm1,%%ymm1 \n"
+ "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n"
+ "vpermq $0xd8,%%ymm1,%%ymm1 \n"
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n"
+ "vextractf128 $0x0,%%ymm1,(%1) \n"
+ "vextractf128 $0x0,%%ymm0,0x00(%1,%2,1) \n"
+ "lea 0x10(%1),%1 \n"
+ "sub $0x20,%3 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_uyvy), // %0
+ "+r"(dst_u), // %1
+ "+r"(dst_v), // %2
+ "+r"(width) // %3
+ :
+ : "memory", "cc", "xmm0", "xmm1", "xmm5");
}
#endif // HAS_YUY2TOYROW_AVX2
@@ -4077,85 +4350,84 @@ static const uvec8 kShuffleAlpha = {3u, 0x80, 3u, 0x80, 7u, 0x80, 7u, 0x80,
11u, 0x80, 11u, 0x80, 15u, 0x80, 15u, 0x80};
// Blend 8 pixels at a time
-void ARGBBlendRow_SSSE3(const uint8* src_argb0,
- const uint8* src_argb1,
- uint8* dst_argb,
+void ARGBBlendRow_SSSE3(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
int width) {
- asm volatile (
- "pcmpeqb %%xmm7,%%xmm7 \n"
- "psrlw $0xf,%%xmm7 \n"
- "pcmpeqb %%xmm6,%%xmm6 \n"
- "psrlw $0x8,%%xmm6 \n"
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "psllw $0x8,%%xmm5 \n"
- "pcmpeqb %%xmm4,%%xmm4 \n"
- "pslld $0x18,%%xmm4 \n"
- "sub $0x4,%3 \n"
- "jl 49f \n"
-
- // 4 pixel loop.
- LABELALIGN
- "40: \n"
- "movdqu " MEMACCESS(0) ",%%xmm3 \n"
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "movdqa %%xmm3,%%xmm0 \n"
- "pxor %%xmm4,%%xmm3 \n"
- "movdqu " MEMACCESS(1) ",%%xmm2 \n"
- "pshufb %4,%%xmm3 \n"
- "pand %%xmm6,%%xmm2 \n"
- "paddw %%xmm7,%%xmm3 \n"
- "pmullw %%xmm3,%%xmm2 \n"
- "movdqu " MEMACCESS(1) ",%%xmm1 \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "psrlw $0x8,%%xmm1 \n"
- "por %%xmm4,%%xmm0 \n"
- "pmullw %%xmm3,%%xmm1 \n"
- "psrlw $0x8,%%xmm2 \n"
- "paddusb %%xmm2,%%xmm0 \n"
- "pand %%xmm5,%%xmm1 \n"
- "paddusb %%xmm1,%%xmm0 \n"
- "movdqu %%xmm0," MEMACCESS(2) " \n"
- "lea " MEMLEA(0x10,2) ",%2 \n"
- "sub $0x4,%3 \n"
- "jge 40b \n"
-
- "49: \n"
- "add $0x3,%3 \n"
- "jl 99f \n"
-
- // 1 pixel loop.
- "91: \n"
- "movd " MEMACCESS(0) ",%%xmm3 \n"
- "lea " MEMLEA(0x4,0) ",%0 \n"
- "movdqa %%xmm3,%%xmm0 \n"
- "pxor %%xmm4,%%xmm3 \n"
- "movd " MEMACCESS(1) ",%%xmm2 \n"
- "pshufb %4,%%xmm3 \n"
- "pand %%xmm6,%%xmm2 \n"
- "paddw %%xmm7,%%xmm3 \n"
- "pmullw %%xmm3,%%xmm2 \n"
- "movd " MEMACCESS(1) ",%%xmm1 \n"
- "lea " MEMLEA(0x4,1) ",%1 \n"
- "psrlw $0x8,%%xmm1 \n"
- "por %%xmm4,%%xmm0 \n"
- "pmullw %%xmm3,%%xmm1 \n"
- "psrlw $0x8,%%xmm2 \n"
- "paddusb %%xmm2,%%xmm0 \n"
- "pand %%xmm5,%%xmm1 \n"
- "paddusb %%xmm1,%%xmm0 \n"
- "movd %%xmm0," MEMACCESS(2) " \n"
- "lea " MEMLEA(0x4,2) ",%2 \n"
- "sub $0x1,%3 \n"
- "jge 91b \n"
- "99: \n"
- : "+r"(src_argb0), // %0
- "+r"(src_argb1), // %1
- "+r"(dst_argb), // %2
- "+r"(width) // %3
- : "m"(kShuffleAlpha) // %4
- : "memory", "cc"
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
- );
+ asm volatile(
+ "pcmpeqb %%xmm7,%%xmm7 \n"
+ "psrlw $0xf,%%xmm7 \n"
+ "pcmpeqb %%xmm6,%%xmm6 \n"
+ "psrlw $0x8,%%xmm6 \n"
+ "pcmpeqb %%xmm5,%%xmm5 \n"
+ "psllw $0x8,%%xmm5 \n"
+ "pcmpeqb %%xmm4,%%xmm4 \n"
+ "pslld $0x18,%%xmm4 \n"
+ "sub $0x4,%3 \n"
+ "jl 49f \n"
+
+ // 4 pixel loop.
+ LABELALIGN
+ "40: \n"
+ "movdqu (%0),%%xmm3 \n"
+ "lea 0x10(%0),%0 \n"
+ "movdqa %%xmm3,%%xmm0 \n"
+ "pxor %%xmm4,%%xmm3 \n"
+ "movdqu (%1),%%xmm2 \n"
+ "pshufb %4,%%xmm3 \n"
+ "pand %%xmm6,%%xmm2 \n"
+ "paddw %%xmm7,%%xmm3 \n"
+ "pmullw %%xmm3,%%xmm2 \n"
+ "movdqu (%1),%%xmm1 \n"
+ "lea 0x10(%1),%1 \n"
+ "psrlw $0x8,%%xmm1 \n"
+ "por %%xmm4,%%xmm0 \n"
+ "pmullw %%xmm3,%%xmm1 \n"
+ "psrlw $0x8,%%xmm2 \n"
+ "paddusb %%xmm2,%%xmm0 \n"
+ "pand %%xmm5,%%xmm1 \n"
+ "paddusb %%xmm1,%%xmm0 \n"
+ "movdqu %%xmm0,(%2) \n"
+ "lea 0x10(%2),%2 \n"
+ "sub $0x4,%3 \n"
+ "jge 40b \n"
+
+ "49: \n"
+ "add $0x3,%3 \n"
+ "jl 99f \n"
+
+ // 1 pixel loop.
+ "91: \n"
+ "movd (%0),%%xmm3 \n"
+ "lea 0x4(%0),%0 \n"
+ "movdqa %%xmm3,%%xmm0 \n"
+ "pxor %%xmm4,%%xmm3 \n"
+ "movd (%1),%%xmm2 \n"
+ "pshufb %4,%%xmm3 \n"
+ "pand %%xmm6,%%xmm2 \n"
+ "paddw %%xmm7,%%xmm3 \n"
+ "pmullw %%xmm3,%%xmm2 \n"
+ "movd (%1),%%xmm1 \n"
+ "lea 0x4(%1),%1 \n"
+ "psrlw $0x8,%%xmm1 \n"
+ "por %%xmm4,%%xmm0 \n"
+ "pmullw %%xmm3,%%xmm1 \n"
+ "psrlw $0x8,%%xmm2 \n"
+ "paddusb %%xmm2,%%xmm0 \n"
+ "pand %%xmm5,%%xmm1 \n"
+ "paddusb %%xmm1,%%xmm0 \n"
+ "movd %%xmm0,(%2) \n"
+ "lea 0x4(%2),%2 \n"
+ "sub $0x1,%3 \n"
+ "jge 91b \n"
+ "99: \n"
+ : "+r"(src_argb0), // %0
+ "+r"(src_argb1), // %1
+ "+r"(dst_argb), // %2
+ "+r"(width) // %3
+ : "m"(kShuffleAlpha) // %4
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
+ "xmm7");
}
#endif // HAS_ARGBBLENDROW_SSSE3
@@ -4165,10 +4437,10 @@ void ARGBBlendRow_SSSE3(const uint8* src_argb0,
// =((A2*C2)+(B2*(255-C2))+255)/256
// signed version of math
// =(((A2-128)*C2)+((B2-128)*(255-C2))+32768+127)/256
-void BlendPlaneRow_SSSE3(const uint8* src0,
- const uint8* src1,
- const uint8* alpha,
- uint8* dst,
+void BlendPlaneRow_SSSE3(const uint8_t* src0,
+ const uint8_t* src1,
+ const uint8_t* alpha,
+ uint8_t* dst,
int width) {
asm volatile(
"pcmpeqb %%xmm5,%%xmm5 \n"
@@ -4217,10 +4489,10 @@ void BlendPlaneRow_SSSE3(const uint8* src0,
// =((A2*C2)+(B2*(255-C2))+255)/256
// signed version of math
// =(((A2-128)*C2)+((B2-128)*(255-C2))+32768+127)/256
-void BlendPlaneRow_AVX2(const uint8* src0,
- const uint8* src1,
- const uint8* alpha,
- uint8* dst,
+void BlendPlaneRow_AVX2(const uint8_t* src0,
+ const uint8_t* src1,
+ const uint8_t* alpha,
+ uint8_t* dst,
int width) {
asm volatile(
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
@@ -4279,45 +4551,45 @@ static const uvec8 kShuffleAlpha0 = {3u, 3u, 3u, 3u, 3u, 3u, 128u, 128u,
static const uvec8 kShuffleAlpha1 = {11u, 11u, 11u, 11u, 11u, 11u, 128u, 128u,
15u, 15u, 15u, 15u, 15u, 15u, 128u, 128u};
// Attenuate 4 pixels at a time.
-void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) {
- asm volatile (
- "pcmpeqb %%xmm3,%%xmm3 \n"
- "pslld $0x18,%%xmm3 \n"
- "movdqa %3,%%xmm4 \n"
- "movdqa %4,%%xmm5 \n"
+void ARGBAttenuateRow_SSSE3(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ int width) {
+ asm volatile(
+ "pcmpeqb %%xmm3,%%xmm3 \n"
+ "pslld $0x18,%%xmm3 \n"
+ "movdqa %3,%%xmm4 \n"
+ "movdqa %4,%%xmm5 \n"
- // 4 pixel loop.
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "pshufb %%xmm4,%%xmm0 \n"
- "movdqu " MEMACCESS(0) ",%%xmm1 \n"
- "punpcklbw %%xmm1,%%xmm1 \n"
- "pmulhuw %%xmm1,%%xmm0 \n"
- "movdqu " MEMACCESS(0) ",%%xmm1 \n"
- "pshufb %%xmm5,%%xmm1 \n"
- "movdqu " MEMACCESS(0) ",%%xmm2 \n"
- "punpckhbw %%xmm2,%%xmm2 \n"
- "pmulhuw %%xmm2,%%xmm1 \n"
- "movdqu " MEMACCESS(0) ",%%xmm2 \n"
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "pand %%xmm3,%%xmm2 \n"
- "psrlw $0x8,%%xmm0 \n"
- "psrlw $0x8,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "por %%xmm2,%%xmm0 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x4,%2 \n"
- "jg 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb), // %1
- "+r"(width) // %2
- : "m"(kShuffleAlpha0), // %3
- "m"(kShuffleAlpha1) // %4
- : "memory", "cc"
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
- );
+ // 4 pixel loop.
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "pshufb %%xmm4,%%xmm0 \n"
+ "movdqu (%0),%%xmm1 \n"
+ "punpcklbw %%xmm1,%%xmm1 \n"
+ "pmulhuw %%xmm1,%%xmm0 \n"
+ "movdqu (%0),%%xmm1 \n"
+ "pshufb %%xmm5,%%xmm1 \n"
+ "movdqu (%0),%%xmm2 \n"
+ "punpckhbw %%xmm2,%%xmm2 \n"
+ "pmulhuw %%xmm2,%%xmm1 \n"
+ "movdqu (%0),%%xmm2 \n"
+ "lea 0x10(%0),%0 \n"
+ "pand %%xmm3,%%xmm2 \n"
+ "psrlw $0x8,%%xmm0 \n"
+ "psrlw $0x8,%%xmm1 \n"
+ "packuswb %%xmm1,%%xmm0 \n"
+ "por %%xmm2,%%xmm0 \n"
+ "movdqu %%xmm0,(%1) \n"
+ "lea 0x10(%1),%1 \n"
+ "sub $0x4,%2 \n"
+ "jg 1b \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_argb), // %1
+ "+r"(width) // %2
+ : "m"(kShuffleAlpha0), // %3
+ "m"(kShuffleAlpha1) // %4
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5");
}
#endif // HAS_ARGBATTENUATEROW_SSSE3
@@ -4327,87 +4599,85 @@ static const uvec8 kShuffleAlpha_AVX2 = {6u, 7u, 6u, 7u, 6u, 7u,
128u, 128u, 14u, 15u, 14u, 15u,
14u, 15u, 128u, 128u};
// Attenuate 8 pixels at a time.
-void ARGBAttenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width) {
- asm volatile (
- "vbroadcastf128 %3,%%ymm4 \n"
- "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
- "vpslld $0x18,%%ymm5,%%ymm5 \n"
- "sub %0,%1 \n"
+void ARGBAttenuateRow_AVX2(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ int width) {
+ asm volatile(
+ "vbroadcastf128 %3,%%ymm4 \n"
+ "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
+ "vpslld $0x18,%%ymm5,%%ymm5 \n"
+ "sub %0,%1 \n"
- // 8 pixel loop.
- LABELALIGN
- "1: \n"
- "vmovdqu " MEMACCESS(0) ",%%ymm6 \n"
- "vpunpcklbw %%ymm6,%%ymm6,%%ymm0 \n"
- "vpunpckhbw %%ymm6,%%ymm6,%%ymm1 \n"
- "vpshufb %%ymm4,%%ymm0,%%ymm2 \n"
- "vpshufb %%ymm4,%%ymm1,%%ymm3 \n"
- "vpmulhuw %%ymm2,%%ymm0,%%ymm0 \n"
- "vpmulhuw %%ymm3,%%ymm1,%%ymm1 \n"
- "vpand %%ymm5,%%ymm6,%%ymm6 \n"
- "vpsrlw $0x8,%%ymm0,%%ymm0 \n"
- "vpsrlw $0x8,%%ymm1,%%ymm1 \n"
- "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
- "vpor %%ymm6,%%ymm0,%%ymm0 \n"
- MEMOPMEM(vmovdqu,ymm0,0x00,0,1,1) // vmovdqu %%ymm0,(%0,%1)
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "sub $0x8,%2 \n"
- "jg 1b \n"
- "vzeroupper \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb), // %1
- "+r"(width) // %2
- : "m"(kShuffleAlpha_AVX2) // %3
- : "memory", "cc"
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
- );
+ // 8 pixel loop.
+ LABELALIGN
+ "1: \n"
+ "vmovdqu (%0),%%ymm6 \n"
+ "vpunpcklbw %%ymm6,%%ymm6,%%ymm0 \n"
+ "vpunpckhbw %%ymm6,%%ymm6,%%ymm1 \n"
+ "vpshufb %%ymm4,%%ymm0,%%ymm2 \n"
+ "vpshufb %%ymm4,%%ymm1,%%ymm3 \n"
+ "vpmulhuw %%ymm2,%%ymm0,%%ymm0 \n"
+ "vpmulhuw %%ymm3,%%ymm1,%%ymm1 \n"
+ "vpand %%ymm5,%%ymm6,%%ymm6 \n"
+ "vpsrlw $0x8,%%ymm0,%%ymm0 \n"
+ "vpsrlw $0x8,%%ymm1,%%ymm1 \n"
+ "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
+ "vpor %%ymm6,%%ymm0,%%ymm0 \n"
+ "vmovdqu %%ymm0,0x00(%0,%1,1) \n"
+ "lea 0x20(%0),%0 \n"
+ "sub $0x8,%2 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_argb), // %1
+ "+r"(width) // %2
+ : "m"(kShuffleAlpha_AVX2) // %3
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6");
}
#endif // HAS_ARGBATTENUATEROW_AVX2
#ifdef HAS_ARGBUNATTENUATEROW_SSE2
// Unattenuate 4 pixels at a time.
-void ARGBUnattenuateRow_SSE2(const uint8* src_argb,
- uint8* dst_argb,
+void ARGBUnattenuateRow_SSE2(const uint8_t* src_argb,
+ uint8_t* dst_argb,
int width) {
uintptr_t alpha;
- asm volatile (
- // 4 pixel loop.
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movzb " MEMACCESS2(0x03,0) ",%3 \n"
- "punpcklbw %%xmm0,%%xmm0 \n"
- MEMOPREG(movd,0x00,4,3,4,xmm2) // movd 0x0(%4,%3,4),%%xmm2
- "movzb " MEMACCESS2(0x07,0) ",%3 \n"
- MEMOPREG(movd,0x00,4,3,4,xmm3) // movd 0x0(%4,%3,4),%%xmm3
- "pshuflw $0x40,%%xmm2,%%xmm2 \n"
- "pshuflw $0x40,%%xmm3,%%xmm3 \n"
- "movlhps %%xmm3,%%xmm2 \n"
- "pmulhuw %%xmm2,%%xmm0 \n"
- "movdqu " MEMACCESS(0) ",%%xmm1 \n"
- "movzb " MEMACCESS2(0x0b,0) ",%3 \n"
- "punpckhbw %%xmm1,%%xmm1 \n"
- MEMOPREG(movd,0x00,4,3,4,xmm2) // movd 0x0(%4,%3,4),%%xmm2
- "movzb " MEMACCESS2(0x0f,0) ",%3 \n"
- MEMOPREG(movd,0x00,4,3,4,xmm3) // movd 0x0(%4,%3,4),%%xmm3
- "pshuflw $0x40,%%xmm2,%%xmm2 \n"
- "pshuflw $0x40,%%xmm3,%%xmm3 \n"
- "movlhps %%xmm3,%%xmm2 \n"
- "pmulhuw %%xmm2,%%xmm1 \n"
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x4,%2 \n"
- "jg 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb), // %1
- "+r"(width), // %2
- "=&r"(alpha) // %3
- : "r"(fixed_invtbl8) // %4
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
- );
+ asm volatile(
+ // 4 pixel loop.
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movzb 0x03(%0),%3 \n"
+ "punpcklbw %%xmm0,%%xmm0 \n"
+ "movd 0x00(%4,%3,4),%%xmm2 \n"
+ "movzb 0x07(%0),%3 \n"
+ "movd 0x00(%4,%3,4),%%xmm3 \n"
+ "pshuflw $0x40,%%xmm2,%%xmm2 \n"
+ "pshuflw $0x40,%%xmm3,%%xmm3 \n"
+ "movlhps %%xmm3,%%xmm2 \n"
+ "pmulhuw %%xmm2,%%xmm0 \n"
+ "movdqu (%0),%%xmm1 \n"
+ "movzb 0x0b(%0),%3 \n"
+ "punpckhbw %%xmm1,%%xmm1 \n"
+ "movd 0x00(%4,%3,4),%%xmm2 \n"
+ "movzb 0x0f(%0),%3 \n"
+ "movd 0x00(%4,%3,4),%%xmm3 \n"
+ "pshuflw $0x40,%%xmm2,%%xmm2 \n"
+ "pshuflw $0x40,%%xmm3,%%xmm3 \n"
+ "movlhps %%xmm3,%%xmm2 \n"
+ "pmulhuw %%xmm2,%%xmm1 \n"
+ "lea 0x10(%0),%0 \n"
+ "packuswb %%xmm1,%%xmm0 \n"
+ "movdqu %%xmm0,(%1) \n"
+ "lea 0x10(%1),%1 \n"
+ "sub $0x4,%2 \n"
+ "jg 1b \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_argb), // %1
+ "+r"(width), // %2
+ "=&r"(alpha) // %3
+ : "r"(fixed_invtbl8) // %4
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5");
}
#endif // HAS_ARGBUNATTENUATEROW_SSE2
@@ -4416,114 +4686,111 @@ void ARGBUnattenuateRow_SSE2(const uint8* src_argb,
static const uvec8 kUnattenShuffleAlpha_AVX2 = {
0u, 1u, 0u, 1u, 0u, 1u, 6u, 7u, 8u, 9u, 8u, 9u, 8u, 9u, 14u, 15u};
// Unattenuate 8 pixels at a time.
-void ARGBUnattenuateRow_AVX2(const uint8* src_argb,
- uint8* dst_argb,
+void ARGBUnattenuateRow_AVX2(const uint8_t* src_argb,
+ uint8_t* dst_argb,
int width) {
uintptr_t alpha;
- asm volatile (
- "sub %0,%1 \n"
- "vbroadcastf128 %5,%%ymm5 \n"
+ asm volatile(
+ "sub %0,%1 \n"
+ "vbroadcastf128 %5,%%ymm5 \n"
- // 8 pixel loop.
- LABELALIGN
- "1: \n"
- // replace VPGATHER
- "movzb " MEMACCESS2(0x03,0) ",%3 \n"
- MEMOPREG(vmovd,0x00,4,3,4,xmm0) // vmovd 0x0(%4,%3,4),%%xmm0
- "movzb " MEMACCESS2(0x07,0) ",%3 \n"
- MEMOPREG(vmovd,0x00,4,3,4,xmm1) // vmovd 0x0(%4,%3,4),%%xmm1
- "movzb " MEMACCESS2(0x0b,0) ",%3 \n"
- "vpunpckldq %%xmm1,%%xmm0,%%xmm6 \n"
- MEMOPREG(vmovd,0x00,4,3,4,xmm2) // vmovd 0x0(%4,%3,4),%%xmm2
- "movzb " MEMACCESS2(0x0f,0) ",%3 \n"
- MEMOPREG(vmovd,0x00,4,3,4,xmm3) // vmovd 0x0(%4,%3,4),%%xmm3
- "movzb " MEMACCESS2(0x13,0) ",%3 \n"
- "vpunpckldq %%xmm3,%%xmm2,%%xmm7 \n"
- MEMOPREG(vmovd,0x00,4,3,4,xmm0) // vmovd 0x0(%4,%3,4),%%xmm0
- "movzb " MEMACCESS2(0x17,0) ",%3 \n"
- MEMOPREG(vmovd,0x00,4,3,4,xmm1) // vmovd 0x0(%4,%3,4),%%xmm1
- "movzb " MEMACCESS2(0x1b,0) ",%3 \n"
- "vpunpckldq %%xmm1,%%xmm0,%%xmm0 \n"
- MEMOPREG(vmovd,0x00,4,3,4,xmm2) // vmovd 0x0(%4,%3,4),%%xmm2
- "movzb " MEMACCESS2(0x1f,0) ",%3 \n"
- MEMOPREG(vmovd,0x00,4,3,4,xmm3) // vmovd 0x0(%4,%3,4),%%xmm3
- "vpunpckldq %%xmm3,%%xmm2,%%xmm2 \n"
- "vpunpcklqdq %%xmm7,%%xmm6,%%xmm3 \n"
- "vpunpcklqdq %%xmm2,%%xmm0,%%xmm0 \n"
- "vinserti128 $0x1,%%xmm0,%%ymm3,%%ymm3 \n"
- // end of VPGATHER
-
- "vmovdqu " MEMACCESS(0) ",%%ymm6 \n"
- "vpunpcklbw %%ymm6,%%ymm6,%%ymm0 \n"
- "vpunpckhbw %%ymm6,%%ymm6,%%ymm1 \n"
- "vpunpcklwd %%ymm3,%%ymm3,%%ymm2 \n"
- "vpunpckhwd %%ymm3,%%ymm3,%%ymm3 \n"
- "vpshufb %%ymm5,%%ymm2,%%ymm2 \n"
- "vpshufb %%ymm5,%%ymm3,%%ymm3 \n"
- "vpmulhuw %%ymm2,%%ymm0,%%ymm0 \n"
- "vpmulhuw %%ymm3,%%ymm1,%%ymm1 \n"
- "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
- MEMOPMEM(vmovdqu,ymm0,0x00,0,1,1) // vmovdqu %%ymm0,(%0,%1)
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "sub $0x8,%2 \n"
- "jg 1b \n"
- "vzeroupper \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb), // %1
- "+r"(width), // %2
- "=&r"(alpha) // %3
- : "r"(fixed_invtbl8), // %4
- "m"(kUnattenShuffleAlpha_AVX2) // %5
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
- );
+ // 8 pixel loop.
+ LABELALIGN
+ "1: \n"
+ // replace VPGATHER
+ "movzb 0x03(%0),%3 \n"
+ "vmovd 0x00(%4,%3,4),%%xmm0 \n"
+ "movzb 0x07(%0),%3 \n"
+ "vmovd 0x00(%4,%3,4),%%xmm1 \n"
+ "movzb 0x0b(%0),%3 \n"
+ "vpunpckldq %%xmm1,%%xmm0,%%xmm6 \n"
+ "vmovd 0x00(%4,%3,4),%%xmm2 \n"
+ "movzb 0x0f(%0),%3 \n"
+ "vmovd 0x00(%4,%3,4),%%xmm3 \n"
+ "movzb 0x13(%0),%3 \n"
+ "vpunpckldq %%xmm3,%%xmm2,%%xmm7 \n"
+ "vmovd 0x00(%4,%3,4),%%xmm0 \n"
+ "movzb 0x17(%0),%3 \n"
+ "vmovd 0x00(%4,%3,4),%%xmm1 \n"
+ "movzb 0x1b(%0),%3 \n"
+ "vpunpckldq %%xmm1,%%xmm0,%%xmm0 \n"
+ "vmovd 0x00(%4,%3,4),%%xmm2 \n"
+ "movzb 0x1f(%0),%3 \n"
+ "vmovd 0x00(%4,%3,4),%%xmm3 \n"
+ "vpunpckldq %%xmm3,%%xmm2,%%xmm2 \n"
+ "vpunpcklqdq %%xmm7,%%xmm6,%%xmm3 \n"
+ "vpunpcklqdq %%xmm2,%%xmm0,%%xmm0 \n"
+ "vinserti128 $0x1,%%xmm0,%%ymm3,%%ymm3 \n"
+ // end of VPGATHER
+
+ "vmovdqu (%0),%%ymm6 \n"
+ "vpunpcklbw %%ymm6,%%ymm6,%%ymm0 \n"
+ "vpunpckhbw %%ymm6,%%ymm6,%%ymm1 \n"
+ "vpunpcklwd %%ymm3,%%ymm3,%%ymm2 \n"
+ "vpunpckhwd %%ymm3,%%ymm3,%%ymm3 \n"
+ "vpshufb %%ymm5,%%ymm2,%%ymm2 \n"
+ "vpshufb %%ymm5,%%ymm3,%%ymm3 \n"
+ "vpmulhuw %%ymm2,%%ymm0,%%ymm0 \n"
+ "vpmulhuw %%ymm3,%%ymm1,%%ymm1 \n"
+ "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
+ "vmovdqu %%ymm0,0x00(%0,%1,1) \n"
+ "lea 0x20(%0),%0 \n"
+ "sub $0x8,%2 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_argb), // %1
+ "+r"(width), // %2
+ "=&r"(alpha) // %3
+ : "r"(fixed_invtbl8), // %4
+ "m"(kUnattenShuffleAlpha_AVX2) // %5
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
+ "xmm7");
}
#endif // HAS_ARGBUNATTENUATEROW_AVX2
#ifdef HAS_ARGBGRAYROW_SSSE3
// Convert 8 ARGB pixels (64 bytes) to 8 Gray ARGB pixels
-void ARGBGrayRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) {
- asm volatile (
- "movdqa %3,%%xmm4 \n"
- "movdqa %4,%%xmm5 \n"
+void ARGBGrayRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_argb, int width) {
+ asm volatile(
+ "movdqa %3,%%xmm4 \n"
+ "movdqa %4,%%xmm5 \n"
- // 8 pixel loop.
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "pmaddubsw %%xmm4,%%xmm0 \n"
- "pmaddubsw %%xmm4,%%xmm1 \n"
- "phaddw %%xmm1,%%xmm0 \n"
- "paddw %%xmm5,%%xmm0 \n"
- "psrlw $0x7,%%xmm0 \n"
- "packuswb %%xmm0,%%xmm0 \n"
- "movdqu " MEMACCESS(0) ",%%xmm2 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm3 \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "psrld $0x18,%%xmm2 \n"
- "psrld $0x18,%%xmm3 \n"
- "packuswb %%xmm3,%%xmm2 \n"
- "packuswb %%xmm2,%%xmm2 \n"
- "movdqa %%xmm0,%%xmm3 \n"
- "punpcklbw %%xmm0,%%xmm0 \n"
- "punpcklbw %%xmm2,%%xmm3 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "punpcklwd %%xmm3,%%xmm0 \n"
- "punpckhwd %%xmm3,%%xmm1 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "movdqu %%xmm1," MEMACCESS2(0x10,1) " \n"
- "lea " MEMLEA(0x20,1) ",%1 \n"
- "sub $0x8,%2 \n"
- "jg 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb), // %1
- "+r"(width) // %2
- : "m"(kARGBToYJ), // %3
- "m"(kAddYJ64) // %4
- : "memory", "cc"
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
- );
+ // 8 pixel loop.
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "pmaddubsw %%xmm4,%%xmm0 \n"
+ "pmaddubsw %%xmm4,%%xmm1 \n"
+ "phaddw %%xmm1,%%xmm0 \n"
+ "paddw %%xmm5,%%xmm0 \n"
+ "psrlw $0x7,%%xmm0 \n"
+ "packuswb %%xmm0,%%xmm0 \n"
+ "movdqu (%0),%%xmm2 \n"
+ "movdqu 0x10(%0),%%xmm3 \n"
+ "lea 0x20(%0),%0 \n"
+ "psrld $0x18,%%xmm2 \n"
+ "psrld $0x18,%%xmm3 \n"
+ "packuswb %%xmm3,%%xmm2 \n"
+ "packuswb %%xmm2,%%xmm2 \n"
+ "movdqa %%xmm0,%%xmm3 \n"
+ "punpcklbw %%xmm0,%%xmm0 \n"
+ "punpcklbw %%xmm2,%%xmm3 \n"
+ "movdqa %%xmm0,%%xmm1 \n"
+ "punpcklwd %%xmm3,%%xmm0 \n"
+ "punpckhwd %%xmm3,%%xmm1 \n"
+ "movdqu %%xmm0,(%1) \n"
+ "movdqu %%xmm1,0x10(%1) \n"
+ "lea 0x20(%1),%1 \n"
+ "sub $0x8,%2 \n"
+ "jg 1b \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_argb), // %1
+ "+r"(width) // %2
+ : "m"(kARGBToYJ), // %3
+ "m"(kAddYJ64) // %4
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5");
}
#endif // HAS_ARGBGRAYROW_SSSE3
@@ -4542,418 +4809,405 @@ static const vec8 kARGBToSepiaR = {24, 98, 50, 0, 24, 98, 50, 0,
24, 98, 50, 0, 24, 98, 50, 0};
// Convert 8 ARGB pixels (32 bytes) to 8 Sepia ARGB pixels.
-void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width) {
- asm volatile (
- "movdqa %2,%%xmm2 \n"
- "movdqa %3,%%xmm3 \n"
- "movdqa %4,%%xmm4 \n"
+void ARGBSepiaRow_SSSE3(uint8_t* dst_argb, int width) {
+ asm volatile(
+ "movdqa %2,%%xmm2 \n"
+ "movdqa %3,%%xmm3 \n"
+ "movdqa %4,%%xmm4 \n"
- // 8 pixel loop.
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm6 \n"
- "pmaddubsw %%xmm2,%%xmm0 \n"
- "pmaddubsw %%xmm2,%%xmm6 \n"
- "phaddw %%xmm6,%%xmm0 \n"
- "psrlw $0x7,%%xmm0 \n"
- "packuswb %%xmm0,%%xmm0 \n"
- "movdqu " MEMACCESS(0) ",%%xmm5 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "pmaddubsw %%xmm3,%%xmm5 \n"
- "pmaddubsw %%xmm3,%%xmm1 \n"
- "phaddw %%xmm1,%%xmm5 \n"
- "psrlw $0x7,%%xmm5 \n"
- "packuswb %%xmm5,%%xmm5 \n"
- "punpcklbw %%xmm5,%%xmm0 \n"
- "movdqu " MEMACCESS(0) ",%%xmm5 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "pmaddubsw %%xmm4,%%xmm5 \n"
- "pmaddubsw %%xmm4,%%xmm1 \n"
- "phaddw %%xmm1,%%xmm5 \n"
- "psrlw $0x7,%%xmm5 \n"
- "packuswb %%xmm5,%%xmm5 \n"
- "movdqu " MEMACCESS(0) ",%%xmm6 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "psrld $0x18,%%xmm6 \n"
- "psrld $0x18,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm6 \n"
- "packuswb %%xmm6,%%xmm6 \n"
- "punpcklbw %%xmm6,%%xmm5 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "punpcklwd %%xmm5,%%xmm0 \n"
- "punpckhwd %%xmm5,%%xmm1 \n"
- "movdqu %%xmm0," MEMACCESS(0) " \n"
- "movdqu %%xmm1," MEMACCESS2(0x10,0) " \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "sub $0x8,%1 \n"
- "jg 1b \n"
- : "+r"(dst_argb), // %0
- "+r"(width) // %1
- : "m"(kARGBToSepiaB), // %2
- "m"(kARGBToSepiaG), // %3
- "m"(kARGBToSepiaR) // %4
- : "memory", "cc"
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
- );
+ // 8 pixel loop.
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm6 \n"
+ "pmaddubsw %%xmm2,%%xmm0 \n"
+ "pmaddubsw %%xmm2,%%xmm6 \n"
+ "phaddw %%xmm6,%%xmm0 \n"
+ "psrlw $0x7,%%xmm0 \n"
+ "packuswb %%xmm0,%%xmm0 \n"
+ "movdqu (%0),%%xmm5 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "pmaddubsw %%xmm3,%%xmm5 \n"
+ "pmaddubsw %%xmm3,%%xmm1 \n"
+ "phaddw %%xmm1,%%xmm5 \n"
+ "psrlw $0x7,%%xmm5 \n"
+ "packuswb %%xmm5,%%xmm5 \n"
+ "punpcklbw %%xmm5,%%xmm0 \n"
+ "movdqu (%0),%%xmm5 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "pmaddubsw %%xmm4,%%xmm5 \n"
+ "pmaddubsw %%xmm4,%%xmm1 \n"
+ "phaddw %%xmm1,%%xmm5 \n"
+ "psrlw $0x7,%%xmm5 \n"
+ "packuswb %%xmm5,%%xmm5 \n"
+ "movdqu (%0),%%xmm6 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "psrld $0x18,%%xmm6 \n"
+ "psrld $0x18,%%xmm1 \n"
+ "packuswb %%xmm1,%%xmm6 \n"
+ "packuswb %%xmm6,%%xmm6 \n"
+ "punpcklbw %%xmm6,%%xmm5 \n"
+ "movdqa %%xmm0,%%xmm1 \n"
+ "punpcklwd %%xmm5,%%xmm0 \n"
+ "punpckhwd %%xmm5,%%xmm1 \n"
+ "movdqu %%xmm0,(%0) \n"
+ "movdqu %%xmm1,0x10(%0) \n"
+ "lea 0x20(%0),%0 \n"
+ "sub $0x8,%1 \n"
+ "jg 1b \n"
+ : "+r"(dst_argb), // %0
+ "+r"(width) // %1
+ : "m"(kARGBToSepiaB), // %2
+ "m"(kARGBToSepiaG), // %3
+ "m"(kARGBToSepiaR) // %4
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6");
}
#endif // HAS_ARGBSEPIAROW_SSSE3
#ifdef HAS_ARGBCOLORMATRIXROW_SSSE3
// Tranform 8 ARGB pixels (32 bytes) with color matrix.
// Same as Sepia except matrix is provided.
-void ARGBColorMatrixRow_SSSE3(const uint8* src_argb,
- uint8* dst_argb,
- const int8* matrix_argb,
+void ARGBColorMatrixRow_SSSE3(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ const int8_t* matrix_argb,
int width) {
- asm volatile (
- "movdqu " MEMACCESS(3) ",%%xmm5 \n"
- "pshufd $0x00,%%xmm5,%%xmm2 \n"
- "pshufd $0x55,%%xmm5,%%xmm3 \n"
- "pshufd $0xaa,%%xmm5,%%xmm4 \n"
- "pshufd $0xff,%%xmm5,%%xmm5 \n"
+ asm volatile(
+ "movdqu (%3),%%xmm5 \n"
+ "pshufd $0x00,%%xmm5,%%xmm2 \n"
+ "pshufd $0x55,%%xmm5,%%xmm3 \n"
+ "pshufd $0xaa,%%xmm5,%%xmm4 \n"
+ "pshufd $0xff,%%xmm5,%%xmm5 \n"
- // 8 pixel loop.
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm7 \n"
- "pmaddubsw %%xmm2,%%xmm0 \n"
- "pmaddubsw %%xmm2,%%xmm7 \n"
- "movdqu " MEMACCESS(0) ",%%xmm6 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "pmaddubsw %%xmm3,%%xmm6 \n"
- "pmaddubsw %%xmm3,%%xmm1 \n"
- "phaddsw %%xmm7,%%xmm0 \n"
- "phaddsw %%xmm1,%%xmm6 \n"
- "psraw $0x6,%%xmm0 \n"
- "psraw $0x6,%%xmm6 \n"
- "packuswb %%xmm0,%%xmm0 \n"
- "packuswb %%xmm6,%%xmm6 \n"
- "punpcklbw %%xmm6,%%xmm0 \n"
- "movdqu " MEMACCESS(0) ",%%xmm1 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm7 \n"
- "pmaddubsw %%xmm4,%%xmm1 \n"
- "pmaddubsw %%xmm4,%%xmm7 \n"
- "phaddsw %%xmm7,%%xmm1 \n"
- "movdqu " MEMACCESS(0) ",%%xmm6 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm7 \n"
- "pmaddubsw %%xmm5,%%xmm6 \n"
- "pmaddubsw %%xmm5,%%xmm7 \n"
- "phaddsw %%xmm7,%%xmm6 \n"
- "psraw $0x6,%%xmm1 \n"
- "psraw $0x6,%%xmm6 \n"
- "packuswb %%xmm1,%%xmm1 \n"
- "packuswb %%xmm6,%%xmm6 \n"
- "punpcklbw %%xmm6,%%xmm1 \n"
- "movdqa %%xmm0,%%xmm6 \n"
- "punpcklwd %%xmm1,%%xmm0 \n"
- "punpckhwd %%xmm1,%%xmm6 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "movdqu %%xmm6," MEMACCESS2(0x10,1) " \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "lea " MEMLEA(0x20,1) ",%1 \n"
- "sub $0x8,%2 \n"
- "jg 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb), // %1
- "+r"(width) // %2
- : "r"(matrix_argb) // %3
- : "memory", "cc"
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
- );
+ // 8 pixel loop.
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm7 \n"
+ "pmaddubsw %%xmm2,%%xmm0 \n"
+ "pmaddubsw %%xmm2,%%xmm7 \n"
+ "movdqu (%0),%%xmm6 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "pmaddubsw %%xmm3,%%xmm6 \n"
+ "pmaddubsw %%xmm3,%%xmm1 \n"
+ "phaddsw %%xmm7,%%xmm0 \n"
+ "phaddsw %%xmm1,%%xmm6 \n"
+ "psraw $0x6,%%xmm0 \n"
+ "psraw $0x6,%%xmm6 \n"
+ "packuswb %%xmm0,%%xmm0 \n"
+ "packuswb %%xmm6,%%xmm6 \n"
+ "punpcklbw %%xmm6,%%xmm0 \n"
+ "movdqu (%0),%%xmm1 \n"
+ "movdqu 0x10(%0),%%xmm7 \n"
+ "pmaddubsw %%xmm4,%%xmm1 \n"
+ "pmaddubsw %%xmm4,%%xmm7 \n"
+ "phaddsw %%xmm7,%%xmm1 \n"
+ "movdqu (%0),%%xmm6 \n"
+ "movdqu 0x10(%0),%%xmm7 \n"
+ "pmaddubsw %%xmm5,%%xmm6 \n"
+ "pmaddubsw %%xmm5,%%xmm7 \n"
+ "phaddsw %%xmm7,%%xmm6 \n"
+ "psraw $0x6,%%xmm1 \n"
+ "psraw $0x6,%%xmm6 \n"
+ "packuswb %%xmm1,%%xmm1 \n"
+ "packuswb %%xmm6,%%xmm6 \n"
+ "punpcklbw %%xmm6,%%xmm1 \n"
+ "movdqa %%xmm0,%%xmm6 \n"
+ "punpcklwd %%xmm1,%%xmm0 \n"
+ "punpckhwd %%xmm1,%%xmm6 \n"
+ "movdqu %%xmm0,(%1) \n"
+ "movdqu %%xmm6,0x10(%1) \n"
+ "lea 0x20(%0),%0 \n"
+ "lea 0x20(%1),%1 \n"
+ "sub $0x8,%2 \n"
+ "jg 1b \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_argb), // %1
+ "+r"(width) // %2
+ : "r"(matrix_argb) // %3
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
+ "xmm7");
}
#endif // HAS_ARGBCOLORMATRIXROW_SSSE3
#ifdef HAS_ARGBQUANTIZEROW_SSE2
// Quantize 4 ARGB pixels (16 bytes).
-void ARGBQuantizeRow_SSE2(uint8* dst_argb,
+void ARGBQuantizeRow_SSE2(uint8_t* dst_argb,
int scale,
int interval_size,
int interval_offset,
int width) {
- asm volatile (
- "movd %2,%%xmm2 \n"
- "movd %3,%%xmm3 \n"
- "movd %4,%%xmm4 \n"
- "pshuflw $0x40,%%xmm2,%%xmm2 \n"
- "pshufd $0x44,%%xmm2,%%xmm2 \n"
- "pshuflw $0x40,%%xmm3,%%xmm3 \n"
- "pshufd $0x44,%%xmm3,%%xmm3 \n"
- "pshuflw $0x40,%%xmm4,%%xmm4 \n"
- "pshufd $0x44,%%xmm4,%%xmm4 \n"
- "pxor %%xmm5,%%xmm5 \n"
- "pcmpeqb %%xmm6,%%xmm6 \n"
- "pslld $0x18,%%xmm6 \n"
-
- // 4 pixel loop.
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "punpcklbw %%xmm5,%%xmm0 \n"
- "pmulhuw %%xmm2,%%xmm0 \n"
- "movdqu " MEMACCESS(0) ",%%xmm1 \n"
- "punpckhbw %%xmm5,%%xmm1 \n"
- "pmulhuw %%xmm2,%%xmm1 \n"
- "pmullw %%xmm3,%%xmm0 \n"
- "movdqu " MEMACCESS(0) ",%%xmm7 \n"
- "pmullw %%xmm3,%%xmm1 \n"
- "pand %%xmm6,%%xmm7 \n"
- "paddw %%xmm4,%%xmm0 \n"
- "paddw %%xmm4,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "por %%xmm7,%%xmm0 \n"
- "movdqu %%xmm0," MEMACCESS(0) " \n"
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "sub $0x4,%1 \n"
- "jg 1b \n"
- : "+r"(dst_argb), // %0
- "+r"(width) // %1
- : "r"(scale), // %2
- "r"(interval_size), // %3
- "r"(interval_offset) // %4
- : "memory", "cc"
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
- );
+ asm volatile(
+ "movd %2,%%xmm2 \n"
+ "movd %3,%%xmm3 \n"
+ "movd %4,%%xmm4 \n"
+ "pshuflw $0x40,%%xmm2,%%xmm2 \n"
+ "pshufd $0x44,%%xmm2,%%xmm2 \n"
+ "pshuflw $0x40,%%xmm3,%%xmm3 \n"
+ "pshufd $0x44,%%xmm3,%%xmm3 \n"
+ "pshuflw $0x40,%%xmm4,%%xmm4 \n"
+ "pshufd $0x44,%%xmm4,%%xmm4 \n"
+ "pxor %%xmm5,%%xmm5 \n"
+ "pcmpeqb %%xmm6,%%xmm6 \n"
+ "pslld $0x18,%%xmm6 \n"
+
+ // 4 pixel loop.
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "punpcklbw %%xmm5,%%xmm0 \n"
+ "pmulhuw %%xmm2,%%xmm0 \n"
+ "movdqu (%0),%%xmm1 \n"
+ "punpckhbw %%xmm5,%%xmm1 \n"
+ "pmulhuw %%xmm2,%%xmm1 \n"
+ "pmullw %%xmm3,%%xmm0 \n"
+ "movdqu (%0),%%xmm7 \n"
+ "pmullw %%xmm3,%%xmm1 \n"
+ "pand %%xmm6,%%xmm7 \n"
+ "paddw %%xmm4,%%xmm0 \n"
+ "paddw %%xmm4,%%xmm1 \n"
+ "packuswb %%xmm1,%%xmm0 \n"
+ "por %%xmm7,%%xmm0 \n"
+ "movdqu %%xmm0,(%0) \n"
+ "lea 0x10(%0),%0 \n"
+ "sub $0x4,%1 \n"
+ "jg 1b \n"
+ : "+r"(dst_argb), // %0
+ "+r"(width) // %1
+ : "r"(scale), // %2
+ "r"(interval_size), // %3
+ "r"(interval_offset) // %4
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
+ "xmm7");
}
#endif // HAS_ARGBQUANTIZEROW_SSE2
#ifdef HAS_ARGBSHADEROW_SSE2
// Shade 4 pixels at a time by specified value.
-void ARGBShadeRow_SSE2(const uint8* src_argb,
- uint8* dst_argb,
+void ARGBShadeRow_SSE2(const uint8_t* src_argb,
+ uint8_t* dst_argb,
int width,
- uint32 value) {
- asm volatile (
- "movd %3,%%xmm2 \n"
- "punpcklbw %%xmm2,%%xmm2 \n"
- "punpcklqdq %%xmm2,%%xmm2 \n"
+ uint32_t value) {
+ asm volatile(
+ "movd %3,%%xmm2 \n"
+ "punpcklbw %%xmm2,%%xmm2 \n"
+ "punpcklqdq %%xmm2,%%xmm2 \n"
- // 4 pixel loop.
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "punpcklbw %%xmm0,%%xmm0 \n"
- "punpckhbw %%xmm1,%%xmm1 \n"
- "pmulhuw %%xmm2,%%xmm0 \n"
- "pmulhuw %%xmm2,%%xmm1 \n"
- "psrlw $0x8,%%xmm0 \n"
- "psrlw $0x8,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x4,%2 \n"
- "jg 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb), // %1
- "+r"(width) // %2
- : "r"(value) // %3
- : "memory", "cc"
- , "xmm0", "xmm1", "xmm2"
- );
+ // 4 pixel loop.
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "lea 0x10(%0),%0 \n"
+ "movdqa %%xmm0,%%xmm1 \n"
+ "punpcklbw %%xmm0,%%xmm0 \n"
+ "punpckhbw %%xmm1,%%xmm1 \n"
+ "pmulhuw %%xmm2,%%xmm0 \n"
+ "pmulhuw %%xmm2,%%xmm1 \n"
+ "psrlw $0x8,%%xmm0 \n"
+ "psrlw $0x8,%%xmm1 \n"
+ "packuswb %%xmm1,%%xmm0 \n"
+ "movdqu %%xmm0,(%1) \n"
+ "lea 0x10(%1),%1 \n"
+ "sub $0x4,%2 \n"
+ "jg 1b \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_argb), // %1
+ "+r"(width) // %2
+ : "r"(value) // %3
+ : "memory", "cc", "xmm0", "xmm1", "xmm2");
}
#endif // HAS_ARGBSHADEROW_SSE2
#ifdef HAS_ARGBMULTIPLYROW_SSE2
// Multiply 2 rows of ARGB pixels together, 4 pixels at a time.
-void ARGBMultiplyRow_SSE2(const uint8* src_argb0,
- const uint8* src_argb1,
- uint8* dst_argb,
+void ARGBMultiplyRow_SSE2(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
int width) {
- asm volatile (
- "pxor %%xmm5,%%xmm5 \n"
+ asm volatile(
- // 4 pixel loop.
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "movdqu " MEMACCESS(1) ",%%xmm2 \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "movdqu %%xmm0,%%xmm1 \n"
- "movdqu %%xmm2,%%xmm3 \n"
- "punpcklbw %%xmm0,%%xmm0 \n"
- "punpckhbw %%xmm1,%%xmm1 \n"
- "punpcklbw %%xmm5,%%xmm2 \n"
- "punpckhbw %%xmm5,%%xmm3 \n"
- "pmulhuw %%xmm2,%%xmm0 \n"
- "pmulhuw %%xmm3,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "movdqu %%xmm0," MEMACCESS(2) " \n"
- "lea " MEMLEA(0x10,2) ",%2 \n"
- "sub $0x4,%3 \n"
- "jg 1b \n"
- : "+r"(src_argb0), // %0
- "+r"(src_argb1), // %1
- "+r"(dst_argb), // %2
- "+r"(width) // %3
- :
- : "memory", "cc"
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
- );
+ "pxor %%xmm5,%%xmm5 \n"
+
+ // 4 pixel loop.
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "lea 0x10(%0),%0 \n"
+ "movdqu (%1),%%xmm2 \n"
+ "lea 0x10(%1),%1 \n"
+ "movdqu %%xmm0,%%xmm1 \n"
+ "movdqu %%xmm2,%%xmm3 \n"
+ "punpcklbw %%xmm0,%%xmm0 \n"
+ "punpckhbw %%xmm1,%%xmm1 \n"
+ "punpcklbw %%xmm5,%%xmm2 \n"
+ "punpckhbw %%xmm5,%%xmm3 \n"
+ "pmulhuw %%xmm2,%%xmm0 \n"
+ "pmulhuw %%xmm3,%%xmm1 \n"
+ "packuswb %%xmm1,%%xmm0 \n"
+ "movdqu %%xmm0,(%2) \n"
+ "lea 0x10(%2),%2 \n"
+ "sub $0x4,%3 \n"
+ "jg 1b \n"
+ : "+r"(src_argb0), // %0
+ "+r"(src_argb1), // %1
+ "+r"(dst_argb), // %2
+ "+r"(width) // %3
+ :
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5");
}
#endif // HAS_ARGBMULTIPLYROW_SSE2
#ifdef HAS_ARGBMULTIPLYROW_AVX2
// Multiply 2 rows of ARGB pixels together, 8 pixels at a time.
-void ARGBMultiplyRow_AVX2(const uint8* src_argb0,
- const uint8* src_argb1,
- uint8* dst_argb,
+void ARGBMultiplyRow_AVX2(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
int width) {
- asm volatile (
- "vpxor %%ymm5,%%ymm5,%%ymm5 \n"
+ asm volatile(
- // 4 pixel loop.
- LABELALIGN
- "1: \n"
- "vmovdqu " MEMACCESS(0) ",%%ymm1 \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "vmovdqu " MEMACCESS(1) ",%%ymm3 \n"
- "lea " MEMLEA(0x20,1) ",%1 \n"
- "vpunpcklbw %%ymm1,%%ymm1,%%ymm0 \n"
- "vpunpckhbw %%ymm1,%%ymm1,%%ymm1 \n"
- "vpunpcklbw %%ymm5,%%ymm3,%%ymm2 \n"
- "vpunpckhbw %%ymm5,%%ymm3,%%ymm3 \n"
- "vpmulhuw %%ymm2,%%ymm0,%%ymm0 \n"
- "vpmulhuw %%ymm3,%%ymm1,%%ymm1 \n"
- "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
- "vmovdqu %%ymm0," MEMACCESS(2) " \n"
- "lea " MEMLEA(0x20,2) ",%2 \n"
- "sub $0x8,%3 \n"
- "jg 1b \n"
- "vzeroupper \n"
- : "+r"(src_argb0), // %0
- "+r"(src_argb1), // %1
- "+r"(dst_argb), // %2
- "+r"(width) // %3
- :
- : "memory", "cc"
+ "vpxor %%ymm5,%%ymm5,%%ymm5 \n"
+
+ // 4 pixel loop.
+ LABELALIGN
+ "1: \n"
+ "vmovdqu (%0),%%ymm1 \n"
+ "lea 0x20(%0),%0 \n"
+ "vmovdqu (%1),%%ymm3 \n"
+ "lea 0x20(%1),%1 \n"
+ "vpunpcklbw %%ymm1,%%ymm1,%%ymm0 \n"
+ "vpunpckhbw %%ymm1,%%ymm1,%%ymm1 \n"
+ "vpunpcklbw %%ymm5,%%ymm3,%%ymm2 \n"
+ "vpunpckhbw %%ymm5,%%ymm3,%%ymm3 \n"
+ "vpmulhuw %%ymm2,%%ymm0,%%ymm0 \n"
+ "vpmulhuw %%ymm3,%%ymm1,%%ymm1 \n"
+ "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
+ "vmovdqu %%ymm0,(%2) \n"
+ "lea 0x20(%2),%2 \n"
+ "sub $0x8,%3 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_argb0), // %0
+ "+r"(src_argb1), // %1
+ "+r"(dst_argb), // %2
+ "+r"(width) // %3
+ :
+ : "memory", "cc"
#if defined(__AVX2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
+ ,
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
#endif
- );
+ );
}
#endif // HAS_ARGBMULTIPLYROW_AVX2
#ifdef HAS_ARGBADDROW_SSE2
// Add 2 rows of ARGB pixels together, 4 pixels at a time.
-void ARGBAddRow_SSE2(const uint8* src_argb0,
- const uint8* src_argb1,
- uint8* dst_argb,
+void ARGBAddRow_SSE2(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
int width) {
- asm volatile (
- // 4 pixel loop.
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "movdqu " MEMACCESS(1) ",%%xmm1 \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "paddusb %%xmm1,%%xmm0 \n"
- "movdqu %%xmm0," MEMACCESS(2) " \n"
- "lea " MEMLEA(0x10,2) ",%2 \n"
- "sub $0x4,%3 \n"
- "jg 1b \n"
- : "+r"(src_argb0), // %0
- "+r"(src_argb1), // %1
- "+r"(dst_argb), // %2
- "+r"(width) // %3
- :
- : "memory", "cc"
- , "xmm0", "xmm1"
- );
+ asm volatile(
+ // 4 pixel loop.
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "lea 0x10(%0),%0 \n"
+ "movdqu (%1),%%xmm1 \n"
+ "lea 0x10(%1),%1 \n"
+ "paddusb %%xmm1,%%xmm0 \n"
+ "movdqu %%xmm0,(%2) \n"
+ "lea 0x10(%2),%2 \n"
+ "sub $0x4,%3 \n"
+ "jg 1b \n"
+ : "+r"(src_argb0), // %0
+ "+r"(src_argb1), // %1
+ "+r"(dst_argb), // %2
+ "+r"(width) // %3
+ :
+ : "memory", "cc", "xmm0", "xmm1");
}
#endif // HAS_ARGBADDROW_SSE2
#ifdef HAS_ARGBADDROW_AVX2
// Add 2 rows of ARGB pixels together, 4 pixels at a time.
-void ARGBAddRow_AVX2(const uint8* src_argb0,
- const uint8* src_argb1,
- uint8* dst_argb,
+void ARGBAddRow_AVX2(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
int width) {
- asm volatile (
- // 4 pixel loop.
- LABELALIGN
- "1: \n"
- "vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "vpaddusb " MEMACCESS(1) ",%%ymm0,%%ymm0 \n"
- "lea " MEMLEA(0x20,1) ",%1 \n"
- "vmovdqu %%ymm0," MEMACCESS(2) " \n"
- "lea " MEMLEA(0x20,2) ",%2 \n"
- "sub $0x8,%3 \n"
- "jg 1b \n"
- "vzeroupper \n"
- : "+r"(src_argb0), // %0
- "+r"(src_argb1), // %1
- "+r"(dst_argb), // %2
- "+r"(width) // %3
- :
- : "memory", "cc"
- , "xmm0"
- );
+ asm volatile(
+ // 4 pixel loop.
+ LABELALIGN
+ "1: \n"
+ "vmovdqu (%0),%%ymm0 \n"
+ "lea 0x20(%0),%0 \n"
+ "vpaddusb (%1),%%ymm0,%%ymm0 \n"
+ "lea 0x20(%1),%1 \n"
+ "vmovdqu %%ymm0,(%2) \n"
+ "lea 0x20(%2),%2 \n"
+ "sub $0x8,%3 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_argb0), // %0
+ "+r"(src_argb1), // %1
+ "+r"(dst_argb), // %2
+ "+r"(width) // %3
+ :
+ : "memory", "cc", "xmm0");
}
#endif // HAS_ARGBADDROW_AVX2
#ifdef HAS_ARGBSUBTRACTROW_SSE2
// Subtract 2 rows of ARGB pixels, 4 pixels at a time.
-void ARGBSubtractRow_SSE2(const uint8* src_argb0,
- const uint8* src_argb1,
- uint8* dst_argb,
+void ARGBSubtractRow_SSE2(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
int width) {
- asm volatile (
- // 4 pixel loop.
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "movdqu " MEMACCESS(1) ",%%xmm1 \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "psubusb %%xmm1,%%xmm0 \n"
- "movdqu %%xmm0," MEMACCESS(2) " \n"
- "lea " MEMLEA(0x10,2) ",%2 \n"
- "sub $0x4,%3 \n"
- "jg 1b \n"
- : "+r"(src_argb0), // %0
- "+r"(src_argb1), // %1
- "+r"(dst_argb), // %2
- "+r"(width) // %3
- :
- : "memory", "cc"
- , "xmm0", "xmm1"
- );
+ asm volatile(
+ // 4 pixel loop.
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "lea 0x10(%0),%0 \n"
+ "movdqu (%1),%%xmm1 \n"
+ "lea 0x10(%1),%1 \n"
+ "psubusb %%xmm1,%%xmm0 \n"
+ "movdqu %%xmm0,(%2) \n"
+ "lea 0x10(%2),%2 \n"
+ "sub $0x4,%3 \n"
+ "jg 1b \n"
+ : "+r"(src_argb0), // %0
+ "+r"(src_argb1), // %1
+ "+r"(dst_argb), // %2
+ "+r"(width) // %3
+ :
+ : "memory", "cc", "xmm0", "xmm1");
}
#endif // HAS_ARGBSUBTRACTROW_SSE2
#ifdef HAS_ARGBSUBTRACTROW_AVX2
// Subtract 2 rows of ARGB pixels, 8 pixels at a time.
-void ARGBSubtractRow_AVX2(const uint8* src_argb0,
- const uint8* src_argb1,
- uint8* dst_argb,
+void ARGBSubtractRow_AVX2(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
int width) {
- asm volatile (
- // 4 pixel loop.
- LABELALIGN
- "1: \n"
- "vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "vpsubusb " MEMACCESS(1) ",%%ymm0,%%ymm0 \n"
- "lea " MEMLEA(0x20,1) ",%1 \n"
- "vmovdqu %%ymm0," MEMACCESS(2) " \n"
- "lea " MEMLEA(0x20,2) ",%2 \n"
- "sub $0x8,%3 \n"
- "jg 1b \n"
- "vzeroupper \n"
- : "+r"(src_argb0), // %0
- "+r"(src_argb1), // %1
- "+r"(dst_argb), // %2
- "+r"(width) // %3
- :
- : "memory", "cc"
- , "xmm0"
- );
+ asm volatile(
+ // 4 pixel loop.
+ LABELALIGN
+ "1: \n"
+ "vmovdqu (%0),%%ymm0 \n"
+ "lea 0x20(%0),%0 \n"
+ "vpsubusb (%1),%%ymm0,%%ymm0 \n"
+ "lea 0x20(%1),%1 \n"
+ "vmovdqu %%ymm0,(%2) \n"
+ "lea 0x20(%2),%2 \n"
+ "sub $0x8,%3 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_argb0), // %0
+ "+r"(src_argb1), // %1
+ "+r"(dst_argb), // %2
+ "+r"(width) // %3
+ :
+ : "memory", "cc", "xmm0");
}
#endif // HAS_ARGBSUBTRACTROW_AVX2
@@ -4962,55 +5216,53 @@ void ARGBSubtractRow_AVX2(const uint8* src_argb0,
// -1 0 1
// -2 0 2
// -1 0 1
-void SobelXRow_SSE2(const uint8* src_y0,
- const uint8* src_y1,
- const uint8* src_y2,
- uint8* dst_sobelx,
+void SobelXRow_SSE2(const uint8_t* src_y0,
+ const uint8_t* src_y1,
+ const uint8_t* src_y2,
+ uint8_t* dst_sobelx,
int width) {
- asm volatile (
- "sub %0,%1 \n"
- "sub %0,%2 \n"
- "sub %0,%3 \n"
- "pxor %%xmm5,%%xmm5 \n"
+ asm volatile(
+ "sub %0,%1 \n"
+ "sub %0,%2 \n"
+ "sub %0,%3 \n"
+ "pxor %%xmm5,%%xmm5 \n"
- // 8 pixel loop.
- LABELALIGN
- "1: \n"
- "movq " MEMACCESS(0) ",%%xmm0 \n"
- "movq " MEMACCESS2(0x2,0) ",%%xmm1 \n"
- "punpcklbw %%xmm5,%%xmm0 \n"
- "punpcklbw %%xmm5,%%xmm1 \n"
- "psubw %%xmm1,%%xmm0 \n"
- MEMOPREG(movq,0x00,0,1,1,xmm1) // movq (%0,%1,1),%%xmm1
- MEMOPREG(movq,0x02,0,1,1,xmm2) // movq 0x2(%0,%1,1),%%xmm2
- "punpcklbw %%xmm5,%%xmm1 \n"
- "punpcklbw %%xmm5,%%xmm2 \n"
- "psubw %%xmm2,%%xmm1 \n"
- MEMOPREG(movq,0x00,0,2,1,xmm2) // movq (%0,%2,1),%%xmm2
- MEMOPREG(movq,0x02,0,2,1,xmm3) // movq 0x2(%0,%2,1),%%xmm3
- "punpcklbw %%xmm5,%%xmm2 \n"
- "punpcklbw %%xmm5,%%xmm3 \n"
- "psubw %%xmm3,%%xmm2 \n"
- "paddw %%xmm2,%%xmm0 \n"
- "paddw %%xmm1,%%xmm0 \n"
- "paddw %%xmm1,%%xmm0 \n"
- "pxor %%xmm1,%%xmm1 \n"
- "psubw %%xmm0,%%xmm1 \n"
- "pmaxsw %%xmm1,%%xmm0 \n"
- "packuswb %%xmm0,%%xmm0 \n"
- MEMOPMEM(movq,xmm0,0x00,0,3,1) // movq %%xmm0,(%0,%3,1)
- "lea " MEMLEA(0x8,0) ",%0 \n"
- "sub $0x8,%4 \n"
- "jg 1b \n"
- : "+r"(src_y0), // %0
- "+r"(src_y1), // %1
- "+r"(src_y2), // %2
- "+r"(dst_sobelx), // %3
- "+r"(width) // %4
- :
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
- );
+ // 8 pixel loop.
+ LABELALIGN
+ "1: \n"
+ "movq (%0),%%xmm0 \n"
+ "movq 0x2(%0),%%xmm1 \n"
+ "punpcklbw %%xmm5,%%xmm0 \n"
+ "punpcklbw %%xmm5,%%xmm1 \n"
+ "psubw %%xmm1,%%xmm0 \n"
+ "movq 0x00(%0,%1,1),%%xmm1 \n"
+ "movq 0x02(%0,%1,1),%%xmm2 \n"
+ "punpcklbw %%xmm5,%%xmm1 \n"
+ "punpcklbw %%xmm5,%%xmm2 \n"
+ "psubw %%xmm2,%%xmm1 \n"
+ "movq 0x00(%0,%2,1),%%xmm2 \n"
+ "movq 0x02(%0,%2,1),%%xmm3 \n"
+ "punpcklbw %%xmm5,%%xmm2 \n"
+ "punpcklbw %%xmm5,%%xmm3 \n"
+ "psubw %%xmm3,%%xmm2 \n"
+ "paddw %%xmm2,%%xmm0 \n"
+ "paddw %%xmm1,%%xmm0 \n"
+ "paddw %%xmm1,%%xmm0 \n"
+ "pxor %%xmm1,%%xmm1 \n"
+ "psubw %%xmm0,%%xmm1 \n"
+ "pmaxsw %%xmm1,%%xmm0 \n"
+ "packuswb %%xmm0,%%xmm0 \n"
+ "movq %%xmm0,0x00(%0,%3,1) \n"
+ "lea 0x8(%0),%0 \n"
+ "sub $0x8,%4 \n"
+ "jg 1b \n"
+ : "+r"(src_y0), // %0
+ "+r"(src_y1), // %1
+ "+r"(src_y2), // %2
+ "+r"(dst_sobelx), // %3
+ "+r"(width) // %4
+ :
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5");
}
#endif // HAS_SOBELXROW_SSE2
@@ -5019,52 +5271,50 @@ void SobelXRow_SSE2(const uint8* src_y0,
// -1 -2 -1
// 0 0 0
// 1 2 1
-void SobelYRow_SSE2(const uint8* src_y0,
- const uint8* src_y1,
- uint8* dst_sobely,
+void SobelYRow_SSE2(const uint8_t* src_y0,
+ const uint8_t* src_y1,
+ uint8_t* dst_sobely,
int width) {
- asm volatile (
- "sub %0,%1 \n"
- "sub %0,%2 \n"
- "pxor %%xmm5,%%xmm5 \n"
+ asm volatile(
+ "sub %0,%1 \n"
+ "sub %0,%2 \n"
+ "pxor %%xmm5,%%xmm5 \n"
- // 8 pixel loop.
- LABELALIGN
- "1: \n"
- "movq " MEMACCESS(0) ",%%xmm0 \n"
- MEMOPREG(movq,0x00,0,1,1,xmm1) // movq (%0,%1,1),%%xmm1
- "punpcklbw %%xmm5,%%xmm0 \n"
- "punpcklbw %%xmm5,%%xmm1 \n"
- "psubw %%xmm1,%%xmm0 \n"
- "movq " MEMACCESS2(0x1,0) ",%%xmm1 \n"
- MEMOPREG(movq,0x01,0,1,1,xmm2) // movq 0x1(%0,%1,1),%%xmm2
- "punpcklbw %%xmm5,%%xmm1 \n"
- "punpcklbw %%xmm5,%%xmm2 \n"
- "psubw %%xmm2,%%xmm1 \n"
- "movq " MEMACCESS2(0x2,0) ",%%xmm2 \n"
- MEMOPREG(movq,0x02,0,1,1,xmm3) // movq 0x2(%0,%1,1),%%xmm3
- "punpcklbw %%xmm5,%%xmm2 \n"
- "punpcklbw %%xmm5,%%xmm3 \n"
- "psubw %%xmm3,%%xmm2 \n"
- "paddw %%xmm2,%%xmm0 \n"
- "paddw %%xmm1,%%xmm0 \n"
- "paddw %%xmm1,%%xmm0 \n"
- "pxor %%xmm1,%%xmm1 \n"
- "psubw %%xmm0,%%xmm1 \n"
- "pmaxsw %%xmm1,%%xmm0 \n"
- "packuswb %%xmm0,%%xmm0 \n"
- MEMOPMEM(movq,xmm0,0x00,0,2,1) // movq %%xmm0,(%0,%2,1)
- "lea " MEMLEA(0x8,0) ",%0 \n"
- "sub $0x8,%3 \n"
- "jg 1b \n"
- : "+r"(src_y0), // %0
- "+r"(src_y1), // %1
- "+r"(dst_sobely), // %2
- "+r"(width) // %3
- :
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
- );
+ // 8 pixel loop.
+ LABELALIGN
+ "1: \n"
+ "movq (%0),%%xmm0 \n"
+ "movq 0x00(%0,%1,1),%%xmm1 \n"
+ "punpcklbw %%xmm5,%%xmm0 \n"
+ "punpcklbw %%xmm5,%%xmm1 \n"
+ "psubw %%xmm1,%%xmm0 \n"
+ "movq 0x1(%0),%%xmm1 \n"
+ "movq 0x01(%0,%1,1),%%xmm2 \n"
+ "punpcklbw %%xmm5,%%xmm1 \n"
+ "punpcklbw %%xmm5,%%xmm2 \n"
+ "psubw %%xmm2,%%xmm1 \n"
+ "movq 0x2(%0),%%xmm2 \n"
+ "movq 0x02(%0,%1,1),%%xmm3 \n"
+ "punpcklbw %%xmm5,%%xmm2 \n"
+ "punpcklbw %%xmm5,%%xmm3 \n"
+ "psubw %%xmm3,%%xmm2 \n"
+ "paddw %%xmm2,%%xmm0 \n"
+ "paddw %%xmm1,%%xmm0 \n"
+ "paddw %%xmm1,%%xmm0 \n"
+ "pxor %%xmm1,%%xmm1 \n"
+ "psubw %%xmm0,%%xmm1 \n"
+ "pmaxsw %%xmm1,%%xmm0 \n"
+ "packuswb %%xmm0,%%xmm0 \n"
+ "movq %%xmm0,0x00(%0,%2,1) \n"
+ "lea 0x8(%0),%0 \n"
+ "sub $0x8,%3 \n"
+ "jg 1b \n"
+ : "+r"(src_y0), // %0
+ "+r"(src_y1), // %1
+ "+r"(dst_sobely), // %2
+ "+r"(width) // %3
+ :
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5");
}
#endif // HAS_SOBELYROW_SSE2
@@ -5074,83 +5324,79 @@ void SobelYRow_SSE2(const uint8* src_y0,
// R = Sobel
// G = Sobel
// B = Sobel
-void SobelRow_SSE2(const uint8* src_sobelx,
- const uint8* src_sobely,
- uint8* dst_argb,
+void SobelRow_SSE2(const uint8_t* src_sobelx,
+ const uint8_t* src_sobely,
+ uint8_t* dst_argb,
int width) {
- asm volatile (
- "sub %0,%1 \n"
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "pslld $0x18,%%xmm5 \n"
+ asm volatile(
+ "sub %0,%1 \n"
+ "pcmpeqb %%xmm5,%%xmm5 \n"
+ "pslld $0x18,%%xmm5 \n"
- // 8 pixel loop.
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- MEMOPREG(movdqu,0x00,0,1,1,xmm1) // movdqu (%0,%1,1),%%xmm1
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "paddusb %%xmm1,%%xmm0 \n"
- "movdqa %%xmm0,%%xmm2 \n"
- "punpcklbw %%xmm0,%%xmm2 \n"
- "punpckhbw %%xmm0,%%xmm0 \n"
- "movdqa %%xmm2,%%xmm1 \n"
- "punpcklwd %%xmm2,%%xmm1 \n"
- "punpckhwd %%xmm2,%%xmm2 \n"
- "por %%xmm5,%%xmm1 \n"
- "por %%xmm5,%%xmm2 \n"
- "movdqa %%xmm0,%%xmm3 \n"
- "punpcklwd %%xmm0,%%xmm3 \n"
- "punpckhwd %%xmm0,%%xmm0 \n"
- "por %%xmm5,%%xmm3 \n"
- "por %%xmm5,%%xmm0 \n"
- "movdqu %%xmm1," MEMACCESS(2) " \n"
- "movdqu %%xmm2," MEMACCESS2(0x10,2) " \n"
- "movdqu %%xmm3," MEMACCESS2(0x20,2) " \n"
- "movdqu %%xmm0," MEMACCESS2(0x30,2) " \n"
- "lea " MEMLEA(0x40,2) ",%2 \n"
- "sub $0x10,%3 \n"
- "jg 1b \n"
- : "+r"(src_sobelx), // %0
- "+r"(src_sobely), // %1
- "+r"(dst_argb), // %2
- "+r"(width) // %3
- :
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
- );
+ // 8 pixel loop.
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x00(%0,%1,1),%%xmm1 \n"
+ "lea 0x10(%0),%0 \n"
+ "paddusb %%xmm1,%%xmm0 \n"
+ "movdqa %%xmm0,%%xmm2 \n"
+ "punpcklbw %%xmm0,%%xmm2 \n"
+ "punpckhbw %%xmm0,%%xmm0 \n"
+ "movdqa %%xmm2,%%xmm1 \n"
+ "punpcklwd %%xmm2,%%xmm1 \n"
+ "punpckhwd %%xmm2,%%xmm2 \n"
+ "por %%xmm5,%%xmm1 \n"
+ "por %%xmm5,%%xmm2 \n"
+ "movdqa %%xmm0,%%xmm3 \n"
+ "punpcklwd %%xmm0,%%xmm3 \n"
+ "punpckhwd %%xmm0,%%xmm0 \n"
+ "por %%xmm5,%%xmm3 \n"
+ "por %%xmm5,%%xmm0 \n"
+ "movdqu %%xmm1,(%2) \n"
+ "movdqu %%xmm2,0x10(%2) \n"
+ "movdqu %%xmm3,0x20(%2) \n"
+ "movdqu %%xmm0,0x30(%2) \n"
+ "lea 0x40(%2),%2 \n"
+ "sub $0x10,%3 \n"
+ "jg 1b \n"
+ : "+r"(src_sobelx), // %0
+ "+r"(src_sobely), // %1
+ "+r"(dst_argb), // %2
+ "+r"(width) // %3
+ :
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5");
}
#endif // HAS_SOBELROW_SSE2
#ifdef HAS_SOBELTOPLANEROW_SSE2
// Adds Sobel X and Sobel Y and stores Sobel into a plane.
-void SobelToPlaneRow_SSE2(const uint8* src_sobelx,
- const uint8* src_sobely,
- uint8* dst_y,
+void SobelToPlaneRow_SSE2(const uint8_t* src_sobelx,
+ const uint8_t* src_sobely,
+ uint8_t* dst_y,
int width) {
- asm volatile (
- "sub %0,%1 \n"
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "pslld $0x18,%%xmm5 \n"
+ asm volatile(
+ "sub %0,%1 \n"
+ "pcmpeqb %%xmm5,%%xmm5 \n"
+ "pslld $0x18,%%xmm5 \n"
- // 8 pixel loop.
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- MEMOPREG(movdqu,0x00,0,1,1,xmm1) // movdqu (%0,%1,1),%%xmm1
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "paddusb %%xmm1,%%xmm0 \n"
- "movdqu %%xmm0," MEMACCESS(2) " \n"
- "lea " MEMLEA(0x10,2) ",%2 \n"
- "sub $0x10,%3 \n"
- "jg 1b \n"
- : "+r"(src_sobelx), // %0
- "+r"(src_sobely), // %1
- "+r"(dst_y), // %2
- "+r"(width) // %3
- :
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1"
- );
+ // 8 pixel loop.
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x00(%0,%1,1),%%xmm1 \n"
+ "lea 0x10(%0),%0 \n"
+ "paddusb %%xmm1,%%xmm0 \n"
+ "movdqu %%xmm0,(%2) \n"
+ "lea 0x10(%2),%2 \n"
+ "sub $0x10,%3 \n"
+ "jg 1b \n"
+ : "+r"(src_sobelx), // %0
+ "+r"(src_sobely), // %1
+ "+r"(dst_y), // %2
+ "+r"(width) // %3
+ :
+ : "memory", "cc", "xmm0", "xmm1");
}
#endif // HAS_SOBELTOPLANEROW_SSE2
@@ -5160,1054 +5406,1123 @@ void SobelToPlaneRow_SSE2(const uint8* src_sobelx,
// R = Sobel X
// G = Sobel
// B = Sobel Y
-void SobelXYRow_SSE2(const uint8* src_sobelx,
- const uint8* src_sobely,
- uint8* dst_argb,
+void SobelXYRow_SSE2(const uint8_t* src_sobelx,
+ const uint8_t* src_sobely,
+ uint8_t* dst_argb,
int width) {
- asm volatile (
- "sub %0,%1 \n"
- "pcmpeqb %%xmm5,%%xmm5 \n"
+ asm volatile(
+ "sub %0,%1 \n"
+ "pcmpeqb %%xmm5,%%xmm5 \n"
- // 8 pixel loop.
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- MEMOPREG(movdqu,0x00,0,1,1,xmm1) // movdqu (%0,%1,1),%%xmm1
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "movdqa %%xmm0,%%xmm2 \n"
- "paddusb %%xmm1,%%xmm2 \n"
- "movdqa %%xmm0,%%xmm3 \n"
- "punpcklbw %%xmm5,%%xmm3 \n"
- "punpckhbw %%xmm5,%%xmm0 \n"
- "movdqa %%xmm1,%%xmm4 \n"
- "punpcklbw %%xmm2,%%xmm4 \n"
- "punpckhbw %%xmm2,%%xmm1 \n"
- "movdqa %%xmm4,%%xmm6 \n"
- "punpcklwd %%xmm3,%%xmm6 \n"
- "punpckhwd %%xmm3,%%xmm4 \n"
- "movdqa %%xmm1,%%xmm7 \n"
- "punpcklwd %%xmm0,%%xmm7 \n"
- "punpckhwd %%xmm0,%%xmm1 \n"
- "movdqu %%xmm6," MEMACCESS(2) " \n"
- "movdqu %%xmm4," MEMACCESS2(0x10,2) " \n"
- "movdqu %%xmm7," MEMACCESS2(0x20,2) " \n"
- "movdqu %%xmm1," MEMACCESS2(0x30,2) " \n"
- "lea " MEMLEA(0x40,2) ",%2 \n"
- "sub $0x10,%3 \n"
- "jg 1b \n"
- : "+r"(src_sobelx), // %0
- "+r"(src_sobely), // %1
- "+r"(dst_argb), // %2
- "+r"(width) // %3
- :
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
- );
+ // 8 pixel loop.
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x00(%0,%1,1),%%xmm1 \n"
+ "lea 0x10(%0),%0 \n"
+ "movdqa %%xmm0,%%xmm2 \n"
+ "paddusb %%xmm1,%%xmm2 \n"
+ "movdqa %%xmm0,%%xmm3 \n"
+ "punpcklbw %%xmm5,%%xmm3 \n"
+ "punpckhbw %%xmm5,%%xmm0 \n"
+ "movdqa %%xmm1,%%xmm4 \n"
+ "punpcklbw %%xmm2,%%xmm4 \n"
+ "punpckhbw %%xmm2,%%xmm1 \n"
+ "movdqa %%xmm4,%%xmm6 \n"
+ "punpcklwd %%xmm3,%%xmm6 \n"
+ "punpckhwd %%xmm3,%%xmm4 \n"
+ "movdqa %%xmm1,%%xmm7 \n"
+ "punpcklwd %%xmm0,%%xmm7 \n"
+ "punpckhwd %%xmm0,%%xmm1 \n"
+ "movdqu %%xmm6,(%2) \n"
+ "movdqu %%xmm4,0x10(%2) \n"
+ "movdqu %%xmm7,0x20(%2) \n"
+ "movdqu %%xmm1,0x30(%2) \n"
+ "lea 0x40(%2),%2 \n"
+ "sub $0x10,%3 \n"
+ "jg 1b \n"
+ : "+r"(src_sobelx), // %0
+ "+r"(src_sobely), // %1
+ "+r"(dst_argb), // %2
+ "+r"(width) // %3
+ :
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
+ "xmm7");
}
#endif // HAS_SOBELXYROW_SSE2
#ifdef HAS_COMPUTECUMULATIVESUMROW_SSE2
// Creates a table of cumulative sums where each value is a sum of all values
// above and to the left of the value, inclusive of the value.
-void ComputeCumulativeSumRow_SSE2(const uint8* row,
- int32* cumsum,
- const int32* previous_cumsum,
+void ComputeCumulativeSumRow_SSE2(const uint8_t* row,
+ int32_t* cumsum,
+ const int32_t* previous_cumsum,
int width) {
- asm volatile (
- "pxor %%xmm0,%%xmm0 \n"
- "pxor %%xmm1,%%xmm1 \n"
- "sub $0x4,%3 \n"
- "jl 49f \n"
- "test $0xf,%1 \n"
- "jne 49f \n"
-
- // 4 pixel loop.
- LABELALIGN
- "40: \n"
- "movdqu " MEMACCESS(0) ",%%xmm2 \n"
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "movdqa %%xmm2,%%xmm4 \n"
- "punpcklbw %%xmm1,%%xmm2 \n"
- "movdqa %%xmm2,%%xmm3 \n"
- "punpcklwd %%xmm1,%%xmm2 \n"
- "punpckhwd %%xmm1,%%xmm3 \n"
- "punpckhbw %%xmm1,%%xmm4 \n"
- "movdqa %%xmm4,%%xmm5 \n"
- "punpcklwd %%xmm1,%%xmm4 \n"
- "punpckhwd %%xmm1,%%xmm5 \n"
- "paddd %%xmm2,%%xmm0 \n"
- "movdqu " MEMACCESS(2) ",%%xmm2 \n"
- "paddd %%xmm0,%%xmm2 \n"
- "paddd %%xmm3,%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,2) ",%%xmm3 \n"
- "paddd %%xmm0,%%xmm3 \n"
- "paddd %%xmm4,%%xmm0 \n"
- "movdqu " MEMACCESS2(0x20,2) ",%%xmm4 \n"
- "paddd %%xmm0,%%xmm4 \n"
- "paddd %%xmm5,%%xmm0 \n"
- "movdqu " MEMACCESS2(0x30,2) ",%%xmm5 \n"
- "lea " MEMLEA(0x40,2) ",%2 \n"
- "paddd %%xmm0,%%xmm5 \n"
- "movdqu %%xmm2," MEMACCESS(1) " \n"
- "movdqu %%xmm3," MEMACCESS2(0x10,1) " \n"
- "movdqu %%xmm4," MEMACCESS2(0x20,1) " \n"
- "movdqu %%xmm5," MEMACCESS2(0x30,1) " \n"
- "lea " MEMLEA(0x40,1) ",%1 \n"
- "sub $0x4,%3 \n"
- "jge 40b \n"
-
- "49: \n"
- "add $0x3,%3 \n"
- "jl 19f \n"
-
- // 1 pixel loop.
- LABELALIGN
- "10: \n"
- "movd " MEMACCESS(0) ",%%xmm2 \n"
- "lea " MEMLEA(0x4,0) ",%0 \n"
- "punpcklbw %%xmm1,%%xmm2 \n"
- "punpcklwd %%xmm1,%%xmm2 \n"
- "paddd %%xmm2,%%xmm0 \n"
- "movdqu " MEMACCESS(2) ",%%xmm2 \n"
- "lea " MEMLEA(0x10,2) ",%2 \n"
- "paddd %%xmm0,%%xmm2 \n"
- "movdqu %%xmm2," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x1,%3 \n"
- "jge 10b \n"
-
- "19: \n"
- : "+r"(row), // %0
- "+r"(cumsum), // %1
- "+r"(previous_cumsum), // %2
- "+r"(width) // %3
- :
- : "memory", "cc"
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
- );
+ asm volatile(
+ "pxor %%xmm0,%%xmm0 \n"
+ "pxor %%xmm1,%%xmm1 \n"
+ "sub $0x4,%3 \n"
+ "jl 49f \n"
+ "test $0xf,%1 \n"
+ "jne 49f \n"
+
+ // 4 pixel loop.
+ LABELALIGN
+ "40: \n"
+ "movdqu (%0),%%xmm2 \n"
+ "lea 0x10(%0),%0 \n"
+ "movdqa %%xmm2,%%xmm4 \n"
+ "punpcklbw %%xmm1,%%xmm2 \n"
+ "movdqa %%xmm2,%%xmm3 \n"
+ "punpcklwd %%xmm1,%%xmm2 \n"
+ "punpckhwd %%xmm1,%%xmm3 \n"
+ "punpckhbw %%xmm1,%%xmm4 \n"
+ "movdqa %%xmm4,%%xmm5 \n"
+ "punpcklwd %%xmm1,%%xmm4 \n"
+ "punpckhwd %%xmm1,%%xmm5 \n"
+ "paddd %%xmm2,%%xmm0 \n"
+ "movdqu (%2),%%xmm2 \n"
+ "paddd %%xmm0,%%xmm2 \n"
+ "paddd %%xmm3,%%xmm0 \n"
+ "movdqu 0x10(%2),%%xmm3 \n"
+ "paddd %%xmm0,%%xmm3 \n"
+ "paddd %%xmm4,%%xmm0 \n"
+ "movdqu 0x20(%2),%%xmm4 \n"
+ "paddd %%xmm0,%%xmm4 \n"
+ "paddd %%xmm5,%%xmm0 \n"
+ "movdqu 0x30(%2),%%xmm5 \n"
+ "lea 0x40(%2),%2 \n"
+ "paddd %%xmm0,%%xmm5 \n"
+ "movdqu %%xmm2,(%1) \n"
+ "movdqu %%xmm3,0x10(%1) \n"
+ "movdqu %%xmm4,0x20(%1) \n"
+ "movdqu %%xmm5,0x30(%1) \n"
+ "lea 0x40(%1),%1 \n"
+ "sub $0x4,%3 \n"
+ "jge 40b \n"
+
+ "49: \n"
+ "add $0x3,%3 \n"
+ "jl 19f \n"
+
+ // 1 pixel loop.
+ LABELALIGN
+ "10: \n"
+ "movd (%0),%%xmm2 \n"
+ "lea 0x4(%0),%0 \n"
+ "punpcklbw %%xmm1,%%xmm2 \n"
+ "punpcklwd %%xmm1,%%xmm2 \n"
+ "paddd %%xmm2,%%xmm0 \n"
+ "movdqu (%2),%%xmm2 \n"
+ "lea 0x10(%2),%2 \n"
+ "paddd %%xmm0,%%xmm2 \n"
+ "movdqu %%xmm2,(%1) \n"
+ "lea 0x10(%1),%1 \n"
+ "sub $0x1,%3 \n"
+ "jge 10b \n"
+
+ "19: \n"
+ : "+r"(row), // %0
+ "+r"(cumsum), // %1
+ "+r"(previous_cumsum), // %2
+ "+r"(width) // %3
+ :
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5");
}
#endif // HAS_COMPUTECUMULATIVESUMROW_SSE2
#ifdef HAS_CUMULATIVESUMTOAVERAGEROW_SSE2
-void CumulativeSumToAverageRow_SSE2(const int32* topleft,
- const int32* botleft,
+void CumulativeSumToAverageRow_SSE2(const int32_t* topleft,
+ const int32_t* botleft,
int width,
int area,
- uint8* dst,
+ uint8_t* dst,
int count) {
- asm volatile (
- "movd %5,%%xmm5 \n"
- "cvtdq2ps %%xmm5,%%xmm5 \n"
- "rcpss %%xmm5,%%xmm4 \n"
- "pshufd $0x0,%%xmm4,%%xmm4 \n"
- "sub $0x4,%3 \n"
- "jl 49f \n"
- "cmpl $0x80,%5 \n"
- "ja 40f \n"
-
- "pshufd $0x0,%%xmm5,%%xmm5 \n"
- "pcmpeqb %%xmm6,%%xmm6 \n"
- "psrld $0x10,%%xmm6 \n"
- "cvtdq2ps %%xmm6,%%xmm6 \n"
- "addps %%xmm6,%%xmm5 \n"
- "mulps %%xmm4,%%xmm5 \n"
- "cvtps2dq %%xmm5,%%xmm5 \n"
- "packssdw %%xmm5,%%xmm5 \n"
-
- // 4 pixel small loop.
- LABELALIGN
- "4: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "movdqu " MEMACCESS2(0x30,0) ",%%xmm3 \n"
- MEMOPREG(psubd,0x00,0,4,4,xmm0) // psubd 0x00(%0,%4,4),%%xmm0
- MEMOPREG(psubd,0x10,0,4,4,xmm1) // psubd 0x10(%0,%4,4),%%xmm1
- MEMOPREG(psubd,0x20,0,4,4,xmm2) // psubd 0x20(%0,%4,4),%%xmm2
- MEMOPREG(psubd,0x30,0,4,4,xmm3) // psubd 0x30(%0,%4,4),%%xmm3
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "psubd " MEMACCESS(1) ",%%xmm0 \n"
- "psubd " MEMACCESS2(0x10,1) ",%%xmm1 \n"
- "psubd " MEMACCESS2(0x20,1) ",%%xmm2 \n"
- "psubd " MEMACCESS2(0x30,1) ",%%xmm3 \n"
- MEMOPREG(paddd,0x00,1,4,4,xmm0) // paddd 0x00(%1,%4,4),%%xmm0
- MEMOPREG(paddd,0x10,1,4,4,xmm1) // paddd 0x10(%1,%4,4),%%xmm1
- MEMOPREG(paddd,0x20,1,4,4,xmm2) // paddd 0x20(%1,%4,4),%%xmm2
- MEMOPREG(paddd,0x30,1,4,4,xmm3) // paddd 0x30(%1,%4,4),%%xmm3
- "lea " MEMLEA(0x40,1) ",%1 \n"
- "packssdw %%xmm1,%%xmm0 \n"
- "packssdw %%xmm3,%%xmm2 \n"
- "pmulhuw %%xmm5,%%xmm0 \n"
- "pmulhuw %%xmm5,%%xmm2 \n"
- "packuswb %%xmm2,%%xmm0 \n"
- "movdqu %%xmm0," MEMACCESS(2) " \n"
- "lea " MEMLEA(0x10,2) ",%2 \n"
- "sub $0x4,%3 \n"
- "jge 4b \n"
- "jmp 49f \n"
-
- // 4 pixel loop \n"
- LABELALIGN
- "40: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "movdqu " MEMACCESS2(0x30,0) ",%%xmm3 \n"
- MEMOPREG(psubd,0x00,0,4,4,xmm0) // psubd 0x00(%0,%4,4),%%xmm0
- MEMOPREG(psubd,0x10,0,4,4,xmm1) // psubd 0x10(%0,%4,4),%%xmm1
- MEMOPREG(psubd,0x20,0,4,4,xmm2) // psubd 0x20(%0,%4,4),%%xmm2
- MEMOPREG(psubd,0x30,0,4,4,xmm3) // psubd 0x30(%0,%4,4),%%xmm3
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "psubd " MEMACCESS(1) ",%%xmm0 \n"
- "psubd " MEMACCESS2(0x10,1) ",%%xmm1 \n"
- "psubd " MEMACCESS2(0x20,1) ",%%xmm2 \n"
- "psubd " MEMACCESS2(0x30,1) ",%%xmm3 \n"
- MEMOPREG(paddd,0x00,1,4,4,xmm0) // paddd 0x00(%1,%4,4),%%xmm0
- MEMOPREG(paddd,0x10,1,4,4,xmm1) // paddd 0x10(%1,%4,4),%%xmm1
- MEMOPREG(paddd,0x20,1,4,4,xmm2) // paddd 0x20(%1,%4,4),%%xmm2
- MEMOPREG(paddd,0x30,1,4,4,xmm3) // paddd 0x30(%1,%4,4),%%xmm3
- "lea " MEMLEA(0x40,1) ",%1 \n"
- "cvtdq2ps %%xmm0,%%xmm0 \n"
- "cvtdq2ps %%xmm1,%%xmm1 \n"
- "mulps %%xmm4,%%xmm0 \n"
- "mulps %%xmm4,%%xmm1 \n"
- "cvtdq2ps %%xmm2,%%xmm2 \n"
- "cvtdq2ps %%xmm3,%%xmm3 \n"
- "mulps %%xmm4,%%xmm2 \n"
- "mulps %%xmm4,%%xmm3 \n"
- "cvtps2dq %%xmm0,%%xmm0 \n"
- "cvtps2dq %%xmm1,%%xmm1 \n"
- "cvtps2dq %%xmm2,%%xmm2 \n"
- "cvtps2dq %%xmm3,%%xmm3 \n"
- "packssdw %%xmm1,%%xmm0 \n"
- "packssdw %%xmm3,%%xmm2 \n"
- "packuswb %%xmm2,%%xmm0 \n"
- "movdqu %%xmm0," MEMACCESS(2) " \n"
- "lea " MEMLEA(0x10,2) ",%2 \n"
- "sub $0x4,%3 \n"
- "jge 40b \n"
-
- "49: \n"
- "add $0x3,%3 \n"
- "jl 19f \n"
-
- // 1 pixel loop \n"
- LABELALIGN
- "10: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- MEMOPREG(psubd,0x00,0,4,4,xmm0) // psubd 0x00(%0,%4,4),%%xmm0
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "psubd " MEMACCESS(1) ",%%xmm0 \n"
- MEMOPREG(paddd,0x00,1,4,4,xmm0) // paddd 0x00(%1,%4,4),%%xmm0
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "cvtdq2ps %%xmm0,%%xmm0 \n"
- "mulps %%xmm4,%%xmm0 \n"
- "cvtps2dq %%xmm0,%%xmm0 \n"
- "packssdw %%xmm0,%%xmm0 \n"
- "packuswb %%xmm0,%%xmm0 \n"
- "movd %%xmm0," MEMACCESS(2) " \n"
- "lea " MEMLEA(0x4,2) ",%2 \n"
- "sub $0x1,%3 \n"
- "jge 10b \n"
- "19: \n"
- : "+r"(topleft), // %0
- "+r"(botleft), // %1
- "+r"(dst), // %2
- "+rm"(count) // %3
- : "r"((intptr_t)(width)), // %4
- "rm"(area) // %5
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
- );
+ asm volatile(
+ "movd %5,%%xmm5 \n"
+ "cvtdq2ps %%xmm5,%%xmm5 \n"
+ "rcpss %%xmm5,%%xmm4 \n"
+ "pshufd $0x0,%%xmm4,%%xmm4 \n"
+ "sub $0x4,%3 \n"
+ "jl 49f \n"
+ "cmpl $0x80,%5 \n"
+ "ja 40f \n"
+
+ "pshufd $0x0,%%xmm5,%%xmm5 \n"
+ "pcmpeqb %%xmm6,%%xmm6 \n"
+ "psrld $0x10,%%xmm6 \n"
+ "cvtdq2ps %%xmm6,%%xmm6 \n"
+ "addps %%xmm6,%%xmm5 \n"
+ "mulps %%xmm4,%%xmm5 \n"
+ "cvtps2dq %%xmm5,%%xmm5 \n"
+ "packssdw %%xmm5,%%xmm5 \n"
+
+ // 4 pixel small loop.
+ LABELALIGN
+ "4: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "movdqu 0x20(%0),%%xmm2 \n"
+ "movdqu 0x30(%0),%%xmm3 \n"
+ "psubd 0x00(%0,%4,4),%%xmm0 \n"
+ "psubd 0x10(%0,%4,4),%%xmm1 \n"
+ "psubd 0x20(%0,%4,4),%%xmm2 \n"
+ "psubd 0x30(%0,%4,4),%%xmm3 \n"
+ "lea 0x40(%0),%0 \n"
+ "psubd (%1),%%xmm0 \n"
+ "psubd 0x10(%1),%%xmm1 \n"
+ "psubd 0x20(%1),%%xmm2 \n"
+ "psubd 0x30(%1),%%xmm3 \n"
+ "paddd 0x00(%1,%4,4),%%xmm0 \n"
+ "paddd 0x10(%1,%4,4),%%xmm1 \n"
+ "paddd 0x20(%1,%4,4),%%xmm2 \n"
+ "paddd 0x30(%1,%4,4),%%xmm3 \n"
+ "lea 0x40(%1),%1 \n"
+ "packssdw %%xmm1,%%xmm0 \n"
+ "packssdw %%xmm3,%%xmm2 \n"
+ "pmulhuw %%xmm5,%%xmm0 \n"
+ "pmulhuw %%xmm5,%%xmm2 \n"
+ "packuswb %%xmm2,%%xmm0 \n"
+ "movdqu %%xmm0,(%2) \n"
+ "lea 0x10(%2),%2 \n"
+ "sub $0x4,%3 \n"
+ "jge 4b \n"
+ "jmp 49f \n"
+
+ // 4 pixel loop
+ LABELALIGN
+ "40: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "movdqu 0x20(%0),%%xmm2 \n"
+ "movdqu 0x30(%0),%%xmm3 \n"
+ "psubd 0x00(%0,%4,4),%%xmm0 \n"
+ "psubd 0x10(%0,%4,4),%%xmm1 \n"
+ "psubd 0x20(%0,%4,4),%%xmm2 \n"
+ "psubd 0x30(%0,%4,4),%%xmm3 \n"
+ "lea 0x40(%0),%0 \n"
+ "psubd (%1),%%xmm0 \n"
+ "psubd 0x10(%1),%%xmm1 \n"
+ "psubd 0x20(%1),%%xmm2 \n"
+ "psubd 0x30(%1),%%xmm3 \n"
+ "paddd 0x00(%1,%4,4),%%xmm0 \n"
+ "paddd 0x10(%1,%4,4),%%xmm1 \n"
+ "paddd 0x20(%1,%4,4),%%xmm2 \n"
+ "paddd 0x30(%1,%4,4),%%xmm3 \n"
+ "lea 0x40(%1),%1 \n"
+ "cvtdq2ps %%xmm0,%%xmm0 \n"
+ "cvtdq2ps %%xmm1,%%xmm1 \n"
+ "mulps %%xmm4,%%xmm0 \n"
+ "mulps %%xmm4,%%xmm1 \n"
+ "cvtdq2ps %%xmm2,%%xmm2 \n"
+ "cvtdq2ps %%xmm3,%%xmm3 \n"
+ "mulps %%xmm4,%%xmm2 \n"
+ "mulps %%xmm4,%%xmm3 \n"
+ "cvtps2dq %%xmm0,%%xmm0 \n"
+ "cvtps2dq %%xmm1,%%xmm1 \n"
+ "cvtps2dq %%xmm2,%%xmm2 \n"
+ "cvtps2dq %%xmm3,%%xmm3 \n"
+ "packssdw %%xmm1,%%xmm0 \n"
+ "packssdw %%xmm3,%%xmm2 \n"
+ "packuswb %%xmm2,%%xmm0 \n"
+ "movdqu %%xmm0,(%2) \n"
+ "lea 0x10(%2),%2 \n"
+ "sub $0x4,%3 \n"
+ "jge 40b \n"
+
+ "49: \n"
+ "add $0x3,%3 \n"
+ "jl 19f \n"
+
+ // 1 pixel loop
+ LABELALIGN
+ "10: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "psubd 0x00(%0,%4,4),%%xmm0 \n"
+ "lea 0x10(%0),%0 \n"
+ "psubd (%1),%%xmm0 \n"
+ "paddd 0x00(%1,%4,4),%%xmm0 \n"
+ "lea 0x10(%1),%1 \n"
+ "cvtdq2ps %%xmm0,%%xmm0 \n"
+ "mulps %%xmm4,%%xmm0 \n"
+ "cvtps2dq %%xmm0,%%xmm0 \n"
+ "packssdw %%xmm0,%%xmm0 \n"
+ "packuswb %%xmm0,%%xmm0 \n"
+ "movd %%xmm0,(%2) \n"
+ "lea 0x4(%2),%2 \n"
+ "sub $0x1,%3 \n"
+ "jge 10b \n"
+ "19: \n"
+ : "+r"(topleft), // %0
+ "+r"(botleft), // %1
+ "+r"(dst), // %2
+ "+rm"(count) // %3
+ : "r"((intptr_t)(width)), // %4
+ "rm"(area) // %5
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6");
}
#endif // HAS_CUMULATIVESUMTOAVERAGEROW_SSE2
#ifdef HAS_ARGBAFFINEROW_SSE2
// Copy ARGB pixels from source image with slope to a row of destination.
LIBYUV_API
-void ARGBAffineRow_SSE2(const uint8* src_argb,
+void ARGBAffineRow_SSE2(const uint8_t* src_argb,
int src_argb_stride,
- uint8* dst_argb,
+ uint8_t* dst_argb,
const float* src_dudv,
int width) {
intptr_t src_argb_stride_temp = src_argb_stride;
intptr_t temp;
- asm volatile (
- "movq " MEMACCESS(3) ",%%xmm2 \n"
- "movq " MEMACCESS2(0x08,3) ",%%xmm7 \n"
- "shl $0x10,%1 \n"
- "add $0x4,%1 \n"
- "movd %1,%%xmm5 \n"
- "sub $0x4,%4 \n"
- "jl 49f \n"
-
- "pshufd $0x44,%%xmm7,%%xmm7 \n"
- "pshufd $0x0,%%xmm5,%%xmm5 \n"
- "movdqa %%xmm2,%%xmm0 \n"
- "addps %%xmm7,%%xmm0 \n"
- "movlhps %%xmm0,%%xmm2 \n"
- "movdqa %%xmm7,%%xmm4 \n"
- "addps %%xmm4,%%xmm4 \n"
- "movdqa %%xmm2,%%xmm3 \n"
- "addps %%xmm4,%%xmm3 \n"
- "addps %%xmm4,%%xmm4 \n"
-
- // 4 pixel loop \n"
- LABELALIGN
- "40: \n"
- "cvttps2dq %%xmm2,%%xmm0 \n" // x, y float to int first 2
- "cvttps2dq %%xmm3,%%xmm1 \n" // x, y float to int next 2
- "packssdw %%xmm1,%%xmm0 \n" // x, y as 8 shorts
- "pmaddwd %%xmm5,%%xmm0 \n" // off = x * 4 + y * stride
- "movd %%xmm0,%k1 \n"
- "pshufd $0x39,%%xmm0,%%xmm0 \n"
- "movd %%xmm0,%k5 \n"
- "pshufd $0x39,%%xmm0,%%xmm0 \n"
- MEMOPREG(movd,0x00,0,1,1,xmm1) // movd (%0,%1,1),%%xmm1
- MEMOPREG(movd,0x00,0,5,1,xmm6) // movd (%0,%5,1),%%xmm6
- "punpckldq %%xmm6,%%xmm1 \n"
- "addps %%xmm4,%%xmm2 \n"
- "movq %%xmm1," MEMACCESS(2) " \n"
- "movd %%xmm0,%k1 \n"
- "pshufd $0x39,%%xmm0,%%xmm0 \n"
- "movd %%xmm0,%k5 \n"
- MEMOPREG(movd,0x00,0,1,1,xmm0) // movd (%0,%1,1),%%xmm0
- MEMOPREG(movd,0x00,0,5,1,xmm6) // movd (%0,%5,1),%%xmm6
- "punpckldq %%xmm6,%%xmm0 \n"
- "addps %%xmm4,%%xmm3 \n"
- "movq %%xmm0," MEMACCESS2(0x08,2) " \n"
- "lea " MEMLEA(0x10,2) ",%2 \n"
- "sub $0x4,%4 \n"
- "jge 40b \n"
-
- "49: \n"
- "add $0x3,%4 \n"
- "jl 19f \n"
-
- // 1 pixel loop \n"
- LABELALIGN
- "10: \n"
- "cvttps2dq %%xmm2,%%xmm0 \n"
- "packssdw %%xmm0,%%xmm0 \n"
- "pmaddwd %%xmm5,%%xmm0 \n"
- "addps %%xmm7,%%xmm2 \n"
- "movd %%xmm0,%k1 \n"
- MEMOPREG(movd,0x00,0,1,1,xmm0) // movd (%0,%1,1),%%xmm0
- "movd %%xmm0," MEMACCESS(2) " \n"
- "lea " MEMLEA(0x04,2) ",%2 \n"
- "sub $0x1,%4 \n"
- "jge 10b \n"
- "19: \n"
- : "+r"(src_argb), // %0
- "+r"(src_argb_stride_temp), // %1
- "+r"(dst_argb), // %2
- "+r"(src_dudv), // %3
- "+rm"(width), // %4
- "=&r"(temp) // %5
- :
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
- );
+ asm volatile(
+ "movq (%3),%%xmm2 \n"
+ "movq 0x08(%3),%%xmm7 \n"
+ "shl $0x10,%1 \n"
+ "add $0x4,%1 \n"
+ "movd %1,%%xmm5 \n"
+ "sub $0x4,%4 \n"
+ "jl 49f \n"
+
+ "pshufd $0x44,%%xmm7,%%xmm7 \n"
+ "pshufd $0x0,%%xmm5,%%xmm5 \n"
+ "movdqa %%xmm2,%%xmm0 \n"
+ "addps %%xmm7,%%xmm0 \n"
+ "movlhps %%xmm0,%%xmm2 \n"
+ "movdqa %%xmm7,%%xmm4 \n"
+ "addps %%xmm4,%%xmm4 \n"
+ "movdqa %%xmm2,%%xmm3 \n"
+ "addps %%xmm4,%%xmm3 \n"
+ "addps %%xmm4,%%xmm4 \n"
+
+ // 4 pixel loop
+ LABELALIGN
+ "40: \n"
+ "cvttps2dq %%xmm2,%%xmm0 \n" // x,y float->int first 2
+ "cvttps2dq %%xmm3,%%xmm1 \n" // x,y float->int next 2
+ "packssdw %%xmm1,%%xmm0 \n" // x, y as 8 shorts
+ "pmaddwd %%xmm5,%%xmm0 \n" // off = x*4 + y*stride
+ "movd %%xmm0,%k1 \n"
+ "pshufd $0x39,%%xmm0,%%xmm0 \n"
+ "movd %%xmm0,%k5 \n"
+ "pshufd $0x39,%%xmm0,%%xmm0 \n"
+ "movd 0x00(%0,%1,1),%%xmm1 \n"
+ "movd 0x00(%0,%5,1),%%xmm6 \n"
+ "punpckldq %%xmm6,%%xmm1 \n"
+ "addps %%xmm4,%%xmm2 \n"
+ "movq %%xmm1,(%2) \n"
+ "movd %%xmm0,%k1 \n"
+ "pshufd $0x39,%%xmm0,%%xmm0 \n"
+ "movd %%xmm0,%k5 \n"
+ "movd 0x00(%0,%1,1),%%xmm0 \n"
+ "movd 0x00(%0,%5,1),%%xmm6 \n"
+ "punpckldq %%xmm6,%%xmm0 \n"
+ "addps %%xmm4,%%xmm3 \n"
+ "movq %%xmm0,0x08(%2) \n"
+ "lea 0x10(%2),%2 \n"
+ "sub $0x4,%4 \n"
+ "jge 40b \n"
+
+ "49: \n"
+ "add $0x3,%4 \n"
+ "jl 19f \n"
+
+ // 1 pixel loop
+ LABELALIGN
+ "10: \n"
+ "cvttps2dq %%xmm2,%%xmm0 \n"
+ "packssdw %%xmm0,%%xmm0 \n"
+ "pmaddwd %%xmm5,%%xmm0 \n"
+ "addps %%xmm7,%%xmm2 \n"
+ "movd %%xmm0,%k1 \n"
+ "movd 0x00(%0,%1,1),%%xmm0 \n"
+ "movd %%xmm0,(%2) \n"
+ "lea 0x04(%2),%2 \n"
+ "sub $0x1,%4 \n"
+ "jge 10b \n"
+ "19: \n"
+ : "+r"(src_argb), // %0
+ "+r"(src_argb_stride_temp), // %1
+ "+r"(dst_argb), // %2
+ "+r"(src_dudv), // %3
+ "+rm"(width), // %4
+ "=&r"(temp) // %5
+ :
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
+ "xmm7");
}
#endif // HAS_ARGBAFFINEROW_SSE2
#ifdef HAS_INTERPOLATEROW_SSSE3
// Bilinear filter 16x2 -> 16x1
-void InterpolateRow_SSSE3(uint8* dst_ptr,
- const uint8* src_ptr,
+void InterpolateRow_SSSE3(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
ptrdiff_t src_stride,
int dst_width,
int source_y_fraction) {
- asm volatile (
- "sub %1,%0 \n"
- "cmp $0x0,%3 \n"
- "je 100f \n"
- "cmp $0x80,%3 \n"
- "je 50f \n"
-
- "movd %3,%%xmm0 \n"
- "neg %3 \n"
- "add $0x100,%3 \n"
- "movd %3,%%xmm5 \n"
- "punpcklbw %%xmm0,%%xmm5 \n"
- "punpcklwd %%xmm5,%%xmm5 \n"
- "pshufd $0x0,%%xmm5,%%xmm5 \n"
- "mov $0x80808080,%%eax \n"
- "movd %%eax,%%xmm4 \n"
- "pshufd $0x0,%%xmm4,%%xmm4 \n"
-
- // General purpose row blend.
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(1) ",%%xmm0 \n"
- MEMOPREG(movdqu,0x00,1,4,1,xmm2)
- "movdqa %%xmm0,%%xmm1 \n"
- "punpcklbw %%xmm2,%%xmm0 \n"
- "punpckhbw %%xmm2,%%xmm1 \n"
- "psubb %%xmm4,%%xmm0 \n"
- "psubb %%xmm4,%%xmm1 \n"
- "movdqa %%xmm5,%%xmm2 \n"
- "movdqa %%xmm5,%%xmm3 \n"
- "pmaddubsw %%xmm0,%%xmm2 \n"
- "pmaddubsw %%xmm1,%%xmm3 \n"
- "paddw %%xmm4,%%xmm2 \n"
- "paddw %%xmm4,%%xmm3 \n"
- "psrlw $0x8,%%xmm2 \n"
- "psrlw $0x8,%%xmm3 \n"
- "packuswb %%xmm3,%%xmm2 \n"
- MEMOPMEM(movdqu,xmm2,0x00,1,0,1)
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x10,%2 \n"
- "jg 1b \n"
- "jmp 99f \n"
+ asm volatile(
+ "sub %1,%0 \n"
+ "cmp $0x0,%3 \n"
+ "je 100f \n"
+ "cmp $0x80,%3 \n"
+ "je 50f \n"
+
+ "movd %3,%%xmm0 \n"
+ "neg %3 \n"
+ "add $0x100,%3 \n"
+ "movd %3,%%xmm5 \n"
+ "punpcklbw %%xmm0,%%xmm5 \n"
+ "punpcklwd %%xmm5,%%xmm5 \n"
+ "pshufd $0x0,%%xmm5,%%xmm5 \n"
+ "mov $0x80808080,%%eax \n"
+ "movd %%eax,%%xmm4 \n"
+ "pshufd $0x0,%%xmm4,%%xmm4 \n"
+
+ // General purpose row blend.
+ LABELALIGN
+ "1: \n"
+ "movdqu (%1),%%xmm0 \n"
+ "movdqu 0x00(%1,%4,1),%%xmm2 \n"
+ "movdqa %%xmm0,%%xmm1 \n"
+ "punpcklbw %%xmm2,%%xmm0 \n"
+ "punpckhbw %%xmm2,%%xmm1 \n"
+ "psubb %%xmm4,%%xmm0 \n"
+ "psubb %%xmm4,%%xmm1 \n"
+ "movdqa %%xmm5,%%xmm2 \n"
+ "movdqa %%xmm5,%%xmm3 \n"
+ "pmaddubsw %%xmm0,%%xmm2 \n"
+ "pmaddubsw %%xmm1,%%xmm3 \n"
+ "paddw %%xmm4,%%xmm2 \n"
+ "paddw %%xmm4,%%xmm3 \n"
+ "psrlw $0x8,%%xmm2 \n"
+ "psrlw $0x8,%%xmm3 \n"
+ "packuswb %%xmm3,%%xmm2 \n"
+ "movdqu %%xmm2,0x00(%1,%0,1) \n"
+ "lea 0x10(%1),%1 \n"
+ "sub $0x10,%2 \n"
+ "jg 1b \n"
+ "jmp 99f \n"
- // Blend 50 / 50.
- LABELALIGN
- "50: \n"
- "movdqu " MEMACCESS(1) ",%%xmm0 \n"
- MEMOPREG(movdqu,0x00,1,4,1,xmm1)
- "pavgb %%xmm1,%%xmm0 \n"
- MEMOPMEM(movdqu,xmm0,0x00,1,0,1)
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x10,%2 \n"
- "jg 50b \n"
- "jmp 99f \n"
-
- // Blend 100 / 0 - Copy row unchanged.
- LABELALIGN
- "100: \n"
- "movdqu " MEMACCESS(1) ",%%xmm0 \n"
- MEMOPMEM(movdqu,xmm0,0x00,1,0,1)
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x10,%2 \n"
- "jg 100b \n"
-
- "99: \n"
- : "+r"(dst_ptr), // %0
- "+r"(src_ptr), // %1
- "+rm"(dst_width), // %2
- "+r"(source_y_fraction) // %3
- : "r"((intptr_t)(src_stride)) // %4
- : "memory", "cc", "eax", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
- );
+ // Blend 50 / 50.
+ LABELALIGN
+ "50: \n"
+ "movdqu (%1),%%xmm0 \n"
+ "movdqu 0x00(%1,%4,1),%%xmm1 \n"
+ "pavgb %%xmm1,%%xmm0 \n"
+ "movdqu %%xmm0,0x00(%1,%0,1) \n"
+ "lea 0x10(%1),%1 \n"
+ "sub $0x10,%2 \n"
+ "jg 50b \n"
+ "jmp 99f \n"
+
+ // Blend 100 / 0 - Copy row unchanged.
+ LABELALIGN
+ "100: \n"
+ "movdqu (%1),%%xmm0 \n"
+ "movdqu %%xmm0,0x00(%1,%0,1) \n"
+ "lea 0x10(%1),%1 \n"
+ "sub $0x10,%2 \n"
+ "jg 100b \n"
+
+ "99: \n"
+ : "+r"(dst_ptr), // %0
+ "+r"(src_ptr), // %1
+ "+rm"(dst_width), // %2
+ "+r"(source_y_fraction) // %3
+ : "r"((intptr_t)(src_stride)) // %4
+ : "memory", "cc", "eax", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5");
}
#endif // HAS_INTERPOLATEROW_SSSE3
#ifdef HAS_INTERPOLATEROW_AVX2
// Bilinear filter 32x2 -> 32x1
-void InterpolateRow_AVX2(uint8* dst_ptr,
- const uint8* src_ptr,
+void InterpolateRow_AVX2(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
ptrdiff_t src_stride,
int dst_width,
int source_y_fraction) {
- asm volatile (
- "cmp $0x0,%3 \n"
- "je 100f \n"
- "sub %1,%0 \n"
- "cmp $0x80,%3 \n"
- "je 50f \n"
-
- "vmovd %3,%%xmm0 \n"
- "neg %3 \n"
- "add $0x100,%3 \n"
- "vmovd %3,%%xmm5 \n"
- "vpunpcklbw %%xmm0,%%xmm5,%%xmm5 \n"
- "vpunpcklwd %%xmm5,%%xmm5,%%xmm5 \n"
- "vbroadcastss %%xmm5,%%ymm5 \n"
- "mov $0x80808080,%%eax \n"
- "vmovd %%eax,%%xmm4 \n"
- "vbroadcastss %%xmm4,%%ymm4 \n"
-
- // General purpose row blend.
- LABELALIGN
- "1: \n"
- "vmovdqu " MEMACCESS(1) ",%%ymm0 \n"
- MEMOPREG(vmovdqu,0x00,1,4,1,ymm2)
- "vpunpckhbw %%ymm2,%%ymm0,%%ymm1 \n"
- "vpunpcklbw %%ymm2,%%ymm0,%%ymm0 \n"
- "vpsubb %%ymm4,%%ymm1,%%ymm1 \n"
- "vpsubb %%ymm4,%%ymm0,%%ymm0 \n"
- "vpmaddubsw %%ymm1,%%ymm5,%%ymm1 \n"
- "vpmaddubsw %%ymm0,%%ymm5,%%ymm0 \n"
- "vpaddw %%ymm4,%%ymm1,%%ymm1 \n"
- "vpaddw %%ymm4,%%ymm0,%%ymm0 \n"
- "vpsrlw $0x8,%%ymm1,%%ymm1 \n"
- "vpsrlw $0x8,%%ymm0,%%ymm0 \n"
- "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
- MEMOPMEM(vmovdqu,ymm0,0x00,1,0,1)
- "lea " MEMLEA(0x20,1) ",%1 \n"
- "sub $0x20,%2 \n"
- "jg 1b \n"
- "jmp 99f \n"
+ asm volatile(
+ "cmp $0x0,%3 \n"
+ "je 100f \n"
+ "sub %1,%0 \n"
+ "cmp $0x80,%3 \n"
+ "je 50f \n"
+
+ "vmovd %3,%%xmm0 \n"
+ "neg %3 \n"
+ "add $0x100,%3 \n"
+ "vmovd %3,%%xmm5 \n"
+ "vpunpcklbw %%xmm0,%%xmm5,%%xmm5 \n"
+ "vpunpcklwd %%xmm5,%%xmm5,%%xmm5 \n"
+ "vbroadcastss %%xmm5,%%ymm5 \n"
+ "mov $0x80808080,%%eax \n"
+ "vmovd %%eax,%%xmm4 \n"
+ "vbroadcastss %%xmm4,%%ymm4 \n"
- // Blend 50 / 50.
- LABELALIGN
- "50: \n"
- "vmovdqu " MEMACCESS(1) ",%%ymm0 \n"
- VMEMOPREG(vpavgb,0x00,1,4,1,ymm0,ymm0) // vpavgb (%1,%4,1),%%ymm0,%%ymm0
- MEMOPMEM(vmovdqu,ymm0,0x00,1,0,1)
- "lea " MEMLEA(0x20,1) ",%1 \n"
- "sub $0x20,%2 \n"
- "jg 50b \n"
- "jmp 99f \n"
+ // General purpose row blend.
+ LABELALIGN
+ "1: \n"
+ "vmovdqu (%1),%%ymm0 \n"
+ "vmovdqu 0x00(%1,%4,1),%%ymm2 \n"
+ "vpunpckhbw %%ymm2,%%ymm0,%%ymm1 \n"
+ "vpunpcklbw %%ymm2,%%ymm0,%%ymm0 \n"
+ "vpsubb %%ymm4,%%ymm1,%%ymm1 \n"
+ "vpsubb %%ymm4,%%ymm0,%%ymm0 \n"
+ "vpmaddubsw %%ymm1,%%ymm5,%%ymm1 \n"
+ "vpmaddubsw %%ymm0,%%ymm5,%%ymm0 \n"
+ "vpaddw %%ymm4,%%ymm1,%%ymm1 \n"
+ "vpaddw %%ymm4,%%ymm0,%%ymm0 \n"
+ "vpsrlw $0x8,%%ymm1,%%ymm1 \n"
+ "vpsrlw $0x8,%%ymm0,%%ymm0 \n"
+ "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
+ "vmovdqu %%ymm0,0x00(%1,%0,1) \n"
+ "lea 0x20(%1),%1 \n"
+ "sub $0x20,%2 \n"
+ "jg 1b \n"
+ "jmp 99f \n"
- // Blend 100 / 0 - Copy row unchanged.
- LABELALIGN
- "100: \n"
- "rep movsb " MEMMOVESTRING(1,0) " \n"
- "jmp 999f \n"
+ // Blend 50 / 50.
+ LABELALIGN
+ "50: \n"
+ "vmovdqu (%1),%%ymm0 \n"
+ "vpavgb 0x00(%1,%4,1),%%ymm0,%%ymm0 \n"
+ "vmovdqu %%ymm0,0x00(%1,%0,1) \n"
+ "lea 0x20(%1),%1 \n"
+ "sub $0x20,%2 \n"
+ "jg 50b \n"
+ "jmp 99f \n"
+
+ // Blend 100 / 0 - Copy row unchanged.
+ LABELALIGN
+ "100: \n"
+ "rep movsb \n"
+ "jmp 999f \n"
- "99: \n"
- "vzeroupper \n"
- "999: \n"
- : "+D"(dst_ptr), // %0
- "+S"(src_ptr), // %1
- "+cm"(dst_width), // %2
- "+r"(source_y_fraction) // %3
- : "r"((intptr_t)(src_stride)) // %4
- : "memory", "cc", "eax", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm4", "xmm5"
- );
+ "99: \n"
+ "vzeroupper \n"
+ "999: \n"
+ : "+D"(dst_ptr), // %0
+ "+S"(src_ptr), // %1
+ "+cm"(dst_width), // %2
+ "+r"(source_y_fraction) // %3
+ : "r"((intptr_t)(src_stride)) // %4
+ : "memory", "cc", "eax", "xmm0", "xmm1", "xmm2", "xmm4", "xmm5");
}
#endif // HAS_INTERPOLATEROW_AVX2
#ifdef HAS_ARGBSHUFFLEROW_SSSE3
// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA.
-void ARGBShuffleRow_SSSE3(const uint8* src_argb,
- uint8* dst_argb,
- const uint8* shuffler,
+void ARGBShuffleRow_SSSE3(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ const uint8_t* shuffler,
int width) {
- asm volatile (
- "movdqu " MEMACCESS(3) ",%%xmm5 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "pshufb %%xmm5,%%xmm0 \n"
- "pshufb %%xmm5,%%xmm1 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "movdqu %%xmm1," MEMACCESS2(0x10,1) " \n"
- "lea " MEMLEA(0x20,1) ",%1 \n"
- "sub $0x8,%2 \n"
- "jg 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb), // %1
- "+r"(width) // %2
- : "r"(shuffler) // %3
- : "memory", "cc"
- , "xmm0", "xmm1", "xmm5"
- );
+ asm volatile(
+
+ "movdqu (%3),%%xmm5 \n"
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "lea 0x20(%0),%0 \n"
+ "pshufb %%xmm5,%%xmm0 \n"
+ "pshufb %%xmm5,%%xmm1 \n"
+ "movdqu %%xmm0,(%1) \n"
+ "movdqu %%xmm1,0x10(%1) \n"
+ "lea 0x20(%1),%1 \n"
+ "sub $0x8,%2 \n"
+ "jg 1b \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_argb), // %1
+ "+r"(width) // %2
+ : "r"(shuffler) // %3
+ : "memory", "cc", "xmm0", "xmm1", "xmm5");
}
#endif // HAS_ARGBSHUFFLEROW_SSSE3
#ifdef HAS_ARGBSHUFFLEROW_AVX2
// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA.
-void ARGBShuffleRow_AVX2(const uint8* src_argb,
- uint8* dst_argb,
- const uint8* shuffler,
+void ARGBShuffleRow_AVX2(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ const uint8_t* shuffler,
int width) {
- asm volatile (
- "vbroadcastf128 " MEMACCESS(3) ",%%ymm5 \n"
- LABELALIGN
- "1: \n"
- "vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
- "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n"
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "vpshufb %%ymm5,%%ymm0,%%ymm0 \n"
- "vpshufb %%ymm5,%%ymm1,%%ymm1 \n"
- "vmovdqu %%ymm0," MEMACCESS(1) " \n"
- "vmovdqu %%ymm1," MEMACCESS2(0x20,1) " \n"
- "lea " MEMLEA(0x40,1) ",%1 \n"
- "sub $0x10,%2 \n"
- "jg 1b \n"
- "vzeroupper \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb), // %1
- "+r"(width) // %2
- : "r"(shuffler) // %3
- : "memory", "cc"
- , "xmm0", "xmm1", "xmm5"
- );
+ asm volatile(
+
+ "vbroadcastf128 (%3),%%ymm5 \n"
+
+ LABELALIGN
+ "1: \n"
+ "vmovdqu (%0),%%ymm0 \n"
+ "vmovdqu 0x20(%0),%%ymm1 \n"
+ "lea 0x40(%0),%0 \n"
+ "vpshufb %%ymm5,%%ymm0,%%ymm0 \n"
+ "vpshufb %%ymm5,%%ymm1,%%ymm1 \n"
+ "vmovdqu %%ymm0,(%1) \n"
+ "vmovdqu %%ymm1,0x20(%1) \n"
+ "lea 0x40(%1),%1 \n"
+ "sub $0x10,%2 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_argb), // %1
+ "+r"(width) // %2
+ : "r"(shuffler) // %3
+ : "memory", "cc", "xmm0", "xmm1", "xmm5");
}
#endif // HAS_ARGBSHUFFLEROW_AVX2
#ifdef HAS_I422TOYUY2ROW_SSE2
-void I422ToYUY2Row_SSE2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_frame,
+void I422ToYUY2Row_SSE2(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_yuy2,
int width) {
- asm volatile (
- "sub %1,%2 \n"
- LABELALIGN
- "1: \n"
- "movq " MEMACCESS(1) ",%%xmm2 \n"
- MEMOPREG(movq,0x00,1,2,1,xmm3) // movq (%1,%2,1),%%xmm3
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "punpcklbw %%xmm3,%%xmm2 \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "punpcklbw %%xmm2,%%xmm0 \n"
- "punpckhbw %%xmm2,%%xmm1 \n"
- "movdqu %%xmm0," MEMACCESS(3) " \n"
- "movdqu %%xmm1," MEMACCESS2(0x10,3) " \n"
- "lea " MEMLEA(0x20,3) ",%3 \n"
- "sub $0x10,%4 \n"
- "jg 1b \n"
- : "+r"(src_y), // %0
- "+r"(src_u), // %1
- "+r"(src_v), // %2
- "+r"(dst_frame), // %3
- "+rm"(width) // %4
- :
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm3"
- );
+ asm volatile(
+
+ "sub %1,%2 \n"
+
+ LABELALIGN
+ "1: \n"
+ "movq (%1),%%xmm2 \n"
+ "movq 0x00(%1,%2,1),%%xmm1 \n"
+ "add $0x8,%1 \n"
+ "punpcklbw %%xmm1,%%xmm2 \n"
+ "movdqu (%0),%%xmm0 \n"
+ "add $0x10,%0 \n"
+ "movdqa %%xmm0,%%xmm1 \n"
+ "punpcklbw %%xmm2,%%xmm0 \n"
+ "punpckhbw %%xmm2,%%xmm1 \n"
+ "movdqu %%xmm0,(%3) \n"
+ "movdqu %%xmm1,0x10(%3) \n"
+ "lea 0x20(%3),%3 \n"
+ "sub $0x10,%4 \n"
+ "jg 1b \n"
+ : "+r"(src_y), // %0
+ "+r"(src_u), // %1
+ "+r"(src_v), // %2
+ "+r"(dst_yuy2), // %3
+ "+rm"(width) // %4
+ :
+ : "memory", "cc", "xmm0", "xmm1", "xmm2");
}
#endif // HAS_I422TOYUY2ROW_SSE2
#ifdef HAS_I422TOUYVYROW_SSE2
-void I422ToUYVYRow_SSE2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_frame,
+void I422ToUYVYRow_SSE2(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_uyvy,
int width) {
- asm volatile (
- "sub %1,%2 \n"
- LABELALIGN
- "1: \n"
- "movq " MEMACCESS(1) ",%%xmm2 \n"
- MEMOPREG(movq,0x00,1,2,1,xmm3) // movq (%1,%2,1),%%xmm3
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "punpcklbw %%xmm3,%%xmm2 \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa %%xmm2,%%xmm1 \n"
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "punpcklbw %%xmm0,%%xmm1 \n"
- "punpckhbw %%xmm0,%%xmm2 \n"
- "movdqu %%xmm1," MEMACCESS(3) " \n"
- "movdqu %%xmm2," MEMACCESS2(0x10,3) " \n"
- "lea " MEMLEA(0x20,3) ",%3 \n"
- "sub $0x10,%4 \n"
- "jg 1b \n"
- : "+r"(src_y), // %0
- "+r"(src_u), // %1
- "+r"(src_v), // %2
- "+r"(dst_frame), // %3
- "+rm"(width) // %4
- :
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm3"
- );
+ asm volatile(
+
+ "sub %1,%2 \n"
+
+ LABELALIGN
+ "1: \n"
+ "movq (%1),%%xmm2 \n"
+ "movq 0x00(%1,%2,1),%%xmm1 \n"
+ "add $0x8,%1 \n"
+ "punpcklbw %%xmm1,%%xmm2 \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqa %%xmm2,%%xmm1 \n"
+ "add $0x10,%0 \n"
+ "punpcklbw %%xmm0,%%xmm1 \n"
+ "punpckhbw %%xmm0,%%xmm2 \n"
+ "movdqu %%xmm1,(%3) \n"
+ "movdqu %%xmm2,0x10(%3) \n"
+ "lea 0x20(%3),%3 \n"
+ "sub $0x10,%4 \n"
+ "jg 1b \n"
+ : "+r"(src_y), // %0
+ "+r"(src_u), // %1
+ "+r"(src_v), // %2
+ "+r"(dst_uyvy), // %3
+ "+rm"(width) // %4
+ :
+ : "memory", "cc", "xmm0", "xmm1", "xmm2");
}
#endif // HAS_I422TOUYVYROW_SSE2
+#ifdef HAS_I422TOYUY2ROW_AVX2
+void I422ToYUY2Row_AVX2(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_yuy2,
+ int width) {
+ asm volatile(
+
+ "sub %1,%2 \n"
+
+ LABELALIGN
+ "1: \n"
+ "vpmovzxbw (%1),%%ymm1 \n"
+ "vpmovzxbw 0x00(%1,%2,1),%%ymm2 \n"
+ "add $0x10,%1 \n"
+ "vpsllw $0x8,%%ymm2,%%ymm2 \n"
+ "vpor %%ymm1,%%ymm2,%%ymm2 \n"
+ "vmovdqu (%0),%%ymm0 \n"
+ "add $0x20,%0 \n"
+ "vpunpcklbw %%ymm2,%%ymm0,%%ymm1 \n"
+ "vpunpckhbw %%ymm2,%%ymm0,%%ymm2 \n"
+ "vextractf128 $0x0,%%ymm1,(%3) \n"
+ "vextractf128 $0x0,%%ymm2,0x10(%3) \n"
+ "vextractf128 $0x1,%%ymm1,0x20(%3) \n"
+ "vextractf128 $0x1,%%ymm2,0x30(%3) \n"
+ "lea 0x40(%3),%3 \n"
+ "sub $0x20,%4 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_y), // %0
+ "+r"(src_u), // %1
+ "+r"(src_v), // %2
+ "+r"(dst_yuy2), // %3
+ "+rm"(width) // %4
+ :
+ : "memory", "cc", "xmm0", "xmm1", "xmm2");
+}
+#endif // HAS_I422TOYUY2ROW_AVX2
+
+#ifdef HAS_I422TOUYVYROW_AVX2
+void I422ToUYVYRow_AVX2(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_uyvy,
+ int width) {
+ asm volatile(
+
+ "sub %1,%2 \n"
+
+ LABELALIGN
+ "1: \n"
+ "vpmovzxbw (%1),%%ymm1 \n"
+ "vpmovzxbw 0x00(%1,%2,1),%%ymm2 \n"
+ "add $0x10,%1 \n"
+ "vpsllw $0x8,%%ymm2,%%ymm2 \n"
+ "vpor %%ymm1,%%ymm2,%%ymm2 \n"
+ "vmovdqu (%0),%%ymm0 \n"
+ "add $0x20,%0 \n"
+ "vpunpcklbw %%ymm0,%%ymm2,%%ymm1 \n"
+ "vpunpckhbw %%ymm0,%%ymm2,%%ymm2 \n"
+ "vextractf128 $0x0,%%ymm1,(%3) \n"
+ "vextractf128 $0x0,%%ymm2,0x10(%3) \n"
+ "vextractf128 $0x1,%%ymm1,0x20(%3) \n"
+ "vextractf128 $0x1,%%ymm2,0x30(%3) \n"
+ "lea 0x40(%3),%3 \n"
+ "sub $0x20,%4 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_y), // %0
+ "+r"(src_u), // %1
+ "+r"(src_v), // %2
+ "+r"(dst_uyvy), // %3
+ "+rm"(width) // %4
+ :
+ : "memory", "cc", "xmm0", "xmm1", "xmm2");
+}
+#endif // HAS_I422TOUYVYROW_AVX2
+
#ifdef HAS_ARGBPOLYNOMIALROW_SSE2
-void ARGBPolynomialRow_SSE2(const uint8* src_argb,
- uint8* dst_argb,
+void ARGBPolynomialRow_SSE2(const uint8_t* src_argb,
+ uint8_t* dst_argb,
const float* poly,
int width) {
- asm volatile (
- "pxor %%xmm3,%%xmm3 \n"
+ asm volatile(
- // 2 pixel loop.
- LABELALIGN
- "1: \n"
- "movq " MEMACCESS(0) ",%%xmm0 \n"
- "lea " MEMLEA(0x8,0) ",%0 \n"
- "punpcklbw %%xmm3,%%xmm0 \n"
- "movdqa %%xmm0,%%xmm4 \n"
- "punpcklwd %%xmm3,%%xmm0 \n"
- "punpckhwd %%xmm3,%%xmm4 \n"
- "cvtdq2ps %%xmm0,%%xmm0 \n"
- "cvtdq2ps %%xmm4,%%xmm4 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "movdqa %%xmm4,%%xmm5 \n"
- "mulps " MEMACCESS2(0x10,3) ",%%xmm0 \n"
- "mulps " MEMACCESS2(0x10,3) ",%%xmm4 \n"
- "addps " MEMACCESS(3) ",%%xmm0 \n"
- "addps " MEMACCESS(3) ",%%xmm4 \n"
- "movdqa %%xmm1,%%xmm2 \n"
- "movdqa %%xmm5,%%xmm6 \n"
- "mulps %%xmm1,%%xmm2 \n"
- "mulps %%xmm5,%%xmm6 \n"
- "mulps %%xmm2,%%xmm1 \n"
- "mulps %%xmm6,%%xmm5 \n"
- "mulps " MEMACCESS2(0x20,3) ",%%xmm2 \n"
- "mulps " MEMACCESS2(0x20,3) ",%%xmm6 \n"
- "mulps " MEMACCESS2(0x30,3) ",%%xmm1 \n"
- "mulps " MEMACCESS2(0x30,3) ",%%xmm5 \n"
- "addps %%xmm2,%%xmm0 \n"
- "addps %%xmm6,%%xmm4 \n"
- "addps %%xmm1,%%xmm0 \n"
- "addps %%xmm5,%%xmm4 \n"
- "cvttps2dq %%xmm0,%%xmm0 \n"
- "cvttps2dq %%xmm4,%%xmm4 \n"
- "packuswb %%xmm4,%%xmm0 \n"
- "packuswb %%xmm0,%%xmm0 \n"
- "movq %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "sub $0x2,%2 \n"
- "jg 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb), // %1
- "+r"(width) // %2
- : "r"(poly) // %3
- : "memory", "cc"
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
- );
+ "pxor %%xmm3,%%xmm3 \n"
+
+ // 2 pixel loop.
+ LABELALIGN
+ "1: \n"
+ "movq (%0),%%xmm0 \n"
+ "lea 0x8(%0),%0 \n"
+ "punpcklbw %%xmm3,%%xmm0 \n"
+ "movdqa %%xmm0,%%xmm4 \n"
+ "punpcklwd %%xmm3,%%xmm0 \n"
+ "punpckhwd %%xmm3,%%xmm4 \n"
+ "cvtdq2ps %%xmm0,%%xmm0 \n"
+ "cvtdq2ps %%xmm4,%%xmm4 \n"
+ "movdqa %%xmm0,%%xmm1 \n"
+ "movdqa %%xmm4,%%xmm5 \n"
+ "mulps 0x10(%3),%%xmm0 \n"
+ "mulps 0x10(%3),%%xmm4 \n"
+ "addps (%3),%%xmm0 \n"
+ "addps (%3),%%xmm4 \n"
+ "movdqa %%xmm1,%%xmm2 \n"
+ "movdqa %%xmm5,%%xmm6 \n"
+ "mulps %%xmm1,%%xmm2 \n"
+ "mulps %%xmm5,%%xmm6 \n"
+ "mulps %%xmm2,%%xmm1 \n"
+ "mulps %%xmm6,%%xmm5 \n"
+ "mulps 0x20(%3),%%xmm2 \n"
+ "mulps 0x20(%3),%%xmm6 \n"
+ "mulps 0x30(%3),%%xmm1 \n"
+ "mulps 0x30(%3),%%xmm5 \n"
+ "addps %%xmm2,%%xmm0 \n"
+ "addps %%xmm6,%%xmm4 \n"
+ "addps %%xmm1,%%xmm0 \n"
+ "addps %%xmm5,%%xmm4 \n"
+ "cvttps2dq %%xmm0,%%xmm0 \n"
+ "cvttps2dq %%xmm4,%%xmm4 \n"
+ "packuswb %%xmm4,%%xmm0 \n"
+ "packuswb %%xmm0,%%xmm0 \n"
+ "movq %%xmm0,(%1) \n"
+ "lea 0x8(%1),%1 \n"
+ "sub $0x2,%2 \n"
+ "jg 1b \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_argb), // %1
+ "+r"(width) // %2
+ : "r"(poly) // %3
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6");
}
#endif // HAS_ARGBPOLYNOMIALROW_SSE2
#ifdef HAS_ARGBPOLYNOMIALROW_AVX2
-void ARGBPolynomialRow_AVX2(const uint8* src_argb,
- uint8* dst_argb,
+void ARGBPolynomialRow_AVX2(const uint8_t* src_argb,
+ uint8_t* dst_argb,
const float* poly,
int width) {
- asm volatile (
- "vbroadcastf128 " MEMACCESS(3) ",%%ymm4 \n"
- "vbroadcastf128 " MEMACCESS2(0x10,3) ",%%ymm5 \n"
- "vbroadcastf128 " MEMACCESS2(0x20,3) ",%%ymm6 \n"
- "vbroadcastf128 " MEMACCESS2(0x30,3) ",%%ymm7 \n"
+ asm volatile(
+ "vbroadcastf128 (%3),%%ymm4 \n"
+ "vbroadcastf128 0x10(%3),%%ymm5 \n"
+ "vbroadcastf128 0x20(%3),%%ymm6 \n"
+ "vbroadcastf128 0x30(%3),%%ymm7 \n"
- // 2 pixel loop.
- LABELALIGN
- "1: \n"
- "vpmovzxbd " MEMACCESS(0) ",%%ymm0 \n" // 2 ARGB pixels
- "lea " MEMLEA(0x8,0) ",%0 \n"
- "vcvtdq2ps %%ymm0,%%ymm0 \n" // X 8 floats
- "vmulps %%ymm0,%%ymm0,%%ymm2 \n" // X * X
- "vmulps %%ymm7,%%ymm0,%%ymm3 \n" // C3 * X
- "vfmadd132ps %%ymm5,%%ymm4,%%ymm0 \n" // result = C0 + C1 * X
- "vfmadd231ps %%ymm6,%%ymm2,%%ymm0 \n" // result += C2 * X * X
- "vfmadd231ps %%ymm3,%%ymm2,%%ymm0 \n" // result += C3 * X * X * X
- "vcvttps2dq %%ymm0,%%ymm0 \n"
- "vpackusdw %%ymm0,%%ymm0,%%ymm0 \n"
- "vpermq $0xd8,%%ymm0,%%ymm0 \n"
- "vpackuswb %%xmm0,%%xmm0,%%xmm0 \n"
- "vmovq %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "sub $0x2,%2 \n"
- "jg 1b \n"
- "vzeroupper \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb), // %1
- "+r"(width) // %2
- : "r"(poly) // %3
- : "memory", "cc",
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
- );
+ // 2 pixel loop.
+ LABELALIGN
+ "1: \n"
+ "vpmovzxbd (%0),%%ymm0 \n" // 2 ARGB pixels
+ "lea 0x8(%0),%0 \n"
+ "vcvtdq2ps %%ymm0,%%ymm0 \n" // X 8 floats
+ "vmulps %%ymm0,%%ymm0,%%ymm2 \n" // X * X
+ "vmulps %%ymm7,%%ymm0,%%ymm3 \n" // C3 * X
+ "vfmadd132ps %%ymm5,%%ymm4,%%ymm0 \n" // result = C0 + C1 * X
+ "vfmadd231ps %%ymm6,%%ymm2,%%ymm0 \n" // result += C2 * X * X
+ "vfmadd231ps %%ymm3,%%ymm2,%%ymm0 \n" // result += C3 * X * X *
+ // X
+ "vcvttps2dq %%ymm0,%%ymm0 \n"
+ "vpackusdw %%ymm0,%%ymm0,%%ymm0 \n"
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n"
+ "vpackuswb %%xmm0,%%xmm0,%%xmm0 \n"
+ "vmovq %%xmm0,(%1) \n"
+ "lea 0x8(%1),%1 \n"
+ "sub $0x2,%2 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_argb), // %1
+ "+r"(width) // %2
+ : "r"(poly) // %3
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
+ "xmm7");
}
#endif // HAS_ARGBPOLYNOMIALROW_AVX2
#ifdef HAS_HALFFLOATROW_SSE2
static float kScaleBias = 1.9259299444e-34f;
-void HalfFloatRow_SSE2(const uint16* src, uint16* dst, float scale, int width) {
+void HalfFloatRow_SSE2(const uint16_t* src,
+ uint16_t* dst,
+ float scale,
+ int width) {
scale *= kScaleBias;
- asm volatile (
- "movd %3,%%xmm4 \n"
- "pshufd $0x0,%%xmm4,%%xmm4 \n"
- "pxor %%xmm5,%%xmm5 \n"
- "sub %0,%1 \n"
+ asm volatile(
+ "movd %3,%%xmm4 \n"
+ "pshufd $0x0,%%xmm4,%%xmm4 \n"
+ "pxor %%xmm5,%%xmm5 \n"
+ "sub %0,%1 \n"
- // 16 pixel loop.
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm2 \n" // 8 shorts
- "add $0x10,%0 \n"
- "movdqa %%xmm2,%%xmm3 \n"
- "punpcklwd %%xmm5,%%xmm2 \n" // 8 ints in xmm2/1
- "cvtdq2ps %%xmm2,%%xmm2 \n" // 8 floats
- "punpckhwd %%xmm5,%%xmm3 \n"
- "cvtdq2ps %%xmm3,%%xmm3 \n"
- "mulps %%xmm4,%%xmm2 \n"
- "mulps %%xmm4,%%xmm3 \n"
- "psrld $0xd,%%xmm2 \n"
- "psrld $0xd,%%xmm3 \n"
- "packssdw %%xmm3,%%xmm2 \n"
- MEMOPMEM(movdqu,xmm2,-0x10,0,1,1)
- "sub $0x8,%2 \n"
- "jg 1b \n"
- : "+r"(src), // %0
- "+r"(dst), // %1
- "+r"(width) // %2
- : "m"(scale) // %3
- : "memory", "cc",
- "xmm2", "xmm3", "xmm4", "xmm5"
- );
+ // 16 pixel loop.
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm2 \n" // 8 shorts
+ "add $0x10,%0 \n"
+ "movdqa %%xmm2,%%xmm3 \n"
+ "punpcklwd %%xmm5,%%xmm2 \n" // 8 ints in xmm2/1
+ "cvtdq2ps %%xmm2,%%xmm2 \n" // 8 floats
+ "punpckhwd %%xmm5,%%xmm3 \n"
+ "cvtdq2ps %%xmm3,%%xmm3 \n"
+ "mulps %%xmm4,%%xmm2 \n"
+ "mulps %%xmm4,%%xmm3 \n"
+ "psrld $0xd,%%xmm2 \n"
+ "psrld $0xd,%%xmm3 \n"
+ "packssdw %%xmm3,%%xmm2 \n"
+ "movdqu %%xmm2,-0x10(%0,%1,1) \n"
+ "sub $0x8,%2 \n"
+ "jg 1b \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
+ : "m"(scale) // %3
+ : "memory", "cc", "xmm2", "xmm3", "xmm4", "xmm5");
}
#endif // HAS_HALFFLOATROW_SSE2
#ifdef HAS_HALFFLOATROW_AVX2
-void HalfFloatRow_AVX2(const uint16* src, uint16* dst, float scale, int width) {
+void HalfFloatRow_AVX2(const uint16_t* src,
+ uint16_t* dst,
+ float scale,
+ int width) {
scale *= kScaleBias;
- asm volatile (
- "vbroadcastss %3, %%ymm4 \n"
- "vpxor %%ymm5,%%ymm5,%%ymm5 \n"
- "sub %0,%1 \n"
+ asm volatile(
+ "vbroadcastss %3, %%ymm4 \n"
+ "vpxor %%ymm5,%%ymm5,%%ymm5 \n"
+ "sub %0,%1 \n"
- // 16 pixel loop.
- LABELALIGN
- "1: \n"
- "vmovdqu " MEMACCESS(0) ",%%ymm2 \n" // 16 shorts
- "add $0x20,%0 \n"
- "vpunpckhwd %%ymm5,%%ymm2,%%ymm3 \n" // mutates
- "vpunpcklwd %%ymm5,%%ymm2,%%ymm2 \n"
- "vcvtdq2ps %%ymm3,%%ymm3 \n"
- "vcvtdq2ps %%ymm2,%%ymm2 \n"
- "vmulps %%ymm3,%%ymm4,%%ymm3 \n"
- "vmulps %%ymm2,%%ymm4,%%ymm2 \n"
- "vpsrld $0xd,%%ymm3,%%ymm3 \n"
- "vpsrld $0xd,%%ymm2,%%ymm2 \n"
- "vpackssdw %%ymm3, %%ymm2, %%ymm2 \n" // unmutates
- MEMOPMEM(vmovdqu,ymm2,-0x20,0,1,1)
- "sub $0x10,%2 \n"
- "jg 1b \n"
+ // 16 pixel loop.
+ LABELALIGN
+ "1: \n"
+ "vmovdqu (%0),%%ymm2 \n" // 16 shorts
+ "add $0x20,%0 \n"
+ "vpunpckhwd %%ymm5,%%ymm2,%%ymm3 \n" // mutates
+ "vpunpcklwd %%ymm5,%%ymm2,%%ymm2 \n"
+ "vcvtdq2ps %%ymm3,%%ymm3 \n"
+ "vcvtdq2ps %%ymm2,%%ymm2 \n"
+ "vmulps %%ymm3,%%ymm4,%%ymm3 \n"
+ "vmulps %%ymm2,%%ymm4,%%ymm2 \n"
+ "vpsrld $0xd,%%ymm3,%%ymm3 \n"
+ "vpsrld $0xd,%%ymm2,%%ymm2 \n"
+ "vpackssdw %%ymm3, %%ymm2, %%ymm2 \n" // unmutates
+ "vmovdqu %%ymm2,-0x20(%0,%1,1) \n"
+ "sub $0x10,%2 \n"
+ "jg 1b \n"
- "vzeroupper \n"
- : "+r"(src), // %0
- "+r"(dst), // %1
- "+r"(width) // %2
+ "vzeroupper \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
#if defined(__x86_64__)
- : "x"(scale) // %3
+ : "x"(scale) // %3
#else
- : "m"(scale) // %3
+ : "m"(scale) // %3
#endif
- : "memory", "cc",
- "xmm2", "xmm3", "xmm4", "xmm5"
- );
+ : "memory", "cc", "xmm2", "xmm3", "xmm4", "xmm5");
}
#endif // HAS_HALFFLOATROW_AVX2
#ifdef HAS_HALFFLOATROW_F16C
-void HalfFloatRow_F16C(const uint16* src, uint16* dst, float scale, int width) {
- asm volatile (
- "vbroadcastss %3, %%ymm4 \n"
- "sub %0,%1 \n"
+void HalfFloatRow_F16C(const uint16_t* src,
+ uint16_t* dst,
+ float scale,
+ int width) {
+ asm volatile(
+ "vbroadcastss %3, %%ymm4 \n"
+ "sub %0,%1 \n"
- // 16 pixel loop.
- LABELALIGN
- "1: \n"
- "vpmovzxwd " MEMACCESS(0) ",%%ymm2 \n" // 16 shorts -> 16 ints
- "vpmovzxwd " MEMACCESS2(0x10,0) ",%%ymm3 \n"
- "vcvtdq2ps %%ymm2,%%ymm2 \n"
- "vcvtdq2ps %%ymm3,%%ymm3 \n"
- "vmulps %%ymm2,%%ymm4,%%ymm2 \n"
- "vmulps %%ymm3,%%ymm4,%%ymm3 \n"
- "vcvtps2ph $3, %%ymm2, %%xmm2 \n"
- "vcvtps2ph $3, %%ymm3, %%xmm3 \n"
- MEMOPMEM(vmovdqu,xmm2,0x00,0,1,1)
- MEMOPMEM(vmovdqu,xmm3,0x10,0,1,1)
- "add $0x20,%0 \n"
- "sub $0x10,%2 \n"
- "jg 1b \n"
- "vzeroupper \n"
- : "+r"(src), // %0
- "+r"(dst), // %1
- "+r"(width) // %2
+ // 16 pixel loop.
+ LABELALIGN
+ "1: \n"
+ "vpmovzxwd (%0),%%ymm2 \n" // 16 shorts -> 16 ints
+ "vpmovzxwd 0x10(%0),%%ymm3 \n"
+ "vcvtdq2ps %%ymm2,%%ymm2 \n"
+ "vcvtdq2ps %%ymm3,%%ymm3 \n"
+ "vmulps %%ymm2,%%ymm4,%%ymm2 \n"
+ "vmulps %%ymm3,%%ymm4,%%ymm3 \n"
+ "vcvtps2ph $3, %%ymm2, %%xmm2 \n"
+ "vcvtps2ph $3, %%ymm3, %%xmm3 \n"
+ "vmovdqu %%xmm2,0x00(%0,%1,1) \n"
+ "vmovdqu %%xmm3,0x10(%0,%1,1) \n"
+ "add $0x20,%0 \n"
+ "sub $0x10,%2 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
#if defined(__x86_64__)
- : "x"(scale) // %3
+ : "x"(scale) // %3
#else
- : "m"(scale) // %3
+ : "m"(scale) // %3
#endif
- : "memory", "cc",
- "xmm2", "xmm3", "xmm4"
- );
+ : "memory", "cc", "xmm2", "xmm3", "xmm4");
}
#endif // HAS_HALFFLOATROW_F16C
#ifdef HAS_HALFFLOATROW_F16C
-void HalfFloat1Row_F16C(const uint16* src, uint16* dst, float, int width) {
- asm volatile (
- "sub %0,%1 \n"
- // 16 pixel loop.
- LABELALIGN
- "1: \n"
- "vpmovzxwd " MEMACCESS(0) ",%%ymm2 \n" // 16 shorts -> 16 ints
- "vpmovzxwd " MEMACCESS2(0x10,0) ",%%ymm3 \n"
- "vcvtdq2ps %%ymm2,%%ymm2 \n"
- "vcvtdq2ps %%ymm3,%%ymm3 \n"
- "vcvtps2ph $3, %%ymm2, %%xmm2 \n"
- "vcvtps2ph $3, %%ymm3, %%xmm3 \n"
- MEMOPMEM(vmovdqu,xmm2,0x00,0,1,1)
- MEMOPMEM(vmovdqu,xmm3,0x10,0,1,1)
- "add $0x20,%0 \n"
- "sub $0x10,%2 \n"
- "jg 1b \n"
- "vzeroupper \n"
- : "+r"(src), // %0
- "+r"(dst), // %1
- "+r"(width) // %2
- :
- : "memory", "cc",
- "xmm2", "xmm3"
- );
+void HalfFloat1Row_F16C(const uint16_t* src, uint16_t* dst, float, int width) {
+ asm volatile(
+ "sub %0,%1 \n"
+ // 16 pixel loop.
+ LABELALIGN
+ "1: \n"
+ "vpmovzxwd (%0),%%ymm2 \n" // 16 shorts -> 16 ints
+ "vpmovzxwd 0x10(%0),%%ymm3 \n"
+ "vcvtdq2ps %%ymm2,%%ymm2 \n"
+ "vcvtdq2ps %%ymm3,%%ymm3 \n"
+ "vcvtps2ph $3, %%ymm2, %%xmm2 \n"
+ "vcvtps2ph $3, %%ymm3, %%xmm3 \n"
+ "vmovdqu %%xmm2,0x00(%0,%1,1) \n"
+ "vmovdqu %%xmm3,0x10(%0,%1,1) \n"
+ "add $0x20,%0 \n"
+ "sub $0x10,%2 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
+ :
+ : "memory", "cc", "xmm2", "xmm3");
}
#endif // HAS_HALFFLOATROW_F16C
#ifdef HAS_ARGBCOLORTABLEROW_X86
// Tranform ARGB pixels with color table.
-void ARGBColorTableRow_X86(uint8* dst_argb,
- const uint8* table_argb,
+void ARGBColorTableRow_X86(uint8_t* dst_argb,
+ const uint8_t* table_argb,
int width) {
uintptr_t pixel_temp;
- asm volatile (
- // 1 pixel loop.
- LABELALIGN
- "1: \n"
- "movzb " MEMACCESS(0) ",%1 \n"
- "lea " MEMLEA(0x4,0) ",%0 \n"
- MEMOPARG(movzb,0x00,3,1,4,1) " \n" // movzb (%3,%1,4),%1
- "mov %b1," MEMACCESS2(-0x4,0) " \n"
- "movzb " MEMACCESS2(-0x3,0) ",%1 \n"
- MEMOPARG(movzb,0x01,3,1,4,1) " \n" // movzb 0x1(%3,%1,4),%1
- "mov %b1," MEMACCESS2(-0x3,0) " \n"
- "movzb " MEMACCESS2(-0x2,0) ",%1 \n"
- MEMOPARG(movzb,0x02,3,1,4,1) " \n" // movzb 0x2(%3,%1,4),%1
- "mov %b1," MEMACCESS2(-0x2,0) " \n"
- "movzb " MEMACCESS2(-0x1,0) ",%1 \n"
- MEMOPARG(movzb,0x03,3,1,4,1) " \n" // movzb 0x3(%3,%1,4),%1
- "mov %b1," MEMACCESS2(-0x1,0) " \n"
- "dec %2 \n"
- "jg 1b \n"
- : "+r"(dst_argb), // %0
- "=&d"(pixel_temp), // %1
- "+r"(width) // %2
- : "r"(table_argb) // %3
- : "memory", "cc");
+ asm volatile(
+ // 1 pixel loop.
+ LABELALIGN
+ "1: \n"
+ "movzb (%0),%1 \n"
+ "lea 0x4(%0),%0 \n"
+ "movzb 0x00(%3,%1,4),%1 \n"
+ "mov %b1,-0x4(%0) \n"
+ "movzb -0x3(%0),%1 \n"
+ "movzb 0x01(%3,%1,4),%1 \n"
+ "mov %b1,-0x3(%0) \n"
+ "movzb -0x2(%0),%1 \n"
+ "movzb 0x02(%3,%1,4),%1 \n"
+ "mov %b1,-0x2(%0) \n"
+ "movzb -0x1(%0),%1 \n"
+ "movzb 0x03(%3,%1,4),%1 \n"
+ "mov %b1,-0x1(%0) \n"
+ "dec %2 \n"
+ "jg 1b \n"
+ : "+r"(dst_argb), // %0
+ "=&d"(pixel_temp), // %1
+ "+r"(width) // %2
+ : "r"(table_argb) // %3
+ : "memory", "cc");
}
#endif // HAS_ARGBCOLORTABLEROW_X86
#ifdef HAS_RGBCOLORTABLEROW_X86
// Tranform RGB pixels with color table.
-void RGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width) {
+void RGBColorTableRow_X86(uint8_t* dst_argb,
+ const uint8_t* table_argb,
+ int width) {
uintptr_t pixel_temp;
- asm volatile (
- // 1 pixel loop.
- LABELALIGN
- "1: \n"
- "movzb " MEMACCESS(0) ",%1 \n"
- "lea " MEMLEA(0x4,0) ",%0 \n"
- MEMOPARG(movzb,0x00,3,1,4,1) " \n" // movzb (%3,%1,4),%1
- "mov %b1," MEMACCESS2(-0x4,0) " \n"
- "movzb " MEMACCESS2(-0x3,0) ",%1 \n"
- MEMOPARG(movzb,0x01,3,1,4,1) " \n" // movzb 0x1(%3,%1,4),%1
- "mov %b1," MEMACCESS2(-0x3,0) " \n"
- "movzb " MEMACCESS2(-0x2,0) ",%1 \n"
- MEMOPARG(movzb,0x02,3,1,4,1) " \n" // movzb 0x2(%3,%1,4),%1
- "mov %b1," MEMACCESS2(-0x2,0) " \n"
- "dec %2 \n"
- "jg 1b \n"
- : "+r"(dst_argb), // %0
- "=&d"(pixel_temp), // %1
- "+r"(width) // %2
- : "r"(table_argb) // %3
- : "memory", "cc");
+ asm volatile(
+ // 1 pixel loop.
+ LABELALIGN
+ "1: \n"
+ "movzb (%0),%1 \n"
+ "lea 0x4(%0),%0 \n"
+ "movzb 0x00(%3,%1,4),%1 \n"
+ "mov %b1,-0x4(%0) \n"
+ "movzb -0x3(%0),%1 \n"
+ "movzb 0x01(%3,%1,4),%1 \n"
+ "mov %b1,-0x3(%0) \n"
+ "movzb -0x2(%0),%1 \n"
+ "movzb 0x02(%3,%1,4),%1 \n"
+ "mov %b1,-0x2(%0) \n"
+ "dec %2 \n"
+ "jg 1b \n"
+ : "+r"(dst_argb), // %0
+ "=&d"(pixel_temp), // %1
+ "+r"(width) // %2
+ : "r"(table_argb) // %3
+ : "memory", "cc");
}
#endif // HAS_RGBCOLORTABLEROW_X86
#ifdef HAS_ARGBLUMACOLORTABLEROW_SSSE3
// Tranform RGB pixels with luma table.
-void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb,
- uint8* dst_argb,
+void ARGBLumaColorTableRow_SSSE3(const uint8_t* src_argb,
+ uint8_t* dst_argb,
int width,
- const uint8* luma,
- uint32 lumacoeff) {
+ const uint8_t* luma,
+ uint32_t lumacoeff) {
uintptr_t pixel_temp;
uintptr_t table_temp;
- asm volatile (
- "movd %6,%%xmm3 \n"
- "pshufd $0x0,%%xmm3,%%xmm3 \n"
- "pcmpeqb %%xmm4,%%xmm4 \n"
- "psllw $0x8,%%xmm4 \n"
- "pxor %%xmm5,%%xmm5 \n"
+ asm volatile(
+ "movd %6,%%xmm3 \n"
+ "pshufd $0x0,%%xmm3,%%xmm3 \n"
+ "pcmpeqb %%xmm4,%%xmm4 \n"
+ "psllw $0x8,%%xmm4 \n"
+ "pxor %%xmm5,%%xmm5 \n"
- // 4 pixel loop.
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(2) ",%%xmm0 \n"
- "pmaddubsw %%xmm3,%%xmm0 \n"
- "phaddw %%xmm0,%%xmm0 \n"
- "pand %%xmm4,%%xmm0 \n"
- "punpcklwd %%xmm5,%%xmm0 \n"
- "movd %%xmm0,%k1 \n" // 32 bit offset
- "add %5,%1 \n"
- "pshufd $0x39,%%xmm0,%%xmm0 \n"
-
- "movzb " MEMACCESS(2) ",%0 \n"
- MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0
- "mov %b0," MEMACCESS(3) " \n"
- "movzb " MEMACCESS2(0x1,2) ",%0 \n"
- MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0
- "mov %b0," MEMACCESS2(0x1,3) " \n"
- "movzb " MEMACCESS2(0x2,2) ",%0 \n"
- MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0
- "mov %b0," MEMACCESS2(0x2,3) " \n"
- "movzb " MEMACCESS2(0x3,2) ",%0 \n"
- "mov %b0," MEMACCESS2(0x3,3) " \n"
-
- "movd %%xmm0,%k1 \n" // 32 bit offset
- "add %5,%1 \n"
- "pshufd $0x39,%%xmm0,%%xmm0 \n"
-
- "movzb " MEMACCESS2(0x4,2) ",%0 \n"
- MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0
- "mov %b0," MEMACCESS2(0x4,3) " \n"
- "movzb " MEMACCESS2(0x5,2) ",%0 \n"
- MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0
- "mov %b0," MEMACCESS2(0x5,3) " \n"
- "movzb " MEMACCESS2(0x6,2) ",%0 \n"
- MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0
- "mov %b0," MEMACCESS2(0x6,3) " \n"
- "movzb " MEMACCESS2(0x7,2) ",%0 \n"
- "mov %b0," MEMACCESS2(0x7,3) " \n"
-
- "movd %%xmm0,%k1 \n" // 32 bit offset
- "add %5,%1 \n"
- "pshufd $0x39,%%xmm0,%%xmm0 \n"
-
- "movzb " MEMACCESS2(0x8,2) ",%0 \n"
- MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0
- "mov %b0," MEMACCESS2(0x8,3) " \n"
- "movzb " MEMACCESS2(0x9,2) ",%0 \n"
- MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0
- "mov %b0," MEMACCESS2(0x9,3) " \n"
- "movzb " MEMACCESS2(0xa,2) ",%0 \n"
- MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0
- "mov %b0," MEMACCESS2(0xa,3) " \n"
- "movzb " MEMACCESS2(0xb,2) ",%0 \n"
- "mov %b0," MEMACCESS2(0xb,3) " \n"
-
- "movd %%xmm0,%k1 \n" // 32 bit offset
- "add %5,%1 \n"
-
- "movzb " MEMACCESS2(0xc,2) ",%0 \n"
- MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0
- "mov %b0," MEMACCESS2(0xc,3) " \n"
- "movzb " MEMACCESS2(0xd,2) ",%0 \n"
- MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0
- "mov %b0," MEMACCESS2(0xd,3) " \n"
- "movzb " MEMACCESS2(0xe,2) ",%0 \n"
- MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0
- "mov %b0," MEMACCESS2(0xe,3) " \n"
- "movzb " MEMACCESS2(0xf,2) ",%0 \n"
- "mov %b0," MEMACCESS2(0xf,3) " \n"
- "lea " MEMLEA(0x10,2) ",%2 \n"
- "lea " MEMLEA(0x10,3) ",%3 \n"
- "sub $0x4,%4 \n"
- "jg 1b \n"
- : "=&d"(pixel_temp), // %0
- "=&a"(table_temp), // %1
- "+r"(src_argb), // %2
- "+r"(dst_argb), // %3
- "+rm"(width) // %4
- : "r"(luma), // %5
- "rm"(lumacoeff) // %6
- : "memory", "cc", "xmm0", "xmm3", "xmm4", "xmm5"
- );
+ // 4 pixel loop.
+ LABELALIGN
+ "1: \n"
+ "movdqu (%2),%%xmm0 \n"
+ "pmaddubsw %%xmm3,%%xmm0 \n"
+ "phaddw %%xmm0,%%xmm0 \n"
+ "pand %%xmm4,%%xmm0 \n"
+ "punpcklwd %%xmm5,%%xmm0 \n"
+ "movd %%xmm0,%k1 \n" // 32 bit offset
+ "add %5,%1 \n"
+ "pshufd $0x39,%%xmm0,%%xmm0 \n"
+
+ "movzb (%2),%0 \n"
+ "movzb 0x00(%1,%0,1),%0 \n"
+ "mov %b0,(%3) \n"
+ "movzb 0x1(%2),%0 \n"
+ "movzb 0x00(%1,%0,1),%0 \n"
+ "mov %b0,0x1(%3) \n"
+ "movzb 0x2(%2),%0 \n"
+ "movzb 0x00(%1,%0,1),%0 \n"
+ "mov %b0,0x2(%3) \n"
+ "movzb 0x3(%2),%0 \n"
+ "mov %b0,0x3(%3) \n"
+
+ "movd %%xmm0,%k1 \n" // 32 bit offset
+ "add %5,%1 \n"
+ "pshufd $0x39,%%xmm0,%%xmm0 \n"
+
+ "movzb 0x4(%2),%0 \n"
+ "movzb 0x00(%1,%0,1),%0 \n"
+ "mov %b0,0x4(%3) \n"
+ "movzb 0x5(%2),%0 \n"
+ "movzb 0x00(%1,%0,1),%0 \n"
+ "mov %b0,0x5(%3) \n"
+ "movzb 0x6(%2),%0 \n"
+ "movzb 0x00(%1,%0,1),%0 \n"
+ "mov %b0,0x6(%3) \n"
+ "movzb 0x7(%2),%0 \n"
+ "mov %b0,0x7(%3) \n"
+
+ "movd %%xmm0,%k1 \n" // 32 bit offset
+ "add %5,%1 \n"
+ "pshufd $0x39,%%xmm0,%%xmm0 \n"
+
+ "movzb 0x8(%2),%0 \n"
+ "movzb 0x00(%1,%0,1),%0 \n"
+ "mov %b0,0x8(%3) \n"
+ "movzb 0x9(%2),%0 \n"
+ "movzb 0x00(%1,%0,1),%0 \n"
+ "mov %b0,0x9(%3) \n"
+ "movzb 0xa(%2),%0 \n"
+ "movzb 0x00(%1,%0,1),%0 \n"
+ "mov %b0,0xa(%3) \n"
+ "movzb 0xb(%2),%0 \n"
+ "mov %b0,0xb(%3) \n"
+
+ "movd %%xmm0,%k1 \n" // 32 bit offset
+ "add %5,%1 \n"
+
+ "movzb 0xc(%2),%0 \n"
+ "movzb 0x00(%1,%0,1),%0 \n"
+ "mov %b0,0xc(%3) \n"
+ "movzb 0xd(%2),%0 \n"
+ "movzb 0x00(%1,%0,1),%0 \n"
+ "mov %b0,0xd(%3) \n"
+ "movzb 0xe(%2),%0 \n"
+ "movzb 0x00(%1,%0,1),%0 \n"
+ "mov %b0,0xe(%3) \n"
+ "movzb 0xf(%2),%0 \n"
+ "mov %b0,0xf(%3) \n"
+ "lea 0x10(%2),%2 \n"
+ "lea 0x10(%3),%3 \n"
+ "sub $0x4,%4 \n"
+ "jg 1b \n"
+ : "=&d"(pixel_temp), // %0
+ "=&a"(table_temp), // %1
+ "+r"(src_argb), // %2
+ "+r"(dst_argb), // %3
+ "+rm"(width) // %4
+ : "r"(luma), // %5
+ "rm"(lumacoeff) // %6
+ : "memory", "cc", "xmm0", "xmm3", "xmm4", "xmm5");
}
#endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3
diff --git a/chromium/third_party/libyuv/source/row_msa.cc b/chromium/third_party/libyuv/source/row_msa.cc
index 5cc23450a52..66666cefcd9 100644
--- a/chromium/third_party/libyuv/source/row_msa.cc
+++ b/chromium/third_party/libyuv/source/row_msa.cc
@@ -37,17 +37,17 @@ extern "C" {
}
// Load YUV 422 pixel data
-#define READYUV422(psrc_y, psrc_u, psrc_v, out_y, out_u, out_v) \
- { \
- uint64 y_m; \
- uint32 u_m, v_m; \
- v4i32 zero_m = {0}; \
- y_m = LD(psrc_y); \
- u_m = LW(psrc_u); \
- v_m = LW(psrc_v); \
- out_y = (v16u8)__msa_insert_d((v2i64)zero_m, 0, (int64)y_m); \
- out_u = (v16u8)__msa_insert_w(zero_m, 0, (int32)u_m); \
- out_v = (v16u8)__msa_insert_w(zero_m, 0, (int32)v_m); \
+#define READYUV422(psrc_y, psrc_u, psrc_v, out_y, out_u, out_v) \
+ { \
+ uint64_t y_m; \
+ uint32_t u_m, v_m; \
+ v4i32 zero_m = {0}; \
+ y_m = LD(psrc_y); \
+ u_m = LW(psrc_u); \
+ v_m = LW(psrc_v); \
+ out_y = (v16u8)__msa_insert_d((v2i64)zero_m, 0, (int64_t)y_m); \
+ out_u = (v16u8)__msa_insert_w(zero_m, 0, (int32_t)u_m); \
+ out_v = (v16u8)__msa_insert_w(zero_m, 0, (int32_t)v_m); \
}
// Clip input vector elements between 0 to 255
@@ -275,17 +275,17 @@ extern "C" {
// Load I444 pixel data
#define READI444(psrc_y, psrc_u, psrc_v, out_y, out_u, out_v) \
{ \
- uint64 y_m, u_m, v_m; \
+ uint64_t y_m, u_m, v_m; \
v2i64 zero_m = {0}; \
y_m = LD(psrc_y); \
u_m = LD(psrc_u); \
v_m = LD(psrc_v); \
- out_y = (v16u8)__msa_insert_d(zero_m, 0, (int64)y_m); \
- out_u = (v16u8)__msa_insert_d(zero_m, 0, (int64)u_m); \
- out_v = (v16u8)__msa_insert_d(zero_m, 0, (int64)v_m); \
+ out_y = (v16u8)__msa_insert_d(zero_m, 0, (int64_t)y_m); \
+ out_u = (v16u8)__msa_insert_d(zero_m, 0, (int64_t)u_m); \
+ out_v = (v16u8)__msa_insert_d(zero_m, 0, (int64_t)v_m); \
}
-void MirrorRow_MSA(const uint8* src, uint8* dst, int width) {
+void MirrorRow_MSA(const uint8_t* src, uint8_t* dst, int width) {
int x;
v16u8 src0, src1, src2, src3;
v16u8 dst0, dst1, dst2, dst3;
@@ -302,7 +302,7 @@ void MirrorRow_MSA(const uint8* src, uint8* dst, int width) {
}
}
-void ARGBMirrorRow_MSA(const uint8* src, uint8* dst, int width) {
+void ARGBMirrorRow_MSA(const uint8_t* src, uint8_t* dst, int width) {
int x;
v16u8 src0, src1, src2, src3;
v16u8 dst0, dst1, dst2, dst3;
@@ -319,10 +319,10 @@ void ARGBMirrorRow_MSA(const uint8* src, uint8* dst, int width) {
}
}
-void I422ToYUY2Row_MSA(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_yuy2,
+void I422ToYUY2Row_MSA(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_yuy2,
int width) {
int x;
v16u8 src_u0, src_v0, src_y0, src_y1, vec_uv0, vec_uv1;
@@ -343,10 +343,10 @@ void I422ToYUY2Row_MSA(const uint8* src_y,
}
}
-void I422ToUYVYRow_MSA(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_uyvy,
+void I422ToUYVYRow_MSA(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_uyvy,
int width) {
int x;
v16u8 src_u0, src_v0, src_y0, src_y1, vec_uv0, vec_uv1;
@@ -367,10 +367,10 @@ void I422ToUYVYRow_MSA(const uint8* src_y,
}
}
-void I422ToARGBRow_MSA(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* rgb_buf,
+void I422ToARGBRow_MSA(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
int x;
@@ -390,18 +390,18 @@ void I422ToARGBRow_MSA(const uint8* src_y,
src1 = (v16u8)__msa_ilvr_b((v16i8)src2, (v16i8)src1);
YUVTORGB(src0, src1, vec_ubvr, vec_ugvg, vec_bb, vec_bg, vec_br, vec_yg,
vec0, vec1, vec2);
- STOREARGB(vec0, vec1, vec2, alpha, rgb_buf);
+ STOREARGB(vec0, vec1, vec2, alpha, dst_argb);
src_y += 8;
src_u += 4;
src_v += 4;
- rgb_buf += 32;
+ dst_argb += 32;
}
}
-void I422ToRGBARow_MSA(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* rgb_buf,
+void I422ToRGBARow_MSA(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
int x;
@@ -421,23 +421,23 @@ void I422ToRGBARow_MSA(const uint8* src_y,
src1 = (v16u8)__msa_ilvr_b((v16i8)src2, (v16i8)src1);
YUVTORGB(src0, src1, vec_ubvr, vec_ugvg, vec_bb, vec_bg, vec_br, vec_yg,
vec0, vec1, vec2);
- STOREARGB(alpha, vec0, vec1, vec2, rgb_buf);
+ STOREARGB(alpha, vec0, vec1, vec2, dst_argb);
src_y += 8;
src_u += 4;
src_v += 4;
- rgb_buf += 32;
+ dst_argb += 32;
}
}
-void I422AlphaToARGBRow_MSA(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- const uint8* src_a,
- uint8* rgb_buf,
+void I422AlphaToARGBRow_MSA(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ const uint8_t* src_a,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
int x;
- int64 data_a;
+ int64_t data_a;
v16u8 src0, src1, src2, src3;
v8i16 vec0, vec1, vec2;
v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg;
@@ -457,23 +457,23 @@ void I422AlphaToARGBRow_MSA(const uint8* src_y,
YUVTORGB(src0, src1, vec_ubvr, vec_ugvg, vec_bb, vec_bg, vec_br, vec_yg,
vec0, vec1, vec2);
src3 = (v16u8)__msa_ilvr_b((v16i8)src3, (v16i8)src3);
- STOREARGB(vec0, vec1, vec2, src3, rgb_buf);
+ STOREARGB(vec0, vec1, vec2, src3, dst_argb);
src_y += 8;
src_u += 4;
src_v += 4;
src_a += 8;
- rgb_buf += 32;
+ dst_argb += 32;
}
}
-void I422ToRGB24Row_MSA(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* rgb_buf,
+void I422ToRGB24Row_MSA(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
- int32 width) {
+ int32_t width) {
int x;
- int64 data_u, data_v;
+ int64_t data_u, data_v;
v16u8 src0, src1, src2, src3, src4, dst0, dst1, dst2;
v8i16 vec0, vec1, vec2, vec3, vec4, vec5;
v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg;
@@ -510,20 +510,20 @@ void I422ToRGB24Row_MSA(const uint8* src_y,
dst0 = (v16u8)__msa_vshf_b(shuffler0, (v16i8)reg3, (v16i8)reg0);
dst1 = (v16u8)__msa_vshf_b(shuffler1, (v16i8)reg3, (v16i8)reg1);
dst2 = (v16u8)__msa_vshf_b(shuffler2, (v16i8)reg3, (v16i8)reg2);
- ST_UB2(dst0, dst1, rgb_buf, 16);
- ST_UB(dst2, (rgb_buf + 32));
+ ST_UB2(dst0, dst1, dst_argb, 16);
+ ST_UB(dst2, (dst_argb + 32));
src_y += 16;
src_u += 8;
src_v += 8;
- rgb_buf += 48;
+ dst_argb += 48;
}
}
// TODO(fbarchard): Consider AND instead of shift to isolate 5 upper bits of R.
-void I422ToRGB565Row_MSA(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_rgb565,
+void I422ToRGB565Row_MSA(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_rgb565,
const struct YuvConstants* yuvconstants,
int width) {
int x;
@@ -558,10 +558,10 @@ void I422ToRGB565Row_MSA(const uint8* src_y,
}
// TODO(fbarchard): Consider AND instead of shift to isolate 4 upper bits of G.
-void I422ToARGB4444Row_MSA(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb4444,
+void I422ToARGB4444Row_MSA(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb4444,
const struct YuvConstants* yuvconstants,
int width) {
int x;
@@ -598,10 +598,10 @@ void I422ToARGB4444Row_MSA(const uint8* src_y,
}
}
-void I422ToARGB1555Row_MSA(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb1555,
+void I422ToARGB1555Row_MSA(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb1555,
const struct YuvConstants* yuvconstants,
int width) {
int x;
@@ -638,7 +638,7 @@ void I422ToARGB1555Row_MSA(const uint8* src_y,
}
}
-void YUY2ToYRow_MSA(const uint8* src_yuy2, uint8* dst_y, int width) {
+void YUY2ToYRow_MSA(const uint8_t* src_yuy2, uint8_t* dst_y, int width) {
int x;
v16u8 src0, src1, src2, src3, dst0, dst1;
@@ -652,12 +652,12 @@ void YUY2ToYRow_MSA(const uint8* src_yuy2, uint8* dst_y, int width) {
}
}
-void YUY2ToUVRow_MSA(const uint8* src_yuy2,
+void YUY2ToUVRow_MSA(const uint8_t* src_yuy2,
int src_stride_yuy2,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
- const uint8* src_yuy2_next = src_yuy2 + src_stride_yuy2;
+ const uint8_t* src_yuy2_next = src_yuy2 + src_stride_yuy2;
int x;
v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
v16u8 vec0, vec1, dst0, dst1;
@@ -682,9 +682,9 @@ void YUY2ToUVRow_MSA(const uint8* src_yuy2,
}
}
-void YUY2ToUV422Row_MSA(const uint8* src_yuy2,
- uint8* dst_u,
- uint8* dst_v,
+void YUY2ToUV422Row_MSA(const uint8_t* src_yuy2,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
int x;
v16u8 src0, src1, src2, src3, dst0, dst1;
@@ -703,7 +703,7 @@ void YUY2ToUV422Row_MSA(const uint8* src_yuy2,
}
}
-void UYVYToYRow_MSA(const uint8* src_uyvy, uint8* dst_y, int width) {
+void UYVYToYRow_MSA(const uint8_t* src_uyvy, uint8_t* dst_y, int width) {
int x;
v16u8 src0, src1, src2, src3, dst0, dst1;
@@ -717,12 +717,12 @@ void UYVYToYRow_MSA(const uint8* src_uyvy, uint8* dst_y, int width) {
}
}
-void UYVYToUVRow_MSA(const uint8* src_uyvy,
+void UYVYToUVRow_MSA(const uint8_t* src_uyvy,
int src_stride_uyvy,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
- const uint8* src_uyvy_next = src_uyvy + src_stride_uyvy;
+ const uint8_t* src_uyvy_next = src_uyvy + src_stride_uyvy;
int x;
v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
v16u8 vec0, vec1, dst0, dst1;
@@ -747,9 +747,9 @@ void UYVYToUVRow_MSA(const uint8* src_uyvy,
}
}
-void UYVYToUV422Row_MSA(const uint8* src_uyvy,
- uint8* dst_u,
- uint8* dst_v,
+void UYVYToUV422Row_MSA(const uint8_t* src_uyvy,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
int x;
v16u8 src0, src1, src2, src3, dst0, dst1;
@@ -768,7 +768,7 @@ void UYVYToUV422Row_MSA(const uint8* src_uyvy,
}
}
-void ARGBToYRow_MSA(const uint8* src_argb0, uint8* dst_y, int width) {
+void ARGBToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
int x;
v16u8 src0, src1, src2, src3, vec0, vec1, vec2, vec3, dst0;
v8u16 reg0, reg1, reg2, reg3, reg4, reg5;
@@ -814,13 +814,13 @@ void ARGBToYRow_MSA(const uint8* src_argb0, uint8* dst_y, int width) {
}
}
-void ARGBToUVRow_MSA(const uint8* src_argb0,
+void ARGBToUVRow_MSA(const uint8_t* src_argb0,
int src_stride_argb,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
int x;
- const uint8* src_argb0_next = src_argb0 + src_stride_argb;
+ const uint8_t* src_argb0_next = src_argb0 + src_stride_argb;
v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, vec9;
v8u16 reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7, reg8, reg9;
@@ -932,7 +932,7 @@ void ARGBToUVRow_MSA(const uint8* src_argb0,
}
}
-void ARGBToRGB24Row_MSA(const uint8* src_argb, uint8* dst_rgb, int width) {
+void ARGBToRGB24Row_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
int x;
v16u8 src0, src1, src2, src3, dst0, dst1, dst2;
v16i8 shuffler0 = {0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 16, 17, 18, 20};
@@ -956,7 +956,7 @@ void ARGBToRGB24Row_MSA(const uint8* src_argb, uint8* dst_rgb, int width) {
}
}
-void ARGBToRAWRow_MSA(const uint8* src_argb, uint8* dst_rgb, int width) {
+void ARGBToRAWRow_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
int x;
v16u8 src0, src1, src2, src3, dst0, dst1, dst2;
v16i8 shuffler0 = {2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, 18, 17, 16, 22};
@@ -980,7 +980,7 @@ void ARGBToRAWRow_MSA(const uint8* src_argb, uint8* dst_rgb, int width) {
}
}
-void ARGBToRGB565Row_MSA(const uint8* src_argb, uint8* dst_rgb, int width) {
+void ARGBToRGB565Row_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
int x;
v16u8 src0, src1, dst0;
v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
@@ -1014,7 +1014,9 @@ void ARGBToRGB565Row_MSA(const uint8* src_argb, uint8* dst_rgb, int width) {
}
}
-void ARGBToARGB1555Row_MSA(const uint8* src_argb, uint8* dst_rgb, int width) {
+void ARGBToARGB1555Row_MSA(const uint8_t* src_argb,
+ uint8_t* dst_rgb,
+ int width) {
int x;
v16u8 src0, src1, dst0;
v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, vec9;
@@ -1054,7 +1056,9 @@ void ARGBToARGB1555Row_MSA(const uint8* src_argb, uint8* dst_rgb, int width) {
}
}
-void ARGBToARGB4444Row_MSA(const uint8* src_argb, uint8* dst_rgb, int width) {
+void ARGBToARGB4444Row_MSA(const uint8_t* src_argb,
+ uint8_t* dst_rgb,
+ int width) {
int x;
v16u8 src0, src1;
v16u8 vec0, vec1;
@@ -1077,11 +1081,11 @@ void ARGBToARGB4444Row_MSA(const uint8* src_argb, uint8* dst_rgb, int width) {
}
}
-void ARGBToUV444Row_MSA(const uint8* src_argb,
- uint8* dst_u,
- uint8* dst_v,
- int32 width) {
- int32 x;
+void ARGBToUV444Row_MSA(const uint8_t* src_argb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int32_t width) {
+ int32_t x;
v16u8 src0, src1, src2, src3, reg0, reg1, reg2, reg3, dst0, dst1;
v8u16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
v8u16 vec8, vec9, vec10, vec11;
@@ -1149,9 +1153,9 @@ void ARGBToUV444Row_MSA(const uint8* src_argb,
}
}
-void ARGBMultiplyRow_MSA(const uint8* src_argb0,
- const uint8* src_argb1,
- uint8* dst_argb,
+void ARGBMultiplyRow_MSA(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
int width) {
int x;
v16u8 src0, src1, dst0;
@@ -1188,9 +1192,9 @@ void ARGBMultiplyRow_MSA(const uint8* src_argb0,
}
}
-void ARGBAddRow_MSA(const uint8* src_argb0,
- const uint8* src_argb1,
- uint8* dst_argb,
+void ARGBAddRow_MSA(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
int width) {
int x;
v16u8 src0, src1, src2, src3, dst0, dst1;
@@ -1209,9 +1213,9 @@ void ARGBAddRow_MSA(const uint8* src_argb0,
}
}
-void ARGBSubtractRow_MSA(const uint8* src_argb0,
- const uint8* src_argb1,
- uint8* dst_argb,
+void ARGBSubtractRow_MSA(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
int width) {
int x;
v16u8 src0, src1, src2, src3, dst0, dst1;
@@ -1230,7 +1234,9 @@ void ARGBSubtractRow_MSA(const uint8* src_argb0,
}
}
-void ARGBAttenuateRow_MSA(const uint8* src_argb, uint8* dst_argb, int width) {
+void ARGBAttenuateRow_MSA(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ int width) {
int x;
v16u8 src0, src1, dst0, dst1;
v8u16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, vec9;
@@ -1295,9 +1301,9 @@ void ARGBAttenuateRow_MSA(const uint8* src_argb, uint8* dst_argb, int width) {
}
}
-void ARGBToRGB565DitherRow_MSA(const uint8* src_argb,
- uint8* dst_rgb,
- uint32 dither4,
+void ARGBToRGB565DitherRow_MSA(const uint8_t* src_argb,
+ uint8_t* dst_rgb,
+ uint32_t dither4,
int width) {
int x;
v16u8 src0, src1, dst0, vec0, vec1;
@@ -1339,15 +1345,15 @@ void ARGBToRGB565DitherRow_MSA(const uint8* src_argb,
}
}
-void ARGBShuffleRow_MSA(const uint8* src_argb,
- uint8* dst_argb,
- const uint8* shuffler,
+void ARGBShuffleRow_MSA(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ const uint8_t* shuffler,
int width) {
int x;
v16u8 src0, src1, dst0, dst1;
v16i8 vec0;
v16i8 shuffler_vec = {0, 0, 0, 0, 4, 4, 4, 4, 8, 8, 8, 8, 12, 12, 12, 12};
- int32 val = LW((int32*)shuffler);
+ int32_t val = LW((int32_t*)shuffler);
vec0 = (v16i8)__msa_fill_w(val);
shuffler_vec += vec0;
@@ -1363,10 +1369,10 @@ void ARGBShuffleRow_MSA(const uint8* src_argb,
}
}
-void ARGBShadeRow_MSA(const uint8* src_argb,
- uint8* dst_argb,
+void ARGBShadeRow_MSA(const uint8_t* src_argb,
+ uint8_t* dst_argb,
int width,
- uint32 value) {
+ uint32_t value) {
int x;
v16u8 src0, dst0;
v8u16 vec0, vec1;
@@ -1402,7 +1408,7 @@ void ARGBShadeRow_MSA(const uint8* src_argb,
}
}
-void ARGBGrayRow_MSA(const uint8* src_argb, uint8* dst_argb, int width) {
+void ARGBGrayRow_MSA(const uint8_t* src_argb, uint8_t* dst_argb, int width) {
int x;
v16u8 src0, src1, vec0, vec1, dst0, dst1;
v8u16 reg0;
@@ -1427,7 +1433,7 @@ void ARGBGrayRow_MSA(const uint8* src_argb, uint8* dst_argb, int width) {
}
}
-void ARGBSepiaRow_MSA(uint8* dst_argb, int width) {
+void ARGBSepiaRow_MSA(uint8_t* dst_argb, int width) {
int x;
v16u8 src0, src1, dst0, dst1, vec0, vec1, vec2, vec3, vec4, vec5;
v8u16 reg0, reg1, reg2;
@@ -1468,8 +1474,8 @@ void ARGBSepiaRow_MSA(uint8* dst_argb, int width) {
}
}
-void ARGB4444ToARGBRow_MSA(const uint8* src_argb4444,
- uint8* dst_argb,
+void ARGB4444ToARGBRow_MSA(const uint8_t* src_argb4444,
+ uint8_t* dst_argb,
int width) {
int x;
v16u8 src0, src1;
@@ -1497,8 +1503,8 @@ void ARGB4444ToARGBRow_MSA(const uint8* src_argb4444,
}
}
-void ARGB1555ToARGBRow_MSA(const uint8* src_argb1555,
- uint8* dst_argb,
+void ARGB1555ToARGBRow_MSA(const uint8_t* src_argb1555,
+ uint8_t* dst_argb,
int width) {
int x;
v8u16 src0, src1;
@@ -1547,7 +1553,9 @@ void ARGB1555ToARGBRow_MSA(const uint8* src_argb1555,
}
}
-void RGB565ToARGBRow_MSA(const uint8* src_rgb565, uint8* dst_argb, int width) {
+void RGB565ToARGBRow_MSA(const uint8_t* src_rgb565,
+ uint8_t* dst_argb,
+ int width) {
int x;
v8u16 src0, src1, vec0, vec1, vec2, vec3, vec4, vec5;
v8u16 reg0, reg1, reg2, reg3, reg4, reg5;
@@ -1592,7 +1600,9 @@ void RGB565ToARGBRow_MSA(const uint8* src_rgb565, uint8* dst_argb, int width) {
}
}
-void RGB24ToARGBRow_MSA(const uint8* src_rgb24, uint8* dst_argb, int width) {
+void RGB24ToARGBRow_MSA(const uint8_t* src_rgb24,
+ uint8_t* dst_argb,
+ int width) {
int x;
v16u8 src0, src1, src2;
v16u8 vec0, vec1, vec2;
@@ -1617,7 +1627,7 @@ void RGB24ToARGBRow_MSA(const uint8* src_rgb24, uint8* dst_argb, int width) {
}
}
-void RAWToARGBRow_MSA(const uint8* src_raw, uint8* dst_argb, int width) {
+void RAWToARGBRow_MSA(const uint8_t* src_raw, uint8_t* dst_argb, int width) {
int x;
v16u8 src0, src1, src2;
v16u8 vec0, vec1, vec2;
@@ -1642,7 +1652,9 @@ void RAWToARGBRow_MSA(const uint8* src_raw, uint8* dst_argb, int width) {
}
}
-void ARGB1555ToYRow_MSA(const uint8* src_argb1555, uint8* dst_y, int width) {
+void ARGB1555ToYRow_MSA(const uint8_t* src_argb1555,
+ uint8_t* dst_y,
+ int width) {
int x;
v8u16 src0, src1, vec0, vec1, vec2, vec3, vec4, vec5;
v8u16 reg0, reg1, reg2, reg3, reg4, reg5;
@@ -1699,7 +1711,7 @@ void ARGB1555ToYRow_MSA(const uint8* src_argb1555, uint8* dst_y, int width) {
}
}
-void RGB565ToYRow_MSA(const uint8* src_rgb565, uint8* dst_y, int width) {
+void RGB565ToYRow_MSA(const uint8_t* src_rgb565, uint8_t* dst_y, int width) {
int x;
v8u16 src0, src1, vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
v8u16 reg0, reg1, reg2, reg3, reg4, reg5;
@@ -1762,7 +1774,7 @@ void RGB565ToYRow_MSA(const uint8* src_rgb565, uint8* dst_y, int width) {
}
}
-void RGB24ToYRow_MSA(const uint8* src_argb0, uint8* dst_y, int width) {
+void RGB24ToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
int x;
v16u8 src0, src1, src2, reg0, reg1, reg2, reg3, dst0;
v8u16 vec0, vec1, vec2, vec3;
@@ -1803,7 +1815,7 @@ void RGB24ToYRow_MSA(const uint8* src_argb0, uint8* dst_y, int width) {
}
}
-void RAWToYRow_MSA(const uint8* src_argb0, uint8* dst_y, int width) {
+void RAWToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
int x;
v16u8 src0, src1, src2, reg0, reg1, reg2, reg3, dst0;
v8u16 vec0, vec1, vec2, vec3;
@@ -1844,14 +1856,14 @@ void RAWToYRow_MSA(const uint8* src_argb0, uint8* dst_y, int width) {
}
}
-void ARGB1555ToUVRow_MSA(const uint8* src_argb1555,
+void ARGB1555ToUVRow_MSA(const uint8_t* src_argb1555,
int src_stride_argb1555,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
int x;
- const uint16* s = (const uint16*)src_argb1555;
- const uint16* t = (const uint16*)(src_argb1555 + src_stride_argb1555);
+ const uint16_t* s = (const uint16_t*)src_argb1555;
+ const uint16_t* t = (const uint16_t*)(src_argb1555 + src_stride_argb1555);
int64_t res0, res1;
v8u16 src0, src1, src2, src3, reg0, reg1, reg2, reg3;
v8u16 vec0, vec1, vec2, vec3, vec4, vec5, vec6;
@@ -1925,14 +1937,14 @@ void ARGB1555ToUVRow_MSA(const uint8* src_argb1555,
}
}
-void RGB565ToUVRow_MSA(const uint8* src_rgb565,
+void RGB565ToUVRow_MSA(const uint8_t* src_rgb565,
int src_stride_rgb565,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
int x;
- const uint16* s = (const uint16*)src_rgb565;
- const uint16* t = (const uint16*)(src_rgb565 + src_stride_rgb565);
+ const uint16_t* s = (const uint16_t*)src_rgb565;
+ const uint16_t* t = (const uint16_t*)(src_rgb565 + src_stride_rgb565);
int64_t res0, res1;
v8u16 src0, src1, src2, src3, reg0, reg1, reg2, reg3;
v8u16 vec0, vec1, vec2, vec3, vec4, vec5;
@@ -2005,15 +2017,15 @@ void RGB565ToUVRow_MSA(const uint8* src_rgb565,
}
}
-void RGB24ToUVRow_MSA(const uint8* src_rgb0,
+void RGB24ToUVRow_MSA(const uint8_t* src_rgb0,
int src_stride_rgb,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
int x;
- const uint8* s = src_rgb0;
- const uint8* t = src_rgb0 + src_stride_rgb;
- int64 res0, res1;
+ const uint8_t* s = src_rgb0;
+ const uint8_t* t = src_rgb0 + src_stride_rgb;
+ int64_t res0, res1;
v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
v16u8 inp0, inp1, inp2, inp3, inp4, inp5;
v8u16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
@@ -2110,15 +2122,15 @@ void RGB24ToUVRow_MSA(const uint8* src_rgb0,
}
}
-void RAWToUVRow_MSA(const uint8* src_rgb0,
+void RAWToUVRow_MSA(const uint8_t* src_rgb0,
int src_stride_rgb,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
int x;
- const uint8* s = src_rgb0;
- const uint8* t = src_rgb0 + src_stride_rgb;
- int64 res0, res1;
+ const uint8_t* s = src_rgb0;
+ const uint8_t* t = src_rgb0 + src_stride_rgb;
+ int64_t res0, res1;
v16u8 inp0, inp1, inp2, inp3, inp4, inp5;
v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
v8u16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
@@ -2215,13 +2227,13 @@ void RAWToUVRow_MSA(const uint8* src_rgb0,
}
}
-void NV12ToARGBRow_MSA(const uint8* src_y,
- const uint8* src_uv,
- uint8* rgb_buf,
+void NV12ToARGBRow_MSA(const uint8_t* src_y,
+ const uint8_t* src_uv,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
int x;
- uint64 val0, val1;
+ uint64_t val0, val1;
v16u8 src0, src1, res0, res1, dst0, dst1;
v8i16 vec0, vec1, vec2;
v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg;
@@ -2245,20 +2257,20 @@ void NV12ToARGBRow_MSA(const uint8* src_y,
res1 = (v16u8)__msa_ilvev_b((v16i8)alpha, (v16i8)vec1);
dst0 = (v16u8)__msa_ilvr_b((v16i8)res1, (v16i8)res0);
dst1 = (v16u8)__msa_ilvl_b((v16i8)res1, (v16i8)res0);
- ST_UB2(dst0, dst1, rgb_buf, 16);
+ ST_UB2(dst0, dst1, dst_argb, 16);
src_y += 8;
src_uv += 8;
- rgb_buf += 32;
+ dst_argb += 32;
}
}
-void NV12ToRGB565Row_MSA(const uint8* src_y,
- const uint8* src_uv,
- uint8* rgb_buf,
+void NV12ToRGB565Row_MSA(const uint8_t* src_y,
+ const uint8_t* src_uv,
+ uint8_t* dst_rgb565,
const struct YuvConstants* yuvconstants,
int width) {
int x;
- uint64 val0, val1;
+ uint64_t val0, val1;
v16u8 src0, src1, dst0;
v8i16 vec0, vec1, vec2;
v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg;
@@ -2281,20 +2293,20 @@ void NV12ToRGB565Row_MSA(const uint8* src_y,
vec1 = (vec1 >> 2) << 5;
vec2 = (vec2 >> 3) << 11;
dst0 = (v16u8)(vec0 | vec1 | vec2);
- ST_UB(dst0, rgb_buf);
+ ST_UB(dst0, dst_rgb565);
src_y += 8;
src_uv += 8;
- rgb_buf += 16;
+ dst_rgb565 += 16;
}
}
-void NV21ToARGBRow_MSA(const uint8* src_y,
- const uint8* src_vu,
- uint8* rgb_buf,
+void NV21ToARGBRow_MSA(const uint8_t* src_y,
+ const uint8_t* src_vu,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
int x;
- uint64 val0, val1;
+ uint64_t val0, val1;
v16u8 src0, src1, res0, res1, dst0, dst1;
v8i16 vec0, vec1, vec2;
v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg;
@@ -2320,16 +2332,16 @@ void NV21ToARGBRow_MSA(const uint8* src_y,
res1 = (v16u8)__msa_ilvev_b((v16i8)alpha, (v16i8)vec1);
dst0 = (v16u8)__msa_ilvr_b((v16i8)res1, (v16i8)res0);
dst1 = (v16u8)__msa_ilvl_b((v16i8)res1, (v16i8)res0);
- ST_UB2(dst0, dst1, rgb_buf, 16);
+ ST_UB2(dst0, dst1, dst_argb, 16);
src_y += 8;
src_vu += 8;
- rgb_buf += 32;
+ dst_argb += 32;
}
}
-void SobelRow_MSA(const uint8* src_sobelx,
- const uint8* src_sobely,
- uint8* dst_argb,
+void SobelRow_MSA(const uint8_t* src_sobelx,
+ const uint8_t* src_sobely,
+ uint8_t* dst_argb,
int width) {
int x;
v16u8 src0, src1, vec0, dst0, dst1, dst2, dst3;
@@ -2355,9 +2367,9 @@ void SobelRow_MSA(const uint8* src_sobelx,
}
}
-void SobelToPlaneRow_MSA(const uint8* src_sobelx,
- const uint8* src_sobely,
- uint8* dst_y,
+void SobelToPlaneRow_MSA(const uint8_t* src_sobelx,
+ const uint8_t* src_sobely,
+ uint8_t* dst_y,
int width) {
int x;
v16u8 src0, src1, src2, src3, dst0, dst1;
@@ -2376,9 +2388,9 @@ void SobelToPlaneRow_MSA(const uint8* src_sobelx,
}
}
-void SobelXYRow_MSA(const uint8* src_sobelx,
- const uint8* src_sobely,
- uint8* dst_argb,
+void SobelXYRow_MSA(const uint8_t* src_sobelx,
+ const uint8_t* src_sobely,
+ uint8_t* dst_argb,
int width) {
int x;
v16u8 src0, src1, vec0, vec1, vec2;
@@ -2404,7 +2416,7 @@ void SobelXYRow_MSA(const uint8* src_sobelx,
}
}
-void ARGBToYJRow_MSA(const uint8* src_argb0, uint8* dst_y, int width) {
+void ARGBToYJRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
int x;
v16u8 src0, src1, src2, src3, dst0;
v16u8 const_0x4B0F = (v16u8)__msa_fill_h(0x4B0F);
@@ -2424,7 +2436,7 @@ void ARGBToYJRow_MSA(const uint8* src_argb0, uint8* dst_y, int width) {
}
}
-void BGRAToYRow_MSA(const uint8* src_argb0, uint8* dst_y, int width) {
+void BGRAToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
int x;
v16u8 src0, src1, src2, src3, dst0;
v16u8 const_0x4200 = (v16u8)__msa_fill_h(0x4200);
@@ -2444,7 +2456,7 @@ void BGRAToYRow_MSA(const uint8* src_argb0, uint8* dst_y, int width) {
}
}
-void ABGRToYRow_MSA(const uint8* src_argb0, uint8* dst_y, int width) {
+void ABGRToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
int x;
v16u8 src0, src1, src2, src3, dst0;
v16u8 const_0x8142 = (v16u8)__msa_fill_h(0x8142);
@@ -2464,7 +2476,7 @@ void ABGRToYRow_MSA(const uint8* src_argb0, uint8* dst_y, int width) {
}
}
-void RGBAToYRow_MSA(const uint8* src_argb0, uint8* dst_y, int width) {
+void RGBAToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
int x;
v16u8 src0, src1, src2, src3, dst0;
v16u8 const_0x1900 = (v16u8)__msa_fill_h(0x1900);
@@ -2484,14 +2496,14 @@ void RGBAToYRow_MSA(const uint8* src_argb0, uint8* dst_y, int width) {
}
}
-void ARGBToUVJRow_MSA(const uint8* src_rgb0,
+void ARGBToUVJRow_MSA(const uint8_t* src_rgb0,
int src_stride_rgb,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
int x;
- const uint8* s = src_rgb0;
- const uint8* t = src_rgb0 + src_stride_rgb;
+ const uint8_t* s = src_rgb0;
+ const uint8_t* t = src_rgb0 + src_stride_rgb;
v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
v16u8 vec0, vec1, vec2, vec3;
v16u8 dst0, dst1;
@@ -2554,14 +2566,14 @@ void ARGBToUVJRow_MSA(const uint8* src_rgb0,
}
}
-void BGRAToUVRow_MSA(const uint8* src_rgb0,
+void BGRAToUVRow_MSA(const uint8_t* src_rgb0,
int src_stride_rgb,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
int x;
- const uint8* s = src_rgb0;
- const uint8* t = src_rgb0 + src_stride_rgb;
+ const uint8_t* s = src_rgb0;
+ const uint8_t* t = src_rgb0 + src_stride_rgb;
v16u8 dst0, dst1, vec0, vec1, vec2, vec3;
v16i8 shuffler0 = {0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29};
v16i8 shuffler1 = {2, 3, 6, 7, 10, 11, 14, 15,
@@ -2587,14 +2599,14 @@ void BGRAToUVRow_MSA(const uint8* src_rgb0,
}
}
-void ABGRToUVRow_MSA(const uint8* src_rgb0,
+void ABGRToUVRow_MSA(const uint8_t* src_rgb0,
int src_stride_rgb,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
int x;
- const uint8* s = src_rgb0;
- const uint8* t = src_rgb0 + src_stride_rgb;
+ const uint8_t* s = src_rgb0;
+ const uint8_t* t = src_rgb0 + src_stride_rgb;
v16u8 src0, src1, src2, src3;
v16u8 dst0, dst1;
v16i8 shuffler0 = {0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29};
@@ -2621,14 +2633,14 @@ void ABGRToUVRow_MSA(const uint8* src_rgb0,
}
}
-void RGBAToUVRow_MSA(const uint8* src_rgb0,
+void RGBAToUVRow_MSA(const uint8_t* src_rgb0,
int src_stride_rgb,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
int x;
- const uint8* s = src_rgb0;
- const uint8* t = src_rgb0 + src_stride_rgb;
+ const uint8_t* s = src_rgb0;
+ const uint8_t* t = src_rgb0 + src_stride_rgb;
v16u8 dst0, dst1, vec0, vec1, vec2, vec3;
v16i8 shuffler0 = {0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29};
v16i8 shuffler1 = {2, 3, 6, 7, 10, 11, 14, 15,
@@ -2654,10 +2666,10 @@ void RGBAToUVRow_MSA(const uint8* src_rgb0,
}
}
-void I444ToARGBRow_MSA(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* rgb_buf,
+void I444ToARGBRow_MSA(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
int x;
@@ -2714,15 +2726,15 @@ void I444ToARGBRow_MSA(const uint8* src_y,
vec1 = (v8u16)__msa_ilvev_b((v16i8)alpha, (v16i8)vec2);
dst0 = (v16u8)__msa_ilvr_h((v8i16)vec1, (v8i16)vec0);
dst1 = (v16u8)__msa_ilvl_h((v8i16)vec1, (v8i16)vec0);
- ST_UB2(dst0, dst1, rgb_buf, 16);
+ ST_UB2(dst0, dst1, dst_argb, 16);
src_y += 8;
src_u += 8;
src_v += 8;
- rgb_buf += 32;
+ dst_argb += 32;
}
}
-void I400ToARGBRow_MSA(const uint8* src_y, uint8* rgb_buf, int width) {
+void I400ToARGBRow_MSA(const uint8_t* src_y, uint8_t* dst_argb, int width) {
int x;
v16u8 src0, res0, res1, res2, res3, res4, dst0, dst1, dst2, dst3;
v8i16 vec0, vec1;
@@ -2768,13 +2780,13 @@ void I400ToARGBRow_MSA(const uint8* src_y, uint8* rgb_buf, int width) {
dst1 = (v16u8)__msa_ilvl_b((v16i8)res3, (v16i8)res1);
dst2 = (v16u8)__msa_ilvr_b((v16i8)res4, (v16i8)res2);
dst3 = (v16u8)__msa_ilvl_b((v16i8)res4, (v16i8)res2);
- ST_UB4(dst0, dst1, dst2, dst3, rgb_buf, 16);
+ ST_UB4(dst0, dst1, dst2, dst3, dst_argb, 16);
src_y += 16;
- rgb_buf += 64;
+ dst_argb += 64;
}
}
-void J400ToARGBRow_MSA(const uint8* src_y, uint8* dst_argb, int width) {
+void J400ToARGBRow_MSA(const uint8_t* src_y, uint8_t* dst_argb, int width) {
int x;
v16u8 src0, vec0, vec1, vec2, vec3, dst0, dst1, dst2, dst3;
v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL);
@@ -2795,8 +2807,8 @@ void J400ToARGBRow_MSA(const uint8* src_y, uint8* dst_argb, int width) {
}
}
-void YUY2ToARGBRow_MSA(const uint8* src_yuy2,
- uint8* rgb_buf,
+void YUY2ToARGBRow_MSA(const uint8_t* src_yuy2,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
int x;
@@ -2817,14 +2829,14 @@ void YUY2ToARGBRow_MSA(const uint8* src_yuy2,
src2 = (v16u8)__msa_pckod_b((v16i8)src0, (v16i8)src0);
YUVTORGB(src1, src2, vec_ubvr, vec_ugvg, vec_bb, vec_bg, vec_br, vec_yg,
vec0, vec1, vec2);
- STOREARGB(vec0, vec1, vec2, alpha, rgb_buf);
+ STOREARGB(vec0, vec1, vec2, alpha, dst_argb);
src_yuy2 += 16;
- rgb_buf += 32;
+ dst_argb += 32;
}
}
-void UYVYToARGBRow_MSA(const uint8* src_uyvy,
- uint8* rgb_buf,
+void UYVYToARGBRow_MSA(const uint8_t* src_uyvy,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
int x;
@@ -2845,22 +2857,22 @@ void UYVYToARGBRow_MSA(const uint8* src_uyvy,
src2 = (v16u8)__msa_pckev_b((v16i8)src0, (v16i8)src0);
YUVTORGB(src1, src2, vec_ubvr, vec_ugvg, vec_bb, vec_bg, vec_br, vec_yg,
vec0, vec1, vec2);
- STOREARGB(vec0, vec1, vec2, alpha, rgb_buf);
+ STOREARGB(vec0, vec1, vec2, alpha, dst_argb);
src_uyvy += 16;
- rgb_buf += 32;
+ dst_argb += 32;
}
}
-void InterpolateRow_MSA(uint8* dst_ptr,
- const uint8* src_ptr,
+void InterpolateRow_MSA(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
ptrdiff_t src_stride,
int width,
- int32 source_y_fraction) {
- int32 y1_fraction = source_y_fraction;
- int32 y0_fraction = 256 - y1_fraction;
- uint16 y_fractions;
- const uint8* s = src_ptr;
- const uint8* t = src_ptr + src_stride;
+ int32_t source_y_fraction) {
+ int32_t y1_fraction = source_y_fraction;
+ int32_t y0_fraction = 256 - y1_fraction;
+ uint16_t y_fractions;
+ const uint8_t* s = src_ptr;
+ const uint8_t* t = src_ptr + src_stride;
int x;
v16u8 src0, src1, src2, src3, dst0, dst1;
v8u16 vec0, vec1, vec2, vec3, y_frac;
@@ -2886,7 +2898,7 @@ void InterpolateRow_MSA(uint8* dst_ptr,
return;
}
- y_fractions = (uint16)(y0_fraction + (y1_fraction << 8));
+ y_fractions = (uint16_t)(y0_fraction + (y1_fraction << 8));
y_frac = (v8u16)__msa_fill_h(y_fractions);
for (x = 0; x < width; x += 32) {
@@ -2915,7 +2927,7 @@ void InterpolateRow_MSA(uint8* dst_ptr,
}
}
-void ARGBSetRow_MSA(uint8* dst_argb, uint32 v32, int width) {
+void ARGBSetRow_MSA(uint8_t* dst_argb, uint32_t v32, int width) {
int x;
v4i32 dst0 = __builtin_msa_fill_w(v32);
@@ -2925,7 +2937,7 @@ void ARGBSetRow_MSA(uint8* dst_argb, uint32 v32, int width) {
}
}
-void RAWToRGB24Row_MSA(const uint8* src_raw, uint8* dst_rgb24, int width) {
+void RAWToRGB24Row_MSA(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) {
int x;
v16u8 src0, src1, src2, src3, src4, dst0, dst1, dst2;
v16i8 shuffler0 = {2, 1, 0, 5, 4, 3, 8, 7, 6, 11, 10, 9, 14, 13, 12, 17};
@@ -2950,9 +2962,9 @@ void RAWToRGB24Row_MSA(const uint8* src_raw, uint8* dst_rgb24, int width) {
}
}
-void MergeUVRow_MSA(const uint8* src_u,
- const uint8* src_v,
- uint8* dst_uv,
+void MergeUVRow_MSA(const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_uv,
int width) {
int x;
v16u8 src0, src1, dst0, dst1;
@@ -2969,7 +2981,9 @@ void MergeUVRow_MSA(const uint8* src_u,
}
}
-void ARGBExtractAlphaRow_MSA(const uint8* src_argb, uint8* dst_a, int width) {
+void ARGBExtractAlphaRow_MSA(const uint8_t* src_argb,
+ uint8_t* dst_a,
+ int width) {
int i;
v16u8 src0, src1, src2, src3, vec0, vec1, dst0;
@@ -2987,9 +3001,9 @@ void ARGBExtractAlphaRow_MSA(const uint8* src_argb, uint8* dst_a, int width) {
}
}
-void ARGBBlendRow_MSA(const uint8* src_argb0,
- const uint8* src_argb1,
- uint8* dst_argb,
+void ARGBBlendRow_MSA(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
int width) {
int x;
v16u8 src0, src1, src2, src3, dst0, dst1;
@@ -3052,7 +3066,7 @@ void ARGBBlendRow_MSA(const uint8* src_argb0,
}
}
-void ARGBQuantizeRow_MSA(uint8* dst_argb,
+void ARGBQuantizeRow_MSA(uint8_t* dst_argb,
int scale,
int interval_size,
int interval_offset,
@@ -3158,11 +3172,11 @@ void ARGBQuantizeRow_MSA(uint8* dst_argb,
}
}
-void ARGBColorMatrixRow_MSA(const uint8* src_argb,
- uint8* dst_argb,
- const int8* matrix_argb,
+void ARGBColorMatrixRow_MSA(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ const int8_t* matrix_argb,
int width) {
- int32 x;
+ int32_t x;
v16i8 src0;
v16u8 src1, src2, dst0, dst1;
v8i16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, vec9;
@@ -3267,9 +3281,9 @@ void ARGBColorMatrixRow_MSA(const uint8* src_argb,
}
}
-void SplitUVRow_MSA(const uint8* src_uv,
- uint8* dst_u,
- uint8* dst_v,
+void SplitUVRow_MSA(const uint8_t* src_uv,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
int x;
v16u8 src0, src1, src2, src3, dst0, dst1, dst2, dst3;
@@ -3291,7 +3305,7 @@ void SplitUVRow_MSA(const uint8* src_uv,
}
}
-void SetRow_MSA(uint8* dst, uint8 v8, int width) {
+void SetRow_MSA(uint8_t* dst, uint8_t v8, int width) {
int x;
v16u8 dst0 = (v16u8)__msa_fill_b(v8);
@@ -3301,9 +3315,9 @@ void SetRow_MSA(uint8* dst, uint8 v8, int width) {
}
}
-void MirrorUVRow_MSA(const uint8* src_uv,
- uint8* dst_u,
- uint8* dst_v,
+void MirrorUVRow_MSA(const uint8_t* src_uv,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
int x;
v16u8 src0, src1, src2, src3;
@@ -3330,11 +3344,11 @@ void MirrorUVRow_MSA(const uint8* src_uv,
}
}
-void SobelXRow_MSA(const uint8* src_y0,
- const uint8* src_y1,
- const uint8* src_y2,
- uint8* dst_sobelx,
- int32 width) {
+void SobelXRow_MSA(const uint8_t* src_y0,
+ const uint8_t* src_y1,
+ const uint8_t* src_y2,
+ uint8_t* dst_sobelx,
+ int32_t width) {
int x;
v16u8 src0, src1, src2, src3, src4, src5, dst0;
v8i16 vec0, vec1, vec2, vec3, vec4, vec5;
@@ -3384,10 +3398,10 @@ void SobelXRow_MSA(const uint8* src_y0,
}
}
-void SobelYRow_MSA(const uint8* src_y0,
- const uint8* src_y1,
- uint8* dst_sobely,
- int32 width) {
+void SobelYRow_MSA(const uint8_t* src_y0,
+ const uint8_t* src_y1,
+ uint8_t* dst_sobely,
+ int32_t width) {
int x;
v16u8 src0, src1, dst0;
v8i16 vec0, vec1, vec2, vec3, vec4, vec5, vec6;
@@ -3429,7 +3443,10 @@ void SobelYRow_MSA(const uint8* src_y0,
}
}
-void HalfFloatRow_MSA(const uint16* src, uint16* dst, float scale, int width) {
+void HalfFloatRow_MSA(const uint16_t* src,
+ uint16_t* dst,
+ float scale,
+ int width) {
int i;
v8u16 src0, src1, src2, src3, dst0, dst1, dst2, dst3;
v4u32 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
diff --git a/chromium/third_party/libyuv/source/row_neon.cc b/chromium/third_party/libyuv/source/row_neon.cc
index 1af828622cd..93a3497d275 100644
--- a/chromium/third_party/libyuv/source/row_neon.cc
+++ b/chromium/third_party/libyuv/source/row_neon.cc
@@ -106,10 +106,10 @@ extern "C" {
"vqshrun.s16 d22, q9, #6 \n" /* R */ \
"vqshrun.s16 d21, q0, #6 \n" /* G */
-void I444ToARGBRow_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
+void I444ToARGBRow_NEON(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
asm volatile(
@@ -132,10 +132,10 @@ void I444ToARGBRow_NEON(const uint8* src_y,
"q12", "q13", "q14", "q15");
}
-void I422ToARGBRow_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
+void I422ToARGBRow_NEON(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
asm volatile(
@@ -158,11 +158,11 @@ void I422ToARGBRow_NEON(const uint8* src_y,
"q12", "q13", "q14", "q15");
}
-void I422AlphaToARGBRow_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- const uint8* src_a,
- uint8* dst_argb,
+void I422AlphaToARGBRow_NEON(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ const uint8_t* src_a,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
asm volatile(
@@ -186,10 +186,10 @@ void I422AlphaToARGBRow_NEON(const uint8* src_y,
"q12", "q13", "q14", "q15");
}
-void I422ToRGBARow_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_rgba,
+void I422ToRGBARow_NEON(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_rgba,
const struct YuvConstants* yuvconstants,
int width) {
asm volatile(
@@ -213,10 +213,10 @@ void I422ToRGBARow_NEON(const uint8* src_y,
"q12", "q13", "q14", "q15");
}
-void I422ToRGB24Row_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_rgb24,
+void I422ToRGB24Row_NEON(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_rgb24,
const struct YuvConstants* yuvconstants,
int width) {
asm volatile(
@@ -245,10 +245,10 @@ void I422ToRGB24Row_NEON(const uint8* src_y,
"vsri.16 q0, q8, #5 \n" /* RG */ \
"vsri.16 q0, q9, #11 \n" /* RGB */
-void I422ToRGB565Row_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_rgb565,
+void I422ToRGB565Row_NEON(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_rgb565,
const struct YuvConstants* yuvconstants,
int width) {
asm volatile(
@@ -279,10 +279,10 @@ void I422ToRGB565Row_NEON(const uint8* src_y,
"vsri.16 q0, q9, #6 \n" /* ARG */ \
"vsri.16 q0, q10, #11 \n" /* ARGB */
-void I422ToARGB1555Row_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb1555,
+void I422ToARGB1555Row_NEON(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb1555,
const struct YuvConstants* yuvconstants,
int width) {
asm volatile(
@@ -315,10 +315,10 @@ void I422ToARGB1555Row_NEON(const uint8* src_y,
"vorr d1, d22, d23 \n" /* RA */ \
"vzip.u8 d0, d1 \n" /* BGRA */
-void I422ToARGB4444Row_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb4444,
+void I422ToARGB4444Row_NEON(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb4444,
const struct YuvConstants* yuvconstants,
int width) {
asm volatile(
@@ -344,7 +344,7 @@ void I422ToARGB4444Row_NEON(const uint8* src_y,
"q12", "q13", "q14", "q15");
}
-void I400ToARGBRow_NEON(const uint8* src_y, uint8* dst_argb, int width) {
+void I400ToARGBRow_NEON(const uint8_t* src_y, uint8_t* dst_argb, int width) {
asm volatile(
YUVTORGB_SETUP
"vmov.u8 d23, #255 \n"
@@ -363,7 +363,7 @@ void I400ToARGBRow_NEON(const uint8* src_y, uint8* dst_argb, int width) {
"q12", "q13", "q14", "q15");
}
-void J400ToARGBRow_NEON(const uint8* src_y, uint8* dst_argb, int width) {
+void J400ToARGBRow_NEON(const uint8_t* src_y, uint8_t* dst_argb, int width) {
asm volatile(
"vmov.u8 d23, #255 \n"
"1: \n"
@@ -380,9 +380,9 @@ void J400ToARGBRow_NEON(const uint8* src_y, uint8* dst_argb, int width) {
: "cc", "memory", "d20", "d21", "d22", "d23");
}
-void NV12ToARGBRow_NEON(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_argb,
+void NV12ToARGBRow_NEON(const uint8_t* src_y,
+ const uint8_t* src_uv,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
asm volatile(YUVTORGB_SETUP
@@ -403,9 +403,9 @@ void NV12ToARGBRow_NEON(const uint8* src_y,
"q10", "q11", "q12", "q13", "q14", "q15");
}
-void NV21ToARGBRow_NEON(const uint8* src_y,
- const uint8* src_vu,
- uint8* dst_argb,
+void NV21ToARGBRow_NEON(const uint8_t* src_y,
+ const uint8_t* src_vu,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
asm volatile(YUVTORGB_SETUP
@@ -426,9 +426,9 @@ void NV21ToARGBRow_NEON(const uint8* src_y,
"q10", "q11", "q12", "q13", "q14", "q15");
}
-void NV12ToRGB565Row_NEON(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_rgb565,
+void NV12ToRGB565Row_NEON(const uint8_t* src_y,
+ const uint8_t* src_uv,
+ uint8_t* dst_rgb565,
const struct YuvConstants* yuvconstants,
int width) {
asm volatile(
@@ -449,8 +449,8 @@ void NV12ToRGB565Row_NEON(const uint8* src_y,
"q12", "q13", "q14", "q15");
}
-void YUY2ToARGBRow_NEON(const uint8* src_yuy2,
- uint8* dst_argb,
+void YUY2ToARGBRow_NEON(const uint8_t* src_yuy2,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
asm volatile(YUVTORGB_SETUP
@@ -470,8 +470,8 @@ void YUY2ToARGBRow_NEON(const uint8* src_yuy2,
"q10", "q11", "q12", "q13", "q14", "q15");
}
-void UYVYToARGBRow_NEON(const uint8* src_uyvy,
- uint8* dst_argb,
+void UYVYToARGBRow_NEON(const uint8_t* src_uyvy,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
asm volatile(YUVTORGB_SETUP
@@ -492,9 +492,9 @@ void UYVYToARGBRow_NEON(const uint8* src_uyvy,
}
// Reads 16 pairs of UV and write even values to dst_u and odd to dst_v.
-void SplitUVRow_NEON(const uint8* src_uv,
- uint8* dst_u,
- uint8* dst_v,
+void SplitUVRow_NEON(const uint8_t* src_uv,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
asm volatile(
"1: \n"
@@ -513,9 +513,9 @@ void SplitUVRow_NEON(const uint8* src_uv,
}
// Reads 16 U's and V's and writes out 16 pairs of UV.
-void MergeUVRow_NEON(const uint8* src_u,
- const uint8* src_v,
- uint8* dst_uv,
+void MergeUVRow_NEON(const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_uv,
int width) {
asm volatile(
"1: \n"
@@ -534,10 +534,10 @@ void MergeUVRow_NEON(const uint8* src_u,
}
// Reads 16 packed RGB and write to planar dst_r, dst_g, dst_b.
-void SplitRGBRow_NEON(const uint8* src_rgb,
- uint8* dst_r,
- uint8* dst_g,
- uint8* dst_b,
+void SplitRGBRow_NEON(const uint8_t* src_rgb,
+ uint8_t* dst_r,
+ uint8_t* dst_g,
+ uint8_t* dst_b,
int width) {
asm volatile(
"1: \n"
@@ -559,10 +559,10 @@ void SplitRGBRow_NEON(const uint8* src_rgb,
}
// Reads 16 planar R's, G's and B's and writes out 16 packed RGB at a time
-void MergeRGBRow_NEON(const uint8* src_r,
- const uint8* src_g,
- const uint8* src_b,
- uint8* dst_rgb,
+void MergeRGBRow_NEON(const uint8_t* src_r,
+ const uint8_t* src_g,
+ const uint8_t* src_b,
+ uint8_t* dst_rgb,
int width) {
asm volatile(
"1: \n"
@@ -584,7 +584,7 @@ void MergeRGBRow_NEON(const uint8* src_r,
}
// Copy multiple of 32. vld4.8 allow unaligned and is fastest on a15.
-void CopyRow_NEON(const uint8* src, uint8* dst, int count) {
+void CopyRow_NEON(const uint8_t* src, uint8_t* dst, int width) {
asm volatile(
"1: \n"
"vld1.8 {d0, d1, d2, d3}, [%0]! \n" // load 32
@@ -593,14 +593,14 @@ void CopyRow_NEON(const uint8* src, uint8* dst, int count) {
"bgt 1b \n"
: "+r"(src), // %0
"+r"(dst), // %1
- "+r"(count) // %2 // Output registers
+ "+r"(width) // %2 // Output registers
: // Input registers
: "cc", "memory", "q0", "q1" // Clobber List
);
}
-// SetRow writes 'count' bytes using an 8 bit value repeated.
-void SetRow_NEON(uint8* dst, uint8 v8, int count) {
+// SetRow writes 'width' bytes using an 8 bit value repeated.
+void SetRow_NEON(uint8_t* dst, uint8_t v8, int width) {
asm volatile(
"vdup.8 q0, %2 \n" // duplicate 16 bytes
"1: \n"
@@ -608,13 +608,13 @@ void SetRow_NEON(uint8* dst, uint8 v8, int count) {
"vst1.8 {q0}, [%0]! \n" // store
"bgt 1b \n"
: "+r"(dst), // %0
- "+r"(count) // %1
+ "+r"(width) // %1
: "r"(v8) // %2
: "cc", "memory", "q0");
}
-// ARGBSetRow writes 'count' pixels using an 32 bit value repeated.
-void ARGBSetRow_NEON(uint8* dst, uint32 v32, int count) {
+// ARGBSetRow writes 'width' pixels using an 32 bit value repeated.
+void ARGBSetRow_NEON(uint8_t* dst, uint32_t v32, int width) {
asm volatile(
"vdup.u32 q0, %2 \n" // duplicate 4 ints
"1: \n"
@@ -622,12 +622,12 @@ void ARGBSetRow_NEON(uint8* dst, uint32 v32, int count) {
"vst1.8 {q0}, [%0]! \n" // store
"bgt 1b \n"
: "+r"(dst), // %0
- "+r"(count) // %1
+ "+r"(width) // %1
: "r"(v32) // %2
: "cc", "memory", "q0");
}
-void MirrorRow_NEON(const uint8* src, uint8* dst, int width) {
+void MirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width) {
asm volatile(
// Start at end of source row.
"mov r3, #-16 \n"
@@ -648,9 +648,9 @@ void MirrorRow_NEON(const uint8* src, uint8* dst, int width) {
: "cc", "memory", "r3", "q0");
}
-void MirrorUVRow_NEON(const uint8* src_uv,
- uint8* dst_u,
- uint8* dst_v,
+void MirrorUVRow_NEON(const uint8_t* src_uv,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
asm volatile(
// Start at end of source row.
@@ -673,7 +673,7 @@ void MirrorUVRow_NEON(const uint8* src_uv,
: "cc", "memory", "r12", "q0");
}
-void ARGBMirrorRow_NEON(const uint8* src, uint8* dst, int width) {
+void ARGBMirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width) {
asm volatile(
// Start at end of source row.
"mov r3, #-16 \n"
@@ -694,7 +694,9 @@ void ARGBMirrorRow_NEON(const uint8* src, uint8* dst, int width) {
: "cc", "memory", "r3", "q0");
}
-void RGB24ToARGBRow_NEON(const uint8* src_rgb24, uint8* dst_argb, int width) {
+void RGB24ToARGBRow_NEON(const uint8_t* src_rgb24,
+ uint8_t* dst_argb,
+ int width) {
asm volatile(
"vmov.u8 d4, #255 \n" // Alpha
"1: \n"
@@ -710,7 +712,7 @@ void RGB24ToARGBRow_NEON(const uint8* src_rgb24, uint8* dst_argb, int width) {
);
}
-void RAWToARGBRow_NEON(const uint8* src_raw, uint8* dst_argb, int width) {
+void RAWToARGBRow_NEON(const uint8_t* src_raw, uint8_t* dst_argb, int width) {
asm volatile(
"vmov.u8 d4, #255 \n" // Alpha
"1: \n"
@@ -727,7 +729,7 @@ void RAWToARGBRow_NEON(const uint8* src_raw, uint8* dst_argb, int width) {
);
}
-void RAWToRGB24Row_NEON(const uint8* src_raw, uint8* dst_rgb24, int width) {
+void RAWToRGB24Row_NEON(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) {
asm volatile(
"1: \n"
"vld3.8 {d1, d2, d3}, [%0]! \n" // load 8 pixels of RAW.
@@ -756,7 +758,9 @@ void RAWToRGB24Row_NEON(const uint8* src_raw, uint8* dst_rgb24, int width) {
"vorr.u8 d2, d1, d5 \n" /* R */ \
"vorr.u8 d1, d4, d6 \n" /* G */
-void RGB565ToARGBRow_NEON(const uint8* src_rgb565, uint8* dst_argb, int width) {
+void RGB565ToARGBRow_NEON(const uint8_t* src_rgb565,
+ uint8_t* dst_argb,
+ int width) {
asm volatile(
"vmov.u8 d3, #255 \n" // Alpha
"1: \n"
@@ -800,8 +804,8 @@ void RGB565ToARGBRow_NEON(const uint8* src_rgb565, uint8* dst_argb, int width) {
"vorr.u8 d2, d1, d5 \n" /* R */ \
"vorr.u8 d1, d4, d6 \n" /* G */
-void ARGB1555ToARGBRow_NEON(const uint8* src_argb1555,
- uint8* dst_argb,
+void ARGB1555ToARGBRow_NEON(const uint8_t* src_argb1555,
+ uint8_t* dst_argb,
int width) {
asm volatile(
"vmov.u8 d3, #255 \n" // Alpha
@@ -829,8 +833,8 @@ void ARGB1555ToARGBRow_NEON(const uint8* src_argb1555,
"vorr.u8 q1, q1, q2 \n" /* G,A GGGGGGGG */ \
"vswp.u8 d1, d2 \n" /* B,R,G,A -> B,G,R,A */
-void ARGB4444ToARGBRow_NEON(const uint8* src_argb4444,
- uint8* dst_argb,
+void ARGB4444ToARGBRow_NEON(const uint8_t* src_argb4444,
+ uint8_t* dst_argb,
int width) {
asm volatile(
"vmov.u8 d3, #255 \n" // Alpha
@@ -848,7 +852,9 @@ void ARGB4444ToARGBRow_NEON(const uint8* src_argb4444,
);
}
-void ARGBToRGB24Row_NEON(const uint8* src_argb, uint8* dst_rgb24, int width) {
+void ARGBToRGB24Row_NEON(const uint8_t* src_argb,
+ uint8_t* dst_rgb24,
+ int width) {
asm volatile(
"1: \n"
"vld4.8 {d1, d2, d3, d4}, [%0]! \n" // load 8 pixels of ARGB.
@@ -864,7 +870,7 @@ void ARGBToRGB24Row_NEON(const uint8* src_argb, uint8* dst_rgb24, int width) {
);
}
-void ARGBToRAWRow_NEON(const uint8* src_argb, uint8* dst_raw, int width) {
+void ARGBToRAWRow_NEON(const uint8_t* src_argb, uint8_t* dst_raw, int width) {
asm volatile(
"1: \n"
"vld4.8 {d1, d2, d3, d4}, [%0]! \n" // load 8 pixels of ARGB.
@@ -880,7 +886,7 @@ void ARGBToRAWRow_NEON(const uint8* src_argb, uint8* dst_raw, int width) {
);
}
-void YUY2ToYRow_NEON(const uint8* src_yuy2, uint8* dst_y, int width) {
+void YUY2ToYRow_NEON(const uint8_t* src_yuy2, uint8_t* dst_y, int width) {
asm volatile(
"1: \n"
"vld2.8 {q0, q1}, [%0]! \n" // load 16 pixels of YUY2.
@@ -895,7 +901,7 @@ void YUY2ToYRow_NEON(const uint8* src_yuy2, uint8* dst_y, int width) {
);
}
-void UYVYToYRow_NEON(const uint8* src_uyvy, uint8* dst_y, int width) {
+void UYVYToYRow_NEON(const uint8_t* src_uyvy, uint8_t* dst_y, int width) {
asm volatile(
"1: \n"
"vld2.8 {q0, q1}, [%0]! \n" // load 16 pixels of UYVY.
@@ -910,9 +916,9 @@ void UYVYToYRow_NEON(const uint8* src_uyvy, uint8* dst_y, int width) {
);
}
-void YUY2ToUV422Row_NEON(const uint8* src_yuy2,
- uint8* dst_u,
- uint8* dst_v,
+void YUY2ToUV422Row_NEON(const uint8_t* src_yuy2,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
asm volatile(
"1: \n"
@@ -930,9 +936,9 @@ void YUY2ToUV422Row_NEON(const uint8* src_yuy2,
);
}
-void UYVYToUV422Row_NEON(const uint8* src_uyvy,
- uint8* dst_u,
- uint8* dst_v,
+void UYVYToUV422Row_NEON(const uint8_t* src_uyvy,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
asm volatile(
"1: \n"
@@ -950,10 +956,10 @@ void UYVYToUV422Row_NEON(const uint8* src_uyvy,
);
}
-void YUY2ToUVRow_NEON(const uint8* src_yuy2,
+void YUY2ToUVRow_NEON(const uint8_t* src_yuy2,
int stride_yuy2,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
asm volatile(
"add %1, %0, %1 \n" // stride + src_yuy2
@@ -977,10 +983,10 @@ void YUY2ToUVRow_NEON(const uint8* src_yuy2,
);
}
-void UYVYToUVRow_NEON(const uint8* src_uyvy,
+void UYVYToUVRow_NEON(const uint8_t* src_uyvy,
int stride_uyvy,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
asm volatile(
"add %1, %0, %1 \n" // stride + src_uyvy
@@ -1005,9 +1011,9 @@ void UYVYToUVRow_NEON(const uint8* src_uyvy,
}
// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA.
-void ARGBShuffleRow_NEON(const uint8* src_argb,
- uint8* dst_argb,
- const uint8* shuffler,
+void ARGBShuffleRow_NEON(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ const uint8_t* shuffler,
int width) {
asm volatile(
"vld1.8 {q2}, [%3] \n" // shuffler
@@ -1026,10 +1032,10 @@ void ARGBShuffleRow_NEON(const uint8* src_argb,
);
}
-void I422ToYUY2Row_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_yuy2,
+void I422ToYUY2Row_NEON(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_yuy2,
int width) {
asm volatile(
"1: \n"
@@ -1048,10 +1054,10 @@ void I422ToYUY2Row_NEON(const uint8* src_y,
: "cc", "memory", "d0", "d1", "d2", "d3");
}
-void I422ToUYVYRow_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_uyvy,
+void I422ToUYVYRow_NEON(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_uyvy,
int width) {
asm volatile(
"1: \n"
@@ -1070,7 +1076,9 @@ void I422ToUYVYRow_NEON(const uint8* src_y,
: "cc", "memory", "d0", "d1", "d2", "d3");
}
-void ARGBToRGB565Row_NEON(const uint8* src_argb, uint8* dst_rgb565, int width) {
+void ARGBToRGB565Row_NEON(const uint8_t* src_argb,
+ uint8_t* dst_rgb565,
+ int width) {
asm volatile(
"1: \n"
"vld4.8 {d20, d21, d22, d23}, [%0]! \n" // load 8 pixels of ARGB.
@@ -1085,9 +1093,9 @@ void ARGBToRGB565Row_NEON(const uint8* src_argb, uint8* dst_rgb565, int width) {
: "cc", "memory", "q0", "q8", "q9", "q10", "q11");
}
-void ARGBToRGB565DitherRow_NEON(const uint8* src_argb,
- uint8* dst_rgb,
- const uint32 dither4,
+void ARGBToRGB565DitherRow_NEON(const uint8_t* src_argb,
+ uint8_t* dst_rgb,
+ const uint32_t dither4,
int width) {
asm volatile(
"vdup.32 d2, %2 \n" // dither4
@@ -1107,8 +1115,8 @@ void ARGBToRGB565DitherRow_NEON(const uint8* src_argb,
: "cc", "memory", "q0", "q1", "q8", "q9", "q10", "q11");
}
-void ARGBToARGB1555Row_NEON(const uint8* src_argb,
- uint8* dst_argb1555,
+void ARGBToARGB1555Row_NEON(const uint8_t* src_argb,
+ uint8_t* dst_argb1555,
int width) {
asm volatile(
"1: \n"
@@ -1124,8 +1132,8 @@ void ARGBToARGB1555Row_NEON(const uint8* src_argb,
: "cc", "memory", "q0", "q8", "q9", "q10", "q11");
}
-void ARGBToARGB4444Row_NEON(const uint8* src_argb,
- uint8* dst_argb4444,
+void ARGBToARGB4444Row_NEON(const uint8_t* src_argb,
+ uint8_t* dst_argb4444,
int width) {
asm volatile(
"vmov.u8 d4, #0x0f \n" // bits to clear with
@@ -1143,7 +1151,7 @@ void ARGBToARGB4444Row_NEON(const uint8* src_argb,
: "cc", "memory", "q0", "q8", "q9", "q10", "q11");
}
-void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int width) {
+void ARGBToYRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width) {
asm volatile(
"vmov.u8 d24, #13 \n" // B * 0.1016 coefficient
"vmov.u8 d25, #65 \n" // G * 0.5078 coefficient
@@ -1166,7 +1174,9 @@ void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int width) {
: "cc", "memory", "q0", "q1", "q2", "q12", "q13");
}
-void ARGBExtractAlphaRow_NEON(const uint8* src_argb, uint8* dst_a, int width) {
+void ARGBExtractAlphaRow_NEON(const uint8_t* src_argb,
+ uint8_t* dst_a,
+ int width) {
asm volatile(
"1: \n"
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels
@@ -1182,7 +1192,7 @@ void ARGBExtractAlphaRow_NEON(const uint8* src_argb, uint8* dst_a, int width) {
);
}
-void ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int width) {
+void ARGBToYJRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width) {
asm volatile(
"vmov.u8 d24, #15 \n" // B * 0.11400 coefficient
"vmov.u8 d25, #75 \n" // G * 0.58700 coefficient
@@ -1204,9 +1214,9 @@ void ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int width) {
}
// 8x1 pixels.
-void ARGBToUV444Row_NEON(const uint8* src_argb,
- uint8* dst_u,
- uint8* dst_v,
+void ARGBToUV444Row_NEON(const uint8_t* src_argb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
asm volatile(
"vmov.u8 d24, #112 \n" // UB / VR 0.875
@@ -1260,10 +1270,10 @@ void ARGBToUV444Row_NEON(const uint8* src_argb,
// clang-format on
// TODO(fbarchard): Consider vhadd vertical, then vpaddl horizontal, avoid shr.
-void ARGBToUVRow_NEON(const uint8* src_argb,
+void ARGBToUVRow_NEON(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
asm volatile (
"add %1, %0, %1 \n" // src_stride + src_argb
@@ -1306,10 +1316,10 @@ void ARGBToUVRow_NEON(const uint8* src_argb,
}
// TODO(fbarchard): Subsample match C code.
-void ARGBToUVJRow_NEON(const uint8* src_argb,
+void ARGBToUVJRow_NEON(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
asm volatile (
"add %1, %0, %1 \n" // src_stride + src_argb
@@ -1351,10 +1361,10 @@ void ARGBToUVJRow_NEON(const uint8* src_argb,
);
}
-void BGRAToUVRow_NEON(const uint8* src_bgra,
+void BGRAToUVRow_NEON(const uint8_t* src_bgra,
int src_stride_bgra,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
asm volatile (
"add %1, %0, %1 \n" // src_stride + src_bgra
@@ -1396,10 +1406,10 @@ void BGRAToUVRow_NEON(const uint8* src_bgra,
);
}
-void ABGRToUVRow_NEON(const uint8* src_abgr,
+void ABGRToUVRow_NEON(const uint8_t* src_abgr,
int src_stride_abgr,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
asm volatile (
"add %1, %0, %1 \n" // src_stride + src_abgr
@@ -1441,10 +1451,10 @@ void ABGRToUVRow_NEON(const uint8* src_abgr,
);
}
-void RGBAToUVRow_NEON(const uint8* src_rgba,
+void RGBAToUVRow_NEON(const uint8_t* src_rgba,
int src_stride_rgba,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
asm volatile (
"add %1, %0, %1 \n" // src_stride + src_rgba
@@ -1486,10 +1496,10 @@ void RGBAToUVRow_NEON(const uint8* src_rgba,
);
}
-void RGB24ToUVRow_NEON(const uint8* src_rgb24,
+void RGB24ToUVRow_NEON(const uint8_t* src_rgb24,
int src_stride_rgb24,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
asm volatile (
"add %1, %0, %1 \n" // src_stride + src_rgb24
@@ -1531,10 +1541,10 @@ void RGB24ToUVRow_NEON(const uint8* src_rgb24,
);
}
-void RAWToUVRow_NEON(const uint8* src_raw,
+void RAWToUVRow_NEON(const uint8_t* src_raw,
int src_stride_raw,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
asm volatile (
"add %1, %0, %1 \n" // src_stride + src_raw
@@ -1577,10 +1587,10 @@ void RAWToUVRow_NEON(const uint8* src_raw,
}
// 16x2 pixels -> 8x1. width is number of argb pixels. e.g. 16.
-void RGB565ToUVRow_NEON(const uint8* src_rgb565,
+void RGB565ToUVRow_NEON(const uint8_t* src_rgb565,
int src_stride_rgb565,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
asm volatile(
"add %1, %0, %1 \n" // src_stride + src_argb
@@ -1643,10 +1653,10 @@ void RGB565ToUVRow_NEON(const uint8* src_rgb565,
}
// 16x2 pixels -> 8x1. width is number of argb pixels. e.g. 16.
-void ARGB1555ToUVRow_NEON(const uint8* src_argb1555,
+void ARGB1555ToUVRow_NEON(const uint8_t* src_argb1555,
int src_stride_argb1555,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
asm volatile(
"add %1, %0, %1 \n" // src_stride + src_argb
@@ -1709,10 +1719,10 @@ void ARGB1555ToUVRow_NEON(const uint8* src_argb1555,
}
// 16x2 pixels -> 8x1. width is number of argb pixels. e.g. 16.
-void ARGB4444ToUVRow_NEON(const uint8* src_argb4444,
+void ARGB4444ToUVRow_NEON(const uint8_t* src_argb4444,
int src_stride_argb4444,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
asm volatile(
"add %1, %0, %1 \n" // src_stride + src_argb
@@ -1774,7 +1784,7 @@ void ARGB4444ToUVRow_NEON(const uint8* src_argb4444,
"q9", "q10", "q11", "q12", "q13", "q14", "q15");
}
-void RGB565ToYRow_NEON(const uint8* src_rgb565, uint8* dst_y, int width) {
+void RGB565ToYRow_NEON(const uint8_t* src_rgb565, uint8_t* dst_y, int width) {
asm volatile(
"vmov.u8 d24, #13 \n" // B * 0.1016 coefficient
"vmov.u8 d25, #65 \n" // G * 0.5078 coefficient
@@ -1798,7 +1808,9 @@ void RGB565ToYRow_NEON(const uint8* src_rgb565, uint8* dst_y, int width) {
: "cc", "memory", "q0", "q1", "q2", "q3", "q12", "q13");
}
-void ARGB1555ToYRow_NEON(const uint8* src_argb1555, uint8* dst_y, int width) {
+void ARGB1555ToYRow_NEON(const uint8_t* src_argb1555,
+ uint8_t* dst_y,
+ int width) {
asm volatile(
"vmov.u8 d24, #13 \n" // B * 0.1016 coefficient
"vmov.u8 d25, #65 \n" // G * 0.5078 coefficient
@@ -1822,7 +1834,9 @@ void ARGB1555ToYRow_NEON(const uint8* src_argb1555, uint8* dst_y, int width) {
: "cc", "memory", "q0", "q1", "q2", "q3", "q12", "q13");
}
-void ARGB4444ToYRow_NEON(const uint8* src_argb4444, uint8* dst_y, int width) {
+void ARGB4444ToYRow_NEON(const uint8_t* src_argb4444,
+ uint8_t* dst_y,
+ int width) {
asm volatile(
"vmov.u8 d24, #13 \n" // B * 0.1016 coefficient
"vmov.u8 d25, #65 \n" // G * 0.5078 coefficient
@@ -1846,7 +1860,7 @@ void ARGB4444ToYRow_NEON(const uint8* src_argb4444, uint8* dst_y, int width) {
: "cc", "memory", "q0", "q1", "q2", "q3", "q12", "q13");
}
-void BGRAToYRow_NEON(const uint8* src_bgra, uint8* dst_y, int width) {
+void BGRAToYRow_NEON(const uint8_t* src_bgra, uint8_t* dst_y, int width) {
asm volatile(
"vmov.u8 d4, #33 \n" // R * 0.2578 coefficient
"vmov.u8 d5, #65 \n" // G * 0.5078 coefficient
@@ -1869,7 +1883,7 @@ void BGRAToYRow_NEON(const uint8* src_bgra, uint8* dst_y, int width) {
: "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8");
}
-void ABGRToYRow_NEON(const uint8* src_abgr, uint8* dst_y, int width) {
+void ABGRToYRow_NEON(const uint8_t* src_abgr, uint8_t* dst_y, int width) {
asm volatile(
"vmov.u8 d4, #33 \n" // R * 0.2578 coefficient
"vmov.u8 d5, #65 \n" // G * 0.5078 coefficient
@@ -1892,7 +1906,7 @@ void ABGRToYRow_NEON(const uint8* src_abgr, uint8* dst_y, int width) {
: "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8");
}
-void RGBAToYRow_NEON(const uint8* src_rgba, uint8* dst_y, int width) {
+void RGBAToYRow_NEON(const uint8_t* src_rgba, uint8_t* dst_y, int width) {
asm volatile(
"vmov.u8 d4, #13 \n" // B * 0.1016 coefficient
"vmov.u8 d5, #65 \n" // G * 0.5078 coefficient
@@ -1915,7 +1929,7 @@ void RGBAToYRow_NEON(const uint8* src_rgba, uint8* dst_y, int width) {
: "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8");
}
-void RGB24ToYRow_NEON(const uint8* src_rgb24, uint8* dst_y, int width) {
+void RGB24ToYRow_NEON(const uint8_t* src_rgb24, uint8_t* dst_y, int width) {
asm volatile(
"vmov.u8 d4, #13 \n" // B * 0.1016 coefficient
"vmov.u8 d5, #65 \n" // G * 0.5078 coefficient
@@ -1938,7 +1952,7 @@ void RGB24ToYRow_NEON(const uint8* src_rgb24, uint8* dst_y, int width) {
: "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8");
}
-void RAWToYRow_NEON(const uint8* src_raw, uint8* dst_y, int width) {
+void RAWToYRow_NEON(const uint8_t* src_raw, uint8_t* dst_y, int width) {
asm volatile(
"vmov.u8 d4, #33 \n" // R * 0.2578 coefficient
"vmov.u8 d5, #65 \n" // G * 0.5078 coefficient
@@ -1962,8 +1976,8 @@ void RAWToYRow_NEON(const uint8* src_raw, uint8* dst_y, int width) {
}
// Bilinear filter 16x2 -> 16x1
-void InterpolateRow_NEON(uint8* dst_ptr,
- const uint8* src_ptr,
+void InterpolateRow_NEON(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
ptrdiff_t src_stride,
int dst_width,
int source_y_fraction) {
@@ -2021,9 +2035,9 @@ void InterpolateRow_NEON(uint8* dst_ptr,
}
// dr * (256 - sa) / 256 + sr = dr - dr * sa / 256 + sr
-void ARGBBlendRow_NEON(const uint8* src_argb0,
- const uint8* src_argb1,
- uint8* dst_argb,
+void ARGBBlendRow_NEON(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
int width) {
asm volatile(
"subs %3, #8 \n"
@@ -2081,7 +2095,9 @@ void ARGBBlendRow_NEON(const uint8* src_argb0,
}
// Attenuate 8 pixels at a time.
-void ARGBAttenuateRow_NEON(const uint8* src_argb, uint8* dst_argb, int width) {
+void ARGBAttenuateRow_NEON(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ int width) {
asm volatile(
// Attenuate 8 pixels.
"1: \n"
@@ -2104,7 +2120,7 @@ void ARGBAttenuateRow_NEON(const uint8* src_argb, uint8* dst_argb, int width) {
// Quantize 8 ARGB pixels (32 bytes).
// dst = (dst * scale >> 16) * interval_size + interval_offset;
-void ARGBQuantizeRow_NEON(uint8* dst_argb,
+void ARGBQuantizeRow_NEON(uint8_t* dst_argb,
int scale,
int interval_size,
int interval_offset,
@@ -2147,10 +2163,10 @@ void ARGBQuantizeRow_NEON(uint8* dst_argb,
// Shade 8 pixels at a time by specified value.
// NOTE vqrdmulh.s16 q10, q10, d0[0] must use a scaler register from 0 to 8.
// Rounding in vqrdmulh does +1 to high if high bit of low s16 is set.
-void ARGBShadeRow_NEON(const uint8* src_argb,
- uint8* dst_argb,
+void ARGBShadeRow_NEON(const uint8_t* src_argb,
+ uint8_t* dst_argb,
int width,
- uint32 value) {
+ uint32_t value) {
asm volatile(
"vdup.u32 q0, %3 \n" // duplicate scale value.
"vzip.u8 d0, d1 \n" // d0 aarrggbb.
@@ -2184,7 +2200,7 @@ void ARGBShadeRow_NEON(const uint8* src_argb,
// Convert 8 ARGB pixels (64 bytes) to 8 Gray ARGB pixels
// Similar to ARGBToYJ but stores ARGB.
// C code is (15 * b + 75 * g + 38 * r + 64) >> 7;
-void ARGBGrayRow_NEON(const uint8* src_argb, uint8* dst_argb, int width) {
+void ARGBGrayRow_NEON(const uint8_t* src_argb, uint8_t* dst_argb, int width) {
asm volatile(
"vmov.u8 d24, #15 \n" // B * 0.11400 coefficient
"vmov.u8 d25, #75 \n" // G * 0.58700 coefficient
@@ -2211,7 +2227,7 @@ void ARGBGrayRow_NEON(const uint8* src_argb, uint8* dst_argb, int width) {
// b = (r * 35 + g * 68 + b * 17) >> 7
// g = (r * 45 + g * 88 + b * 22) >> 7
// r = (r * 50 + g * 98 + b * 24) >> 7
-void ARGBSepiaRow_NEON(uint8* dst_argb, int width) {
+void ARGBSepiaRow_NEON(uint8_t* dst_argb, int width) {
asm volatile(
"vmov.u8 d20, #17 \n" // BB coefficient
"vmov.u8 d21, #68 \n" // BG coefficient
@@ -2249,9 +2265,9 @@ void ARGBSepiaRow_NEON(uint8* dst_argb, int width) {
// Tranform 8 ARGB pixels (32 bytes) with color matrix.
// TODO(fbarchard): Was same as Sepia except matrix is provided. This function
// needs to saturate. Consider doing a non-saturating version.
-void ARGBColorMatrixRow_NEON(const uint8* src_argb,
- uint8* dst_argb,
- const int8* matrix_argb,
+void ARGBColorMatrixRow_NEON(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ const int8_t* matrix_argb,
int width) {
asm volatile(
"vld1.8 {q2}, [%3] \n" // load 3 ARGB vectors.
@@ -2308,9 +2324,9 @@ void ARGBColorMatrixRow_NEON(const uint8* src_argb,
}
// Multiply 2 rows of ARGB pixels together, 8 pixels at a time.
-void ARGBMultiplyRow_NEON(const uint8* src_argb0,
- const uint8* src_argb1,
- uint8* dst_argb,
+void ARGBMultiplyRow_NEON(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
int width) {
asm volatile(
// 8 pixel loop.
@@ -2337,9 +2353,9 @@ void ARGBMultiplyRow_NEON(const uint8* src_argb0,
}
// Add 2 rows of ARGB pixels together, 8 pixels at a time.
-void ARGBAddRow_NEON(const uint8* src_argb0,
- const uint8* src_argb1,
- uint8* dst_argb,
+void ARGBAddRow_NEON(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
int width) {
asm volatile(
// 8 pixel loop.
@@ -2360,9 +2376,9 @@ void ARGBAddRow_NEON(const uint8* src_argb0,
}
// Subtract 2 rows of ARGB pixels, 8 pixels at a time.
-void ARGBSubtractRow_NEON(const uint8* src_argb0,
- const uint8* src_argb1,
- uint8* dst_argb,
+void ARGBSubtractRow_NEON(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
int width) {
asm volatile(
// 8 pixel loop.
@@ -2387,9 +2403,9 @@ void ARGBSubtractRow_NEON(const uint8* src_argb0,
// R = Sobel
// G = Sobel
// B = Sobel
-void SobelRow_NEON(const uint8* src_sobelx,
- const uint8* src_sobely,
- uint8* dst_argb,
+void SobelRow_NEON(const uint8_t* src_sobelx,
+ const uint8_t* src_sobely,
+ uint8_t* dst_argb,
int width) {
asm volatile(
"vmov.u8 d3, #255 \n" // alpha
@@ -2412,9 +2428,9 @@ void SobelRow_NEON(const uint8* src_sobelx,
}
// Adds Sobel X and Sobel Y and stores Sobel into plane.
-void SobelToPlaneRow_NEON(const uint8* src_sobelx,
- const uint8* src_sobely,
- uint8* dst_y,
+void SobelToPlaneRow_NEON(const uint8_t* src_sobelx,
+ const uint8_t* src_sobely,
+ uint8_t* dst_y,
int width) {
asm volatile(
// 16 pixel loop.
@@ -2438,9 +2454,9 @@ void SobelToPlaneRow_NEON(const uint8* src_sobelx,
// R = Sobel X
// G = Sobel
// B = Sobel Y
-void SobelXYRow_NEON(const uint8* src_sobelx,
- const uint8* src_sobely,
- uint8* dst_argb,
+void SobelXYRow_NEON(const uint8_t* src_sobelx,
+ const uint8_t* src_sobely,
+ uint8_t* dst_argb,
int width) {
asm volatile(
"vmov.u8 d3, #255 \n" // alpha
@@ -2464,10 +2480,10 @@ void SobelXYRow_NEON(const uint8* src_sobelx,
// -1 0 1
// -2 0 2
// -1 0 1
-void SobelXRow_NEON(const uint8* src_y0,
- const uint8* src_y1,
- const uint8* src_y2,
- uint8* dst_sobelx,
+void SobelXRow_NEON(const uint8_t* src_y0,
+ const uint8_t* src_y1,
+ const uint8_t* src_y2,
+ uint8_t* dst_sobelx,
int width) {
asm volatile(
"1: \n"
@@ -2503,9 +2519,9 @@ void SobelXRow_NEON(const uint8* src_y0,
// -1 -2 -1
// 0 0 0
// 1 2 1
-void SobelYRow_NEON(const uint8* src_y0,
- const uint8* src_y1,
- uint8* dst_sobely,
+void SobelYRow_NEON(const uint8_t* src_y0,
+ const uint8_t* src_y1,
+ uint8_t* dst_sobely,
int width) {
asm volatile(
"1: \n"
@@ -2536,7 +2552,10 @@ void SobelYRow_NEON(const uint8* src_y0,
);
}
-void HalfFloat1Row_NEON(const uint16* src, uint16* dst, float, int width) {
+void HalfFloat1Row_NEON(const uint16_t* src,
+ uint16_t* dst,
+ float /*unused*/,
+ int width) {
asm volatile(
"vdup.32 q0, %3 \n"
@@ -2561,7 +2580,10 @@ void HalfFloat1Row_NEON(const uint16* src, uint16* dst, float, int width) {
}
// TODO(fbarchard): multiply by element.
-void HalfFloatRow_NEON(const uint16* src, uint16* dst, float scale, int width) {
+void HalfFloatRow_NEON(const uint16_t* src,
+ uint16_t* dst,
+ float scale,
+ int width) {
asm volatile(
"vdup.32 q0, %3 \n"
diff --git a/chromium/third_party/libyuv/source/row_neon64.cc b/chromium/third_party/libyuv/source/row_neon64.cc
index 5616d8a5b5f..e7b8b5c1dd9 100644
--- a/chromium/third_party/libyuv/source/row_neon64.cc
+++ b/chromium/third_party/libyuv/source/row_neon64.cc
@@ -112,10 +112,10 @@ extern "C" {
".8h, #6 \n" /* G */ \
"sqshrun " #vR ".8b, " #vR ".8h, #6 \n" /* R */
-void I444ToARGBRow_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
+void I444ToARGBRow_NEON(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
asm volatile (
@@ -141,10 +141,10 @@ void I444ToARGBRow_NEON(const uint8* src_y,
);
}
-void I422ToARGBRow_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
+void I422ToARGBRow_NEON(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
asm volatile (
@@ -170,11 +170,11 @@ void I422ToARGBRow_NEON(const uint8* src_y,
);
}
-void I422AlphaToARGBRow_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- const uint8* src_a,
- uint8* dst_argb,
+void I422AlphaToARGBRow_NEON(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ const uint8_t* src_a,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
asm volatile (
@@ -201,10 +201,10 @@ void I422AlphaToARGBRow_NEON(const uint8* src_y,
);
}
-void I422ToRGBARow_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_rgba,
+void I422ToRGBARow_NEON(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_rgba,
const struct YuvConstants* yuvconstants,
int width) {
asm volatile (
@@ -230,10 +230,10 @@ void I422ToRGBARow_NEON(const uint8* src_y,
);
}
-void I422ToRGB24Row_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_rgb24,
+void I422ToRGB24Row_NEON(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_rgb24,
const struct YuvConstants* yuvconstants,
int width) {
asm volatile (
@@ -265,10 +265,10 @@ void I422ToRGB24Row_NEON(const uint8* src_y,
"sri v0.8h, v21.8h, #5 \n" /* RG */ \
"sri v0.8h, v20.8h, #11 \n" /* RGB */
-void I422ToRGB565Row_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_rgb565,
+void I422ToRGB565Row_NEON(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_rgb565,
const struct YuvConstants* yuvconstants,
int width) {
asm volatile(
@@ -301,10 +301,10 @@ void I422ToRGB565Row_NEON(const uint8* src_y,
"sri v0.8h, v21.8h, #6 \n" /* ARG */ \
"sri v0.8h, v20.8h, #11 \n" /* ARGB */
-void I422ToARGB1555Row_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb1555,
+void I422ToARGB1555Row_NEON(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb1555,
const struct YuvConstants* yuvconstants,
int width) {
asm volatile(
@@ -339,10 +339,10 @@ void I422ToARGB1555Row_NEON(const uint8* src_y,
"orr v1.8b, v22.8b, v23.8b \n" /* RA */ \
"zip1 v0.16b, v0.16b, v1.16b \n" /* BGRA */
-void I422ToARGB4444Row_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb4444,
+void I422ToARGB4444Row_NEON(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb4444,
const struct YuvConstants* yuvconstants,
int width) {
asm volatile (
@@ -370,7 +370,7 @@ void I422ToARGB4444Row_NEON(const uint8* src_y,
);
}
-void I400ToARGBRow_NEON(const uint8* src_y, uint8* dst_argb, int width) {
+void I400ToARGBRow_NEON(const uint8_t* src_y, uint8_t* dst_argb, int width) {
asm volatile (
YUVTORGB_SETUP
"movi v23.8b, #255 \n"
@@ -392,7 +392,7 @@ void I400ToARGBRow_NEON(const uint8* src_y, uint8* dst_argb, int width) {
);
}
-void J400ToARGBRow_NEON(const uint8* src_y, uint8* dst_argb, int width) {
+void J400ToARGBRow_NEON(const uint8_t* src_y, uint8_t* dst_argb, int width) {
asm volatile(
"movi v23.8b, #255 \n"
"1: \n"
@@ -409,9 +409,9 @@ void J400ToARGBRow_NEON(const uint8* src_y, uint8* dst_argb, int width) {
: "cc", "memory", "v20", "v21", "v22", "v23");
}
-void NV12ToARGBRow_NEON(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_argb,
+void NV12ToARGBRow_NEON(const uint8_t* src_y,
+ const uint8_t* src_uv,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
asm volatile (
@@ -436,9 +436,9 @@ void NV12ToARGBRow_NEON(const uint8* src_y,
);
}
-void NV21ToARGBRow_NEON(const uint8* src_y,
- const uint8* src_vu,
- uint8* dst_argb,
+void NV21ToARGBRow_NEON(const uint8_t* src_y,
+ const uint8_t* src_vu,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
asm volatile (
@@ -463,9 +463,9 @@ void NV21ToARGBRow_NEON(const uint8* src_y,
);
}
-void NV12ToRGB565Row_NEON(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_rgb565,
+void NV12ToRGB565Row_NEON(const uint8_t* src_y,
+ const uint8_t* src_uv,
+ uint8_t* dst_rgb565,
const struct YuvConstants* yuvconstants,
int width) {
asm volatile(
@@ -488,8 +488,8 @@ void NV12ToRGB565Row_NEON(const uint8* src_y,
"v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30");
}
-void YUY2ToARGBRow_NEON(const uint8* src_yuy2,
- uint8* dst_argb,
+void YUY2ToARGBRow_NEON(const uint8_t* src_yuy2,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
asm volatile (
@@ -513,8 +513,8 @@ void YUY2ToARGBRow_NEON(const uint8* src_yuy2,
);
}
-void UYVYToARGBRow_NEON(const uint8* src_uyvy,
- uint8* dst_argb,
+void UYVYToARGBRow_NEON(const uint8_t* src_uyvy,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
asm volatile (
@@ -539,9 +539,9 @@ void UYVYToARGBRow_NEON(const uint8* src_uyvy,
}
// Reads 16 pairs of UV and write even values to dst_u and odd to dst_v.
-void SplitUVRow_NEON(const uint8* src_uv,
- uint8* dst_u,
- uint8* dst_v,
+void SplitUVRow_NEON(const uint8_t* src_uv,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
asm volatile(
"1: \n"
@@ -560,9 +560,9 @@ void SplitUVRow_NEON(const uint8* src_uv,
}
// Reads 16 U's and V's and writes out 16 pairs of UV.
-void MergeUVRow_NEON(const uint8* src_u,
- const uint8* src_v,
- uint8* dst_uv,
+void MergeUVRow_NEON(const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_uv,
int width) {
asm volatile(
"1: \n"
@@ -581,10 +581,10 @@ void MergeUVRow_NEON(const uint8* src_u,
}
// Reads 16 packed RGB and write to planar dst_r, dst_g, dst_b.
-void SplitRGBRow_NEON(const uint8* src_rgb,
- uint8* dst_r,
- uint8* dst_g,
- uint8* dst_b,
+void SplitRGBRow_NEON(const uint8_t* src_rgb,
+ uint8_t* dst_r,
+ uint8_t* dst_g,
+ uint8_t* dst_b,
int width) {
asm volatile(
"1: \n"
@@ -605,10 +605,10 @@ void SplitRGBRow_NEON(const uint8* src_rgb,
}
// Reads 16 planar R's, G's and B's and writes out 16 packed RGB at a time
-void MergeRGBRow_NEON(const uint8* src_r,
- const uint8* src_g,
- const uint8* src_b,
- uint8* dst_rgb,
+void MergeRGBRow_NEON(const uint8_t* src_r,
+ const uint8_t* src_g,
+ const uint8_t* src_b,
+ uint8_t* dst_rgb,
int width) {
asm volatile(
"1: \n"
@@ -629,7 +629,7 @@ void MergeRGBRow_NEON(const uint8* src_r,
}
// Copy multiple of 32.
-void CopyRow_NEON(const uint8* src, uint8* dst, int count) {
+void CopyRow_NEON(const uint8_t* src, uint8_t* dst, int width) {
asm volatile(
"1: \n"
"ldp q0, q1, [%0], #32 \n"
@@ -638,14 +638,14 @@ void CopyRow_NEON(const uint8* src, uint8* dst, int count) {
"b.gt 1b \n"
: "+r"(src), // %0
"+r"(dst), // %1
- "+r"(count) // %2 // Output registers
+ "+r"(width) // %2 // Output registers
: // Input registers
: "cc", "memory", "v0", "v1" // Clobber List
);
}
-// SetRow writes 'count' bytes using an 8 bit value repeated.
-void SetRow_NEON(uint8* dst, uint8 v8, int count) {
+// SetRow writes 'width' bytes using an 8 bit value repeated.
+void SetRow_NEON(uint8_t* dst, uint8_t v8, int width) {
asm volatile(
"dup v0.16b, %w2 \n" // duplicate 16 bytes
"1: \n"
@@ -653,12 +653,12 @@ void SetRow_NEON(uint8* dst, uint8 v8, int count) {
"st1 {v0.16b}, [%0], #16 \n" // store
"b.gt 1b \n"
: "+r"(dst), // %0
- "+r"(count) // %1
+ "+r"(width) // %1
: "r"(v8) // %2
: "cc", "memory", "v0");
}
-void ARGBSetRow_NEON(uint8* dst, uint32 v32, int count) {
+void ARGBSetRow_NEON(uint8_t* dst, uint32_t v32, int width) {
asm volatile(
"dup v0.4s, %w2 \n" // duplicate 4 ints
"1: \n"
@@ -666,12 +666,12 @@ void ARGBSetRow_NEON(uint8* dst, uint32 v32, int count) {
"st1 {v0.16b}, [%0], #16 \n" // store
"b.gt 1b \n"
: "+r"(dst), // %0
- "+r"(count) // %1
+ "+r"(width) // %1
: "r"(v32) // %2
: "cc", "memory", "v0");
}
-void MirrorRow_NEON(const uint8* src, uint8* dst, int width) {
+void MirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width) {
asm volatile(
// Start at end of source row.
"add %0, %0, %w2, sxtw \n"
@@ -690,9 +690,9 @@ void MirrorRow_NEON(const uint8* src, uint8* dst, int width) {
: "cc", "memory", "v0");
}
-void MirrorUVRow_NEON(const uint8* src_uv,
- uint8* dst_u,
- uint8* dst_v,
+void MirrorUVRow_NEON(const uint8_t* src_uv,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
asm volatile(
// Start at end of source row.
@@ -714,7 +714,7 @@ void MirrorUVRow_NEON(const uint8* src_uv,
: "cc", "memory", "v0", "v1");
}
-void ARGBMirrorRow_NEON(const uint8* src, uint8* dst, int width) {
+void ARGBMirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width) {
asm volatile(
// Start at end of source row.
"add %0, %0, %w2, sxtw #2 \n"
@@ -733,7 +733,9 @@ void ARGBMirrorRow_NEON(const uint8* src, uint8* dst, int width) {
: "cc", "memory", "v0");
}
-void RGB24ToARGBRow_NEON(const uint8* src_rgb24, uint8* dst_argb, int width) {
+void RGB24ToARGBRow_NEON(const uint8_t* src_rgb24,
+ uint8_t* dst_argb,
+ int width) {
asm volatile(
"movi v4.8b, #255 \n" // Alpha
"1: \n"
@@ -749,7 +751,7 @@ void RGB24ToARGBRow_NEON(const uint8* src_rgb24, uint8* dst_argb, int width) {
);
}
-void RAWToARGBRow_NEON(const uint8* src_raw, uint8* dst_argb, int width) {
+void RAWToARGBRow_NEON(const uint8_t* src_raw, uint8_t* dst_argb, int width) {
asm volatile(
"movi v5.8b, #255 \n" // Alpha
"1: \n"
@@ -767,7 +769,7 @@ void RAWToARGBRow_NEON(const uint8* src_raw, uint8* dst_argb, int width) {
);
}
-void RAWToRGB24Row_NEON(const uint8* src_raw, uint8* dst_rgb24, int width) {
+void RAWToRGB24Row_NEON(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) {
asm volatile(
"1: \n"
"ld3 {v0.8b,v1.8b,v2.8b}, [%0], #24 \n" // read r g b
@@ -797,7 +799,9 @@ void RAWToRGB24Row_NEON(const uint8* src_raw, uint8* dst_rgb24, int width) {
"orr v0.16b, v0.16b, v2.16b \n" /* R,B */ \
"dup v2.2D, v0.D[1] \n" /* R */
-void RGB565ToARGBRow_NEON(const uint8* src_rgb565, uint8* dst_argb, int width) {
+void RGB565ToARGBRow_NEON(const uint8_t* src_rgb565,
+ uint8_t* dst_argb,
+ int width) {
asm volatile(
"movi v3.8b, #255 \n" // Alpha
"1: \n"
@@ -851,8 +855,8 @@ void RGB565ToARGBRow_NEON(const uint8* src_rgb565, uint8* dst_argb, int width) {
"orr v2.16b, v1.16b, v3.16b \n" /* R */ \
"dup v1.2D, v0.D[1] \n" /* G */
-void ARGB1555ToARGBRow_NEON(const uint8* src_argb1555,
- uint8* dst_argb,
+void ARGB1555ToARGBRow_NEON(const uint8_t* src_argb1555,
+ uint8_t* dst_argb,
int width) {
asm volatile(
"movi v3.8b, #255 \n" // Alpha
@@ -883,8 +887,8 @@ void ARGB1555ToARGBRow_NEON(const uint8* src_argb1555,
"dup v0.2D, v2.D[1] \n" \
"dup v1.2D, v3.D[1] \n"
-void ARGB4444ToARGBRow_NEON(const uint8* src_argb4444,
- uint8* dst_argb,
+void ARGB4444ToARGBRow_NEON(const uint8_t* src_argb4444,
+ uint8_t* dst_argb,
int width) {
asm volatile(
"1: \n"
@@ -902,7 +906,9 @@ void ARGB4444ToARGBRow_NEON(const uint8* src_argb4444,
);
}
-void ARGBToRGB24Row_NEON(const uint8* src_argb, uint8* dst_rgb24, int width) {
+void ARGBToRGB24Row_NEON(const uint8_t* src_argb,
+ uint8_t* dst_rgb24,
+ int width) {
asm volatile(
"1: \n"
"ld4 {v1.8b,v2.8b,v3.8b,v4.8b}, [%0], #32 \n" // load 8 ARGB
@@ -918,7 +924,7 @@ void ARGBToRGB24Row_NEON(const uint8* src_argb, uint8* dst_rgb24, int width) {
);
}
-void ARGBToRAWRow_NEON(const uint8* src_argb, uint8* dst_raw, int width) {
+void ARGBToRAWRow_NEON(const uint8_t* src_argb, uint8_t* dst_raw, int width) {
asm volatile(
"1: \n"
"ld4 {v1.8b,v2.8b,v3.8b,v4.8b}, [%0], #32 \n" // load b g r a
@@ -935,7 +941,7 @@ void ARGBToRAWRow_NEON(const uint8* src_argb, uint8* dst_raw, int width) {
);
}
-void YUY2ToYRow_NEON(const uint8* src_yuy2, uint8* dst_y, int width) {
+void YUY2ToYRow_NEON(const uint8_t* src_yuy2, uint8_t* dst_y, int width) {
asm volatile(
"1: \n"
"ld2 {v0.16b,v1.16b}, [%0], #32 \n" // load 16 pixels of YUY2.
@@ -950,7 +956,7 @@ void YUY2ToYRow_NEON(const uint8* src_yuy2, uint8* dst_y, int width) {
);
}
-void UYVYToYRow_NEON(const uint8* src_uyvy, uint8* dst_y, int width) {
+void UYVYToYRow_NEON(const uint8_t* src_uyvy, uint8_t* dst_y, int width) {
asm volatile(
"1: \n"
"ld2 {v0.16b,v1.16b}, [%0], #32 \n" // load 16 pixels of UYVY.
@@ -965,9 +971,9 @@ void UYVYToYRow_NEON(const uint8* src_uyvy, uint8* dst_y, int width) {
);
}
-void YUY2ToUV422Row_NEON(const uint8* src_yuy2,
- uint8* dst_u,
- uint8* dst_v,
+void YUY2ToUV422Row_NEON(const uint8_t* src_yuy2,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
asm volatile(
"1: \n"
@@ -985,9 +991,9 @@ void YUY2ToUV422Row_NEON(const uint8* src_yuy2,
);
}
-void UYVYToUV422Row_NEON(const uint8* src_uyvy,
- uint8* dst_u,
- uint8* dst_v,
+void UYVYToUV422Row_NEON(const uint8_t* src_uyvy,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
asm volatile(
"1: \n"
@@ -1005,12 +1011,12 @@ void UYVYToUV422Row_NEON(const uint8* src_uyvy,
);
}
-void YUY2ToUVRow_NEON(const uint8* src_yuy2,
+void YUY2ToUVRow_NEON(const uint8_t* src_yuy2,
int stride_yuy2,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
- const uint8* src_yuy2b = src_yuy2 + stride_yuy2;
+ const uint8_t* src_yuy2b = src_yuy2 + stride_yuy2;
asm volatile(
"1: \n"
"ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 16 pixels
@@ -1032,12 +1038,12 @@ void YUY2ToUVRow_NEON(const uint8* src_yuy2,
);
}
-void UYVYToUVRow_NEON(const uint8* src_uyvy,
+void UYVYToUVRow_NEON(const uint8_t* src_uyvy,
int stride_uyvy,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
- const uint8* src_uyvyb = src_uyvy + stride_uyvy;
+ const uint8_t* src_uyvyb = src_uyvy + stride_uyvy;
asm volatile(
"1: \n"
"ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 16 pixels
@@ -1060,9 +1066,9 @@ void UYVYToUVRow_NEON(const uint8* src_uyvy,
}
// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA.
-void ARGBShuffleRow_NEON(const uint8* src_argb,
- uint8* dst_argb,
- const uint8* shuffler,
+void ARGBShuffleRow_NEON(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ const uint8_t* shuffler,
int width) {
asm volatile(
"ld1 {v2.16b}, [%3] \n" // shuffler
@@ -1080,10 +1086,10 @@ void ARGBShuffleRow_NEON(const uint8* src_argb,
);
}
-void I422ToYUY2Row_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_yuy2,
+void I422ToYUY2Row_NEON(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_yuy2,
int width) {
asm volatile(
"1: \n"
@@ -1103,10 +1109,10 @@ void I422ToYUY2Row_NEON(const uint8* src_y,
: "cc", "memory", "v0", "v1", "v2", "v3");
}
-void I422ToUYVYRow_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_uyvy,
+void I422ToUYVYRow_NEON(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_uyvy,
int width) {
asm volatile(
"1: \n"
@@ -1126,7 +1132,9 @@ void I422ToUYVYRow_NEON(const uint8* src_y,
: "cc", "memory", "v0", "v1", "v2", "v3");
}
-void ARGBToRGB565Row_NEON(const uint8* src_argb, uint8* dst_rgb565, int width) {
+void ARGBToRGB565Row_NEON(const uint8_t* src_argb,
+ uint8_t* dst_rgb565,
+ int width) {
asm volatile(
"1: \n"
"ld4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%0], #32 \n" // load 8 pixels
@@ -1141,9 +1149,9 @@ void ARGBToRGB565Row_NEON(const uint8* src_argb, uint8* dst_rgb565, int width) {
: "cc", "memory", "v0", "v20", "v21", "v22", "v23");
}
-void ARGBToRGB565DitherRow_NEON(const uint8* src_argb,
- uint8* dst_rgb,
- const uint32 dither4,
+void ARGBToRGB565DitherRow_NEON(const uint8_t* src_argb,
+ uint8_t* dst_rgb,
+ const uint32_t dither4,
int width) {
asm volatile(
"dup v1.4s, %w2 \n" // dither4
@@ -1162,8 +1170,8 @@ void ARGBToRGB565DitherRow_NEON(const uint8* src_argb,
: "cc", "memory", "v0", "v1", "v20", "v21", "v22", "v23");
}
-void ARGBToARGB1555Row_NEON(const uint8* src_argb,
- uint8* dst_argb1555,
+void ARGBToARGB1555Row_NEON(const uint8_t* src_argb,
+ uint8_t* dst_argb1555,
int width) {
asm volatile(
"1: \n"
@@ -1180,8 +1188,8 @@ void ARGBToARGB1555Row_NEON(const uint8* src_argb,
: "cc", "memory", "v0", "v20", "v21", "v22", "v23");
}
-void ARGBToARGB4444Row_NEON(const uint8* src_argb,
- uint8* dst_argb4444,
+void ARGBToARGB4444Row_NEON(const uint8_t* src_argb,
+ uint8_t* dst_argb4444,
int width) {
asm volatile(
"movi v4.16b, #0x0f \n" // bits to clear with
@@ -1200,7 +1208,7 @@ void ARGBToARGB4444Row_NEON(const uint8* src_argb,
: "cc", "memory", "v0", "v1", "v4", "v20", "v21", "v22", "v23");
}
-void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int width) {
+void ARGBToYRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width) {
asm volatile(
"movi v4.8b, #13 \n" // B * 0.1016 coefficient
"movi v5.8b, #65 \n" // G * 0.5078 coefficient
@@ -1223,7 +1231,9 @@ void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int width) {
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7");
}
-void ARGBExtractAlphaRow_NEON(const uint8* src_argb, uint8* dst_a, int width) {
+void ARGBExtractAlphaRow_NEON(const uint8_t* src_argb,
+ uint8_t* dst_a,
+ int width) {
asm volatile(
"1: \n"
"ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load row 16
@@ -1239,7 +1249,7 @@ void ARGBExtractAlphaRow_NEON(const uint8* src_argb, uint8* dst_a, int width) {
);
}
-void ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int width) {
+void ARGBToYJRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width) {
asm volatile(
"movi v4.8b, #15 \n" // B * 0.11400 coefficient
"movi v5.8b, #75 \n" // G * 0.58700 coefficient
@@ -1261,9 +1271,9 @@ void ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int width) {
}
// 8x1 pixels.
-void ARGBToUV444Row_NEON(const uint8* src_argb,
- uint8* dst_u,
- uint8* dst_v,
+void ARGBToUV444Row_NEON(const uint8_t* src_argb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
asm volatile(
"movi v24.8b, #112 \n" // UB / VR 0.875
@@ -1328,12 +1338,12 @@ void ARGBToUV444Row_NEON(const uint8* src_argb,
// TODO(fbarchard): Consider vhadd vertical, then vpaddl horizontal, avoid shr.
// TODO(fbarchard): consider ptrdiff_t for all strides.
-void ARGBToUVRow_NEON(const uint8* src_argb,
+void ARGBToUVRow_NEON(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
- const uint8* src_argb_1 = src_argb + src_stride_argb;
+ const uint8_t* src_argb_1 = src_argb + src_stride_argb;
asm volatile (
RGBTOUV_SETUP_REG
"1: \n"
@@ -1368,12 +1378,12 @@ void ARGBToUVRow_NEON(const uint8* src_argb,
}
// TODO(fbarchard): Subsample match C code.
-void ARGBToUVJRow_NEON(const uint8* src_argb,
+void ARGBToUVJRow_NEON(const uint8_t* src_argb,
int src_stride_argb,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
- const uint8* src_argb_1 = src_argb + src_stride_argb;
+ const uint8_t* src_argb_1 = src_argb + src_stride_argb;
asm volatile (
"movi v20.8h, #63, lsl #0 \n" // UB/VR coeff (0.500) / 2
"movi v21.8h, #42, lsl #0 \n" // UG coeff (-0.33126) / 2
@@ -1411,12 +1421,12 @@ void ARGBToUVJRow_NEON(const uint8* src_argb,
);
}
-void BGRAToUVRow_NEON(const uint8* src_bgra,
+void BGRAToUVRow_NEON(const uint8_t* src_bgra,
int src_stride_bgra,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
- const uint8* src_bgra_1 = src_bgra + src_stride_bgra;
+ const uint8_t* src_bgra_1 = src_bgra + src_stride_bgra;
asm volatile (
RGBTOUV_SETUP_REG
"1: \n"
@@ -1449,12 +1459,12 @@ void BGRAToUVRow_NEON(const uint8* src_bgra,
);
}
-void ABGRToUVRow_NEON(const uint8* src_abgr,
+void ABGRToUVRow_NEON(const uint8_t* src_abgr,
int src_stride_abgr,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
- const uint8* src_abgr_1 = src_abgr + src_stride_abgr;
+ const uint8_t* src_abgr_1 = src_abgr + src_stride_abgr;
asm volatile (
RGBTOUV_SETUP_REG
"1: \n"
@@ -1487,12 +1497,12 @@ void ABGRToUVRow_NEON(const uint8* src_abgr,
);
}
-void RGBAToUVRow_NEON(const uint8* src_rgba,
+void RGBAToUVRow_NEON(const uint8_t* src_rgba,
int src_stride_rgba,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
- const uint8* src_rgba_1 = src_rgba + src_stride_rgba;
+ const uint8_t* src_rgba_1 = src_rgba + src_stride_rgba;
asm volatile (
RGBTOUV_SETUP_REG
"1: \n"
@@ -1525,12 +1535,12 @@ void RGBAToUVRow_NEON(const uint8* src_rgba,
);
}
-void RGB24ToUVRow_NEON(const uint8* src_rgb24,
+void RGB24ToUVRow_NEON(const uint8_t* src_rgb24,
int src_stride_rgb24,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
- const uint8* src_rgb24_1 = src_rgb24 + src_stride_rgb24;
+ const uint8_t* src_rgb24_1 = src_rgb24 + src_stride_rgb24;
asm volatile (
RGBTOUV_SETUP_REG
"1: \n"
@@ -1563,12 +1573,12 @@ void RGB24ToUVRow_NEON(const uint8* src_rgb24,
);
}
-void RAWToUVRow_NEON(const uint8* src_raw,
+void RAWToUVRow_NEON(const uint8_t* src_raw,
int src_stride_raw,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
- const uint8* src_raw_1 = src_raw + src_stride_raw;
+ const uint8_t* src_raw_1 = src_raw + src_stride_raw;
asm volatile (
RGBTOUV_SETUP_REG
"1: \n"
@@ -1602,12 +1612,12 @@ void RAWToUVRow_NEON(const uint8* src_raw,
}
// 16x2 pixels -> 8x1. width is number of argb pixels. e.g. 16.
-void RGB565ToUVRow_NEON(const uint8* src_rgb565,
+void RGB565ToUVRow_NEON(const uint8_t* src_rgb565,
int src_stride_rgb565,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
- const uint8* src_rgb565_1 = src_rgb565 + src_stride_rgb565;
+ const uint8_t* src_rgb565_1 = src_rgb565 + src_stride_rgb565;
asm volatile(
"movi v22.8h, #56, lsl #0 \n" // UB / VR coeff (0.875) /
// 2
@@ -1673,12 +1683,12 @@ void RGB565ToUVRow_NEON(const uint8* src_rgb565,
}
// 16x2 pixels -> 8x1. width is number of argb pixels. e.g. 16.
-void ARGB1555ToUVRow_NEON(const uint8* src_argb1555,
+void ARGB1555ToUVRow_NEON(const uint8_t* src_argb1555,
int src_stride_argb1555,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
- const uint8* src_argb1555_1 = src_argb1555 + src_stride_argb1555;
+ const uint8_t* src_argb1555_1 = src_argb1555 + src_stride_argb1555;
asm volatile(
RGBTOUV_SETUP_REG
"1: \n"
@@ -1738,12 +1748,12 @@ void ARGB1555ToUVRow_NEON(const uint8* src_argb1555,
}
// 16x2 pixels -> 8x1. width is number of argb pixels. e.g. 16.
-void ARGB4444ToUVRow_NEON(const uint8* src_argb4444,
+void ARGB4444ToUVRow_NEON(const uint8_t* src_argb4444,
int src_stride_argb4444,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
- const uint8* src_argb4444_1 = src_argb4444 + src_stride_argb4444;
+ const uint8_t* src_argb4444_1 = src_argb4444 + src_stride_argb4444;
asm volatile(
RGBTOUV_SETUP_REG
"1: \n"
@@ -1804,7 +1814,7 @@ void ARGB4444ToUVRow_NEON(const uint8* src_argb4444,
);
}
-void RGB565ToYRow_NEON(const uint8* src_rgb565, uint8* dst_y, int width) {
+void RGB565ToYRow_NEON(const uint8_t* src_rgb565, uint8_t* dst_y, int width) {
asm volatile(
"movi v24.8b, #13 \n" // B * 0.1016 coefficient
"movi v25.8b, #65 \n" // G * 0.5078 coefficient
@@ -1829,7 +1839,9 @@ void RGB565ToYRow_NEON(const uint8* src_rgb565, uint8* dst_y, int width) {
"v27");
}
-void ARGB1555ToYRow_NEON(const uint8* src_argb1555, uint8* dst_y, int width) {
+void ARGB1555ToYRow_NEON(const uint8_t* src_argb1555,
+ uint8_t* dst_y,
+ int width) {
asm volatile(
"movi v4.8b, #13 \n" // B * 0.1016 coefficient
"movi v5.8b, #65 \n" // G * 0.5078 coefficient
@@ -1853,7 +1865,9 @@ void ARGB1555ToYRow_NEON(const uint8* src_argb1555, uint8* dst_y, int width) {
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7");
}
-void ARGB4444ToYRow_NEON(const uint8* src_argb4444, uint8* dst_y, int width) {
+void ARGB4444ToYRow_NEON(const uint8_t* src_argb4444,
+ uint8_t* dst_y,
+ int width) {
asm volatile(
"movi v24.8b, #13 \n" // B * 0.1016 coefficient
"movi v25.8b, #65 \n" // G * 0.5078 coefficient
@@ -1877,7 +1891,7 @@ void ARGB4444ToYRow_NEON(const uint8* src_argb4444, uint8* dst_y, int width) {
: "cc", "memory", "v0", "v1", "v2", "v3", "v24", "v25", "v26", "v27");
}
-void BGRAToYRow_NEON(const uint8* src_bgra, uint8* dst_y, int width) {
+void BGRAToYRow_NEON(const uint8_t* src_bgra, uint8_t* dst_y, int width) {
asm volatile(
"movi v4.8b, #33 \n" // R * 0.2578 coefficient
"movi v5.8b, #65 \n" // G * 0.5078 coefficient
@@ -1900,7 +1914,7 @@ void BGRAToYRow_NEON(const uint8* src_bgra, uint8* dst_y, int width) {
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16");
}
-void ABGRToYRow_NEON(const uint8* src_abgr, uint8* dst_y, int width) {
+void ABGRToYRow_NEON(const uint8_t* src_abgr, uint8_t* dst_y, int width) {
asm volatile(
"movi v4.8b, #33 \n" // R * 0.2578 coefficient
"movi v5.8b, #65 \n" // G * 0.5078 coefficient
@@ -1923,7 +1937,7 @@ void ABGRToYRow_NEON(const uint8* src_abgr, uint8* dst_y, int width) {
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16");
}
-void RGBAToYRow_NEON(const uint8* src_rgba, uint8* dst_y, int width) {
+void RGBAToYRow_NEON(const uint8_t* src_rgba, uint8_t* dst_y, int width) {
asm volatile(
"movi v4.8b, #13 \n" // B * 0.1016 coefficient
"movi v5.8b, #65 \n" // G * 0.5078 coefficient
@@ -1946,7 +1960,7 @@ void RGBAToYRow_NEON(const uint8* src_rgba, uint8* dst_y, int width) {
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16");
}
-void RGB24ToYRow_NEON(const uint8* src_rgb24, uint8* dst_y, int width) {
+void RGB24ToYRow_NEON(const uint8_t* src_rgb24, uint8_t* dst_y, int width) {
asm volatile(
"movi v4.8b, #13 \n" // B * 0.1016 coefficient
"movi v5.8b, #65 \n" // G * 0.5078 coefficient
@@ -1969,7 +1983,7 @@ void RGB24ToYRow_NEON(const uint8* src_rgb24, uint8* dst_y, int width) {
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16");
}
-void RAWToYRow_NEON(const uint8* src_raw, uint8* dst_y, int width) {
+void RAWToYRow_NEON(const uint8_t* src_raw, uint8_t* dst_y, int width) {
asm volatile(
"movi v4.8b, #33 \n" // R * 0.2578 coefficient
"movi v5.8b, #65 \n" // G * 0.5078 coefficient
@@ -1993,14 +2007,14 @@ void RAWToYRow_NEON(const uint8* src_raw, uint8* dst_y, int width) {
}
// Bilinear filter 16x2 -> 16x1
-void InterpolateRow_NEON(uint8* dst_ptr,
- const uint8* src_ptr,
+void InterpolateRow_NEON(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
ptrdiff_t src_stride,
int dst_width,
int source_y_fraction) {
int y1_fraction = source_y_fraction;
int y0_fraction = 256 - y1_fraction;
- const uint8* src_ptr1 = src_ptr + src_stride;
+ const uint8_t* src_ptr1 = src_ptr + src_stride;
asm volatile(
"cmp %w4, #0 \n"
"b.eq 100f \n"
@@ -2053,9 +2067,9 @@ void InterpolateRow_NEON(uint8* dst_ptr,
}
// dr * (256 - sa) / 256 + sr = dr - dr * sa / 256 + sr
-void ARGBBlendRow_NEON(const uint8* src_argb0,
- const uint8* src_argb1,
- uint8* dst_argb,
+void ARGBBlendRow_NEON(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
int width) {
asm volatile(
"subs %w3, %w3, #8 \n"
@@ -2121,7 +2135,9 @@ void ARGBBlendRow_NEON(const uint8* src_argb0,
}
// Attenuate 8 pixels at a time.
-void ARGBAttenuateRow_NEON(const uint8* src_argb, uint8* dst_argb, int width) {
+void ARGBAttenuateRow_NEON(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ int width) {
asm volatile(
// Attenuate 8 pixels.
"1: \n"
@@ -2145,7 +2161,7 @@ void ARGBAttenuateRow_NEON(const uint8* src_argb, uint8* dst_argb, int width) {
// Quantize 8 ARGB pixels (32 bytes).
// dst = (dst * scale >> 16) * interval_size + interval_offset;
-void ARGBQuantizeRow_NEON(uint8* dst_argb,
+void ARGBQuantizeRow_NEON(uint8_t* dst_argb,
int scale,
int interval_size,
int interval_offset,
@@ -2188,10 +2204,10 @@ void ARGBQuantizeRow_NEON(uint8* dst_argb,
// Shade 8 pixels at a time by specified value.
// NOTE vqrdmulh.s16 q10, q10, d0[0] must use a scaler register from 0 to 8.
// Rounding in vqrdmulh does +1 to high if high bit of low s16 is set.
-void ARGBShadeRow_NEON(const uint8* src_argb,
- uint8* dst_argb,
+void ARGBShadeRow_NEON(const uint8_t* src_argb,
+ uint8_t* dst_argb,
int width,
- uint32 value) {
+ uint32_t value) {
asm volatile(
"dup v0.4s, %w3 \n" // duplicate scale value.
"zip1 v0.8b, v0.8b, v0.8b \n" // v0.8b aarrggbb.
@@ -2225,7 +2241,7 @@ void ARGBShadeRow_NEON(const uint8* src_argb,
// Convert 8 ARGB pixels (64 bytes) to 8 Gray ARGB pixels
// Similar to ARGBToYJ but stores ARGB.
// C code is (15 * b + 75 * g + 38 * r + 64) >> 7;
-void ARGBGrayRow_NEON(const uint8* src_argb, uint8* dst_argb, int width) {
+void ARGBGrayRow_NEON(const uint8_t* src_argb, uint8_t* dst_argb, int width) {
asm volatile(
"movi v24.8b, #15 \n" // B * 0.11400 coefficient
"movi v25.8b, #75 \n" // G * 0.58700 coefficient
@@ -2253,7 +2269,7 @@ void ARGBGrayRow_NEON(const uint8* src_argb, uint8* dst_argb, int width) {
// g = (r * 45 + g * 88 + b * 22) >> 7
// r = (r * 50 + g * 98 + b * 24) >> 7
-void ARGBSepiaRow_NEON(uint8* dst_argb, int width) {
+void ARGBSepiaRow_NEON(uint8_t* dst_argb, int width) {
asm volatile(
"movi v20.8b, #17 \n" // BB coefficient
"movi v21.8b, #68 \n" // BG coefficient
@@ -2291,9 +2307,9 @@ void ARGBSepiaRow_NEON(uint8* dst_argb, int width) {
// Tranform 8 ARGB pixels (32 bytes) with color matrix.
// TODO(fbarchard): Was same as Sepia except matrix is provided. This function
// needs to saturate. Consider doing a non-saturating version.
-void ARGBColorMatrixRow_NEON(const uint8* src_argb,
- uint8* dst_argb,
- const int8* matrix_argb,
+void ARGBColorMatrixRow_NEON(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ const int8_t* matrix_argb,
int width) {
asm volatile(
"ld1 {v2.16b}, [%3] \n" // load 3 ARGB vectors.
@@ -2351,9 +2367,9 @@ void ARGBColorMatrixRow_NEON(const uint8* src_argb,
// TODO(fbarchard): fix vqshrun in ARGBMultiplyRow_NEON and reenable.
// Multiply 2 rows of ARGB pixels together, 8 pixels at a time.
-void ARGBMultiplyRow_NEON(const uint8* src_argb0,
- const uint8* src_argb1,
- uint8* dst_argb,
+void ARGBMultiplyRow_NEON(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
int width) {
asm volatile(
// 8 pixel loop.
@@ -2380,9 +2396,9 @@ void ARGBMultiplyRow_NEON(const uint8* src_argb0,
}
// Add 2 rows of ARGB pixels together, 8 pixels at a time.
-void ARGBAddRow_NEON(const uint8* src_argb0,
- const uint8* src_argb1,
- uint8* dst_argb,
+void ARGBAddRow_NEON(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
int width) {
asm volatile(
// 8 pixel loop.
@@ -2405,9 +2421,9 @@ void ARGBAddRow_NEON(const uint8* src_argb0,
}
// Subtract 2 rows of ARGB pixels, 8 pixels at a time.
-void ARGBSubtractRow_NEON(const uint8* src_argb0,
- const uint8* src_argb1,
- uint8* dst_argb,
+void ARGBSubtractRow_NEON(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
int width) {
asm volatile(
// 8 pixel loop.
@@ -2434,9 +2450,9 @@ void ARGBSubtractRow_NEON(const uint8* src_argb0,
// R = Sobel
// G = Sobel
// B = Sobel
-void SobelRow_NEON(const uint8* src_sobelx,
- const uint8* src_sobely,
- uint8* dst_argb,
+void SobelRow_NEON(const uint8_t* src_sobelx,
+ const uint8_t* src_sobely,
+ uint8_t* dst_argb,
int width) {
asm volatile(
"movi v3.8b, #255 \n" // alpha
@@ -2459,9 +2475,9 @@ void SobelRow_NEON(const uint8* src_sobelx,
}
// Adds Sobel X and Sobel Y and stores Sobel into plane.
-void SobelToPlaneRow_NEON(const uint8* src_sobelx,
- const uint8* src_sobely,
- uint8* dst_y,
+void SobelToPlaneRow_NEON(const uint8_t* src_sobelx,
+ const uint8_t* src_sobely,
+ uint8_t* dst_y,
int width) {
asm volatile(
// 16 pixel loop.
@@ -2485,9 +2501,9 @@ void SobelToPlaneRow_NEON(const uint8* src_sobelx,
// R = Sobel X
// G = Sobel
// B = Sobel Y
-void SobelXYRow_NEON(const uint8* src_sobelx,
- const uint8* src_sobely,
- uint8* dst_argb,
+void SobelXYRow_NEON(const uint8_t* src_sobelx,
+ const uint8_t* src_sobely,
+ uint8_t* dst_argb,
int width) {
asm volatile(
"movi v3.8b, #255 \n" // alpha
@@ -2511,10 +2527,10 @@ void SobelXYRow_NEON(const uint8* src_sobelx,
// -1 0 1
// -2 0 2
// -1 0 1
-void SobelXRow_NEON(const uint8* src_y0,
- const uint8* src_y1,
- const uint8* src_y2,
- uint8* dst_sobelx,
+void SobelXRow_NEON(const uint8_t* src_y0,
+ const uint8_t* src_y1,
+ const uint8_t* src_y2,
+ uint8_t* dst_sobelx,
int width) {
asm volatile(
"1: \n"
@@ -2550,9 +2566,9 @@ void SobelXRow_NEON(const uint8* src_y0,
// -1 -2 -1
// 0 0 0
// 1 2 1
-void SobelYRow_NEON(const uint8* src_y0,
- const uint8* src_y1,
- uint8* dst_sobely,
+void SobelYRow_NEON(const uint8_t* src_y0,
+ const uint8_t* src_y1,
+ uint8_t* dst_sobely,
int width) {
asm volatile(
"1: \n"
@@ -2584,7 +2600,10 @@ void SobelYRow_NEON(const uint8* src_y0,
}
// Caveat - rounds float to half float whereas scaling version truncates.
-void HalfFloat1Row_NEON(const uint16* src, uint16* dst, float, int width) {
+void HalfFloat1Row_NEON(const uint16_t* src,
+ uint16_t* dst,
+ float /*unused*/,
+ int width) {
asm volatile(
"1: \n"
"ld1 {v1.16b}, [%0], #16 \n" // load 8 shorts
@@ -2604,7 +2623,10 @@ void HalfFloat1Row_NEON(const uint16* src, uint16* dst, float, int width) {
: "cc", "memory", "v1", "v2", "v3");
}
-void HalfFloatRow_NEON(const uint16* src, uint16* dst, float scale, int width) {
+void HalfFloatRow_NEON(const uint16_t* src,
+ uint16_t* dst,
+ float scale,
+ int width) {
asm volatile(
"1: \n"
"ld1 {v1.16b}, [%0], #16 \n" // load 8 shorts
@@ -2702,12 +2724,12 @@ void ScaleSamples_NEON(const float* src, float* dst, float scale, int width) {
}
// filter 5 rows with 1, 4, 6, 4, 1 coefficients to produce 1 row.
-void GaussCol_NEON(const uint16* src0,
- const uint16* src1,
- const uint16* src2,
- const uint16* src3,
- const uint16* src4,
- uint32* dst,
+void GaussCol_NEON(const uint16_t* src0,
+ const uint16_t* src1,
+ const uint16_t* src2,
+ const uint16_t* src3,
+ const uint16_t* src4,
+ uint32_t* dst,
int width) {
asm volatile(
"movi v6.8h, #4 \n" // constant 4
@@ -2742,10 +2764,10 @@ void GaussCol_NEON(const uint16* src0,
}
// filter 5 rows with 1, 4, 6, 4, 1 coefficients to produce 1 row.
-void GaussRow_NEON(const uint32* src, uint16* dst, int width) {
- const uint32* src1 = src + 1;
- const uint32* src2 = src + 2;
- const uint32* src3 = src + 3;
+void GaussRow_NEON(const uint32_t* src, uint16_t* dst, int width) {
+ const uint32_t* src1 = src + 1;
+ const uint32_t* src2 = src + 2;
+ const uint32_t* src3 = src + 3;
asm volatile(
"movi v6.4s, #4 \n" // constant 4
"movi v7.4s, #6 \n" // constant 6
diff --git a/chromium/third_party/libyuv/source/row_win.cc b/chromium/third_party/libyuv/source/row_win.cc
index 596d7df739e..5500d7f5a64 100644
--- a/chromium/third_party/libyuv/source/row_win.cc
+++ b/chromium/third_party/libyuv/source/row_win.cc
@@ -28,27 +28,27 @@ extern "C" {
#if defined(_M_X64)
// Read 4 UV from 422, upsample to 8 UV.
-#define READYUV422 \
- xmm0 = _mm_cvtsi32_si128(*(uint32*)u_buf); \
- xmm1 = _mm_cvtsi32_si128(*(uint32*)(u_buf + offset)); \
- xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); \
- xmm0 = _mm_unpacklo_epi16(xmm0, xmm0); \
- u_buf += 4; \
- xmm4 = _mm_loadl_epi64((__m128i*)y_buf); \
- xmm4 = _mm_unpacklo_epi8(xmm4, xmm4); \
+#define READYUV422 \
+ xmm0 = _mm_cvtsi32_si128(*(uint32_t*)u_buf); \
+ xmm1 = _mm_cvtsi32_si128(*(uint32_t*)(u_buf + offset)); \
+ xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); \
+ xmm0 = _mm_unpacklo_epi16(xmm0, xmm0); \
+ u_buf += 4; \
+ xmm4 = _mm_loadl_epi64((__m128i*)y_buf); \
+ xmm4 = _mm_unpacklo_epi8(xmm4, xmm4); \
y_buf += 8;
// Read 4 UV from 422, upsample to 8 UV. With 8 Alpha.
-#define READYUVA422 \
- xmm0 = _mm_cvtsi32_si128(*(uint32*)u_buf); \
- xmm1 = _mm_cvtsi32_si128(*(uint32*)(u_buf + offset)); \
- xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); \
- xmm0 = _mm_unpacklo_epi16(xmm0, xmm0); \
- u_buf += 4; \
- xmm4 = _mm_loadl_epi64((__m128i*)y_buf); \
- xmm4 = _mm_unpacklo_epi8(xmm4, xmm4); \
- y_buf += 8; \
- xmm5 = _mm_loadl_epi64((__m128i*)a_buf); \
+#define READYUVA422 \
+ xmm0 = _mm_cvtsi32_si128(*(uint32_t*)u_buf); \
+ xmm1 = _mm_cvtsi32_si128(*(uint32_t*)(u_buf + offset)); \
+ xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); \
+ xmm0 = _mm_unpacklo_epi16(xmm0, xmm0); \
+ u_buf += 4; \
+ xmm4 = _mm_loadl_epi64((__m128i*)y_buf); \
+ xmm4 = _mm_unpacklo_epi8(xmm4, xmm4); \
+ y_buf += 8; \
+ xmm5 = _mm_loadl_epi64((__m128i*)a_buf); \
a_buf += 8;
// Convert 8 pixels: 8 UV and 8 Y.
@@ -84,15 +84,15 @@ extern "C" {
dst_argb += 32;
#if defined(HAS_I422TOARGBROW_SSSE3)
-void I422ToARGBRow_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_argb,
+void I422ToARGBRow_SSSE3(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
__m128i xmm0, xmm1, xmm2, xmm4;
const __m128i xmm5 = _mm_set1_epi8(-1);
- const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf;
+ const ptrdiff_t offset = (uint8_t*)v_buf - (uint8_t*)u_buf;
while (width > 0) {
READYUV422
YUVTORGB(yuvconstants)
@@ -103,15 +103,15 @@ void I422ToARGBRow_SSSE3(const uint8* y_buf,
#endif
#if defined(HAS_I422ALPHATOARGBROW_SSSE3)
-void I422AlphaToARGBRow_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- const uint8* a_buf,
- uint8* dst_argb,
+void I422AlphaToARGBRow_SSSE3(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ const uint8_t* a_buf,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
__m128i xmm0, xmm1, xmm2, xmm4, xmm5;
- const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf;
+ const ptrdiff_t offset = (uint8_t*)v_buf - (uint8_t*)u_buf;
while (width > 0) {
READYUVA422
YUVTORGB(yuvconstants)
@@ -255,8 +255,8 @@ static const lvec8 kShuffleNV21 = {
};
// Duplicates gray value 3 times and fills in alpha opaque.
-__declspec(naked) void J400ToARGBRow_SSE2(const uint8* src_y,
- uint8* dst_argb,
+__declspec(naked) void J400ToARGBRow_SSE2(const uint8_t* src_y,
+ uint8_t* dst_argb,
int width) {
__asm {
mov eax, [esp + 4] // src_y
@@ -285,8 +285,8 @@ __declspec(naked) void J400ToARGBRow_SSE2(const uint8* src_y,
#ifdef HAS_J400TOARGBROW_AVX2
// Duplicates gray value 3 times and fills in alpha opaque.
-__declspec(naked) void J400ToARGBRow_AVX2(const uint8* src_y,
- uint8* dst_argb,
+__declspec(naked) void J400ToARGBRow_AVX2(const uint8_t* src_y,
+ uint8_t* dst_argb,
int width) {
__asm {
mov eax, [esp + 4] // src_y
@@ -316,8 +316,8 @@ __declspec(naked) void J400ToARGBRow_AVX2(const uint8* src_y,
}
#endif // HAS_J400TOARGBROW_AVX2
-__declspec(naked) void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24,
- uint8* dst_argb,
+__declspec(naked) void RGB24ToARGBRow_SSSE3(const uint8_t* src_rgb24,
+ uint8_t* dst_argb,
int width) {
__asm {
mov eax, [esp + 4] // src_rgb24
@@ -355,8 +355,8 @@ __declspec(naked) void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24,
}
}
-__declspec(naked) void RAWToARGBRow_SSSE3(const uint8* src_raw,
- uint8* dst_argb,
+__declspec(naked) void RAWToARGBRow_SSSE3(const uint8_t* src_raw,
+ uint8_t* dst_argb,
int width) {
__asm {
mov eax, [esp + 4] // src_raw
@@ -394,8 +394,8 @@ __declspec(naked) void RAWToARGBRow_SSSE3(const uint8* src_raw,
}
}
-__declspec(naked) void RAWToRGB24Row_SSSE3(const uint8* src_raw,
- uint8* dst_rgb24,
+__declspec(naked) void RAWToRGB24Row_SSSE3(const uint8_t* src_raw,
+ uint8_t* dst_rgb24,
int width) {
__asm {
mov eax, [esp + 4] // src_raw
@@ -430,8 +430,8 @@ __declspec(naked) void RAWToRGB24Row_SSSE3(const uint8* src_raw,
// v * (256 + 8)
// G shift of 5 is incorporated, so shift is 5 + 8 and 5 + 3
// 20 instructions.
-__declspec(naked) void RGB565ToARGBRow_SSE2(const uint8* src_rgb565,
- uint8* dst_argb,
+__declspec(naked) void RGB565ToARGBRow_SSE2(const uint8_t* src_rgb565,
+ uint8_t* dst_argb,
int width) {
__asm {
mov eax, 0x01080108 // generate multiplier to repeat 5 bits
@@ -486,8 +486,8 @@ __declspec(naked) void RGB565ToARGBRow_SSE2(const uint8* src_rgb565,
// v * 256 + v * 8
// v * (256 + 8)
// G shift of 5 is incorporated, so shift is 5 + 8 and 5 + 3
-__declspec(naked) void RGB565ToARGBRow_AVX2(const uint8* src_rgb565,
- uint8* dst_argb,
+__declspec(naked) void RGB565ToARGBRow_AVX2(const uint8_t* src_rgb565,
+ uint8_t* dst_argb,
int width) {
__asm {
mov eax, 0x01080108 // generate multiplier to repeat 5 bits
@@ -537,8 +537,8 @@ __declspec(naked) void RGB565ToARGBRow_AVX2(const uint8* src_rgb565,
#endif // HAS_RGB565TOARGBROW_AVX2
#ifdef HAS_ARGB1555TOARGBROW_AVX2
-__declspec(naked) void ARGB1555ToARGBRow_AVX2(const uint8* src_argb1555,
- uint8* dst_argb,
+__declspec(naked) void ARGB1555ToARGBRow_AVX2(const uint8_t* src_argb1555,
+ uint8_t* dst_argb,
int width) {
__asm {
mov eax, 0x01080108 // generate multiplier to repeat 5 bits
@@ -589,8 +589,8 @@ __declspec(naked) void ARGB1555ToARGBRow_AVX2(const uint8* src_argb1555,
#endif // HAS_ARGB1555TOARGBROW_AVX2
#ifdef HAS_ARGB4444TOARGBROW_AVX2
-__declspec(naked) void ARGB4444ToARGBRow_AVX2(const uint8* src_argb4444,
- uint8* dst_argb,
+__declspec(naked) void ARGB4444ToARGBRow_AVX2(const uint8_t* src_argb4444,
+ uint8_t* dst_argb,
int width) {
__asm {
mov eax, 0x0f0f0f0f // generate mask 0x0f0f0f0f
@@ -627,8 +627,8 @@ __declspec(naked) void ARGB4444ToARGBRow_AVX2(const uint8* src_argb4444,
#endif // HAS_ARGB4444TOARGBROW_AVX2
// 24 instructions
-__declspec(naked) void ARGB1555ToARGBRow_SSE2(const uint8* src_argb1555,
- uint8* dst_argb,
+__declspec(naked) void ARGB1555ToARGBRow_SSE2(const uint8_t* src_argb1555,
+ uint8_t* dst_argb,
int width) {
__asm {
mov eax, 0x01080108 // generate multiplier to repeat 5 bits
@@ -680,8 +680,8 @@ __declspec(naked) void ARGB1555ToARGBRow_SSE2(const uint8* src_argb1555,
}
// 18 instructions.
-__declspec(naked) void ARGB4444ToARGBRow_SSE2(const uint8* src_argb4444,
- uint8* dst_argb,
+__declspec(naked) void ARGB4444ToARGBRow_SSE2(const uint8_t* src_argb4444,
+ uint8_t* dst_argb,
int width) {
__asm {
mov eax, 0x0f0f0f0f // generate mask 0x0f0f0f0f
@@ -718,8 +718,8 @@ __declspec(naked) void ARGB4444ToARGBRow_SSE2(const uint8* src_argb4444,
}
}
-__declspec(naked) void ARGBToRGB24Row_SSSE3(const uint8* src_argb,
- uint8* dst_rgb,
+__declspec(naked) void ARGBToRGB24Row_SSSE3(const uint8_t* src_argb,
+ uint8_t* dst_rgb,
int width) {
__asm {
mov eax, [esp + 4] // src_argb
@@ -757,8 +757,8 @@ __declspec(naked) void ARGBToRGB24Row_SSSE3(const uint8* src_argb,
}
}
-__declspec(naked) void ARGBToRAWRow_SSSE3(const uint8* src_argb,
- uint8* dst_rgb,
+__declspec(naked) void ARGBToRAWRow_SSSE3(const uint8_t* src_argb,
+ uint8_t* dst_rgb,
int width) {
__asm {
mov eax, [esp + 4] // src_argb
@@ -796,8 +796,8 @@ __declspec(naked) void ARGBToRAWRow_SSSE3(const uint8* src_argb,
}
}
-__declspec(naked) void ARGBToRGB565Row_SSE2(const uint8* src_argb,
- uint8* dst_rgb,
+__declspec(naked) void ARGBToRGB565Row_SSE2(const uint8_t* src_argb,
+ uint8_t* dst_rgb,
int width) {
__asm {
mov eax, [esp + 4] // src_argb
@@ -834,9 +834,9 @@ __declspec(naked) void ARGBToRGB565Row_SSE2(const uint8* src_argb,
}
}
-__declspec(naked) void ARGBToRGB565DitherRow_SSE2(const uint8* src_argb,
- uint8* dst_rgb,
- const uint32 dither4,
+__declspec(naked) void ARGBToRGB565DitherRow_SSE2(const uint8_t* src_argb,
+ uint8_t* dst_rgb,
+ const uint32_t dither4,
int width) {
__asm {
@@ -881,9 +881,9 @@ __declspec(naked) void ARGBToRGB565DitherRow_SSE2(const uint8* src_argb,
}
#ifdef HAS_ARGBTORGB565DITHERROW_AVX2
-__declspec(naked) void ARGBToRGB565DitherRow_AVX2(const uint8* src_argb,
- uint8* dst_rgb,
- const uint32 dither4,
+__declspec(naked) void ARGBToRGB565DitherRow_AVX2(const uint8_t* src_argb,
+ uint8_t* dst_rgb,
+ const uint32_t dither4,
int width) {
__asm {
mov eax, [esp + 4] // src_argb
@@ -925,8 +925,8 @@ __declspec(naked) void ARGBToRGB565DitherRow_AVX2(const uint8* src_argb,
#endif // HAS_ARGBTORGB565DITHERROW_AVX2
// TODO(fbarchard): Improve sign extension/packing.
-__declspec(naked) void ARGBToARGB1555Row_SSE2(const uint8* src_argb,
- uint8* dst_rgb,
+__declspec(naked) void ARGBToARGB1555Row_SSE2(const uint8_t* src_argb,
+ uint8_t* dst_rgb,
int width) {
__asm {
mov eax, [esp + 4] // src_argb
@@ -967,8 +967,8 @@ __declspec(naked) void ARGBToARGB1555Row_SSE2(const uint8* src_argb,
}
}
-__declspec(naked) void ARGBToARGB4444Row_SSE2(const uint8* src_argb,
- uint8* dst_rgb,
+__declspec(naked) void ARGBToARGB4444Row_SSE2(const uint8_t* src_argb,
+ uint8_t* dst_rgb,
int width) {
__asm {
mov eax, [esp + 4] // src_argb
@@ -998,8 +998,8 @@ __declspec(naked) void ARGBToARGB4444Row_SSE2(const uint8* src_argb,
}
#ifdef HAS_ARGBTORGB565ROW_AVX2
-__declspec(naked) void ARGBToRGB565Row_AVX2(const uint8* src_argb,
- uint8* dst_rgb,
+__declspec(naked) void ARGBToRGB565Row_AVX2(const uint8_t* src_argb,
+ uint8_t* dst_rgb,
int width) {
__asm {
mov eax, [esp + 4] // src_argb
@@ -1036,8 +1036,8 @@ __declspec(naked) void ARGBToRGB565Row_AVX2(const uint8* src_argb,
#endif // HAS_ARGBTORGB565ROW_AVX2
#ifdef HAS_ARGBTOARGB1555ROW_AVX2
-__declspec(naked) void ARGBToARGB1555Row_AVX2(const uint8* src_argb,
- uint8* dst_rgb,
+__declspec(naked) void ARGBToARGB1555Row_AVX2(const uint8_t* src_argb,
+ uint8_t* dst_rgb,
int width) {
__asm {
mov eax, [esp + 4] // src_argb
@@ -1077,8 +1077,8 @@ __declspec(naked) void ARGBToARGB1555Row_AVX2(const uint8* src_argb,
#endif // HAS_ARGBTOARGB1555ROW_AVX2
#ifdef HAS_ARGBTOARGB4444ROW_AVX2
-__declspec(naked) void ARGBToARGB4444Row_AVX2(const uint8* src_argb,
- uint8* dst_rgb,
+__declspec(naked) void ARGBToARGB4444Row_AVX2(const uint8_t* src_argb,
+ uint8_t* dst_rgb,
int width) {
__asm {
mov eax, [esp + 4] // src_argb
@@ -1109,8 +1109,8 @@ __declspec(naked) void ARGBToARGB4444Row_AVX2(const uint8* src_argb,
#endif // HAS_ARGBTOARGB4444ROW_AVX2
// Convert 16 ARGB pixels (64 bytes) to 16 Y values.
-__declspec(naked) void ARGBToYRow_SSSE3(const uint8* src_argb,
- uint8* dst_y,
+__declspec(naked) void ARGBToYRow_SSSE3(const uint8_t* src_argb,
+ uint8_t* dst_y,
int width) {
__asm {
mov eax, [esp + 4] /* src_argb */
@@ -1145,8 +1145,8 @@ __declspec(naked) void ARGBToYRow_SSSE3(const uint8* src_argb,
// Convert 16 ARGB pixels (64 bytes) to 16 YJ values.
// Same as ARGBToYRow but different coefficients, no add 16, but do rounding.
-__declspec(naked) void ARGBToYJRow_SSSE3(const uint8* src_argb,
- uint8* dst_y,
+__declspec(naked) void ARGBToYJRow_SSSE3(const uint8_t* src_argb,
+ uint8_t* dst_y,
int width) {
__asm {
mov eax, [esp + 4] /* src_argb */
@@ -1185,8 +1185,8 @@ __declspec(naked) void ARGBToYJRow_SSSE3(const uint8* src_argb,
static const lvec32 kPermdARGBToY_AVX = {0, 4, 1, 5, 2, 6, 3, 7};
// Convert 32 ARGB pixels (128 bytes) to 32 Y values.
-__declspec(naked) void ARGBToYRow_AVX2(const uint8* src_argb,
- uint8* dst_y,
+__declspec(naked) void ARGBToYRow_AVX2(const uint8_t* src_argb,
+ uint8_t* dst_y,
int width) {
__asm {
mov eax, [esp + 4] /* src_argb */
@@ -1225,8 +1225,8 @@ __declspec(naked) void ARGBToYRow_AVX2(const uint8* src_argb,
#ifdef HAS_ARGBTOYJROW_AVX2
// Convert 32 ARGB pixels (128 bytes) to 32 Y values.
-__declspec(naked) void ARGBToYJRow_AVX2(const uint8* src_argb,
- uint8* dst_y,
+__declspec(naked) void ARGBToYJRow_AVX2(const uint8_t* src_argb,
+ uint8_t* dst_y,
int width) {
__asm {
mov eax, [esp + 4] /* src_argb */
@@ -1265,8 +1265,8 @@ __declspec(naked) void ARGBToYJRow_AVX2(const uint8* src_argb,
}
#endif // HAS_ARGBTOYJROW_AVX2
-__declspec(naked) void BGRAToYRow_SSSE3(const uint8* src_argb,
- uint8* dst_y,
+__declspec(naked) void BGRAToYRow_SSSE3(const uint8_t* src_argb,
+ uint8_t* dst_y,
int width) {
__asm {
mov eax, [esp + 4] /* src_argb */
@@ -1299,8 +1299,8 @@ __declspec(naked) void BGRAToYRow_SSSE3(const uint8* src_argb,
}
}
-__declspec(naked) void ABGRToYRow_SSSE3(const uint8* src_argb,
- uint8* dst_y,
+__declspec(naked) void ABGRToYRow_SSSE3(const uint8_t* src_argb,
+ uint8_t* dst_y,
int width) {
__asm {
mov eax, [esp + 4] /* src_argb */
@@ -1333,8 +1333,8 @@ __declspec(naked) void ABGRToYRow_SSSE3(const uint8* src_argb,
}
}
-__declspec(naked) void RGBAToYRow_SSSE3(const uint8* src_argb,
- uint8* dst_y,
+__declspec(naked) void RGBAToYRow_SSSE3(const uint8_t* src_argb,
+ uint8_t* dst_y,
int width) {
__asm {
mov eax, [esp + 4] /* src_argb */
@@ -1367,10 +1367,10 @@ __declspec(naked) void RGBAToYRow_SSSE3(const uint8* src_argb,
}
}
-__declspec(naked) void ARGBToUVRow_SSSE3(const uint8* src_argb0,
+__declspec(naked) void ARGBToUVRow_SSSE3(const uint8_t* src_argb0,
int src_stride_argb,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
__asm {
push esi
@@ -1439,10 +1439,10 @@ __declspec(naked) void ARGBToUVRow_SSSE3(const uint8* src_argb0,
}
}
-__declspec(naked) void ARGBToUVJRow_SSSE3(const uint8* src_argb0,
+__declspec(naked) void ARGBToUVJRow_SSSE3(const uint8_t* src_argb0,
int src_stride_argb,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
__asm {
push esi
@@ -1513,10 +1513,10 @@ __declspec(naked) void ARGBToUVJRow_SSSE3(const uint8* src_argb0,
}
#ifdef HAS_ARGBTOUVROW_AVX2
-__declspec(naked) void ARGBToUVRow_AVX2(const uint8* src_argb0,
+__declspec(naked) void ARGBToUVRow_AVX2(const uint8_t* src_argb0,
int src_stride_argb,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
__asm {
push esi
@@ -1581,10 +1581,10 @@ __declspec(naked) void ARGBToUVRow_AVX2(const uint8* src_argb0,
#endif // HAS_ARGBTOUVROW_AVX2
#ifdef HAS_ARGBTOUVJROW_AVX2
-__declspec(naked) void ARGBToUVJRow_AVX2(const uint8* src_argb0,
+__declspec(naked) void ARGBToUVJRow_AVX2(const uint8_t* src_argb0,
int src_stride_argb,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
__asm {
push esi
@@ -1649,9 +1649,9 @@ __declspec(naked) void ARGBToUVJRow_AVX2(const uint8* src_argb0,
}
#endif // HAS_ARGBTOUVJROW_AVX2
-__declspec(naked) void ARGBToUV444Row_SSSE3(const uint8* src_argb0,
- uint8* dst_u,
- uint8* dst_v,
+__declspec(naked) void ARGBToUV444Row_SSSE3(const uint8_t* src_argb0,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
__asm {
push edi
@@ -1707,10 +1707,10 @@ __declspec(naked) void ARGBToUV444Row_SSSE3(const uint8* src_argb0,
}
}
-__declspec(naked) void BGRAToUVRow_SSSE3(const uint8* src_argb0,
+__declspec(naked) void BGRAToUVRow_SSSE3(const uint8_t* src_argb0,
int src_stride_argb,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
__asm {
push esi
@@ -1779,10 +1779,10 @@ __declspec(naked) void BGRAToUVRow_SSSE3(const uint8* src_argb0,
}
}
-__declspec(naked) void ABGRToUVRow_SSSE3(const uint8* src_argb0,
+__declspec(naked) void ABGRToUVRow_SSSE3(const uint8_t* src_argb0,
int src_stride_argb,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
__asm {
push esi
@@ -1851,10 +1851,10 @@ __declspec(naked) void ABGRToUVRow_SSSE3(const uint8* src_argb0,
}
}
-__declspec(naked) void RGBAToUVRow_SSSE3(const uint8* src_argb0,
+__declspec(naked) void RGBAToUVRow_SSSE3(const uint8_t* src_argb0,
int src_stride_argb,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
__asm {
push esi
@@ -2065,10 +2065,10 @@ __declspec(naked) void RGBAToUVRow_SSSE3(const uint8* src_argb0,
// 16 pixels
// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
__declspec(naked) void I422ToARGBRow_AVX2(
- const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_argb,
+ const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
__asm {
@@ -2105,11 +2105,11 @@ __declspec(naked) void I422ToARGBRow_AVX2(
// 16 pixels
// 8 UV values upsampled to 16 UV, mixed with 16 Y and 16 A producing 16 ARGB.
__declspec(naked) void I422AlphaToARGBRow_AVX2(
- const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- const uint8* a_buf,
- uint8* dst_argb,
+ const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ const uint8_t* a_buf,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
__asm {
@@ -2148,10 +2148,10 @@ __declspec(naked) void I422AlphaToARGBRow_AVX2(
// 16 pixels
// 16 UV values with 16 Y producing 16 ARGB (64 bytes).
__declspec(naked) void I444ToARGBRow_AVX2(
- const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_argb,
+ const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
__asm {
@@ -2187,9 +2187,9 @@ __declspec(naked) void I444ToARGBRow_AVX2(
// 16 pixels.
// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
__declspec(naked) void NV12ToARGBRow_AVX2(
- const uint8* y_buf,
- const uint8* uv_buf,
- uint8* dst_argb,
+ const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
__asm {
@@ -2222,9 +2222,9 @@ __declspec(naked) void NV12ToARGBRow_AVX2(
// 16 pixels.
// 8 VU values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
__declspec(naked) void NV21ToARGBRow_AVX2(
- const uint8* y_buf,
- const uint8* vu_buf,
- uint8* dst_argb,
+ const uint8_t* y_buf,
+ const uint8_t* vu_buf,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
__asm {
@@ -2257,8 +2257,8 @@ __declspec(naked) void NV21ToARGBRow_AVX2(
// 16 pixels.
// 8 YUY2 values with 16 Y and 8 UV producing 16 ARGB (64 bytes).
__declspec(naked) void YUY2ToARGBRow_AVX2(
- const uint8* src_yuy2,
- uint8* dst_argb,
+ const uint8_t* src_yuy2,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
__asm {
@@ -2288,8 +2288,8 @@ __declspec(naked) void YUY2ToARGBRow_AVX2(
// 16 pixels.
// 8 UYVY values with 16 Y and 8 UV producing 16 ARGB (64 bytes).
__declspec(naked) void UYVYToARGBRow_AVX2(
- const uint8* src_uyvy,
- uint8* dst_argb,
+ const uint8_t* src_uyvy,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
__asm {
@@ -2319,10 +2319,10 @@ __declspec(naked) void UYVYToARGBRow_AVX2(
// 16 pixels
// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 RGBA (64 bytes).
__declspec(naked) void I422ToRGBARow_AVX2(
- const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_argb,
+ const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
__asm {
@@ -2551,10 +2551,10 @@ __declspec(naked) void I422ToRGBARow_AVX2(
// 8 pixels.
// 8 UV values, mixed with 8 Y producing 8 ARGB (32 bytes).
__declspec(naked) void I444ToARGBRow_SSSE3(
- const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_argb,
+ const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
__asm {
@@ -2588,10 +2588,10 @@ __declspec(naked) void I444ToARGBRow_SSSE3(
// 8 pixels.
// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 RGB24 (24 bytes).
__declspec(naked) void I422ToRGB24Row_SSSE3(
- const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_rgb24,
+ const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_rgb24,
const struct YuvConstants* yuvconstants,
int width) {
__asm {
@@ -2626,10 +2626,10 @@ __declspec(naked) void I422ToRGB24Row_SSSE3(
// 8 pixels
// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 RGB565 (16 bytes).
__declspec(naked) void I422ToRGB565Row_SSSE3(
- const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb565_buf,
+ const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* rgb565_buf,
const struct YuvConstants* yuvconstants,
int width) {
__asm {
@@ -2669,10 +2669,10 @@ __declspec(naked) void I422ToRGB565Row_SSSE3(
// 8 pixels.
// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
__declspec(naked) void I422ToARGBRow_SSSE3(
- const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_argb,
+ const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
__asm {
@@ -2706,11 +2706,11 @@ __declspec(naked) void I422ToARGBRow_SSSE3(
// 8 pixels.
// 4 UV values upsampled to 8 UV, mixed with 8 Y and 8 A producing 8 ARGB.
__declspec(naked) void I422AlphaToARGBRow_SSSE3(
- const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- const uint8* a_buf,
- uint8* dst_argb,
+ const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ const uint8_t* a_buf,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
__asm {
@@ -2746,9 +2746,9 @@ __declspec(naked) void I422AlphaToARGBRow_SSSE3(
// 8 pixels.
// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
__declspec(naked) void NV12ToARGBRow_SSSE3(
- const uint8* y_buf,
- const uint8* uv_buf,
- uint8* dst_argb,
+ const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
__asm {
@@ -2778,9 +2778,9 @@ __declspec(naked) void NV12ToARGBRow_SSSE3(
// 8 pixels.
// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
__declspec(naked) void NV21ToARGBRow_SSSE3(
- const uint8* y_buf,
- const uint8* vu_buf,
- uint8* dst_argb,
+ const uint8_t* y_buf,
+ const uint8_t* vu_buf,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
__asm {
@@ -2810,8 +2810,8 @@ __declspec(naked) void NV21ToARGBRow_SSSE3(
// 8 pixels.
// 4 YUY2 values with 8 Y and 4 UV producing 8 ARGB (32 bytes).
__declspec(naked) void YUY2ToARGBRow_SSSE3(
- const uint8* src_yuy2,
- uint8* dst_argb,
+ const uint8_t* src_yuy2,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
__asm {
@@ -2838,8 +2838,8 @@ __declspec(naked) void YUY2ToARGBRow_SSSE3(
// 8 pixels.
// 4 UYVY values with 8 Y and 4 UV producing 8 ARGB (32 bytes).
__declspec(naked) void UYVYToARGBRow_SSSE3(
- const uint8* src_uyvy,
- uint8* dst_argb,
+ const uint8_t* src_uyvy,
+ uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
__asm {
@@ -2864,10 +2864,10 @@ __declspec(naked) void UYVYToARGBRow_SSSE3(
}
__declspec(naked) void I422ToRGBARow_SSSE3(
- const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_rgba,
+ const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_rgba,
const struct YuvConstants* yuvconstants,
int width) {
__asm {
@@ -2900,8 +2900,8 @@ __declspec(naked) void I422ToRGBARow_SSSE3(
#ifdef HAS_I400TOARGBROW_SSE2
// 8 pixels of Y converted to 8 pixels of ARGB (32 bytes).
-__declspec(naked) void I400ToARGBRow_SSE2(const uint8* y_buf,
- uint8* rgb_buf,
+__declspec(naked) void I400ToARGBRow_SSE2(const uint8_t* y_buf,
+ uint8_t* rgb_buf,
int width) {
__asm {
mov eax, 0x4a354a35 // 4a35 = 18997 = round(1.164 * 64 * 256)
@@ -2947,8 +2947,8 @@ __declspec(naked) void I400ToARGBRow_SSE2(const uint8* y_buf,
#ifdef HAS_I400TOARGBROW_AVX2
// 16 pixels of Y converted to 16 pixels of ARGB (64 bytes).
// note: vpunpcklbw mutates and vpackuswb unmutates.
-__declspec(naked) void I400ToARGBRow_AVX2(const uint8* y_buf,
- uint8* rgb_buf,
+__declspec(naked) void I400ToARGBRow_AVX2(const uint8_t* y_buf,
+ uint8_t* rgb_buf,
int width) {
__asm {
mov eax, 0x4a354a35 // 4a35 = 18997 = round(1.164 * 64 * 256)
@@ -3000,8 +3000,8 @@ static const uvec8 kShuffleMirror = {15u, 14u, 13u, 12u, 11u, 10u, 9u, 8u,
7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u};
// TODO(fbarchard): Replace lea with -16 offset.
-__declspec(naked) void MirrorRow_SSSE3(const uint8* src,
- uint8* dst,
+__declspec(naked) void MirrorRow_SSSE3(const uint8_t* src,
+ uint8_t* dst,
int width) {
__asm {
mov eax, [esp + 4] // src
@@ -3022,7 +3022,9 @@ __declspec(naked) void MirrorRow_SSSE3(const uint8* src,
#endif // HAS_MIRRORROW_SSSE3
#ifdef HAS_MIRRORROW_AVX2
-__declspec(naked) void MirrorRow_AVX2(const uint8* src, uint8* dst, int width) {
+__declspec(naked) void MirrorRow_AVX2(const uint8_t* src,
+ uint8_t* dst,
+ int width) {
__asm {
mov eax, [esp + 4] // src
mov edx, [esp + 8] // dst
@@ -3048,9 +3050,9 @@ __declspec(naked) void MirrorRow_AVX2(const uint8* src, uint8* dst, int width) {
static const uvec8 kShuffleMirrorUV = {14u, 12u, 10u, 8u, 6u, 4u, 2u, 0u,
15u, 13u, 11u, 9u, 7u, 5u, 3u, 1u};
-__declspec(naked) void MirrorUVRow_SSSE3(const uint8* src,
- uint8* dst_u,
- uint8* dst_v,
+__declspec(naked) void MirrorUVRow_SSSE3(const uint8_t* src,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
__asm {
push edi
@@ -3079,8 +3081,8 @@ __declspec(naked) void MirrorUVRow_SSSE3(const uint8* src,
#endif // HAS_MIRRORUVROW_SSSE3
#ifdef HAS_ARGBMIRRORROW_SSE2
-__declspec(naked) void ARGBMirrorRow_SSE2(const uint8* src,
- uint8* dst,
+__declspec(naked) void ARGBMirrorRow_SSE2(const uint8_t* src,
+ uint8_t* dst,
int width) {
__asm {
mov eax, [esp + 4] // src
@@ -3105,8 +3107,8 @@ __declspec(naked) void ARGBMirrorRow_SSE2(const uint8* src,
// Shuffle table for reversing the bytes.
static const ulvec32 kARGBShuffleMirror_AVX2 = {7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u};
-__declspec(naked) void ARGBMirrorRow_AVX2(const uint8* src,
- uint8* dst,
+__declspec(naked) void ARGBMirrorRow_AVX2(const uint8_t* src,
+ uint8_t* dst,
int width) {
__asm {
mov eax, [esp + 4] // src
@@ -3127,9 +3129,9 @@ __declspec(naked) void ARGBMirrorRow_AVX2(const uint8* src,
#endif // HAS_ARGBMIRRORROW_AVX2
#ifdef HAS_SPLITUVROW_SSE2
-__declspec(naked) void SplitUVRow_SSE2(const uint8* src_uv,
- uint8* dst_u,
- uint8* dst_v,
+__declspec(naked) void SplitUVRow_SSE2(const uint8_t* src_uv,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
__asm {
push edi
@@ -3167,9 +3169,9 @@ __declspec(naked) void SplitUVRow_SSE2(const uint8* src_uv,
#endif // HAS_SPLITUVROW_SSE2
#ifdef HAS_SPLITUVROW_AVX2
-__declspec(naked) void SplitUVRow_AVX2(const uint8* src_uv,
- uint8* dst_u,
- uint8* dst_v,
+__declspec(naked) void SplitUVRow_AVX2(const uint8_t* src_uv,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
__asm {
push edi
@@ -3207,9 +3209,9 @@ __declspec(naked) void SplitUVRow_AVX2(const uint8* src_uv,
#endif // HAS_SPLITUVROW_AVX2
#ifdef HAS_MERGEUVROW_SSE2
-__declspec(naked) void MergeUVRow_SSE2(const uint8* src_u,
- const uint8* src_v,
- uint8* dst_uv,
+__declspec(naked) void MergeUVRow_SSE2(const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_uv,
int width) {
__asm {
push edi
@@ -3239,9 +3241,9 @@ __declspec(naked) void MergeUVRow_SSE2(const uint8* src_u,
#endif // HAS_MERGEUVROW_SSE2
#ifdef HAS_MERGEUVROW_AVX2
-__declspec(naked) void MergeUVRow_AVX2(const uint8* src_u,
- const uint8* src_v,
- uint8* dst_uv,
+__declspec(naked) void MergeUVRow_AVX2(const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_uv,
int width) {
__asm {
push edi
@@ -3273,12 +3275,14 @@ __declspec(naked) void MergeUVRow_AVX2(const uint8* src_u,
#endif // HAS_MERGEUVROW_AVX2
#ifdef HAS_COPYROW_SSE2
-// CopyRow copys 'count' bytes using a 16 byte load/store, 32 bytes at time.
-__declspec(naked) void CopyRow_SSE2(const uint8* src, uint8* dst, int count) {
+// CopyRow copys 'width' bytes using a 16 byte load/store, 32 bytes at time.
+__declspec(naked) void CopyRow_SSE2(const uint8_t* src,
+ uint8_t* dst,
+ int width) {
__asm {
mov eax, [esp + 4] // src
mov edx, [esp + 8] // dst
- mov ecx, [esp + 12] // count
+ mov ecx, [esp + 12] // width
test eax, 15
jne convertloopu
test edx, 15
@@ -3310,12 +3314,14 @@ __declspec(naked) void CopyRow_SSE2(const uint8* src, uint8* dst, int count) {
#endif // HAS_COPYROW_SSE2
#ifdef HAS_COPYROW_AVX
-// CopyRow copys 'count' bytes using a 32 byte load/store, 64 bytes at time.
-__declspec(naked) void CopyRow_AVX(const uint8* src, uint8* dst, int count) {
+// CopyRow copys 'width' bytes using a 32 byte load/store, 64 bytes at time.
+__declspec(naked) void CopyRow_AVX(const uint8_t* src,
+ uint8_t* dst,
+ int width) {
__asm {
mov eax, [esp + 4] // src
mov edx, [esp + 8] // dst
- mov ecx, [esp + 12] // count
+ mov ecx, [esp + 12] // width
convertloop:
vmovdqu ymm0, [eax]
@@ -3334,13 +3340,15 @@ __declspec(naked) void CopyRow_AVX(const uint8* src, uint8* dst, int count) {
#endif // HAS_COPYROW_AVX
// Multiple of 1.
-__declspec(naked) void CopyRow_ERMS(const uint8* src, uint8* dst, int count) {
+__declspec(naked) void CopyRow_ERMS(const uint8_t* src,
+ uint8_t* dst,
+ int width) {
__asm {
mov eax, esi
mov edx, edi
mov esi, [esp + 4] // src
mov edi, [esp + 8] // dst
- mov ecx, [esp + 12] // count
+ mov ecx, [esp + 12] // width
rep movsb
mov edi, edx
mov esi, eax
@@ -3350,13 +3358,13 @@ __declspec(naked) void CopyRow_ERMS(const uint8* src, uint8* dst, int count) {
#ifdef HAS_ARGBCOPYALPHAROW_SSE2
// width in pixels
-__declspec(naked) void ARGBCopyAlphaRow_SSE2(const uint8* src,
- uint8* dst,
+__declspec(naked) void ARGBCopyAlphaRow_SSE2(const uint8_t* src,
+ uint8_t* dst,
int width) {
__asm {
mov eax, [esp + 4] // src
mov edx, [esp + 8] // dst
- mov ecx, [esp + 12] // count
+ mov ecx, [esp + 12] // width
pcmpeqb xmm0, xmm0 // generate mask 0xff000000
pslld xmm0, 24
pcmpeqb xmm1, xmm1 // generate mask 0x00ffffff
@@ -3387,13 +3395,13 @@ __declspec(naked) void ARGBCopyAlphaRow_SSE2(const uint8* src,
#ifdef HAS_ARGBCOPYALPHAROW_AVX2
// width in pixels
-__declspec(naked) void ARGBCopyAlphaRow_AVX2(const uint8* src,
- uint8* dst,
+__declspec(naked) void ARGBCopyAlphaRow_AVX2(const uint8_t* src,
+ uint8_t* dst,
int width) {
__asm {
mov eax, [esp + 4] // src
mov edx, [esp + 8] // dst
- mov ecx, [esp + 12] // count
+ mov ecx, [esp + 12] // width
vpcmpeqb ymm0, ymm0, ymm0
vpsrld ymm0, ymm0, 8 // generate mask 0x00ffffff
@@ -3417,8 +3425,8 @@ __declspec(naked) void ARGBCopyAlphaRow_AVX2(const uint8* src,
#ifdef HAS_ARGBEXTRACTALPHAROW_SSE2
// width in pixels
-__declspec(naked) void ARGBExtractAlphaRow_SSE2(const uint8* src_argb,
- uint8* dst_a,
+__declspec(naked) void ARGBExtractAlphaRow_SSE2(const uint8_t* src_argb,
+ uint8_t* dst_a,
int width) {
__asm {
mov eax, [esp + 4] // src_argb
@@ -3445,8 +3453,8 @@ __declspec(naked) void ARGBExtractAlphaRow_SSE2(const uint8* src_argb,
#ifdef HAS_ARGBEXTRACTALPHAROW_AVX2
// width in pixels
-__declspec(naked) void ARGBExtractAlphaRow_AVX2(const uint8* src_argb,
- uint8* dst_a,
+__declspec(naked) void ARGBExtractAlphaRow_AVX2(const uint8_t* src_argb,
+ uint8_t* dst_a,
int width) {
__asm {
mov eax, [esp + 4] // src_argb
@@ -3481,13 +3489,13 @@ __declspec(naked) void ARGBExtractAlphaRow_AVX2(const uint8* src_argb,
#ifdef HAS_ARGBCOPYYTOALPHAROW_SSE2
// width in pixels
-__declspec(naked) void ARGBCopyYToAlphaRow_SSE2(const uint8* src,
- uint8* dst,
+__declspec(naked) void ARGBCopyYToAlphaRow_SSE2(const uint8_t* src,
+ uint8_t* dst,
int width) {
__asm {
mov eax, [esp + 4] // src
mov edx, [esp + 8] // dst
- mov ecx, [esp + 12] // count
+ mov ecx, [esp + 12] // width
pcmpeqb xmm0, xmm0 // generate mask 0xff000000
pslld xmm0, 24
pcmpeqb xmm1, xmm1 // generate mask 0x00ffffff
@@ -3520,13 +3528,13 @@ __declspec(naked) void ARGBCopyYToAlphaRow_SSE2(const uint8* src,
#ifdef HAS_ARGBCOPYYTOALPHAROW_AVX2
// width in pixels
-__declspec(naked) void ARGBCopyYToAlphaRow_AVX2(const uint8* src,
- uint8* dst,
+__declspec(naked) void ARGBCopyYToAlphaRow_AVX2(const uint8_t* src,
+ uint8_t* dst,
int width) {
__asm {
mov eax, [esp + 4] // src
mov edx, [esp + 8] // dst
- mov ecx, [esp + 12] // count
+ mov ecx, [esp + 12] // width
vpcmpeqb ymm0, ymm0, ymm0
vpsrld ymm0, ymm0, 8 // generate mask 0x00ffffff
@@ -3551,16 +3559,16 @@ __declspec(naked) void ARGBCopyYToAlphaRow_AVX2(const uint8* src,
#endif // HAS_ARGBCOPYYTOALPHAROW_AVX2
#ifdef HAS_SETROW_X86
-// Write 'count' bytes using an 8 bit value repeated.
-// Count should be multiple of 4.
-__declspec(naked) void SetRow_X86(uint8* dst, uint8 v8, int count) {
+// Write 'width' bytes using an 8 bit value repeated.
+// width should be multiple of 4.
+__declspec(naked) void SetRow_X86(uint8_t* dst, uint8_t v8, int width) {
__asm {
movzx eax, byte ptr [esp + 8] // v8
mov edx, 0x01010101 // Duplicate byte to all bytes.
mul edx // overwrites edx with upper part of result.
mov edx, edi
mov edi, [esp + 4] // dst
- mov ecx, [esp + 12] // count
+ mov ecx, [esp + 12] // width
shr ecx, 2
rep stosd
mov edi, edx
@@ -3568,26 +3576,28 @@ __declspec(naked) void SetRow_X86(uint8* dst, uint8 v8, int count) {
}
}
-// Write 'count' bytes using an 8 bit value repeated.
-__declspec(naked) void SetRow_ERMS(uint8* dst, uint8 v8, int count) {
+// Write 'width' bytes using an 8 bit value repeated.
+__declspec(naked) void SetRow_ERMS(uint8_t* dst, uint8_t v8, int width) {
__asm {
mov edx, edi
mov edi, [esp + 4] // dst
mov eax, [esp + 8] // v8
- mov ecx, [esp + 12] // count
+ mov ecx, [esp + 12] // width
rep stosb
mov edi, edx
ret
}
}
-// Write 'count' 32 bit values.
-__declspec(naked) void ARGBSetRow_X86(uint8* dst_argb, uint32 v32, int count) {
+// Write 'width' 32 bit values.
+__declspec(naked) void ARGBSetRow_X86(uint8_t* dst_argb,
+ uint32_t v32,
+ int width) {
__asm {
mov edx, edi
mov edi, [esp + 4] // dst
mov eax, [esp + 8] // v32
- mov ecx, [esp + 12] // count
+ mov ecx, [esp + 12] // width
rep stosd
mov edi, edx
ret
@@ -3596,8 +3606,8 @@ __declspec(naked) void ARGBSetRow_X86(uint8* dst_argb, uint32 v32, int count) {
#endif // HAS_SETROW_X86
#ifdef HAS_YUY2TOYROW_AVX2
-__declspec(naked) void YUY2ToYRow_AVX2(const uint8* src_yuy2,
- uint8* dst_y,
+__declspec(naked) void YUY2ToYRow_AVX2(const uint8_t* src_yuy2,
+ uint8_t* dst_y,
int width) {
__asm {
mov eax, [esp + 4] // src_yuy2
@@ -3623,10 +3633,10 @@ __declspec(naked) void YUY2ToYRow_AVX2(const uint8* src_yuy2,
}
}
-__declspec(naked) void YUY2ToUVRow_AVX2(const uint8* src_yuy2,
+__declspec(naked) void YUY2ToUVRow_AVX2(const uint8_t* src_yuy2,
int stride_yuy2,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
__asm {
push esi
@@ -3669,9 +3679,9 @@ __declspec(naked) void YUY2ToUVRow_AVX2(const uint8* src_yuy2,
}
}
-__declspec(naked) void YUY2ToUV422Row_AVX2(const uint8* src_yuy2,
- uint8* dst_u,
- uint8* dst_v,
+__declspec(naked) void YUY2ToUV422Row_AVX2(const uint8_t* src_yuy2,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
__asm {
push edi
@@ -3709,8 +3719,8 @@ __declspec(naked) void YUY2ToUV422Row_AVX2(const uint8* src_yuy2,
}
}
-__declspec(naked) void UYVYToYRow_AVX2(const uint8* src_uyvy,
- uint8* dst_y,
+__declspec(naked) void UYVYToYRow_AVX2(const uint8_t* src_uyvy,
+ uint8_t* dst_y,
int width) {
__asm {
mov eax, [esp + 4] // src_uyvy
@@ -3734,10 +3744,10 @@ __declspec(naked) void UYVYToYRow_AVX2(const uint8* src_uyvy,
}
}
-__declspec(naked) void UYVYToUVRow_AVX2(const uint8* src_uyvy,
+__declspec(naked) void UYVYToUVRow_AVX2(const uint8_t* src_uyvy,
int stride_uyvy,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
__asm {
push esi
@@ -3780,9 +3790,9 @@ __declspec(naked) void UYVYToUVRow_AVX2(const uint8* src_uyvy,
}
}
-__declspec(naked) void UYVYToUV422Row_AVX2(const uint8* src_uyvy,
- uint8* dst_u,
- uint8* dst_v,
+__declspec(naked) void UYVYToUV422Row_AVX2(const uint8_t* src_uyvy,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
__asm {
push edi
@@ -3822,8 +3832,8 @@ __declspec(naked) void UYVYToUV422Row_AVX2(const uint8* src_uyvy,
#endif // HAS_YUY2TOYROW_AVX2
#ifdef HAS_YUY2TOYROW_SSE2
-__declspec(naked) void YUY2ToYRow_SSE2(const uint8* src_yuy2,
- uint8* dst_y,
+__declspec(naked) void YUY2ToYRow_SSE2(const uint8_t* src_yuy2,
+ uint8_t* dst_y,
int width) {
__asm {
mov eax, [esp + 4] // src_yuy2
@@ -3847,10 +3857,10 @@ __declspec(naked) void YUY2ToYRow_SSE2(const uint8* src_yuy2,
}
}
-__declspec(naked) void YUY2ToUVRow_SSE2(const uint8* src_yuy2,
+__declspec(naked) void YUY2ToUVRow_SSE2(const uint8_t* src_yuy2,
int stride_yuy2,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
__asm {
push esi
@@ -3892,9 +3902,9 @@ __declspec(naked) void YUY2ToUVRow_SSE2(const uint8* src_yuy2,
}
}
-__declspec(naked) void YUY2ToUV422Row_SSE2(const uint8* src_yuy2,
- uint8* dst_u,
- uint8* dst_v,
+__declspec(naked) void YUY2ToUV422Row_SSE2(const uint8_t* src_yuy2,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
__asm {
push edi
@@ -3929,8 +3939,8 @@ __declspec(naked) void YUY2ToUV422Row_SSE2(const uint8* src_yuy2,
}
}
-__declspec(naked) void UYVYToYRow_SSE2(const uint8* src_uyvy,
- uint8* dst_y,
+__declspec(naked) void UYVYToYRow_SSE2(const uint8_t* src_uyvy,
+ uint8_t* dst_y,
int width) {
__asm {
mov eax, [esp + 4] // src_uyvy
@@ -3952,10 +3962,10 @@ __declspec(naked) void UYVYToYRow_SSE2(const uint8* src_uyvy,
}
}
-__declspec(naked) void UYVYToUVRow_SSE2(const uint8* src_uyvy,
+__declspec(naked) void UYVYToUVRow_SSE2(const uint8_t* src_uyvy,
int stride_uyvy,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
__asm {
push esi
@@ -3997,9 +4007,9 @@ __declspec(naked) void UYVYToUVRow_SSE2(const uint8* src_uyvy,
}
}
-__declspec(naked) void UYVYToUV422Row_SSE2(const uint8* src_uyvy,
- uint8* dst_u,
- uint8* dst_v,
+__declspec(naked) void UYVYToUV422Row_SSE2(const uint8_t* src_uyvy,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int width) {
__asm {
push edi
@@ -4041,10 +4051,10 @@ __declspec(naked) void UYVYToUV422Row_SSE2(const uint8* src_uyvy,
// =((A2*C2)+(B2*(255-C2))+255)/256
// signed version of math
// =(((A2-128)*C2)+((B2-128)*(255-C2))+32768+127)/256
-__declspec(naked) void BlendPlaneRow_SSSE3(const uint8* src0,
- const uint8* src1,
- const uint8* alpha,
- uint8* dst,
+__declspec(naked) void BlendPlaneRow_SSSE3(const uint8_t* src0,
+ const uint8_t* src1,
+ const uint8_t* alpha,
+ uint8_t* dst,
int width) {
__asm {
push esi
@@ -4098,10 +4108,10 @@ __declspec(naked) void BlendPlaneRow_SSSE3(const uint8* src0,
// =((A2*C2)+(B2*(255-C2))+255)/256
// signed version of math
// =(((A2-128)*C2)+((B2-128)*(255-C2))+32768+127)/256
-__declspec(naked) void BlendPlaneRow_AVX2(const uint8* src0,
- const uint8* src1,
- const uint8* alpha,
- uint8* dst,
+__declspec(naked) void BlendPlaneRow_AVX2(const uint8_t* src0,
+ const uint8_t* src1,
+ const uint8_t* alpha,
+ uint8_t* dst,
int width) {
__asm {
push esi
@@ -4162,9 +4172,9 @@ static const uvec8 kShuffleAlpha = {3u, 0x80, 3u, 0x80, 7u, 0x80, 7u, 0x80,
11u, 0x80, 11u, 0x80, 15u, 0x80, 15u, 0x80};
// Blend 8 pixels at a time.
-__declspec(naked) void ARGBBlendRow_SSSE3(const uint8* src_argb0,
- const uint8* src_argb1,
- uint8* dst_argb,
+__declspec(naked) void ARGBBlendRow_SSSE3(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
int width) {
__asm {
push esi
@@ -4253,8 +4263,8 @@ static const uvec8 kShuffleAlpha1 = {
11u, 11u, 11u, 11u, 11u, 11u, 128u, 128u,
15u, 15u, 15u, 15u, 15u, 15u, 128u, 128u,
};
-__declspec(naked) void ARGBAttenuateRow_SSSE3(const uint8* src_argb,
- uint8* dst_argb,
+__declspec(naked) void ARGBAttenuateRow_SSSE3(const uint8_t* src_argb,
+ uint8_t* dst_argb,
int width) {
__asm {
mov eax, [esp + 4] // src_argb0
@@ -4298,8 +4308,8 @@ __declspec(naked) void ARGBAttenuateRow_SSSE3(const uint8* src_argb,
static const uvec8 kShuffleAlpha_AVX2 = {6u, 7u, 6u, 7u, 6u, 7u,
128u, 128u, 14u, 15u, 14u, 15u,
14u, 15u, 128u, 128u};
-__declspec(naked) void ARGBAttenuateRow_AVX2(const uint8* src_argb,
- uint8* dst_argb,
+__declspec(naked) void ARGBAttenuateRow_AVX2(const uint8_t* src_argb,
+ uint8_t* dst_argb,
int width) {
__asm {
mov eax, [esp + 4] // src_argb0
@@ -4336,8 +4346,8 @@ __declspec(naked) void ARGBAttenuateRow_AVX2(const uint8* src_argb,
#ifdef HAS_ARGBUNATTENUATEROW_SSE2
// Unattenuate 4 pixels at a time.
-__declspec(naked) void ARGBUnattenuateRow_SSE2(const uint8* src_argb,
- uint8* dst_argb,
+__declspec(naked) void ARGBUnattenuateRow_SSE2(const uint8_t* src_argb,
+ uint8_t* dst_argb,
int width) {
__asm {
push ebx
@@ -4392,8 +4402,8 @@ static const uvec8 kUnattenShuffleAlpha_AVX2 = {
// TODO(fbarchard): Enable USE_GATHER for future hardware if faster.
// USE_GATHER is not on by default, due to being a slow instruction.
#ifdef USE_GATHER
-__declspec(naked) void ARGBUnattenuateRow_AVX2(const uint8* src_argb,
- uint8* dst_argb,
+__declspec(naked) void ARGBUnattenuateRow_AVX2(const uint8_t* src_argb,
+ uint8_t* dst_argb,
int width) {
__asm {
mov eax, [esp + 4] // src_argb0
@@ -4426,8 +4436,8 @@ __declspec(naked) void ARGBUnattenuateRow_AVX2(const uint8* src_argb,
}
}
#else // USE_GATHER
-__declspec(naked) void ARGBUnattenuateRow_AVX2(const uint8* src_argb,
- uint8* dst_argb,
+__declspec(naked) void ARGBUnattenuateRow_AVX2(const uint8_t* src_argb,
+ uint8_t* dst_argb,
int width) {
__asm {
@@ -4495,8 +4505,8 @@ __declspec(naked) void ARGBUnattenuateRow_AVX2(const uint8* src_argb,
#ifdef HAS_ARGBGRAYROW_SSSE3
// Convert 8 ARGB pixels (64 bytes) to 8 Gray ARGB pixels.
-__declspec(naked) void ARGBGrayRow_SSSE3(const uint8* src_argb,
- uint8* dst_argb,
+__declspec(naked) void ARGBGrayRow_SSSE3(const uint8_t* src_argb,
+ uint8_t* dst_argb,
int width) {
__asm {
mov eax, [esp + 4] /* src_argb */
@@ -4552,7 +4562,7 @@ static const vec8 kARGBToSepiaR = {24, 98, 50, 0, 24, 98, 50, 0,
24, 98, 50, 0, 24, 98, 50, 0};
// Convert 8 ARGB pixels (32 bytes) to 8 Sepia ARGB pixels.
-__declspec(naked) void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width) {
+__declspec(naked) void ARGBSepiaRow_SSSE3(uint8_t* dst_argb, int width) {
__asm {
mov eax, [esp + 4] /* dst_argb */
mov ecx, [esp + 8] /* width */
@@ -4608,9 +4618,9 @@ __declspec(naked) void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width) {
// Same as Sepia except matrix is provided.
// TODO(fbarchard): packuswbs only use half of the reg. To make RGBA, combine R
// and B into a high and low, then G/A, unpackl/hbw and then unpckl/hwd.
-__declspec(naked) void ARGBColorMatrixRow_SSSE3(const uint8* src_argb,
- uint8* dst_argb,
- const int8* matrix_argb,
+__declspec(naked) void ARGBColorMatrixRow_SSSE3(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ const int8_t* matrix_argb,
int width) {
__asm {
mov eax, [esp + 4] /* src_argb */
@@ -4670,7 +4680,7 @@ __declspec(naked) void ARGBColorMatrixRow_SSSE3(const uint8* src_argb,
#ifdef HAS_ARGBQUANTIZEROW_SSE2
// Quantize 4 ARGB pixels (16 bytes).
-__declspec(naked) void ARGBQuantizeRow_SSE2(uint8* dst_argb,
+__declspec(naked) void ARGBQuantizeRow_SSE2(uint8_t* dst_argb,
int scale,
int interval_size,
int interval_offset,
@@ -4717,10 +4727,10 @@ __declspec(naked) void ARGBQuantizeRow_SSE2(uint8* dst_argb,
#ifdef HAS_ARGBSHADEROW_SSE2
// Shade 4 pixels at a time by specified value.
-__declspec(naked) void ARGBShadeRow_SSE2(const uint8* src_argb,
- uint8* dst_argb,
+__declspec(naked) void ARGBShadeRow_SSE2(const uint8_t* src_argb,
+ uint8_t* dst_argb,
int width,
- uint32 value) {
+ uint32_t value) {
__asm {
mov eax, [esp + 4] // src_argb
mov edx, [esp + 8] // dst_argb
@@ -4752,9 +4762,9 @@ __declspec(naked) void ARGBShadeRow_SSE2(const uint8* src_argb,
#ifdef HAS_ARGBMULTIPLYROW_SSE2
// Multiply 2 rows of ARGB pixels together, 4 pixels at a time.
-__declspec(naked) void ARGBMultiplyRow_SSE2(const uint8* src_argb0,
- const uint8* src_argb1,
- uint8* dst_argb,
+__declspec(naked) void ARGBMultiplyRow_SSE2(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
int width) {
__asm {
push esi
@@ -4792,9 +4802,9 @@ __declspec(naked) void ARGBMultiplyRow_SSE2(const uint8* src_argb0,
#ifdef HAS_ARGBADDROW_SSE2
// Add 2 rows of ARGB pixels together, 4 pixels at a time.
// TODO(fbarchard): Port this to posix, neon and other math functions.
-__declspec(naked) void ARGBAddRow_SSE2(const uint8* src_argb0,
- const uint8* src_argb1,
- uint8* dst_argb,
+__declspec(naked) void ARGBAddRow_SSE2(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
int width) {
__asm {
push esi
@@ -4841,9 +4851,9 @@ __declspec(naked) void ARGBAddRow_SSE2(const uint8* src_argb0,
#ifdef HAS_ARGBSUBTRACTROW_SSE2
// Subtract 2 rows of ARGB pixels together, 4 pixels at a time.
-__declspec(naked) void ARGBSubtractRow_SSE2(const uint8* src_argb0,
- const uint8* src_argb1,
- uint8* dst_argb,
+__declspec(naked) void ARGBSubtractRow_SSE2(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
int width) {
__asm {
push esi
@@ -4871,9 +4881,9 @@ __declspec(naked) void ARGBSubtractRow_SSE2(const uint8* src_argb0,
#ifdef HAS_ARGBMULTIPLYROW_AVX2
// Multiply 2 rows of ARGB pixels together, 8 pixels at a time.
-__declspec(naked) void ARGBMultiplyRow_AVX2(const uint8* src_argb0,
- const uint8* src_argb1,
- uint8* dst_argb,
+__declspec(naked) void ARGBMultiplyRow_AVX2(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
int width) {
__asm {
push esi
@@ -4909,9 +4919,9 @@ __declspec(naked) void ARGBMultiplyRow_AVX2(const uint8* src_argb0,
#ifdef HAS_ARGBADDROW_AVX2
// Add 2 rows of ARGB pixels together, 8 pixels at a time.
-__declspec(naked) void ARGBAddRow_AVX2(const uint8* src_argb0,
- const uint8* src_argb1,
- uint8* dst_argb,
+__declspec(naked) void ARGBAddRow_AVX2(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
int width) {
__asm {
push esi
@@ -4939,9 +4949,9 @@ __declspec(naked) void ARGBAddRow_AVX2(const uint8* src_argb0,
#ifdef HAS_ARGBSUBTRACTROW_AVX2
// Subtract 2 rows of ARGB pixels together, 8 pixels at a time.
-__declspec(naked) void ARGBSubtractRow_AVX2(const uint8* src_argb0,
- const uint8* src_argb1,
- uint8* dst_argb,
+__declspec(naked) void ARGBSubtractRow_AVX2(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
int width) {
__asm {
push esi
@@ -4972,10 +4982,10 @@ __declspec(naked) void ARGBSubtractRow_AVX2(const uint8* src_argb0,
// -1 0 1
// -2 0 2
// -1 0 1
-__declspec(naked) void SobelXRow_SSE2(const uint8* src_y0,
- const uint8* src_y1,
- const uint8* src_y2,
- uint8* dst_sobelx,
+__declspec(naked) void SobelXRow_SSE2(const uint8_t* src_y0,
+ const uint8_t* src_y1,
+ const uint8_t* src_y2,
+ uint8_t* dst_sobelx,
int width) {
__asm {
push esi
@@ -5030,9 +5040,9 @@ __declspec(naked) void SobelXRow_SSE2(const uint8* src_y0,
// -1 -2 -1
// 0 0 0
// 1 2 1
-__declspec(naked) void SobelYRow_SSE2(const uint8* src_y0,
- const uint8* src_y1,
- uint8* dst_sobely,
+__declspec(naked) void SobelYRow_SSE2(const uint8_t* src_y0,
+ const uint8_t* src_y1,
+ uint8_t* dst_sobely,
int width) {
__asm {
push esi
@@ -5084,9 +5094,9 @@ __declspec(naked) void SobelYRow_SSE2(const uint8* src_y0,
// R = Sobel
// G = Sobel
// B = Sobel
-__declspec(naked) void SobelRow_SSE2(const uint8* src_sobelx,
- const uint8* src_sobely,
- uint8* dst_argb,
+__declspec(naked) void SobelRow_SSE2(const uint8_t* src_sobelx,
+ const uint8_t* src_sobely,
+ uint8_t* dst_argb,
int width) {
__asm {
push esi
@@ -5132,9 +5142,9 @@ __declspec(naked) void SobelRow_SSE2(const uint8* src_sobelx,
#ifdef HAS_SOBELTOPLANEROW_SSE2
// Adds Sobel X and Sobel Y and stores Sobel into a plane.
-__declspec(naked) void SobelToPlaneRow_SSE2(const uint8* src_sobelx,
- const uint8* src_sobely,
- uint8* dst_y,
+__declspec(naked) void SobelToPlaneRow_SSE2(const uint8_t* src_sobelx,
+ const uint8_t* src_sobely,
+ uint8_t* dst_y,
int width) {
__asm {
push esi
@@ -5166,9 +5176,9 @@ __declspec(naked) void SobelToPlaneRow_SSE2(const uint8* src_sobelx,
// R = Sobel X
// G = Sobel
// B = Sobel Y
-__declspec(naked) void SobelXYRow_SSE2(const uint8* src_sobelx,
- const uint8* src_sobely,
- uint8* dst_argb,
+__declspec(naked) void SobelXYRow_SSE2(const uint8_t* src_sobelx,
+ const uint8_t* src_sobely,
+ uint8_t* dst_argb,
int width) {
__asm {
push esi
@@ -5225,11 +5235,11 @@ __declspec(naked) void SobelXYRow_SSE2(const uint8* src_sobelx,
// count is number of averaged pixels to produce.
// Does 4 pixels at a time.
// This function requires alignment on accumulation buffer pointers.
-void CumulativeSumToAverageRow_SSE2(const int32* topleft,
- const int32* botleft,
+void CumulativeSumToAverageRow_SSE2(const int32_t* topleft,
+ const int32_t* botleft,
int width,
int area,
- uint8* dst,
+ uint8_t* dst,
int count) {
__asm {
mov eax, topleft // eax topleft
@@ -5375,9 +5385,9 @@ void CumulativeSumToAverageRow_SSE2(const int32* topleft,
#ifdef HAS_COMPUTECUMULATIVESUMROW_SSE2
// Creates a table of cumulative sums where each value is a sum of all values
// above and to the left of the value.
-void ComputeCumulativeSumRow_SSE2(const uint8* row,
- int32* cumsum,
- const int32* previous_cumsum,
+void ComputeCumulativeSumRow_SSE2(const uint8_t* row,
+ int32_t* cumsum,
+ const int32_t* previous_cumsum,
int width) {
__asm {
mov eax, row
@@ -5460,9 +5470,9 @@ void ComputeCumulativeSumRow_SSE2(const uint8* row,
#ifdef HAS_ARGBAFFINEROW_SSE2
// Copy ARGB pixels from source image with slope to a row of destination.
-__declspec(naked) LIBYUV_API void ARGBAffineRow_SSE2(const uint8* src_argb,
+__declspec(naked) LIBYUV_API void ARGBAffineRow_SSE2(const uint8_t* src_argb,
int src_argb_stride,
- uint8* dst_argb,
+ uint8_t* dst_argb,
const float* uv_dudv,
int width) {
__asm {
@@ -5546,8 +5556,8 @@ __declspec(naked) LIBYUV_API void ARGBAffineRow_SSE2(const uint8* src_argb,
#ifdef HAS_INTERPOLATEROW_AVX2
// Bilinear filter 32x2 -> 32x1
-__declspec(naked) void InterpolateRow_AVX2(uint8* dst_ptr,
- const uint8* src_ptr,
+__declspec(naked) void InterpolateRow_AVX2(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
ptrdiff_t src_stride,
int dst_width,
int source_y_fraction) {
@@ -5623,8 +5633,8 @@ __declspec(naked) void InterpolateRow_AVX2(uint8* dst_ptr,
// Bilinear filter 16x2 -> 16x1
// TODO(fbarchard): Consider allowing 256 using memcpy.
-__declspec(naked) void InterpolateRow_SSSE3(uint8* dst_ptr,
- const uint8* src_ptr,
+__declspec(naked) void InterpolateRow_SSSE3(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
ptrdiff_t src_stride,
int dst_width,
int source_y_fraction) {
@@ -5705,9 +5715,9 @@ __declspec(naked) void InterpolateRow_SSSE3(uint8* dst_ptr,
}
// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA.
-__declspec(naked) void ARGBShuffleRow_SSSE3(const uint8* src_argb,
- uint8* dst_argb,
- const uint8* shuffler,
+__declspec(naked) void ARGBShuffleRow_SSSE3(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ const uint8_t* shuffler,
int width) {
__asm {
mov eax, [esp + 4] // src_argb
@@ -5732,9 +5742,9 @@ __declspec(naked) void ARGBShuffleRow_SSSE3(const uint8* src_argb,
}
#ifdef HAS_ARGBSHUFFLEROW_AVX2
-__declspec(naked) void ARGBShuffleRow_AVX2(const uint8* src_argb,
- uint8* dst_argb,
- const uint8* shuffler,
+__declspec(naked) void ARGBShuffleRow_AVX2(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ const uint8_t* shuffler,
int width) {
__asm {
mov eax, [esp + 4] // src_argb
@@ -5767,10 +5777,10 @@ __declspec(naked) void ARGBShuffleRow_AVX2(const uint8* src_argb,
// UYVY - Macro-pixel = 2 image pixels
// U0Y0V0Y1
-__declspec(naked) void I422ToYUY2Row_SSE2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_frame,
+__declspec(naked) void I422ToYUY2Row_SSE2(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_frame,
int width) {
__asm {
push esi
@@ -5804,10 +5814,10 @@ __declspec(naked) void I422ToYUY2Row_SSE2(const uint8* src_y,
}
}
-__declspec(naked) void I422ToUYVYRow_SSE2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_frame,
+__declspec(naked) void I422ToUYVYRow_SSE2(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_frame,
int width) {
__asm {
push esi
@@ -5842,8 +5852,8 @@ __declspec(naked) void I422ToUYVYRow_SSE2(const uint8* src_y,
}
#ifdef HAS_ARGBPOLYNOMIALROW_SSE2
-__declspec(naked) void ARGBPolynomialRow_SSE2(const uint8* src_argb,
- uint8* dst_argb,
+__declspec(naked) void ARGBPolynomialRow_SSE2(const uint8_t* src_argb,
+ uint8_t* dst_argb,
const float* poly,
int width) {
__asm {
@@ -5901,8 +5911,8 @@ __declspec(naked) void ARGBPolynomialRow_SSE2(const uint8* src_argb,
#endif // HAS_ARGBPOLYNOMIALROW_SSE2
#ifdef HAS_ARGBPOLYNOMIALROW_AVX2
-__declspec(naked) void ARGBPolynomialRow_AVX2(const uint8* src_argb,
- uint8* dst_argb,
+__declspec(naked) void ARGBPolynomialRow_AVX2(const uint8_t* src_argb,
+ uint8_t* dst_argb,
const float* poly,
int width) {
__asm {
@@ -5941,8 +5951,8 @@ __declspec(naked) void ARGBPolynomialRow_AVX2(const uint8* src_argb,
#ifdef HAS_HALFFLOATROW_SSE2
static float kExpBias = 1.9259299444e-34f;
-__declspec(naked) void HalfFloatRow_SSE2(const uint16* src,
- uint16* dst,
+__declspec(naked) void HalfFloatRow_SSE2(const uint16_t* src,
+ uint16_t* dst,
float scale,
int width) {
__asm {
@@ -5978,8 +5988,8 @@ __declspec(naked) void HalfFloatRow_SSE2(const uint16* src,
#endif // HAS_HALFFLOATROW_SSE2
#ifdef HAS_HALFFLOATROW_AVX2
-__declspec(naked) void HalfFloatRow_AVX2(const uint16* src,
- uint16* dst,
+__declspec(naked) void HalfFloatRow_AVX2(const uint16_t* src,
+ uint16_t* dst,
float scale,
int width) {
__asm {
@@ -6016,8 +6026,8 @@ __declspec(naked) void HalfFloatRow_AVX2(const uint16* src,
#endif // HAS_HALFFLOATROW_AVX2
#ifdef HAS_HALFFLOATROW_F16C
-__declspec(naked) void HalfFloatRow_F16C(const uint16* src,
- uint16* dst,
+__declspec(naked) void HalfFloatRow_F16C(const uint16_t* src,
+ uint16_t* dst,
float scale,
int width) {
__asm {
@@ -6050,8 +6060,8 @@ __declspec(naked) void HalfFloatRow_F16C(const uint16* src,
#ifdef HAS_ARGBCOLORTABLEROW_X86
// Tranform ARGB pixels with color table.
-__declspec(naked) void ARGBColorTableRow_X86(uint8* dst_argb,
- const uint8* table_argb,
+__declspec(naked) void ARGBColorTableRow_X86(uint8_t* dst_argb,
+ const uint8_t* table_argb,
int width) {
__asm {
push esi
@@ -6084,8 +6094,8 @@ __declspec(naked) void ARGBColorTableRow_X86(uint8* dst_argb,
#ifdef HAS_RGBCOLORTABLEROW_X86
// Tranform RGB pixels with color table.
-__declspec(naked) void RGBColorTableRow_X86(uint8* dst_argb,
- const uint8* table_argb,
+__declspec(naked) void RGBColorTableRow_X86(uint8_t* dst_argb,
+ const uint8_t* table_argb,
int width) {
__asm {
push esi
@@ -6116,11 +6126,11 @@ __declspec(naked) void RGBColorTableRow_X86(uint8* dst_argb,
#ifdef HAS_ARGBLUMACOLORTABLEROW_SSSE3
// Tranform RGB pixels with luma table.
-__declspec(naked) void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb,
- uint8* dst_argb,
+__declspec(naked) void ARGBLumaColorTableRow_SSSE3(const uint8_t* src_argb,
+ uint8_t* dst_argb,
int width,
- const uint8* luma,
- uint32 lumacoeff) {
+ const uint8_t* luma,
+ uint32_t lumacoeff) {
__asm {
push esi
push edi
diff --git a/chromium/third_party/libyuv/source/scale.cc b/chromium/third_party/libyuv/source/scale.cc
index 6951d8fb95e..2cfa1c6cb1c 100644
--- a/chromium/third_party/libyuv/source/scale.cc
+++ b/chromium/third_party/libyuv/source/scale.cc
@@ -39,12 +39,12 @@ static void ScalePlaneDown2(int src_width,
int dst_height,
int src_stride,
int dst_stride,
- const uint8* src_ptr,
- uint8* dst_ptr,
+ const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
enum FilterMode filtering) {
int y;
- void (*ScaleRowDown2)(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) =
+ void (*ScaleRowDown2)(const uint8_t* src_ptr, ptrdiff_t src_stride,
+ uint8_t* dst_ptr, int dst_width) =
filtering == kFilterNone
? ScaleRowDown2_C
: (filtering == kFilterLinear ? ScaleRowDown2Linear_C
@@ -136,12 +136,12 @@ static void ScalePlaneDown2_16(int src_width,
int dst_height,
int src_stride,
int dst_stride,
- const uint16* src_ptr,
- uint16* dst_ptr,
+ const uint16_t* src_ptr,
+ uint16_t* dst_ptr,
enum FilterMode filtering) {
int y;
- void (*ScaleRowDown2)(const uint16* src_ptr, ptrdiff_t src_stride,
- uint16* dst_ptr, int dst_width) =
+ void (*ScaleRowDown2)(const uint16_t* src_ptr, ptrdiff_t src_stride,
+ uint16_t* dst_ptr, int dst_width) =
filtering == kFilterNone
? ScaleRowDown2_16_C
: (filtering == kFilterLinear ? ScaleRowDown2Linear_16_C
@@ -191,12 +191,12 @@ static void ScalePlaneDown4(int src_width,
int dst_height,
int src_stride,
int dst_stride,
- const uint8* src_ptr,
- uint8* dst_ptr,
+ const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
enum FilterMode filtering) {
int y;
- void (*ScaleRowDown4)(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) =
+ void (*ScaleRowDown4)(const uint8_t* src_ptr, ptrdiff_t src_stride,
+ uint8_t* dst_ptr, int dst_width) =
filtering ? ScaleRowDown4Box_C : ScaleRowDown4_C;
int row_stride = src_stride << 2;
(void)src_width;
@@ -258,12 +258,12 @@ static void ScalePlaneDown4_16(int src_width,
int dst_height,
int src_stride,
int dst_stride,
- const uint16* src_ptr,
- uint16* dst_ptr,
+ const uint16_t* src_ptr,
+ uint16_t* dst_ptr,
enum FilterMode filtering) {
int y;
- void (*ScaleRowDown4)(const uint16* src_ptr, ptrdiff_t src_stride,
- uint16* dst_ptr, int dst_width) =
+ void (*ScaleRowDown4)(const uint16_t* src_ptr, ptrdiff_t src_stride,
+ uint16_t* dst_ptr, int dst_width) =
filtering ? ScaleRowDown4Box_16_C : ScaleRowDown4_16_C;
int row_stride = src_stride << 2;
(void)src_width;
@@ -302,14 +302,14 @@ static void ScalePlaneDown34(int src_width,
int dst_height,
int src_stride,
int dst_stride,
- const uint8* src_ptr,
- uint8* dst_ptr,
+ const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
enum FilterMode filtering) {
int y;
- void (*ScaleRowDown34_0)(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
- void (*ScaleRowDown34_1)(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
+ void (*ScaleRowDown34_0)(const uint8_t* src_ptr, ptrdiff_t src_stride,
+ uint8_t* dst_ptr, int dst_width);
+ void (*ScaleRowDown34_1)(const uint8_t* src_ptr, ptrdiff_t src_stride,
+ uint8_t* dst_ptr, int dst_width);
const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
(void)src_width;
(void)src_height;
@@ -411,14 +411,14 @@ static void ScalePlaneDown34_16(int src_width,
int dst_height,
int src_stride,
int dst_stride,
- const uint16* src_ptr,
- uint16* dst_ptr,
+ const uint16_t* src_ptr,
+ uint16_t* dst_ptr,
enum FilterMode filtering) {
int y;
- void (*ScaleRowDown34_0)(const uint16* src_ptr, ptrdiff_t src_stride,
- uint16* dst_ptr, int dst_width);
- void (*ScaleRowDown34_1)(const uint16* src_ptr, ptrdiff_t src_stride,
- uint16* dst_ptr, int dst_width);
+ void (*ScaleRowDown34_0)(const uint16_t* src_ptr, ptrdiff_t src_stride,
+ uint16_t* dst_ptr, int dst_width);
+ void (*ScaleRowDown34_1)(const uint16_t* src_ptr, ptrdiff_t src_stride,
+ uint16_t* dst_ptr, int dst_width);
const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
(void)src_width;
(void)src_height;
@@ -497,14 +497,14 @@ static void ScalePlaneDown38(int src_width,
int dst_height,
int src_stride,
int dst_stride,
- const uint8* src_ptr,
- uint8* dst_ptr,
+ const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
enum FilterMode filtering) {
int y;
- void (*ScaleRowDown38_3)(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
- void (*ScaleRowDown38_2)(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
+ void (*ScaleRowDown38_3)(const uint8_t* src_ptr, ptrdiff_t src_stride,
+ uint8_t* dst_ptr, int dst_width);
+ void (*ScaleRowDown38_2)(const uint8_t* src_ptr, ptrdiff_t src_stride,
+ uint8_t* dst_ptr, int dst_width);
const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
assert(dst_width % 3 == 0);
(void)src_width;
@@ -606,14 +606,14 @@ static void ScalePlaneDown38_16(int src_width,
int dst_height,
int src_stride,
int dst_stride,
- const uint16* src_ptr,
- uint16* dst_ptr,
+ const uint16_t* src_ptr,
+ uint16_t* dst_ptr,
enum FilterMode filtering) {
int y;
- void (*ScaleRowDown38_3)(const uint16* src_ptr, ptrdiff_t src_stride,
- uint16* dst_ptr, int dst_width);
- void (*ScaleRowDown38_2)(const uint16* src_ptr, ptrdiff_t src_stride,
- uint16* dst_ptr, int dst_width);
+ void (*ScaleRowDown38_3)(const uint16_t* src_ptr, ptrdiff_t src_stride,
+ uint16_t* dst_ptr, int dst_width);
+ void (*ScaleRowDown38_2)(const uint16_t* src_ptr, ptrdiff_t src_stride,
+ uint16_t* dst_ptr, int dst_width);
const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
(void)src_width;
(void)src_height;
@@ -673,8 +673,8 @@ static void ScalePlaneDown38_16(int src_width,
#define MIN1(x) ((x) < 1 ? 1 : (x))
-static __inline uint32 SumPixels(int iboxwidth, const uint16* src_ptr) {
- uint32 sum = 0u;
+static __inline uint32_t SumPixels(int iboxwidth, const uint16_t* src_ptr) {
+ uint32_t sum = 0u;
int x;
assert(iboxwidth > 0);
for (x = 0; x < iboxwidth; ++x) {
@@ -683,8 +683,8 @@ static __inline uint32 SumPixels(int iboxwidth, const uint16* src_ptr) {
return sum;
}
-static __inline uint32 SumPixels_16(int iboxwidth, const uint32* src_ptr) {
- uint32 sum = 0u;
+static __inline uint32_t SumPixels_16(int iboxwidth, const uint32_t* src_ptr) {
+ uint32_t sum = 0u;
int x;
assert(iboxwidth > 0);
for (x = 0; x < iboxwidth; ++x) {
@@ -697,8 +697,8 @@ static void ScaleAddCols2_C(int dst_width,
int boxheight,
int x,
int dx,
- const uint16* src_ptr,
- uint8* dst_ptr) {
+ const uint16_t* src_ptr,
+ uint8_t* dst_ptr) {
int i;
int scaletbl[2];
int minboxwidth = dx >> 16;
@@ -719,8 +719,8 @@ static void ScaleAddCols2_16_C(int dst_width,
int boxheight,
int x,
int dx,
- const uint32* src_ptr,
- uint16* dst_ptr) {
+ const uint32_t* src_ptr,
+ uint16_t* dst_ptr) {
int i;
int scaletbl[2];
int minboxwidth = dx >> 16;
@@ -741,8 +741,8 @@ static void ScaleAddCols0_C(int dst_width,
int boxheight,
int x,
int dx,
- const uint16* src_ptr,
- uint8* dst_ptr) {
+ const uint16_t* src_ptr,
+ uint8_t* dst_ptr) {
int scaleval = 65536 / boxheight;
int i;
(void)dx;
@@ -756,8 +756,8 @@ static void ScaleAddCols1_C(int dst_width,
int boxheight,
int x,
int dx,
- const uint16* src_ptr,
- uint8* dst_ptr) {
+ const uint16_t* src_ptr,
+ uint8_t* dst_ptr) {
int boxwidth = MIN1(dx >> 16);
int scaleval = 65536 / (boxwidth * boxheight);
int i;
@@ -772,8 +772,8 @@ static void ScaleAddCols1_16_C(int dst_width,
int boxheight,
int x,
int dx,
- const uint32* src_ptr,
- uint16* dst_ptr) {
+ const uint32_t* src_ptr,
+ uint16_t* dst_ptr) {
int boxwidth = MIN1(dx >> 16);
int scaleval = 65536 / (boxwidth * boxheight);
int i;
@@ -796,8 +796,8 @@ static void ScalePlaneBox(int src_width,
int dst_height,
int src_stride,
int dst_stride,
- const uint8* src_ptr,
- uint8* dst_ptr) {
+ const uint8_t* src_ptr,
+ uint8_t* dst_ptr) {
int j, k;
// Initial source x/y coordinate and step values as 16.16 fixed point.
int x = 0;
@@ -809,14 +809,14 @@ static void ScalePlaneBox(int src_width,
&dx, &dy);
src_width = Abs(src_width);
{
- // Allocate a row buffer of uint16.
+ // Allocate a row buffer of uint16_t.
align_buffer_64(row16, src_width * 2);
void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,
- const uint16* src_ptr, uint8* dst_ptr) =
+ const uint16_t* src_ptr, uint8_t* dst_ptr) =
(dx & 0xffff) ? ScaleAddCols2_C
: ((dx != 0x10000) ? ScaleAddCols1_C : ScaleAddCols0_C);
- void (*ScaleAddRow)(const uint8* src_ptr, uint16* dst_ptr, int src_width) =
- ScaleAddRow_C;
+ void (*ScaleAddRow)(const uint8_t* src_ptr, uint16_t* dst_ptr,
+ int src_width) = ScaleAddRow_C;
#if defined(HAS_SCALEADDROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
ScaleAddRow = ScaleAddRow_Any_SSE2;
@@ -853,7 +853,7 @@ static void ScalePlaneBox(int src_width,
for (j = 0; j < dst_height; ++j) {
int boxheight;
int iy = y >> 16;
- const uint8* src = src_ptr + iy * src_stride;
+ const uint8_t* src = src_ptr + iy * src_stride;
y += dy;
if (y > max_y) {
y = max_y;
@@ -861,10 +861,10 @@ static void ScalePlaneBox(int src_width,
boxheight = MIN1((y >> 16) - iy);
memset(row16, 0, src_width * 2);
for (k = 0; k < boxheight; ++k) {
- ScaleAddRow(src, (uint16*)(row16), src_width);
+ ScaleAddRow(src, (uint16_t*)(row16), src_width);
src += src_stride;
}
- ScaleAddCols(dst_width, boxheight, x, dx, (uint16*)(row16), dst_ptr);
+ ScaleAddCols(dst_width, boxheight, x, dx, (uint16_t*)(row16), dst_ptr);
dst_ptr += dst_stride;
}
free_aligned_buffer_64(row16);
@@ -877,8 +877,8 @@ static void ScalePlaneBox_16(int src_width,
int dst_height,
int src_stride,
int dst_stride,
- const uint16* src_ptr,
- uint16* dst_ptr) {
+ const uint16_t* src_ptr,
+ uint16_t* dst_ptr) {
int j, k;
// Initial source x/y coordinate and step values as 16.16 fixed point.
int x = 0;
@@ -890,13 +890,13 @@ static void ScalePlaneBox_16(int src_width,
&dx, &dy);
src_width = Abs(src_width);
{
- // Allocate a row buffer of uint32.
+ // Allocate a row buffer of uint32_t.
align_buffer_64(row32, src_width * 4);
void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,
- const uint32* src_ptr, uint16* dst_ptr) =
+ const uint32_t* src_ptr, uint16_t* dst_ptr) =
(dx & 0xffff) ? ScaleAddCols2_16_C : ScaleAddCols1_16_C;
- void (*ScaleAddRow)(const uint16* src_ptr, uint32* dst_ptr, int src_width) =
- ScaleAddRow_16_C;
+ void (*ScaleAddRow)(const uint16_t* src_ptr, uint32_t* dst_ptr,
+ int src_width) = ScaleAddRow_16_C;
#if defined(HAS_SCALEADDROW_16_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(src_width, 16)) {
@@ -907,7 +907,7 @@ static void ScalePlaneBox_16(int src_width,
for (j = 0; j < dst_height; ++j) {
int boxheight;
int iy = y >> 16;
- const uint16* src = src_ptr + iy * src_stride;
+ const uint16_t* src = src_ptr + iy * src_stride;
y += dy;
if (y > max_y) {
y = max_y;
@@ -915,10 +915,10 @@ static void ScalePlaneBox_16(int src_width,
boxheight = MIN1((y >> 16) - iy);
memset(row32, 0, src_width * 4);
for (k = 0; k < boxheight; ++k) {
- ScaleAddRow(src, (uint32*)(row32), src_width);
+ ScaleAddRow(src, (uint32_t*)(row32), src_width);
src += src_stride;
}
- ScaleAddCols(dst_width, boxheight, x, dx, (uint32*)(row32), dst_ptr);
+ ScaleAddCols(dst_width, boxheight, x, dx, (uint32_t*)(row32), dst_ptr);
dst_ptr += dst_stride;
}
free_aligned_buffer_64(row32);
@@ -932,8 +932,8 @@ void ScalePlaneBilinearDown(int src_width,
int dst_height,
int src_stride,
int dst_stride,
- const uint8* src_ptr,
- uint8* dst_ptr,
+ const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
enum FilterMode filtering) {
// Initial source x/y coordinate and step values as 16.16 fixed point.
int x = 0;
@@ -946,10 +946,10 @@ void ScalePlaneBilinearDown(int src_width,
const int max_y = (src_height - 1) << 16;
int j;
- void (*ScaleFilterCols)(uint8 * dst_ptr, const uint8* src_ptr, int dst_width,
- int x, int dx) =
+ void (*ScaleFilterCols)(uint8_t * dst_ptr, const uint8_t* src_ptr,
+ int dst_width, int x, int dx) =
(src_width >= 32768) ? ScaleFilterCols64_C : ScaleFilterCols_C;
- void (*InterpolateRow)(uint8 * dst_ptr, const uint8* src_ptr,
+ void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr,
ptrdiff_t src_stride, int dst_width,
int source_y_fraction) = InterpolateRow_C;
ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
@@ -1016,7 +1016,7 @@ void ScalePlaneBilinearDown(int src_width,
for (j = 0; j < dst_height; ++j) {
int yi = y >> 16;
- const uint8* src = src_ptr + yi * src_stride;
+ const uint8_t* src = src_ptr + yi * src_stride;
if (filtering == kFilterLinear) {
ScaleFilterCols(dst_ptr, src, dst_width, x, dx);
} else {
@@ -1039,8 +1039,8 @@ void ScalePlaneBilinearDown_16(int src_width,
int dst_height,
int src_stride,
int dst_stride,
- const uint16* src_ptr,
- uint16* dst_ptr,
+ const uint16_t* src_ptr,
+ uint16_t* dst_ptr,
enum FilterMode filtering) {
// Initial source x/y coordinate and step values as 16.16 fixed point.
int x = 0;
@@ -1053,10 +1053,10 @@ void ScalePlaneBilinearDown_16(int src_width,
const int max_y = (src_height - 1) << 16;
int j;
- void (*ScaleFilterCols)(uint16 * dst_ptr, const uint16* src_ptr,
+ void (*ScaleFilterCols)(uint16_t * dst_ptr, const uint16_t* src_ptr,
int dst_width, int x, int dx) =
(src_width >= 32768) ? ScaleFilterCols64_16_C : ScaleFilterCols_16_C;
- void (*InterpolateRow)(uint16 * dst_ptr, const uint16* src_ptr,
+ void (*InterpolateRow)(uint16_t * dst_ptr, const uint16_t* src_ptr,
ptrdiff_t src_stride, int dst_width,
int source_y_fraction) = InterpolateRow_16_C;
ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
@@ -1107,13 +1107,13 @@ void ScalePlaneBilinearDown_16(int src_width,
for (j = 0; j < dst_height; ++j) {
int yi = y >> 16;
- const uint16* src = src_ptr + yi * src_stride;
+ const uint16_t* src = src_ptr + yi * src_stride;
if (filtering == kFilterLinear) {
ScaleFilterCols(dst_ptr, src, dst_width, x, dx);
} else {
int yf = (y >> 8) & 255;
- InterpolateRow((uint16*)row, src, src_stride, src_width, yf);
- ScaleFilterCols(dst_ptr, (uint16*)row, dst_width, x, dx);
+ InterpolateRow((uint16_t*)row, src, src_stride, src_width, yf);
+ ScaleFilterCols(dst_ptr, (uint16_t*)row, dst_width, x, dx);
}
dst_ptr += dst_stride;
y += dy;
@@ -1131,8 +1131,8 @@ void ScalePlaneBilinearUp(int src_width,
int dst_height,
int src_stride,
int dst_stride,
- const uint8* src_ptr,
- uint8* dst_ptr,
+ const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
enum FilterMode filtering) {
int j;
// Initial source x/y coordinate and step values as 16.16 fixed point.
@@ -1141,11 +1141,11 @@ void ScalePlaneBilinearUp(int src_width,
int dx = 0;
int dy = 0;
const int max_y = (src_height - 1) << 16;
- void (*InterpolateRow)(uint8 * dst_ptr, const uint8* src_ptr,
+ void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr,
ptrdiff_t src_stride, int dst_width,
int source_y_fraction) = InterpolateRow_C;
- void (*ScaleFilterCols)(uint8 * dst_ptr, const uint8* src_ptr, int dst_width,
- int x, int dx) =
+ void (*ScaleFilterCols)(uint8_t * dst_ptr, const uint8_t* src_ptr,
+ int dst_width, int x, int dx) =
filtering ? ScaleFilterCols_C : ScaleCols_C;
ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
&dx, &dy);
@@ -1214,13 +1214,13 @@ void ScalePlaneBilinearUp(int src_width,
}
{
int yi = y >> 16;
- const uint8* src = src_ptr + yi * src_stride;
+ const uint8_t* src = src_ptr + yi * src_stride;
// Allocate 2 row buffers.
const int kRowSize = (dst_width + 31) & ~31;
align_buffer_64(row, kRowSize * 2);
- uint8* rowptr = row;
+ uint8_t* rowptr = row;
int rowstride = kRowSize;
int lasty = yi;
@@ -1266,8 +1266,8 @@ void ScalePlaneBilinearUp_16(int src_width,
int dst_height,
int src_stride,
int dst_stride,
- const uint16* src_ptr,
- uint16* dst_ptr,
+ const uint16_t* src_ptr,
+ uint16_t* dst_ptr,
enum FilterMode filtering) {
int j;
// Initial source x/y coordinate and step values as 16.16 fixed point.
@@ -1276,10 +1276,10 @@ void ScalePlaneBilinearUp_16(int src_width,
int dx = 0;
int dy = 0;
const int max_y = (src_height - 1) << 16;
- void (*InterpolateRow)(uint16 * dst_ptr, const uint16* src_ptr,
+ void (*InterpolateRow)(uint16_t * dst_ptr, const uint16_t* src_ptr,
ptrdiff_t src_stride, int dst_width,
int source_y_fraction) = InterpolateRow_16_C;
- void (*ScaleFilterCols)(uint16 * dst_ptr, const uint16* src_ptr,
+ void (*ScaleFilterCols)(uint16_t * dst_ptr, const uint16_t* src_ptr,
int dst_width, int x, int dx) =
filtering ? ScaleFilterCols_16_C : ScaleCols_16_C;
ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
@@ -1341,13 +1341,13 @@ void ScalePlaneBilinearUp_16(int src_width,
}
{
int yi = y >> 16;
- const uint16* src = src_ptr + yi * src_stride;
+ const uint16_t* src = src_ptr + yi * src_stride;
// Allocate 2 row buffers.
const int kRowSize = (dst_width + 31) & ~31;
align_buffer_64(row, kRowSize * 4);
- uint16* rowptr = (uint16*)row;
+ uint16_t* rowptr = (uint16_t*)row;
int rowstride = kRowSize;
int lasty = yi;
@@ -1398,11 +1398,11 @@ static void ScalePlaneSimple(int src_width,
int dst_height,
int src_stride,
int dst_stride,
- const uint8* src_ptr,
- uint8* dst_ptr) {
+ const uint8_t* src_ptr,
+ uint8_t* dst_ptr) {
int i;
- void (*ScaleCols)(uint8 * dst_ptr, const uint8* src_ptr, int dst_width, int x,
- int dx) = ScaleCols_C;
+ void (*ScaleCols)(uint8_t * dst_ptr, const uint8_t* src_ptr, int dst_width,
+ int x, int dx) = ScaleCols_C;
// Initial source x/y coordinate and step values as 16.16 fixed point.
int x = 0;
int y = 0;
@@ -1434,10 +1434,10 @@ static void ScalePlaneSimple_16(int src_width,
int dst_height,
int src_stride,
int dst_stride,
- const uint16* src_ptr,
- uint16* dst_ptr) {
+ const uint16_t* src_ptr,
+ uint16_t* dst_ptr) {
int i;
- void (*ScaleCols)(uint16 * dst_ptr, const uint16* src_ptr, int dst_width,
+ void (*ScaleCols)(uint16_t * dst_ptr, const uint16_t* src_ptr, int dst_width,
int x, int dx) = ScaleCols_16_C;
// Initial source x/y coordinate and step values as 16.16 fixed point.
int x = 0;
@@ -1468,11 +1468,11 @@ static void ScalePlaneSimple_16(int src_width,
// This function dispatches to a specialized scaler based on scale factor.
LIBYUV_API
-void ScalePlane(const uint8* src,
+void ScalePlane(const uint8_t* src,
int src_stride,
int src_width,
int src_height,
- uint8* dst,
+ uint8_t* dst,
int dst_stride,
int dst_width,
int dst_height,
@@ -1551,11 +1551,11 @@ void ScalePlane(const uint8* src,
}
LIBYUV_API
-void ScalePlane_16(const uint16* src,
+void ScalePlane_16(const uint16_t* src,
int src_stride,
int src_width,
int src_height,
- uint16* dst,
+ uint16_t* dst,
int dst_stride,
int dst_width,
int dst_height,
@@ -1637,19 +1637,19 @@ void ScalePlane_16(const uint16* src,
// This function in turn calls a scaling function for each plane.
LIBYUV_API
-int I420Scale(const uint8* src_y,
+int I420Scale(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
int src_width,
int src_height,
- uint8* dst_y,
+ uint8_t* dst_y,
int dst_stride_y,
- uint8* dst_u,
+ uint8_t* dst_u,
int dst_stride_u,
- uint8* dst_v,
+ uint8_t* dst_v,
int dst_stride_v,
int dst_width,
int dst_height,
@@ -1674,19 +1674,19 @@ int I420Scale(const uint8* src_y,
}
LIBYUV_API
-int I420Scale_16(const uint16* src_y,
+int I420Scale_16(const uint16_t* src_y,
int src_stride_y,
- const uint16* src_u,
+ const uint16_t* src_u,
int src_stride_u,
- const uint16* src_v,
+ const uint16_t* src_v,
int src_stride_v,
int src_width,
int src_height,
- uint16* dst_y,
+ uint16_t* dst_y,
int dst_stride_y,
- uint16* dst_u,
+ uint16_t* dst_u,
int dst_stride_u,
- uint16* dst_v,
+ uint16_t* dst_v,
int dst_stride_v,
int dst_width,
int dst_height,
@@ -1712,17 +1712,17 @@ int I420Scale_16(const uint16* src_y,
// Deprecated api
LIBYUV_API
-int Scale(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
+int Scale(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
int src_stride_y,
int src_stride_u,
int src_stride_v,
int src_width,
int src_height,
- uint8* dst_y,
- uint8* dst_u,
- uint8* dst_v,
+ uint8_t* dst_y,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
int dst_stride_y,
int dst_stride_u,
int dst_stride_v,
@@ -1735,43 +1735,6 @@ int Scale(const uint8* src_y,
dst_height, interpolate ? kFilterBox : kFilterNone);
}
-// Deprecated api
-LIBYUV_API
-int ScaleOffset(const uint8* src,
- int src_width,
- int src_height,
- uint8* dst,
- int dst_width,
- int dst_height,
- int dst_yoffset,
- LIBYUV_BOOL interpolate) {
- // Chroma requires offset to multiple of 2.
- int dst_yoffset_even = dst_yoffset & ~1;
- int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
- int src_halfheight = SUBSAMPLE(src_height, 1, 1);
- int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
- int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
- int aheight = dst_height - dst_yoffset_even * 2; // actual output height
- const uint8* src_y = src;
- const uint8* src_u = src + src_width * src_height;
- const uint8* src_v =
- src + src_width * src_height + src_halfwidth * src_halfheight;
- uint8* dst_y = dst + dst_yoffset_even * dst_width;
- uint8* dst_u =
- dst + dst_width * dst_height + (dst_yoffset_even >> 1) * dst_halfwidth;
- uint8* dst_v = dst + dst_width * dst_height + dst_halfwidth * dst_halfheight +
- (dst_yoffset_even >> 1) * dst_halfwidth;
- if (!src || src_width <= 0 || src_height <= 0 || !dst || dst_width <= 0 ||
- dst_height <= 0 || dst_yoffset_even < 0 ||
- dst_yoffset_even >= dst_height) {
- return -1;
- }
- return I420Scale(src_y, src_width, src_u, src_halfwidth, src_v, src_halfwidth,
- src_width, src_height, dst_y, dst_width, dst_u,
- dst_halfwidth, dst_v, dst_halfwidth, dst_width, aheight,
- interpolate ? kFilterBox : kFilterNone);
-}
-
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
diff --git a/chromium/third_party/libyuv/source/scale_any.cc b/chromium/third_party/libyuv/source/scale_any.cc
index 8604c233859..53ad1364049 100644
--- a/chromium/third_party/libyuv/source/scale_any.cc
+++ b/chromium/third_party/libyuv/source/scale_any.cc
@@ -19,15 +19,15 @@ extern "C" {
#endif
// Definition for ScaleFilterCols, ScaleARGBCols and ScaleARGBFilterCols
-#define CANY(NAMEANY, TERP_SIMD, TERP_C, BPP, MASK) \
- void NAMEANY(uint8* dst_ptr, const uint8* src_ptr, int dst_width, int x, \
- int dx) { \
- int r = dst_width & MASK; \
- int n = dst_width & ~MASK; \
- if (n > 0) { \
- TERP_SIMD(dst_ptr, src_ptr, n, x, dx); \
- } \
- TERP_C(dst_ptr + n * BPP, src_ptr, r, x + n * dx, dx); \
+#define CANY(NAMEANY, TERP_SIMD, TERP_C, BPP, MASK) \
+ void NAMEANY(uint8_t* dst_ptr, const uint8_t* src_ptr, int dst_width, int x, \
+ int dx) { \
+ int r = dst_width & MASK; \
+ int n = dst_width & ~MASK; \
+ if (n > 0) { \
+ TERP_SIMD(dst_ptr, src_ptr, n, x, dx); \
+ } \
+ TERP_C(dst_ptr + n * BPP, src_ptr, r, x + n * dx, dx); \
}
#ifdef HAS_SCALEFILTERCOLS_NEON
@@ -60,31 +60,31 @@ CANY(ScaleARGBFilterCols_Any_MSA,
// Fixed scale down.
// Mask may be non-power of 2, so use MOD
-#define SDANY(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, FACTOR, BPP, MASK) \
- void NAMEANY(const uint8* src_ptr, ptrdiff_t src_stride, uint8* dst_ptr, \
- int dst_width) { \
- int r = (int)((unsigned int)dst_width % (MASK + 1)); /* NOLINT */ \
- int n = dst_width - r; \
- if (n > 0) { \
- SCALEROWDOWN_SIMD(src_ptr, src_stride, dst_ptr, n); \
- } \
- SCALEROWDOWN_C(src_ptr + (n * FACTOR) * BPP, src_stride, \
- dst_ptr + n * BPP, r); \
+#define SDANY(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, FACTOR, BPP, MASK) \
+ void NAMEANY(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, \
+ int dst_width) { \
+ int r = (int)((unsigned int)dst_width % (MASK + 1)); /* NOLINT */ \
+ int n = dst_width - r; \
+ if (n > 0) { \
+ SCALEROWDOWN_SIMD(src_ptr, src_stride, dst_ptr, n); \
+ } \
+ SCALEROWDOWN_C(src_ptr + (n * FACTOR) * BPP, src_stride, \
+ dst_ptr + n * BPP, r); \
}
// Fixed scale down for odd source width. Used by I420Blend subsampling.
// Since dst_width is (width + 1) / 2, this function scales one less pixel
// and copies the last pixel.
-#define SDODD(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, FACTOR, BPP, MASK) \
- void NAMEANY(const uint8* src_ptr, ptrdiff_t src_stride, uint8* dst_ptr, \
- int dst_width) { \
- int r = (int)((unsigned int)(dst_width - 1) % (MASK + 1)); /* NOLINT */ \
- int n = (dst_width - 1) - r; \
- if (n > 0) { \
- SCALEROWDOWN_SIMD(src_ptr, src_stride, dst_ptr, n); \
- } \
- SCALEROWDOWN_C(src_ptr + (n * FACTOR) * BPP, src_stride, \
- dst_ptr + n * BPP, r + 1); \
+#define SDODD(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, FACTOR, BPP, MASK) \
+ void NAMEANY(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, \
+ int dst_width) { \
+ int r = (int)((unsigned int)(dst_width - 1) % (MASK + 1)); /* NOLINT */ \
+ int n = (dst_width - 1) - r; \
+ if (n > 0) { \
+ SCALEROWDOWN_SIMD(src_ptr, src_stride, dst_ptr, n); \
+ } \
+ SCALEROWDOWN_C(src_ptr + (n * FACTOR) * BPP, src_stride, \
+ dst_ptr + n * BPP, r + 1); \
}
#ifdef HAS_SCALEROWDOWN2_SSSE3
@@ -385,16 +385,16 @@ SDANY(ScaleARGBRowDown2Box_Any_MSA,
#undef SDANY
// Scale down by even scale factor.
-#define SDAANY(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, BPP, MASK) \
- void NAMEANY(const uint8* src_ptr, ptrdiff_t src_stride, int src_stepx, \
- uint8* dst_ptr, int dst_width) { \
- int r = dst_width & MASK; \
- int n = dst_width & ~MASK; \
- if (n > 0) { \
- SCALEROWDOWN_SIMD(src_ptr, src_stride, src_stepx, dst_ptr, n); \
- } \
- SCALEROWDOWN_C(src_ptr + (n * src_stepx) * BPP, src_stride, src_stepx, \
- dst_ptr + n * BPP, r); \
+#define SDAANY(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, BPP, MASK) \
+ void NAMEANY(const uint8_t* src_ptr, ptrdiff_t src_stride, int src_stepx, \
+ uint8_t* dst_ptr, int dst_width) { \
+ int r = dst_width & MASK; \
+ int n = dst_width & ~MASK; \
+ if (n > 0) { \
+ SCALEROWDOWN_SIMD(src_ptr, src_stride, src_stepx, dst_ptr, n); \
+ } \
+ SCALEROWDOWN_C(src_ptr + (n * src_stepx) * BPP, src_stride, src_stepx, \
+ dst_ptr + n * BPP, r); \
}
#ifdef HAS_SCALEARGBROWDOWNEVEN_SSE2
@@ -435,13 +435,13 @@ SDAANY(ScaleARGBRowDownEvenBox_Any_MSA,
#endif
// Add rows box filter scale down.
-#define SAANY(NAMEANY, SCALEADDROW_SIMD, SCALEADDROW_C, MASK) \
- void NAMEANY(const uint8* src_ptr, uint16* dst_ptr, int src_width) { \
- int n = src_width & ~MASK; \
- if (n > 0) { \
- SCALEADDROW_SIMD(src_ptr, dst_ptr, n); \
- } \
- SCALEADDROW_C(src_ptr + n, dst_ptr + n, src_width & MASK); \
+#define SAANY(NAMEANY, SCALEADDROW_SIMD, SCALEADDROW_C, MASK) \
+ void NAMEANY(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width) { \
+ int n = src_width & ~MASK; \
+ if (n > 0) { \
+ SCALEADDROW_SIMD(src_ptr, dst_ptr, n); \
+ } \
+ SCALEADDROW_C(src_ptr + n, dst_ptr + n, src_width & MASK); \
}
#ifdef HAS_SCALEADDROW_SSE2
diff --git a/chromium/third_party/libyuv/source/scale_argb.cc b/chromium/third_party/libyuv/source/scale_argb.cc
index cd4683b37be..53a22e8b41e 100644
--- a/chromium/third_party/libyuv/source/scale_argb.cc
+++ b/chromium/third_party/libyuv/source/scale_argb.cc
@@ -36,8 +36,8 @@ static void ScaleARGBDown2(int src_width,
int dst_height,
int src_stride,
int dst_stride,
- const uint8* src_argb,
- uint8* dst_argb,
+ const uint8_t* src_argb,
+ uint8_t* dst_argb,
int x,
int dx,
int y,
@@ -45,8 +45,8 @@ static void ScaleARGBDown2(int src_width,
enum FilterMode filtering) {
int j;
int row_stride = src_stride * (dy >> 16);
- void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride,
- uint8* dst_argb, int dst_width) =
+ void (*ScaleARGBRowDown2)(const uint8_t* src_argb, ptrdiff_t src_stride,
+ uint8_t* dst_argb, int dst_width) =
filtering == kFilterNone
? ScaleARGBRowDown2_C
: (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_C
@@ -131,8 +131,8 @@ static void ScaleARGBDown4Box(int src_width,
int dst_height,
int src_stride,
int dst_stride,
- const uint8* src_argb,
- uint8* dst_argb,
+ const uint8_t* src_argb,
+ uint8_t* dst_argb,
int x,
int dx,
int y,
@@ -142,8 +142,8 @@ static void ScaleARGBDown4Box(int src_width,
const int kRowSize = (dst_width * 2 * 4 + 31) & ~31;
align_buffer_64(row, kRowSize * 2);
int row_stride = src_stride * (dy >> 16);
- void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride,
- uint8* dst_argb, int dst_width) =
+ void (*ScaleARGBRowDown2)(const uint8_t* src_argb, ptrdiff_t src_stride,
+ uint8_t* dst_argb, int dst_width) =
ScaleARGBRowDown2Box_C;
// Advance to odd row, even column.
src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
@@ -189,8 +189,8 @@ static void ScaleARGBDownEven(int src_width,
int dst_height,
int src_stride,
int dst_stride,
- const uint8* src_argb,
- uint8* dst_argb,
+ const uint8_t* src_argb,
+ uint8_t* dst_argb,
int x,
int dx,
int y,
@@ -199,8 +199,8 @@ static void ScaleARGBDownEven(int src_width,
int j;
int col_step = dx >> 16;
int row_stride = (dy >> 16) * src_stride;
- void (*ScaleARGBRowDownEven)(const uint8* src_argb, ptrdiff_t src_stride,
- int src_step, uint8* dst_argb, int dst_width) =
+ void (*ScaleARGBRowDownEven)(const uint8_t* src_argb, ptrdiff_t src_stride,
+ int src_step, uint8_t* dst_argb, int dst_width) =
filtering ? ScaleARGBRowDownEvenBox_C : ScaleARGBRowDownEven_C;
(void)src_width;
(void)src_height;
@@ -255,23 +255,23 @@ static void ScaleARGBBilinearDown(int src_width,
int dst_height,
int src_stride,
int dst_stride,
- const uint8* src_argb,
- uint8* dst_argb,
+ const uint8_t* src_argb,
+ uint8_t* dst_argb,
int x,
int dx,
int y,
int dy,
enum FilterMode filtering) {
int j;
- void (*InterpolateRow)(uint8 * dst_argb, const uint8* src_argb,
+ void (*InterpolateRow)(uint8_t * dst_argb, const uint8_t* src_argb,
ptrdiff_t src_stride, int dst_width,
int source_y_fraction) = InterpolateRow_C;
- void (*ScaleARGBFilterCols)(uint8 * dst_argb, const uint8* src_argb,
+ void (*ScaleARGBFilterCols)(uint8_t * dst_argb, const uint8_t* src_argb,
int dst_width, int x, int dx) =
(src_width >= 32768) ? ScaleARGBFilterCols64_C : ScaleARGBFilterCols_C;
- int64 xlast = x + (int64)(dst_width - 1) * dx;
- int64 xl = (dx >= 0) ? x : xlast;
- int64 xr = (dx >= 0) ? xlast : x;
+ int64_t xlast = x + (int64_t)(dst_width - 1) * dx;
+ int64_t xl = (dx >= 0) ? x : xlast;
+ int64_t xr = (dx >= 0) ? xlast : x;
int clip_src_width;
xl = (xl >> 16) & ~3; // Left edge aligned.
xr = (xr >> 16) + 1; // Right most pixel used. Bilinear uses 2 pixels.
@@ -346,7 +346,7 @@ static void ScaleARGBBilinearDown(int src_width,
}
for (j = 0; j < dst_height; ++j) {
int yi = y >> 16;
- const uint8* src = src_argb + yi * src_stride;
+ const uint8_t* src = src_argb + yi * src_stride;
if (filtering == kFilterLinear) {
ScaleARGBFilterCols(dst_argb, src, dst_width, x, dx);
} else {
@@ -371,18 +371,18 @@ static void ScaleARGBBilinearUp(int src_width,
int dst_height,
int src_stride,
int dst_stride,
- const uint8* src_argb,
- uint8* dst_argb,
+ const uint8_t* src_argb,
+ uint8_t* dst_argb,
int x,
int dx,
int y,
int dy,
enum FilterMode filtering) {
int j;
- void (*InterpolateRow)(uint8 * dst_argb, const uint8* src_argb,
+ void (*InterpolateRow)(uint8_t * dst_argb, const uint8_t* src_argb,
ptrdiff_t src_stride, int dst_width,
int source_y_fraction) = InterpolateRow_C;
- void (*ScaleARGBFilterCols)(uint8 * dst_argb, const uint8* src_argb,
+ void (*ScaleARGBFilterCols)(uint8_t * dst_argb, const uint8_t* src_argb,
int dst_width, int x, int dx) =
filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
const int max_y = (src_height - 1) << 16;
@@ -479,13 +479,13 @@ static void ScaleARGBBilinearUp(int src_width,
{
int yi = y >> 16;
- const uint8* src = src_argb + yi * src_stride;
+ const uint8_t* src = src_argb + yi * src_stride;
// Allocate 2 rows of ARGB.
const int kRowSize = (dst_width * 4 + 31) & ~31;
align_buffer_64(row, kRowSize * 2);
- uint8* rowptr = row;
+ uint8_t* rowptr = row;
int rowstride = kRowSize;
int lasty = yi;
@@ -535,18 +535,18 @@ static void ScaleYUVToARGBBilinearUp(int src_width,
int src_stride_u,
int src_stride_v,
int dst_stride_argb,
- const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
+ const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb,
int x,
int dx,
int y,
int dy,
enum FilterMode filtering) {
int j;
- void (*I422ToARGBRow)(const uint8* y_buf, const uint8* u_buf,
- const uint8* v_buf, uint8* rgb_buf, int width) =
+ void (*I422ToARGBRow)(const uint8_t* y_buf, const uint8_t* u_buf,
+ const uint8_t* v_buf, uint8_t* rgb_buf, int width) =
I422ToARGBRow_C;
#if defined(HAS_I422TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
@@ -581,7 +581,7 @@ static void ScaleYUVToARGBBilinearUp(int src_width,
}
#endif
- void (*InterpolateRow)(uint8 * dst_argb, const uint8* src_argb,
+ void (*InterpolateRow)(uint8_t * dst_argb, const uint8_t* src_argb,
ptrdiff_t src_stride, int dst_width,
int source_y_fraction) = InterpolateRow_C;
#if defined(HAS_INTERPOLATEROW_SSSE3)
@@ -617,7 +617,7 @@ static void ScaleYUVToARGBBilinearUp(int src_width,
}
#endif
- void (*ScaleARGBFilterCols)(uint8 * dst_argb, const uint8* src_argb,
+ void (*ScaleARGBFilterCols)(uint8_t * dst_argb, const uint8_t* src_argb,
int dst_width, int x, int dx) =
filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
if (src_width >= 32768) {
@@ -682,9 +682,9 @@ static void ScaleYUVToARGBBilinearUp(int src_width,
const int kYShift = 1; // Shift Y by 1 to convert Y plane to UV coordinate.
int yi = y >> 16;
int uv_yi = yi >> kYShift;
- const uint8* src_row_y = src_y + yi * src_stride_y;
- const uint8* src_row_u = src_u + uv_yi * src_stride_u;
- const uint8* src_row_v = src_v + uv_yi * src_stride_v;
+ const uint8_t* src_row_y = src_y + yi * src_stride_y;
+ const uint8_t* src_row_u = src_u + uv_yi * src_stride_u;
+ const uint8_t* src_row_v = src_v + uv_yi * src_stride_v;
// Allocate 2 rows of ARGB.
const int kRowSize = (dst_width * 4 + 31) & ~31;
@@ -693,7 +693,7 @@ static void ScaleYUVToARGBBilinearUp(int src_width,
// Allocate 1 row of ARGB for source conversion.
align_buffer_64(argb_row, src_width * 4);
- uint8* rowptr = row;
+ uint8_t* rowptr = row;
int rowstride = kRowSize;
int lasty = yi;
@@ -765,15 +765,15 @@ static void ScaleARGBSimple(int src_width,
int dst_height,
int src_stride,
int dst_stride,
- const uint8* src_argb,
- uint8* dst_argb,
+ const uint8_t* src_argb,
+ uint8_t* dst_argb,
int x,
int dx,
int y,
int dy) {
int j;
- void (*ScaleARGBCols)(uint8 * dst_argb, const uint8* src_argb, int dst_width,
- int x, int dx) =
+ void (*ScaleARGBCols)(uint8_t * dst_argb, const uint8_t* src_argb,
+ int dst_width, int x, int dx) =
(src_width >= 32768) ? ScaleARGBCols64_C : ScaleARGBCols_C;
(void)src_height;
#if defined(HAS_SCALEARGBCOLS_SSE2)
@@ -817,11 +817,11 @@ static void ScaleARGBSimple(int src_width,
// ScaleARGB a ARGB.
// This function in turn calls a scaling function
// suitable for handling the desired resolutions.
-static void ScaleARGB(const uint8* src,
+static void ScaleARGB(const uint8_t* src,
int src_stride,
int src_width,
int src_height,
- uint8* dst,
+ uint8_t* dst,
int dst_stride,
int dst_width,
int dst_height,
@@ -850,13 +850,13 @@ static void ScaleARGB(const uint8* src,
&dx, &dy);
src_width = Abs(src_width);
if (clip_x) {
- int64 clipf = (int64)(clip_x)*dx;
+ int64_t clipf = (int64_t)(clip_x)*dx;
x += (clipf & 0xffff);
src += (clipf >> 16) * 4;
dst += clip_x * 4;
}
if (clip_y) {
- int64 clipf = (int64)(clip_y)*dy;
+ int64_t clipf = (int64_t)(clip_y)*dy;
y += (clipf & 0xffff);
src += (clipf >> 16) * src_stride;
dst += clip_y * dst_stride;
@@ -922,11 +922,11 @@ static void ScaleARGB(const uint8* src,
}
LIBYUV_API
-int ARGBScaleClip(const uint8* src_argb,
+int ARGBScaleClip(const uint8_t* src_argb,
int src_stride_argb,
int src_width,
int src_height,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int dst_width,
int dst_height,
@@ -950,11 +950,11 @@ int ARGBScaleClip(const uint8* src_argb,
// Scale an ARGB image.
LIBYUV_API
-int ARGBScale(const uint8* src_argb,
+int ARGBScale(const uint8_t* src_argb,
int src_stride_argb,
int src_width,
int src_height,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int dst_width,
int dst_height,
@@ -971,18 +971,18 @@ int ARGBScale(const uint8* src_argb,
// Scale with YUV conversion to ARGB and clipping.
LIBYUV_API
-int YUVToARGBScaleClip(const uint8* src_y,
+int YUVToARGBScaleClip(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint32 src_fourcc,
+ uint32_t src_fourcc,
int src_width,
int src_height,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
- uint32 dst_fourcc,
+ uint32_t dst_fourcc,
int dst_width,
int dst_height,
int clip_x,
@@ -990,7 +990,7 @@ int YUVToARGBScaleClip(const uint8* src_y,
int clip_width,
int clip_height,
enum FilterMode filtering) {
- uint8* argb_buffer = (uint8*)malloc(src_width * src_height * 4);
+ uint8_t* argb_buffer = (uint8_t*)malloc(src_width * src_height * 4);
int r;
(void)src_fourcc; // TODO(fbarchard): implement and/or assert.
(void)dst_fourcc;
diff --git a/chromium/third_party/libyuv/source/scale_common.cc b/chromium/third_party/libyuv/source/scale_common.cc
index e060c3cb8d9..b28d7da41fc 100644
--- a/chromium/third_party/libyuv/source/scale_common.cc
+++ b/chromium/third_party/libyuv/source/scale_common.cc
@@ -28,9 +28,9 @@ static __inline int Abs(int v) {
}
// CPU agnostic row functions
-void ScaleRowDown2_C(const uint8* src_ptr,
+void ScaleRowDown2_C(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst,
+ uint8_t* dst,
int dst_width) {
int x;
(void)src_stride;
@@ -45,9 +45,9 @@ void ScaleRowDown2_C(const uint8* src_ptr,
}
}
-void ScaleRowDown2_16_C(const uint16* src_ptr,
+void ScaleRowDown2_16_C(const uint16_t* src_ptr,
ptrdiff_t src_stride,
- uint16* dst,
+ uint16_t* dst,
int dst_width) {
int x;
(void)src_stride;
@@ -62,11 +62,11 @@ void ScaleRowDown2_16_C(const uint16* src_ptr,
}
}
-void ScaleRowDown2Linear_C(const uint8* src_ptr,
+void ScaleRowDown2Linear_C(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst,
+ uint8_t* dst,
int dst_width) {
- const uint8* s = src_ptr;
+ const uint8_t* s = src_ptr;
int x;
(void)src_stride;
for (x = 0; x < dst_width - 1; x += 2) {
@@ -80,11 +80,11 @@ void ScaleRowDown2Linear_C(const uint8* src_ptr,
}
}
-void ScaleRowDown2Linear_16_C(const uint16* src_ptr,
+void ScaleRowDown2Linear_16_C(const uint16_t* src_ptr,
ptrdiff_t src_stride,
- uint16* dst,
+ uint16_t* dst,
int dst_width) {
- const uint16* s = src_ptr;
+ const uint16_t* s = src_ptr;
int x;
(void)src_stride;
for (x = 0; x < dst_width - 1; x += 2) {
@@ -98,12 +98,12 @@ void ScaleRowDown2Linear_16_C(const uint16* src_ptr,
}
}
-void ScaleRowDown2Box_C(const uint8* src_ptr,
+void ScaleRowDown2Box_C(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst,
+ uint8_t* dst,
int dst_width) {
- const uint8* s = src_ptr;
- const uint8* t = src_ptr + src_stride;
+ const uint8_t* s = src_ptr;
+ const uint8_t* t = src_ptr + src_stride;
int x;
for (x = 0; x < dst_width - 1; x += 2) {
dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
@@ -117,12 +117,12 @@ void ScaleRowDown2Box_C(const uint8* src_ptr,
}
}
-void ScaleRowDown2Box_Odd_C(const uint8* src_ptr,
+void ScaleRowDown2Box_Odd_C(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst,
+ uint8_t* dst,
int dst_width) {
- const uint8* s = src_ptr;
- const uint8* t = src_ptr + src_stride;
+ const uint8_t* s = src_ptr;
+ const uint8_t* t = src_ptr + src_stride;
int x;
dst_width -= 1;
for (x = 0; x < dst_width - 1; x += 2) {
@@ -141,12 +141,12 @@ void ScaleRowDown2Box_Odd_C(const uint8* src_ptr,
dst[0] = (s[0] + t[0] + 1) >> 1;
}
-void ScaleRowDown2Box_16_C(const uint16* src_ptr,
+void ScaleRowDown2Box_16_C(const uint16_t* src_ptr,
ptrdiff_t src_stride,
- uint16* dst,
+ uint16_t* dst,
int dst_width) {
- const uint16* s = src_ptr;
- const uint16* t = src_ptr + src_stride;
+ const uint16_t* s = src_ptr;
+ const uint16_t* t = src_ptr + src_stride;
int x;
for (x = 0; x < dst_width - 1; x += 2) {
dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
@@ -160,9 +160,9 @@ void ScaleRowDown2Box_16_C(const uint16* src_ptr,
}
}
-void ScaleRowDown4_C(const uint8* src_ptr,
+void ScaleRowDown4_C(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst,
+ uint8_t* dst,
int dst_width) {
int x;
(void)src_stride;
@@ -177,9 +177,9 @@ void ScaleRowDown4_C(const uint8* src_ptr,
}
}
-void ScaleRowDown4_16_C(const uint16* src_ptr,
+void ScaleRowDown4_16_C(const uint16_t* src_ptr,
ptrdiff_t src_stride,
- uint16* dst,
+ uint16_t* dst,
int dst_width) {
int x;
(void)src_stride;
@@ -194,9 +194,9 @@ void ScaleRowDown4_16_C(const uint16* src_ptr,
}
}
-void ScaleRowDown4Box_C(const uint8* src_ptr,
+void ScaleRowDown4Box_C(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst,
+ uint8_t* dst,
int dst_width) {
intptr_t stride = src_stride;
int x;
@@ -232,9 +232,9 @@ void ScaleRowDown4Box_C(const uint8* src_ptr,
}
}
-void ScaleRowDown4Box_16_C(const uint16* src_ptr,
+void ScaleRowDown4Box_16_C(const uint16_t* src_ptr,
ptrdiff_t src_stride,
- uint16* dst,
+ uint16_t* dst,
int dst_width) {
intptr_t stride = src_stride;
int x;
@@ -270,9 +270,9 @@ void ScaleRowDown4Box_16_C(const uint16* src_ptr,
}
}
-void ScaleRowDown34_C(const uint8* src_ptr,
+void ScaleRowDown34_C(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst,
+ uint8_t* dst,
int dst_width) {
int x;
(void)src_stride;
@@ -286,9 +286,9 @@ void ScaleRowDown34_C(const uint8* src_ptr,
}
}
-void ScaleRowDown34_16_C(const uint16* src_ptr,
+void ScaleRowDown34_16_C(const uint16_t* src_ptr,
ptrdiff_t src_stride,
- uint16* dst,
+ uint16_t* dst,
int dst_width) {
int x;
(void)src_stride;
@@ -303,21 +303,21 @@ void ScaleRowDown34_16_C(const uint16* src_ptr,
}
// Filter rows 0 and 1 together, 3 : 1
-void ScaleRowDown34_0_Box_C(const uint8* src_ptr,
+void ScaleRowDown34_0_Box_C(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* d,
+ uint8_t* d,
int dst_width) {
- const uint8* s = src_ptr;
- const uint8* t = src_ptr + src_stride;
+ const uint8_t* s = src_ptr;
+ const uint8_t* t = src_ptr + src_stride;
int x;
assert((dst_width % 3 == 0) && (dst_width > 0));
for (x = 0; x < dst_width; x += 3) {
- uint8 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
- uint8 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
- uint8 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
- uint8 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
- uint8 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
- uint8 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
+ uint8_t a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
+ uint8_t a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
+ uint8_t a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
+ uint8_t b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
+ uint8_t b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
+ uint8_t b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
d[0] = (a0 * 3 + b0 + 2) >> 2;
d[1] = (a1 * 3 + b1 + 2) >> 2;
d[2] = (a2 * 3 + b2 + 2) >> 2;
@@ -327,21 +327,21 @@ void ScaleRowDown34_0_Box_C(const uint8* src_ptr,
}
}
-void ScaleRowDown34_0_Box_16_C(const uint16* src_ptr,
+void ScaleRowDown34_0_Box_16_C(const uint16_t* src_ptr,
ptrdiff_t src_stride,
- uint16* d,
+ uint16_t* d,
int dst_width) {
- const uint16* s = src_ptr;
- const uint16* t = src_ptr + src_stride;
+ const uint16_t* s = src_ptr;
+ const uint16_t* t = src_ptr + src_stride;
int x;
assert((dst_width % 3 == 0) && (dst_width > 0));
for (x = 0; x < dst_width; x += 3) {
- uint16 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
- uint16 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
- uint16 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
- uint16 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
- uint16 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
- uint16 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
+ uint16_t a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
+ uint16_t a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
+ uint16_t a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
+ uint16_t b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
+ uint16_t b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
+ uint16_t b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
d[0] = (a0 * 3 + b0 + 2) >> 2;
d[1] = (a1 * 3 + b1 + 2) >> 2;
d[2] = (a2 * 3 + b2 + 2) >> 2;
@@ -352,21 +352,21 @@ void ScaleRowDown34_0_Box_16_C(const uint16* src_ptr,
}
// Filter rows 1 and 2 together, 1 : 1
-void ScaleRowDown34_1_Box_C(const uint8* src_ptr,
+void ScaleRowDown34_1_Box_C(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* d,
+ uint8_t* d,
int dst_width) {
- const uint8* s = src_ptr;
- const uint8* t = src_ptr + src_stride;
+ const uint8_t* s = src_ptr;
+ const uint8_t* t = src_ptr + src_stride;
int x;
assert((dst_width % 3 == 0) && (dst_width > 0));
for (x = 0; x < dst_width; x += 3) {
- uint8 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
- uint8 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
- uint8 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
- uint8 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
- uint8 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
- uint8 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
+ uint8_t a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
+ uint8_t a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
+ uint8_t a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
+ uint8_t b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
+ uint8_t b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
+ uint8_t b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
d[0] = (a0 + b0 + 1) >> 1;
d[1] = (a1 + b1 + 1) >> 1;
d[2] = (a2 + b2 + 1) >> 1;
@@ -376,21 +376,21 @@ void ScaleRowDown34_1_Box_C(const uint8* src_ptr,
}
}
-void ScaleRowDown34_1_Box_16_C(const uint16* src_ptr,
+void ScaleRowDown34_1_Box_16_C(const uint16_t* src_ptr,
ptrdiff_t src_stride,
- uint16* d,
+ uint16_t* d,
int dst_width) {
- const uint16* s = src_ptr;
- const uint16* t = src_ptr + src_stride;
+ const uint16_t* s = src_ptr;
+ const uint16_t* t = src_ptr + src_stride;
int x;
assert((dst_width % 3 == 0) && (dst_width > 0));
for (x = 0; x < dst_width; x += 3) {
- uint16 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
- uint16 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
- uint16 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
- uint16 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
- uint16 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
- uint16 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
+ uint16_t a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
+ uint16_t a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
+ uint16_t a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
+ uint16_t b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
+ uint16_t b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
+ uint16_t b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
d[0] = (a0 + b0 + 1) >> 1;
d[1] = (a1 + b1 + 1) >> 1;
d[2] = (a2 + b2 + 1) >> 1;
@@ -401,8 +401,8 @@ void ScaleRowDown34_1_Box_16_C(const uint16* src_ptr,
}
// Scales a single row of pixels using point sampling.
-void ScaleCols_C(uint8* dst_ptr,
- const uint8* src_ptr,
+void ScaleCols_C(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
int dst_width,
int x,
int dx) {
@@ -419,8 +419,8 @@ void ScaleCols_C(uint8* dst_ptr,
}
}
-void ScaleCols_16_C(uint16* dst_ptr,
- const uint16* src_ptr,
+void ScaleCols_16_C(uint16_t* dst_ptr,
+ const uint16_t* src_ptr,
int dst_width,
int x,
int dx) {
@@ -438,8 +438,8 @@ void ScaleCols_16_C(uint16* dst_ptr,
}
// Scales a single row of pixels up by 2x using point sampling.
-void ScaleColsUp2_C(uint8* dst_ptr,
- const uint8* src_ptr,
+void ScaleColsUp2_C(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
int dst_width,
int x,
int dx) {
@@ -456,8 +456,8 @@ void ScaleColsUp2_C(uint8* dst_ptr,
}
}
-void ScaleColsUp2_16_C(uint16* dst_ptr,
- const uint16* src_ptr,
+void ScaleColsUp2_16_C(uint16_t* dst_ptr,
+ const uint16_t* src_ptr,
int dst_width,
int x,
int dx) {
@@ -477,15 +477,15 @@ void ScaleColsUp2_16_C(uint16* dst_ptr,
// (1-f)a + fb can be replaced with a + f(b-a)
#if defined(__arm__) || defined(__aarch64__)
#define BLENDER(a, b, f) \
- (uint8)((int)(a) + ((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16))
+ (uint8_t)((int)(a) + ((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16))
#else
// Intel uses 7 bit math with rounding.
#define BLENDER(a, b, f) \
- (uint8)((int)(a) + (((int)((f) >> 9) * ((int)(b) - (int)(a)) + 0x40) >> 7))
+ (uint8_t)((int)(a) + (((int)((f) >> 9) * ((int)(b) - (int)(a)) + 0x40) >> 7))
#endif
-void ScaleFilterCols_C(uint8* dst_ptr,
- const uint8* src_ptr,
+void ScaleFilterCols_C(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
int dst_width,
int x,
int dx) {
@@ -511,15 +511,15 @@ void ScaleFilterCols_C(uint8* dst_ptr,
}
}
-void ScaleFilterCols64_C(uint8* dst_ptr,
- const uint8* src_ptr,
+void ScaleFilterCols64_C(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
int dst_width,
int x32,
int dx) {
- int64 x = (int64)(x32);
+ int64_t x = (int64_t)(x32);
int j;
for (j = 0; j < dst_width - 1; j += 2) {
- int64 xi = x >> 16;
+ int64_t xi = x >> 16;
int a = src_ptr[xi];
int b = src_ptr[xi + 1];
dst_ptr[0] = BLENDER(a, b, x & 0xffff);
@@ -532,7 +532,7 @@ void ScaleFilterCols64_C(uint8* dst_ptr,
dst_ptr += 2;
}
if (dst_width & 1) {
- int64 xi = x >> 16;
+ int64_t xi = x >> 16;
int a = src_ptr[xi];
int b = src_ptr[xi + 1];
dst_ptr[0] = BLENDER(a, b, x & 0xffff);
@@ -540,12 +540,12 @@ void ScaleFilterCols64_C(uint8* dst_ptr,
}
#undef BLENDER
-// Same as 8 bit arm blender but return is cast to uint16
+// Same as 8 bit arm blender but return is cast to uint16_t
#define BLENDER(a, b, f) \
- (uint16)((int)(a) + ((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16))
+ (uint16_t)((int)(a) + ((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16))
-void ScaleFilterCols_16_C(uint16* dst_ptr,
- const uint16* src_ptr,
+void ScaleFilterCols_16_C(uint16_t* dst_ptr,
+ const uint16_t* src_ptr,
int dst_width,
int x,
int dx) {
@@ -571,15 +571,15 @@ void ScaleFilterCols_16_C(uint16* dst_ptr,
}
}
-void ScaleFilterCols64_16_C(uint16* dst_ptr,
- const uint16* src_ptr,
+void ScaleFilterCols64_16_C(uint16_t* dst_ptr,
+ const uint16_t* src_ptr,
int dst_width,
int x32,
int dx) {
- int64 x = (int64)(x32);
+ int64_t x = (int64_t)(x32);
int j;
for (j = 0; j < dst_width - 1; j += 2) {
- int64 xi = x >> 16;
+ int64_t xi = x >> 16;
int a = src_ptr[xi];
int b = src_ptr[xi + 1];
dst_ptr[0] = BLENDER(a, b, x & 0xffff);
@@ -592,7 +592,7 @@ void ScaleFilterCols64_16_C(uint16* dst_ptr,
dst_ptr += 2;
}
if (dst_width & 1) {
- int64 xi = x >> 16;
+ int64_t xi = x >> 16;
int a = src_ptr[xi];
int b = src_ptr[xi + 1];
dst_ptr[0] = BLENDER(a, b, x & 0xffff);
@@ -600,9 +600,9 @@ void ScaleFilterCols64_16_C(uint16* dst_ptr,
}
#undef BLENDER
-void ScaleRowDown38_C(const uint8* src_ptr,
+void ScaleRowDown38_C(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst,
+ uint8_t* dst,
int dst_width) {
int x;
(void)src_stride;
@@ -616,9 +616,9 @@ void ScaleRowDown38_C(const uint8* src_ptr,
}
}
-void ScaleRowDown38_16_C(const uint16* src_ptr,
+void ScaleRowDown38_16_C(const uint16_t* src_ptr,
ptrdiff_t src_stride,
- uint16* dst,
+ uint16_t* dst,
int dst_width) {
int x;
(void)src_stride;
@@ -633,9 +633,9 @@ void ScaleRowDown38_16_C(const uint16* src_ptr,
}
// 8x3 -> 3x1
-void ScaleRowDown38_3_Box_C(const uint8* src_ptr,
+void ScaleRowDown38_3_Box_C(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width) {
intptr_t stride = src_stride;
int i;
@@ -663,9 +663,9 @@ void ScaleRowDown38_3_Box_C(const uint8* src_ptr,
}
}
-void ScaleRowDown38_3_Box_16_C(const uint16* src_ptr,
+void ScaleRowDown38_3_Box_16_C(const uint16_t* src_ptr,
ptrdiff_t src_stride,
- uint16* dst_ptr,
+ uint16_t* dst_ptr,
int dst_width) {
intptr_t stride = src_stride;
int i;
@@ -694,9 +694,9 @@ void ScaleRowDown38_3_Box_16_C(const uint16* src_ptr,
}
// 8x2 -> 3x1
-void ScaleRowDown38_2_Box_C(const uint8* src_ptr,
+void ScaleRowDown38_2_Box_C(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width) {
intptr_t stride = src_stride;
int i;
@@ -719,9 +719,9 @@ void ScaleRowDown38_2_Box_C(const uint8* src_ptr,
}
}
-void ScaleRowDown38_2_Box_16_C(const uint16* src_ptr,
+void ScaleRowDown38_2_Box_16_C(const uint16_t* src_ptr,
ptrdiff_t src_stride,
- uint16* dst_ptr,
+ uint16_t* dst_ptr,
int dst_width) {
intptr_t stride = src_stride;
int i;
@@ -744,7 +744,7 @@ void ScaleRowDown38_2_Box_16_C(const uint16* src_ptr,
}
}
-void ScaleAddRow_C(const uint8* src_ptr, uint16* dst_ptr, int src_width) {
+void ScaleAddRow_C(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width) {
int x;
assert(src_width > 0);
for (x = 0; x < src_width - 1; x += 2) {
@@ -758,7 +758,9 @@ void ScaleAddRow_C(const uint8* src_ptr, uint16* dst_ptr, int src_width) {
}
}
-void ScaleAddRow_16_C(const uint16* src_ptr, uint32* dst_ptr, int src_width) {
+void ScaleAddRow_16_C(const uint16_t* src_ptr,
+ uint32_t* dst_ptr,
+ int src_width) {
int x;
assert(src_width > 0);
for (x = 0; x < src_width - 1; x += 2) {
@@ -772,12 +774,12 @@ void ScaleAddRow_16_C(const uint16* src_ptr, uint32* dst_ptr, int src_width) {
}
}
-void ScaleARGBRowDown2_C(const uint8* src_argb,
+void ScaleARGBRowDown2_C(const uint8_t* src_argb,
ptrdiff_t src_stride,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_width) {
- const uint32* src = (const uint32*)(src_argb);
- uint32* dst = (uint32*)(dst_argb);
+ const uint32_t* src = (const uint32_t*)(src_argb);
+ uint32_t* dst = (uint32_t*)(dst_argb);
int x;
(void)src_stride;
for (x = 0; x < dst_width - 1; x += 2) {
@@ -791,9 +793,9 @@ void ScaleARGBRowDown2_C(const uint8* src_argb,
}
}
-void ScaleARGBRowDown2Linear_C(const uint8* src_argb,
+void ScaleARGBRowDown2Linear_C(const uint8_t* src_argb,
ptrdiff_t src_stride,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_width) {
int x;
(void)src_stride;
@@ -807,9 +809,9 @@ void ScaleARGBRowDown2Linear_C(const uint8* src_argb,
}
}
-void ScaleARGBRowDown2Box_C(const uint8* src_argb,
+void ScaleARGBRowDown2Box_C(const uint8_t* src_argb,
ptrdiff_t src_stride,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_width) {
int x;
for (x = 0; x < dst_width; ++x) {
@@ -830,13 +832,13 @@ void ScaleARGBRowDown2Box_C(const uint8* src_argb,
}
}
-void ScaleARGBRowDownEven_C(const uint8* src_argb,
+void ScaleARGBRowDownEven_C(const uint8_t* src_argb,
ptrdiff_t src_stride,
int src_stepx,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_width) {
- const uint32* src = (const uint32*)(src_argb);
- uint32* dst = (uint32*)(dst_argb);
+ const uint32_t* src = (const uint32_t*)(src_argb);
+ uint32_t* dst = (uint32_t*)(dst_argb);
(void)src_stride;
int x;
for (x = 0; x < dst_width - 1; x += 2) {
@@ -850,10 +852,10 @@ void ScaleARGBRowDownEven_C(const uint8* src_argb,
}
}
-void ScaleARGBRowDownEvenBox_C(const uint8* src_argb,
+void ScaleARGBRowDownEvenBox_C(const uint8_t* src_argb,
ptrdiff_t src_stride,
int src_stepx,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_width) {
int x;
for (x = 0; x < dst_width; ++x) {
@@ -875,13 +877,13 @@ void ScaleARGBRowDownEvenBox_C(const uint8* src_argb,
}
// Scales a single row of pixels using point sampling.
-void ScaleARGBCols_C(uint8* dst_argb,
- const uint8* src_argb,
+void ScaleARGBCols_C(uint8_t* dst_argb,
+ const uint8_t* src_argb,
int dst_width,
int x,
int dx) {
- const uint32* src = (const uint32*)(src_argb);
- uint32* dst = (uint32*)(dst_argb);
+ const uint32_t* src = (const uint32_t*)(src_argb);
+ uint32_t* dst = (uint32_t*)(dst_argb);
int j;
for (j = 0; j < dst_width - 1; j += 2) {
dst[0] = src[x >> 16];
@@ -895,14 +897,14 @@ void ScaleARGBCols_C(uint8* dst_argb,
}
}
-void ScaleARGBCols64_C(uint8* dst_argb,
- const uint8* src_argb,
+void ScaleARGBCols64_C(uint8_t* dst_argb,
+ const uint8_t* src_argb,
int dst_width,
int x32,
int dx) {
- int64 x = (int64)(x32);
- const uint32* src = (const uint32*)(src_argb);
- uint32* dst = (uint32*)(dst_argb);
+ int64_t x = (int64_t)(x32);
+ const uint32_t* src = (const uint32_t*)(src_argb);
+ uint32_t* dst = (uint32_t*)(dst_argb);
int j;
for (j = 0; j < dst_width - 1; j += 2) {
dst[0] = src[x >> 16];
@@ -917,13 +919,13 @@ void ScaleARGBCols64_C(uint8* dst_argb,
}
// Scales a single row of pixels up by 2x using point sampling.
-void ScaleARGBColsUp2_C(uint8* dst_argb,
- const uint8* src_argb,
+void ScaleARGBColsUp2_C(uint8_t* dst_argb,
+ const uint8_t* src_argb,
int dst_width,
int x,
int dx) {
- const uint32* src = (const uint32*)(src_argb);
- uint32* dst = (uint32*)(dst_argb);
+ const uint32_t* src = (const uint32_t*)(src_argb);
+ uint32_t* dst = (uint32_t*)(dst_argb);
int j;
(void)x;
(void)dx;
@@ -941,24 +943,24 @@ void ScaleARGBColsUp2_C(uint8* dst_argb,
// Mimics SSSE3 blender
#define BLENDER1(a, b, f) ((a) * (0x7f ^ f) + (b)*f) >> 7
#define BLENDERC(a, b, f, s) \
- (uint32)(BLENDER1(((a) >> s) & 255, ((b) >> s) & 255, f) << s)
+ (uint32_t)(BLENDER1(((a) >> s) & 255, ((b) >> s) & 255, f) << s)
#define BLENDER(a, b, f) \
BLENDERC(a, b, f, 24) | BLENDERC(a, b, f, 16) | BLENDERC(a, b, f, 8) | \
BLENDERC(a, b, f, 0)
-void ScaleARGBFilterCols_C(uint8* dst_argb,
- const uint8* src_argb,
+void ScaleARGBFilterCols_C(uint8_t* dst_argb,
+ const uint8_t* src_argb,
int dst_width,
int x,
int dx) {
- const uint32* src = (const uint32*)(src_argb);
- uint32* dst = (uint32*)(dst_argb);
+ const uint32_t* src = (const uint32_t*)(src_argb);
+ uint32_t* dst = (uint32_t*)(dst_argb);
int j;
for (j = 0; j < dst_width - 1; j += 2) {
int xi = x >> 16;
int xf = (x >> 9) & 0x7f;
- uint32 a = src[xi];
- uint32 b = src[xi + 1];
+ uint32_t a = src[xi];
+ uint32_t b = src[xi + 1];
dst[0] = BLENDER(a, b, xf);
x += dx;
xi = x >> 16;
@@ -972,26 +974,26 @@ void ScaleARGBFilterCols_C(uint8* dst_argb,
if (dst_width & 1) {
int xi = x >> 16;
int xf = (x >> 9) & 0x7f;
- uint32 a = src[xi];
- uint32 b = src[xi + 1];
+ uint32_t a = src[xi];
+ uint32_t b = src[xi + 1];
dst[0] = BLENDER(a, b, xf);
}
}
-void ScaleARGBFilterCols64_C(uint8* dst_argb,
- const uint8* src_argb,
+void ScaleARGBFilterCols64_C(uint8_t* dst_argb,
+ const uint8_t* src_argb,
int dst_width,
int x32,
int dx) {
- int64 x = (int64)(x32);
- const uint32* src = (const uint32*)(src_argb);
- uint32* dst = (uint32*)(dst_argb);
+ int64_t x = (int64_t)(x32);
+ const uint32_t* src = (const uint32_t*)(src_argb);
+ uint32_t* dst = (uint32_t*)(dst_argb);
int j;
for (j = 0; j < dst_width - 1; j += 2) {
- int64 xi = x >> 16;
+ int64_t xi = x >> 16;
int xf = (x >> 9) & 0x7f;
- uint32 a = src[xi];
- uint32 b = src[xi + 1];
+ uint32_t a = src[xi];
+ uint32_t b = src[xi + 1];
dst[0] = BLENDER(a, b, xf);
x += dx;
xi = x >> 16;
@@ -1003,10 +1005,10 @@ void ScaleARGBFilterCols64_C(uint8* dst_argb,
dst += 2;
}
if (dst_width & 1) {
- int64 xi = x >> 16;
+ int64_t xi = x >> 16;
int xf = (x >> 9) & 0x7f;
- uint32 a = src[xi];
- uint32 b = src[xi + 1];
+ uint32_t a = src[xi];
+ uint32_t b = src[xi + 1];
dst[0] = BLENDER(a, b, xf);
}
}
@@ -1020,8 +1022,8 @@ void ScalePlaneVertical(int src_height,
int dst_height,
int src_stride,
int dst_stride,
- const uint8* src_argb,
- uint8* dst_argb,
+ const uint8_t* src_argb,
+ uint8_t* dst_argb,
int x,
int y,
int dy,
@@ -1029,7 +1031,7 @@ void ScalePlaneVertical(int src_height,
enum FilterMode filtering) {
// TODO(fbarchard): Allow higher bpp.
int dst_width_bytes = dst_width * bpp;
- void (*InterpolateRow)(uint8 * dst_argb, const uint8* src_argb,
+ void (*InterpolateRow)(uint8_t * dst_argb, const uint8_t* src_argb,
ptrdiff_t src_stride, int dst_width,
int source_y_fraction) = InterpolateRow_C;
const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;
@@ -1090,8 +1092,8 @@ void ScalePlaneVertical_16(int src_height,
int dst_height,
int src_stride,
int dst_stride,
- const uint16* src_argb,
- uint16* dst_argb,
+ const uint16_t* src_argb,
+ uint16_t* dst_argb,
int x,
int y,
int dy,
@@ -1099,7 +1101,7 @@ void ScalePlaneVertical_16(int src_height,
enum FilterMode filtering) {
// TODO(fbarchard): Allow higher wpp.
int dst_width_words = dst_width * wpp;
- void (*InterpolateRow)(uint16 * dst_argb, const uint16* src_argb,
+ void (*InterpolateRow)(uint16_t * dst_argb, const uint16_t* src_argb,
ptrdiff_t src_stride, int dst_width,
int source_y_fraction) = InterpolateRow_16_C;
const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;
@@ -1202,12 +1204,12 @@ enum FilterMode ScaleFilterReduce(int src_width,
// Divide num by div and return as 16.16 fixed point result.
int FixedDiv_C(int num, int div) {
- return (int)(((int64)(num) << 16) / div);
+ return (int)(((int64_t)(num) << 16) / div);
}
// Divide num by div and return as 16.16 fixed point result.
int FixedDiv1_C(int num, int div) {
- return (int)((((int64)(num) << 16) - 0x00010001) / (div - 1));
+ return (int)((((int64_t)(num) << 16) - 0x00010001) / (div - 1));
}
#define CENTERSTART(dx, s) (dx < 0) ? -((-dx >> 1) + s) : ((dx >> 1) + s)
@@ -1288,18 +1290,18 @@ void ScaleSlope(int src_width,
// Read 8x2 upsample with filtering and write 16x1.
// actually reads an extra pixel, so 9x2.
-void ScaleRowUp2_16_C(const uint16* src_ptr,
+void ScaleRowUp2_16_C(const uint16_t* src_ptr,
ptrdiff_t src_stride,
- uint16* dst,
+ uint16_t* dst,
int dst_width) {
- const uint16* src2 = src_ptr + src_stride;
+ const uint16_t* src2 = src_ptr + src_stride;
int x;
for (x = 0; x < dst_width - 1; x += 2) {
- uint16 p0 = src_ptr[0];
- uint16 p1 = src_ptr[1];
- uint16 p2 = src2[0];
- uint16 p3 = src2[1];
+ uint16_t p0 = src_ptr[0];
+ uint16_t p1 = src_ptr[1];
+ uint16_t p2 = src2[0];
+ uint16_t p3 = src2[1];
dst[0] = (p0 * 9 + p1 * 3 + p2 * 3 + p3 + 8) >> 4;
dst[1] = (p0 * 3 + p1 * 9 + p2 + p3 * 3 + 8) >> 4;
++src_ptr;
@@ -1307,10 +1309,10 @@ void ScaleRowUp2_16_C(const uint16* src_ptr,
dst += 2;
}
if (dst_width & 1) {
- uint16 p0 = src_ptr[0];
- uint16 p1 = src_ptr[1];
- uint16 p2 = src2[0];
- uint16 p3 = src2[1];
+ uint16_t p0 = src_ptr[0];
+ uint16_t p1 = src_ptr[1];
+ uint16_t p2 = src2[0];
+ uint16_t p3 = src2[1];
dst[0] = (p0 * 9 + p1 * 3 + p2 * 3 + p3 + 8) >> 4;
}
}
diff --git a/chromium/third_party/libyuv/source/scale_gcc.cc b/chromium/third_party/libyuv/source/scale_gcc.cc
index 336eb2dba44..312236d2df8 100644
--- a/chromium/third_party/libyuv/source/scale_gcc.cc
+++ b/chromium/third_party/libyuv/source/scale_gcc.cc
@@ -93,391 +93,386 @@ static const uvec16 kScaleAb2 = {65536 / 3, 65536 / 3, 65536 / 2, 65536 / 3,
// Generated using gcc disassembly on Visual C object file:
// objdump -D yuvscaler.obj >yuvscaler.txt
-void ScaleRowDown2_SSSE3(const uint8* src_ptr,
+void ScaleRowDown2_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width) {
(void)src_stride;
- asm volatile (
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "psrlw $0x8,%%xmm0 \n"
- "psrlw $0x8,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x10,%2 \n"
- "jg 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(dst_ptr), // %1
- "+r"(dst_width) // %2
- :: "memory", "cc", "xmm0", "xmm1"
- );
+ asm volatile(
+ // 16 pixel loop.
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "lea 0x20(%0),%0 \n"
+ "psrlw $0x8,%%xmm0 \n"
+ "psrlw $0x8,%%xmm1 \n"
+ "packuswb %%xmm1,%%xmm0 \n"
+ "movdqu %%xmm0,(%1) \n"
+ "lea 0x10(%1),%1 \n"
+ "sub $0x10,%2 \n"
+ "jg 1b \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst_ptr), // %1
+ "+r"(dst_width) // %2
+ ::"memory",
+ "cc", "xmm0", "xmm1");
}
-void ScaleRowDown2Linear_SSSE3(const uint8* src_ptr,
+void ScaleRowDown2Linear_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width) {
(void)src_stride;
- asm volatile (
- "pcmpeqb %%xmm4,%%xmm4 \n"
- "psrlw $0xf,%%xmm4 \n"
- "packuswb %%xmm4,%%xmm4 \n"
- "pxor %%xmm5,%%xmm5 \n"
-
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10, 0) ",%%xmm1 \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "pmaddubsw %%xmm4,%%xmm0 \n"
- "pmaddubsw %%xmm4,%%xmm1 \n"
- "pavgw %%xmm5,%%xmm0 \n"
- "pavgw %%xmm5,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x10,%2 \n"
- "jg 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(dst_ptr), // %1
- "+r"(dst_width) // %2
- :: "memory", "cc", "xmm0", "xmm1", "xmm4", "xmm5"
- );
+ asm volatile(
+ "pcmpeqb %%xmm4,%%xmm4 \n"
+ "psrlw $0xf,%%xmm4 \n"
+ "packuswb %%xmm4,%%xmm4 \n"
+ "pxor %%xmm5,%%xmm5 \n"
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "lea 0x20(%0),%0 \n"
+ "pmaddubsw %%xmm4,%%xmm0 \n"
+ "pmaddubsw %%xmm4,%%xmm1 \n"
+ "pavgw %%xmm5,%%xmm0 \n"
+ "pavgw %%xmm5,%%xmm1 \n"
+ "packuswb %%xmm1,%%xmm0 \n"
+ "movdqu %%xmm0,(%1) \n"
+ "lea 0x10(%1),%1 \n"
+ "sub $0x10,%2 \n"
+ "jg 1b \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst_ptr), // %1
+ "+r"(dst_width) // %2
+ ::"memory",
+ "cc", "xmm0", "xmm1", "xmm4", "xmm5");
}
-void ScaleRowDown2Box_SSSE3(const uint8* src_ptr,
+void ScaleRowDown2Box_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width) {
- asm volatile (
- "pcmpeqb %%xmm4,%%xmm4 \n"
- "psrlw $0xf,%%xmm4 \n"
- "packuswb %%xmm4,%%xmm4 \n"
- "pxor %%xmm5,%%xmm5 \n"
-
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- MEMOPREG(movdqu,0x00,0,3,1,xmm2) // movdqu (%0,%3,1),%%xmm2
- MEMOPREG(movdqu,0x10,0,3,1,xmm3) // movdqu 0x10(%0,%3,1),%%xmm3
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "pmaddubsw %%xmm4,%%xmm0 \n"
- "pmaddubsw %%xmm4,%%xmm1 \n"
- "pmaddubsw %%xmm4,%%xmm2 \n"
- "pmaddubsw %%xmm4,%%xmm3 \n"
- "paddw %%xmm2,%%xmm0 \n"
- "paddw %%xmm3,%%xmm1 \n"
- "psrlw $0x1,%%xmm0 \n"
- "psrlw $0x1,%%xmm1 \n"
- "pavgw %%xmm5,%%xmm0 \n"
- "pavgw %%xmm5,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x10,%2 \n"
- "jg 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(dst_ptr), // %1
- "+r"(dst_width) // %2
- : "r"((intptr_t)(src_stride)) // %3
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
- );
+ asm volatile(
+ "pcmpeqb %%xmm4,%%xmm4 \n"
+ "psrlw $0xf,%%xmm4 \n"
+ "packuswb %%xmm4,%%xmm4 \n"
+ "pxor %%xmm5,%%xmm5 \n"
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "movdqu 0x00(%0,%3,1),%%xmm2 \n"
+ "movdqu 0x10(%0,%3,1),%%xmm3 \n"
+ "lea 0x20(%0),%0 \n"
+ "pmaddubsw %%xmm4,%%xmm0 \n"
+ "pmaddubsw %%xmm4,%%xmm1 \n"
+ "pmaddubsw %%xmm4,%%xmm2 \n"
+ "pmaddubsw %%xmm4,%%xmm3 \n"
+ "paddw %%xmm2,%%xmm0 \n"
+ "paddw %%xmm3,%%xmm1 \n"
+ "psrlw $0x1,%%xmm0 \n"
+ "psrlw $0x1,%%xmm1 \n"
+ "pavgw %%xmm5,%%xmm0 \n"
+ "pavgw %%xmm5,%%xmm1 \n"
+ "packuswb %%xmm1,%%xmm0 \n"
+ "movdqu %%xmm0,(%1) \n"
+ "lea 0x10(%1),%1 \n"
+ "sub $0x10,%2 \n"
+ "jg 1b \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst_ptr), // %1
+ "+r"(dst_width) // %2
+ : "r"((intptr_t)(src_stride)) // %3
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5");
}
#ifdef HAS_SCALEROWDOWN2_AVX2
-void ScaleRowDown2_AVX2(const uint8* src_ptr,
+void ScaleRowDown2_AVX2(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width) {
(void)src_stride;
- asm volatile (
- LABELALIGN
- "1: \n"
- "vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
- "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n"
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "vpsrlw $0x8,%%ymm0,%%ymm0 \n"
- "vpsrlw $0x8,%%ymm1,%%ymm1 \n"
- "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
- "vpermq $0xd8,%%ymm0,%%ymm0 \n"
- "vmovdqu %%ymm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x20,1) ",%1 \n"
- "sub $0x20,%2 \n"
- "jg 1b \n"
- "vzeroupper \n"
- : "+r"(src_ptr), // %0
- "+r"(dst_ptr), // %1
- "+r"(dst_width) // %2
- :: "memory", "cc", "xmm0", "xmm1"
- );
+ asm volatile(
+
+ LABELALIGN
+ "1: \n"
+ "vmovdqu (%0),%%ymm0 \n"
+ "vmovdqu 0x20(%0),%%ymm1 \n"
+ "lea 0x40(%0),%0 \n"
+ "vpsrlw $0x8,%%ymm0,%%ymm0 \n"
+ "vpsrlw $0x8,%%ymm1,%%ymm1 \n"
+ "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n"
+ "vmovdqu %%ymm0,(%1) \n"
+ "lea 0x20(%1),%1 \n"
+ "sub $0x20,%2 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst_ptr), // %1
+ "+r"(dst_width) // %2
+ ::"memory",
+ "cc", "xmm0", "xmm1");
}
-void ScaleRowDown2Linear_AVX2(const uint8* src_ptr,
+void ScaleRowDown2Linear_AVX2(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width) {
(void)src_stride;
- asm volatile (
- "vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n"
- "vpsrlw $0xf,%%ymm4,%%ymm4 \n"
- "vpackuswb %%ymm4,%%ymm4,%%ymm4 \n"
- "vpxor %%ymm5,%%ymm5,%%ymm5 \n"
-
- LABELALIGN
- "1: \n"
- "vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
- "vmovdqu " MEMACCESS2(0x20, 0) ",%%ymm1 \n"
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "vpmaddubsw %%ymm4,%%ymm0,%%ymm0 \n"
- "vpmaddubsw %%ymm4,%%ymm1,%%ymm1 \n"
- "vpavgw %%ymm5,%%ymm0,%%ymm0 \n"
- "vpavgw %%ymm5,%%ymm1,%%ymm1 \n"
- "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
- "vpermq $0xd8,%%ymm0,%%ymm0 \n"
- "vmovdqu %%ymm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x20,1) ",%1 \n"
- "sub $0x20,%2 \n"
- "jg 1b \n"
- "vzeroupper \n"
- : "+r"(src_ptr), // %0
- "+r"(dst_ptr), // %1
- "+r"(dst_width) // %2
- :: "memory", "cc", "xmm0", "xmm1", "xmm4", "xmm5"
- );
+ asm volatile(
+ "vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n"
+ "vpsrlw $0xf,%%ymm4,%%ymm4 \n"
+ "vpackuswb %%ymm4,%%ymm4,%%ymm4 \n"
+ "vpxor %%ymm5,%%ymm5,%%ymm5 \n"
+
+ LABELALIGN
+ "1: \n"
+ "vmovdqu (%0),%%ymm0 \n"
+ "vmovdqu 0x20(%0),%%ymm1 \n"
+ "lea 0x40(%0),%0 \n"
+ "vpmaddubsw %%ymm4,%%ymm0,%%ymm0 \n"
+ "vpmaddubsw %%ymm4,%%ymm1,%%ymm1 \n"
+ "vpavgw %%ymm5,%%ymm0,%%ymm0 \n"
+ "vpavgw %%ymm5,%%ymm1,%%ymm1 \n"
+ "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n"
+ "vmovdqu %%ymm0,(%1) \n"
+ "lea 0x20(%1),%1 \n"
+ "sub $0x20,%2 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst_ptr), // %1
+ "+r"(dst_width) // %2
+ ::"memory",
+ "cc", "xmm0", "xmm1", "xmm4", "xmm5");
}
-void ScaleRowDown2Box_AVX2(const uint8* src_ptr,
+void ScaleRowDown2Box_AVX2(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width) {
- asm volatile (
- "vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n"
- "vpsrlw $0xf,%%ymm4,%%ymm4 \n"
- "vpackuswb %%ymm4,%%ymm4,%%ymm4 \n"
- "vpxor %%ymm5,%%ymm5,%%ymm5 \n"
-
- LABELALIGN
- "1: \n"
- "vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
- "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n"
- MEMOPREG(vmovdqu,0x00,0,3,1,ymm2) // vmovdqu (%0,%3,1),%%ymm2
- MEMOPREG(vmovdqu,0x20,0,3,1,ymm3) // vmovdqu 0x20(%0,%3,1),%%ymm3
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "vpmaddubsw %%ymm4,%%ymm0,%%ymm0 \n"
- "vpmaddubsw %%ymm4,%%ymm1,%%ymm1 \n"
- "vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n"
- "vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n"
- "vpaddw %%ymm2,%%ymm0,%%ymm0 \n"
- "vpaddw %%ymm3,%%ymm1,%%ymm1 \n"
- "vpsrlw $0x1,%%ymm0,%%ymm0 \n"
- "vpsrlw $0x1,%%ymm1,%%ymm1 \n"
- "vpavgw %%ymm5,%%ymm0,%%ymm0 \n"
- "vpavgw %%ymm5,%%ymm1,%%ymm1 \n"
- "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
- "vpermq $0xd8,%%ymm0,%%ymm0 \n"
- "vmovdqu %%ymm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x20,1) ",%1 \n"
- "sub $0x20,%2 \n"
- "jg 1b \n"
- "vzeroupper \n"
- : "+r"(src_ptr), // %0
- "+r"(dst_ptr), // %1
- "+r"(dst_width) // %2
- : "r"((intptr_t)(src_stride)) // %3
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
- );
+ asm volatile(
+ "vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n"
+ "vpsrlw $0xf,%%ymm4,%%ymm4 \n"
+ "vpackuswb %%ymm4,%%ymm4,%%ymm4 \n"
+ "vpxor %%ymm5,%%ymm5,%%ymm5 \n"
+
+ LABELALIGN
+ "1: \n"
+ "vmovdqu (%0),%%ymm0 \n"
+ "vmovdqu 0x20(%0),%%ymm1 \n"
+ "vmovdqu 0x00(%0,%3,1),%%ymm2 \n"
+ "vmovdqu 0x20(%0,%3,1),%%ymm3 \n"
+ "lea 0x40(%0),%0 \n"
+ "vpmaddubsw %%ymm4,%%ymm0,%%ymm0 \n"
+ "vpmaddubsw %%ymm4,%%ymm1,%%ymm1 \n"
+ "vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n"
+ "vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n"
+ "vpaddw %%ymm2,%%ymm0,%%ymm0 \n"
+ "vpaddw %%ymm3,%%ymm1,%%ymm1 \n"
+ "vpsrlw $0x1,%%ymm0,%%ymm0 \n"
+ "vpsrlw $0x1,%%ymm1,%%ymm1 \n"
+ "vpavgw %%ymm5,%%ymm0,%%ymm0 \n"
+ "vpavgw %%ymm5,%%ymm1,%%ymm1 \n"
+ "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n"
+ "vmovdqu %%ymm0,(%1) \n"
+ "lea 0x20(%1),%1 \n"
+ "sub $0x20,%2 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst_ptr), // %1
+ "+r"(dst_width) // %2
+ : "r"((intptr_t)(src_stride)) // %3
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5");
}
#endif // HAS_SCALEROWDOWN2_AVX2
-void ScaleRowDown4_SSSE3(const uint8* src_ptr,
+void ScaleRowDown4_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width) {
(void)src_stride;
- asm volatile (
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "psrld $0x18,%%xmm5 \n"
- "pslld $0x10,%%xmm5 \n"
-
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "pand %%xmm5,%%xmm0 \n"
- "pand %%xmm5,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "psrlw $0x8,%%xmm0 \n"
- "packuswb %%xmm0,%%xmm0 \n"
- "movq %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "sub $0x8,%2 \n"
- "jg 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(dst_ptr), // %1
- "+r"(dst_width) // %2
- :: "memory", "cc", "xmm0", "xmm1", "xmm5"
- );
+ asm volatile(
+ "pcmpeqb %%xmm5,%%xmm5 \n"
+ "psrld $0x18,%%xmm5 \n"
+ "pslld $0x10,%%xmm5 \n"
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "lea 0x20(%0),%0 \n"
+ "pand %%xmm5,%%xmm0 \n"
+ "pand %%xmm5,%%xmm1 \n"
+ "packuswb %%xmm1,%%xmm0 \n"
+ "psrlw $0x8,%%xmm0 \n"
+ "packuswb %%xmm0,%%xmm0 \n"
+ "movq %%xmm0,(%1) \n"
+ "lea 0x8(%1),%1 \n"
+ "sub $0x8,%2 \n"
+ "jg 1b \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst_ptr), // %1
+ "+r"(dst_width) // %2
+ ::"memory",
+ "cc", "xmm0", "xmm1", "xmm5");
}
-void ScaleRowDown4Box_SSSE3(const uint8* src_ptr,
+void ScaleRowDown4Box_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width) {
intptr_t stridex3;
- asm volatile (
- "pcmpeqb %%xmm4,%%xmm4 \n"
- "psrlw $0xf,%%xmm4 \n"
- "movdqa %%xmm4,%%xmm5 \n"
- "packuswb %%xmm4,%%xmm4 \n"
- "psllw $0x3,%%xmm5 \n"
- "lea " MEMLEA4(0x00,4,4,2) ",%3 \n"
-
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- MEMOPREG(movdqu,0x00,0,4,1,xmm2) // movdqu (%0,%4,1),%%xmm2
- MEMOPREG(movdqu,0x10,0,4,1,xmm3) // movdqu 0x10(%0,%4,1),%%xmm3
- "pmaddubsw %%xmm4,%%xmm0 \n"
- "pmaddubsw %%xmm4,%%xmm1 \n"
- "pmaddubsw %%xmm4,%%xmm2 \n"
- "pmaddubsw %%xmm4,%%xmm3 \n"
- "paddw %%xmm2,%%xmm0 \n"
- "paddw %%xmm3,%%xmm1 \n"
- MEMOPREG(movdqu,0x00,0,4,2,xmm2) // movdqu (%0,%4,2),%%xmm2
- MEMOPREG(movdqu,0x10,0,4,2,xmm3) // movdqu 0x10(%0,%4,2),%%xmm3
- "pmaddubsw %%xmm4,%%xmm2 \n"
- "pmaddubsw %%xmm4,%%xmm3 \n"
- "paddw %%xmm2,%%xmm0 \n"
- "paddw %%xmm3,%%xmm1 \n"
- MEMOPREG(movdqu,0x00,0,3,1,xmm2) // movdqu (%0,%3,1),%%xmm2
- MEMOPREG(movdqu,0x10,0,3,1,xmm3) // movdqu 0x10(%0,%3,1),%%xmm3
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "pmaddubsw %%xmm4,%%xmm2 \n"
- "pmaddubsw %%xmm4,%%xmm3 \n"
- "paddw %%xmm2,%%xmm0 \n"
- "paddw %%xmm3,%%xmm1 \n"
- "phaddw %%xmm1,%%xmm0 \n"
- "paddw %%xmm5,%%xmm0 \n"
- "psrlw $0x4,%%xmm0 \n"
- "packuswb %%xmm0,%%xmm0 \n"
- "movq %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "sub $0x8,%2 \n"
- "jg 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(dst_ptr), // %1
- "+r"(dst_width), // %2
- "=&r"(stridex3) // %3
- : "r"((intptr_t)(src_stride)) // %4
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
- );
+ asm volatile(
+ "pcmpeqb %%xmm4,%%xmm4 \n"
+ "psrlw $0xf,%%xmm4 \n"
+ "movdqa %%xmm4,%%xmm5 \n"
+ "packuswb %%xmm4,%%xmm4 \n"
+ "psllw $0x3,%%xmm5 \n"
+ "lea 0x00(%4,%4,2),%3 \n"
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "movdqu 0x00(%0,%4,1),%%xmm2 \n"
+ "movdqu 0x10(%0,%4,1),%%xmm3 \n"
+ "pmaddubsw %%xmm4,%%xmm0 \n"
+ "pmaddubsw %%xmm4,%%xmm1 \n"
+ "pmaddubsw %%xmm4,%%xmm2 \n"
+ "pmaddubsw %%xmm4,%%xmm3 \n"
+ "paddw %%xmm2,%%xmm0 \n"
+ "paddw %%xmm3,%%xmm1 \n"
+ "movdqu 0x00(%0,%4,2),%%xmm2 \n"
+ "movdqu 0x10(%0,%4,2),%%xmm3 \n"
+ "pmaddubsw %%xmm4,%%xmm2 \n"
+ "pmaddubsw %%xmm4,%%xmm3 \n"
+ "paddw %%xmm2,%%xmm0 \n"
+ "paddw %%xmm3,%%xmm1 \n"
+ "movdqu 0x00(%0,%3,1),%%xmm2 \n"
+ "movdqu 0x10(%0,%3,1),%%xmm3 \n"
+ "lea 0x20(%0),%0 \n"
+ "pmaddubsw %%xmm4,%%xmm2 \n"
+ "pmaddubsw %%xmm4,%%xmm3 \n"
+ "paddw %%xmm2,%%xmm0 \n"
+ "paddw %%xmm3,%%xmm1 \n"
+ "phaddw %%xmm1,%%xmm0 \n"
+ "paddw %%xmm5,%%xmm0 \n"
+ "psrlw $0x4,%%xmm0 \n"
+ "packuswb %%xmm0,%%xmm0 \n"
+ "movq %%xmm0,(%1) \n"
+ "lea 0x8(%1),%1 \n"
+ "sub $0x8,%2 \n"
+ "jg 1b \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst_ptr), // %1
+ "+r"(dst_width), // %2
+ "=&r"(stridex3) // %3
+ : "r"((intptr_t)(src_stride)) // %4
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5");
}
#ifdef HAS_SCALEROWDOWN4_AVX2
-void ScaleRowDown4_AVX2(const uint8* src_ptr,
+void ScaleRowDown4_AVX2(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width) {
(void)src_stride;
- asm volatile (
- "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
- "vpsrld $0x18,%%ymm5,%%ymm5 \n"
- "vpslld $0x10,%%ymm5,%%ymm5 \n"
- LABELALIGN
- "1: \n"
- "vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
- "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n"
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "vpand %%ymm5,%%ymm0,%%ymm0 \n"
- "vpand %%ymm5,%%ymm1,%%ymm1 \n"
- "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
- "vpermq $0xd8,%%ymm0,%%ymm0 \n"
- "vpsrlw $0x8,%%ymm0,%%ymm0 \n"
- "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n"
- "vpermq $0xd8,%%ymm0,%%ymm0 \n"
- "vmovdqu %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x10,%2 \n"
- "jg 1b \n"
- "vzeroupper \n"
- : "+r"(src_ptr), // %0
- "+r"(dst_ptr), // %1
- "+r"(dst_width) // %2
- :: "memory", "cc", "xmm0", "xmm1", "xmm5"
- );
+ asm volatile(
+ "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
+ "vpsrld $0x18,%%ymm5,%%ymm5 \n"
+ "vpslld $0x10,%%ymm5,%%ymm5 \n"
+
+ LABELALIGN
+ "1: \n"
+ "vmovdqu (%0),%%ymm0 \n"
+ "vmovdqu 0x20(%0),%%ymm1 \n"
+ "lea 0x40(%0),%0 \n"
+ "vpand %%ymm5,%%ymm0,%%ymm0 \n"
+ "vpand %%ymm5,%%ymm1,%%ymm1 \n"
+ "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n"
+ "vpsrlw $0x8,%%ymm0,%%ymm0 \n"
+ "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n"
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n"
+ "vmovdqu %%xmm0,(%1) \n"
+ "lea 0x10(%1),%1 \n"
+ "sub $0x10,%2 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst_ptr), // %1
+ "+r"(dst_width) // %2
+ ::"memory",
+ "cc", "xmm0", "xmm1", "xmm5");
}
-void ScaleRowDown4Box_AVX2(const uint8* src_ptr,
+void ScaleRowDown4Box_AVX2(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width) {
- asm volatile (
- "vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n"
- "vpsrlw $0xf,%%ymm4,%%ymm4 \n"
- "vpsllw $0x3,%%ymm4,%%ymm5 \n"
- "vpackuswb %%ymm4,%%ymm4,%%ymm4 \n"
-
- LABELALIGN
- "1: \n"
- "vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
- "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n"
- MEMOPREG(vmovdqu,0x00,0,3,1,ymm2) // vmovdqu (%0,%3,1),%%ymm2
- MEMOPREG(vmovdqu,0x20,0,3,1,ymm3) // vmovdqu 0x20(%0,%3,1),%%ymm3
- "vpmaddubsw %%ymm4,%%ymm0,%%ymm0 \n"
- "vpmaddubsw %%ymm4,%%ymm1,%%ymm1 \n"
- "vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n"
- "vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n"
- "vpaddw %%ymm2,%%ymm0,%%ymm0 \n"
- "vpaddw %%ymm3,%%ymm1,%%ymm1 \n"
- MEMOPREG(vmovdqu,0x00,0,3,2,ymm2) // vmovdqu (%0,%3,2),%%ymm2
- MEMOPREG(vmovdqu,0x20,0,3,2,ymm3) // vmovdqu 0x20(%0,%3,2),%%ymm3
- "vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n"
- "vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n"
- "vpaddw %%ymm2,%%ymm0,%%ymm0 \n"
- "vpaddw %%ymm3,%%ymm1,%%ymm1 \n"
- MEMOPREG(vmovdqu,0x00,0,4,1,ymm2) // vmovdqu (%0,%4,1),%%ymm2
- MEMOPREG(vmovdqu,0x20,0,4,1,ymm3) // vmovdqu 0x20(%0,%4,1),%%ymm3
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n"
- "vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n"
- "vpaddw %%ymm2,%%ymm0,%%ymm0 \n"
- "vpaddw %%ymm3,%%ymm1,%%ymm1 \n"
- "vphaddw %%ymm1,%%ymm0,%%ymm0 \n"
- "vpermq $0xd8,%%ymm0,%%ymm0 \n"
- "vpaddw %%ymm5,%%ymm0,%%ymm0 \n"
- "vpsrlw $0x4,%%ymm0,%%ymm0 \n"
- "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n"
- "vpermq $0xd8,%%ymm0,%%ymm0 \n"
- "vmovdqu %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x10,%2 \n"
- "jg 1b \n"
- "vzeroupper \n"
- : "+r"(src_ptr), // %0
- "+r"(dst_ptr), // %1
- "+r"(dst_width) // %2
- : "r"((intptr_t)(src_stride)), // %3
- "r"((intptr_t)(src_stride * 3)) // %4
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
- );
+ asm volatile(
+ "vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n"
+ "vpsrlw $0xf,%%ymm4,%%ymm4 \n"
+ "vpsllw $0x3,%%ymm4,%%ymm5 \n"
+ "vpackuswb %%ymm4,%%ymm4,%%ymm4 \n"
+
+ LABELALIGN
+ "1: \n"
+ "vmovdqu (%0),%%ymm0 \n"
+ "vmovdqu 0x20(%0),%%ymm1 \n"
+ "vmovdqu 0x00(%0,%3,1),%%ymm2 \n"
+ "vmovdqu 0x20(%0,%3,1),%%ymm3 \n"
+ "vpmaddubsw %%ymm4,%%ymm0,%%ymm0 \n"
+ "vpmaddubsw %%ymm4,%%ymm1,%%ymm1 \n"
+ "vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n"
+ "vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n"
+ "vpaddw %%ymm2,%%ymm0,%%ymm0 \n"
+ "vpaddw %%ymm3,%%ymm1,%%ymm1 \n"
+ "vmovdqu 0x00(%0,%3,2),%%ymm2 \n"
+ "vmovdqu 0x20(%0,%3,2),%%ymm3 \n"
+ "vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n"
+ "vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n"
+ "vpaddw %%ymm2,%%ymm0,%%ymm0 \n"
+ "vpaddw %%ymm3,%%ymm1,%%ymm1 \n"
+ "vmovdqu 0x00(%0,%4,1),%%ymm2 \n"
+ "vmovdqu 0x20(%0,%4,1),%%ymm3 \n"
+ "lea 0x40(%0),%0 \n"
+ "vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n"
+ "vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n"
+ "vpaddw %%ymm2,%%ymm0,%%ymm0 \n"
+ "vpaddw %%ymm3,%%ymm1,%%ymm1 \n"
+ "vphaddw %%ymm1,%%ymm0,%%ymm0 \n"
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n"
+ "vpaddw %%ymm5,%%ymm0,%%ymm0 \n"
+ "vpsrlw $0x4,%%ymm0,%%ymm0 \n"
+ "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n"
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n"
+ "vmovdqu %%xmm0,(%1) \n"
+ "lea 0x10(%1),%1 \n"
+ "sub $0x10,%2 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst_ptr), // %1
+ "+r"(dst_width) // %2
+ : "r"((intptr_t)(src_stride)), // %3
+ "r"((intptr_t)(src_stride * 3)) // %4
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5");
}
#endif // HAS_SCALEROWDOWN4_AVX2
-void ScaleRowDown34_SSSE3(const uint8* src_ptr,
+void ScaleRowDown34_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width) {
(void)src_stride;
asm volatile(
@@ -489,33 +484,34 @@ void ScaleRowDown34_SSSE3(const uint8* src_ptr,
"m"(kShuf1), // %1
"m"(kShuf2) // %2
);
- asm volatile (
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm2 \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "movdqa %%xmm2,%%xmm1 \n"
- "palignr $0x8,%%xmm0,%%xmm1 \n"
- "pshufb %%xmm3,%%xmm0 \n"
- "pshufb %%xmm4,%%xmm1 \n"
- "pshufb %%xmm5,%%xmm2 \n"
- "movq %%xmm0," MEMACCESS(1) " \n"
- "movq %%xmm1," MEMACCESS2(0x8,1) " \n"
- "movq %%xmm2," MEMACCESS2(0x10,1) " \n"
- "lea " MEMLEA(0x18,1) ",%1 \n"
- "sub $0x18,%2 \n"
- "jg 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(dst_ptr), // %1
- "+r"(dst_width) // %2
- :: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
- );
+ asm volatile(
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm2 \n"
+ "lea 0x20(%0),%0 \n"
+ "movdqa %%xmm2,%%xmm1 \n"
+ "palignr $0x8,%%xmm0,%%xmm1 \n"
+ "pshufb %%xmm3,%%xmm0 \n"
+ "pshufb %%xmm4,%%xmm1 \n"
+ "pshufb %%xmm5,%%xmm2 \n"
+ "movq %%xmm0,(%1) \n"
+ "movq %%xmm1,0x8(%1) \n"
+ "movq %%xmm2,0x10(%1) \n"
+ "lea 0x18(%1),%1 \n"
+ "sub $0x18,%2 \n"
+ "jg 1b \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst_ptr), // %1
+ "+r"(dst_width) // %2
+ ::"memory",
+ "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5");
}
-void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr,
+void ScaleRowDown34_1_Box_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width) {
asm volatile(
"movdqa %0,%%xmm2 \n" // kShuf01
@@ -535,53 +531,53 @@ void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr,
"m"(kMadd11), // %1
"m"(kRound34) // %2
);
- asm volatile (
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm6 \n"
- MEMOPREG(movdqu,0x00,0,3,1,xmm7) // movdqu (%0,%3),%%xmm7
- "pavgb %%xmm7,%%xmm6 \n"
- "pshufb %%xmm2,%%xmm6 \n"
- "pmaddubsw %%xmm5,%%xmm6 \n"
- "paddsw %%xmm1,%%xmm6 \n"
- "psrlw $0x2,%%xmm6 \n"
- "packuswb %%xmm6,%%xmm6 \n"
- "movq %%xmm6," MEMACCESS(1) " \n"
- "movdqu " MEMACCESS2(0x8,0) ",%%xmm6 \n"
- MEMOPREG(movdqu,0x8,0,3,1,xmm7) // movdqu 0x8(%0,%3),%%xmm7
- "pavgb %%xmm7,%%xmm6 \n"
- "pshufb %%xmm3,%%xmm6 \n"
- "pmaddubsw %%xmm0,%%xmm6 \n"
- "paddsw %%xmm1,%%xmm6 \n"
- "psrlw $0x2,%%xmm6 \n"
- "packuswb %%xmm6,%%xmm6 \n"
- "movq %%xmm6," MEMACCESS2(0x8,1) " \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm6 \n"
- MEMOPREG(movdqu,0x10,0,3,1,xmm7) // movdqu 0x10(%0,%3),%%xmm7
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "pavgb %%xmm7,%%xmm6 \n"
- "pshufb %%xmm4,%%xmm6 \n"
- "pmaddubsw %4,%%xmm6 \n"
- "paddsw %%xmm1,%%xmm6 \n"
- "psrlw $0x2,%%xmm6 \n"
- "packuswb %%xmm6,%%xmm6 \n"
- "movq %%xmm6," MEMACCESS2(0x10,1) " \n"
- "lea " MEMLEA(0x18,1) ",%1 \n"
- "sub $0x18,%2 \n"
- "jg 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(dst_ptr), // %1
- "+r"(dst_width) // %2
- : "r"((intptr_t)(src_stride)), // %3
- "m"(kMadd21) // %4
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
- );
+ asm volatile(
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm6 \n"
+ "movdqu 0x00(%0,%3,1),%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm6 \n"
+ "pshufb %%xmm2,%%xmm6 \n"
+ "pmaddubsw %%xmm5,%%xmm6 \n"
+ "paddsw %%xmm1,%%xmm6 \n"
+ "psrlw $0x2,%%xmm6 \n"
+ "packuswb %%xmm6,%%xmm6 \n"
+ "movq %%xmm6,(%1) \n"
+ "movdqu 0x8(%0),%%xmm6 \n"
+ "movdqu 0x8(%0,%3,1),%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm6 \n"
+ "pshufb %%xmm3,%%xmm6 \n"
+ "pmaddubsw %%xmm0,%%xmm6 \n"
+ "paddsw %%xmm1,%%xmm6 \n"
+ "psrlw $0x2,%%xmm6 \n"
+ "packuswb %%xmm6,%%xmm6 \n"
+ "movq %%xmm6,0x8(%1) \n"
+ "movdqu 0x10(%0),%%xmm6 \n"
+ "movdqu 0x10(%0,%3,1),%%xmm7 \n"
+ "lea 0x20(%0),%0 \n"
+ "pavgb %%xmm7,%%xmm6 \n"
+ "pshufb %%xmm4,%%xmm6 \n"
+ "pmaddubsw %4,%%xmm6 \n"
+ "paddsw %%xmm1,%%xmm6 \n"
+ "psrlw $0x2,%%xmm6 \n"
+ "packuswb %%xmm6,%%xmm6 \n"
+ "movq %%xmm6,0x10(%1) \n"
+ "lea 0x18(%1),%1 \n"
+ "sub $0x18,%2 \n"
+ "jg 1b \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst_ptr), // %1
+ "+r"(dst_width) // %2
+ : "r"((intptr_t)(src_stride)), // %3
+ "m"(kMadd21) // %4
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
+ "xmm7");
}
-void ScaleRowDown34_0_Box_SSSE3(const uint8* src_ptr,
+void ScaleRowDown34_0_Box_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width) {
asm volatile(
"movdqa %0,%%xmm2 \n" // kShuf01
@@ -602,88 +598,87 @@ void ScaleRowDown34_0_Box_SSSE3(const uint8* src_ptr,
"m"(kRound34) // %2
);
- asm volatile (
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm6 \n"
- MEMOPREG(movdqu,0x00,0,3,1,xmm7) // movdqu (%0,%3,1),%%xmm7
- "pavgb %%xmm6,%%xmm7 \n"
- "pavgb %%xmm7,%%xmm6 \n"
- "pshufb %%xmm2,%%xmm6 \n"
- "pmaddubsw %%xmm5,%%xmm6 \n"
- "paddsw %%xmm1,%%xmm6 \n"
- "psrlw $0x2,%%xmm6 \n"
- "packuswb %%xmm6,%%xmm6 \n"
- "movq %%xmm6," MEMACCESS(1) " \n"
- "movdqu " MEMACCESS2(0x8,0) ",%%xmm6 \n"
- MEMOPREG(movdqu,0x8,0,3,1,xmm7) // movdqu 0x8(%0,%3,1),%%xmm7
- "pavgb %%xmm6,%%xmm7 \n"
- "pavgb %%xmm7,%%xmm6 \n"
- "pshufb %%xmm3,%%xmm6 \n"
- "pmaddubsw %%xmm0,%%xmm6 \n"
- "paddsw %%xmm1,%%xmm6 \n"
- "psrlw $0x2,%%xmm6 \n"
- "packuswb %%xmm6,%%xmm6 \n"
- "movq %%xmm6," MEMACCESS2(0x8,1) " \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm6 \n"
- MEMOPREG(movdqu,0x10,0,3,1,xmm7) // movdqu 0x10(%0,%3,1),%%xmm7
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "pavgb %%xmm6,%%xmm7 \n"
- "pavgb %%xmm7,%%xmm6 \n"
- "pshufb %%xmm4,%%xmm6 \n"
- "pmaddubsw %4,%%xmm6 \n"
- "paddsw %%xmm1,%%xmm6 \n"
- "psrlw $0x2,%%xmm6 \n"
- "packuswb %%xmm6,%%xmm6 \n"
- "movq %%xmm6," MEMACCESS2(0x10,1) " \n"
- "lea " MEMLEA(0x18,1) ",%1 \n"
- "sub $0x18,%2 \n"
- "jg 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(dst_ptr), // %1
- "+r"(dst_width) // %2
- : "r"((intptr_t)(src_stride)), // %3
- "m"(kMadd21) // %4
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
- );
+ asm volatile(
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm6 \n"
+ "movdqu 0x00(%0,%3,1),%%xmm7 \n"
+ "pavgb %%xmm6,%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm6 \n"
+ "pshufb %%xmm2,%%xmm6 \n"
+ "pmaddubsw %%xmm5,%%xmm6 \n"
+ "paddsw %%xmm1,%%xmm6 \n"
+ "psrlw $0x2,%%xmm6 \n"
+ "packuswb %%xmm6,%%xmm6 \n"
+ "movq %%xmm6,(%1) \n"
+ "movdqu 0x8(%0),%%xmm6 \n"
+ "movdqu 0x8(%0,%3,1),%%xmm7 \n"
+ "pavgb %%xmm6,%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm6 \n"
+ "pshufb %%xmm3,%%xmm6 \n"
+ "pmaddubsw %%xmm0,%%xmm6 \n"
+ "paddsw %%xmm1,%%xmm6 \n"
+ "psrlw $0x2,%%xmm6 \n"
+ "packuswb %%xmm6,%%xmm6 \n"
+ "movq %%xmm6,0x8(%1) \n"
+ "movdqu 0x10(%0),%%xmm6 \n"
+ "movdqu 0x10(%0,%3,1),%%xmm7 \n"
+ "lea 0x20(%0),%0 \n"
+ "pavgb %%xmm6,%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm6 \n"
+ "pshufb %%xmm4,%%xmm6 \n"
+ "pmaddubsw %4,%%xmm6 \n"
+ "paddsw %%xmm1,%%xmm6 \n"
+ "psrlw $0x2,%%xmm6 \n"
+ "packuswb %%xmm6,%%xmm6 \n"
+ "movq %%xmm6,0x10(%1) \n"
+ "lea 0x18(%1),%1 \n"
+ "sub $0x18,%2 \n"
+ "jg 1b \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst_ptr), // %1
+ "+r"(dst_width) // %2
+ : "r"((intptr_t)(src_stride)), // %3
+ "m"(kMadd21) // %4
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
+ "xmm7");
}
-void ScaleRowDown38_SSSE3(const uint8* src_ptr,
+void ScaleRowDown38_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width) {
(void)src_stride;
- asm volatile (
- "movdqa %3,%%xmm4 \n"
- "movdqa %4,%%xmm5 \n"
-
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "pshufb %%xmm4,%%xmm0 \n"
- "pshufb %%xmm5,%%xmm1 \n"
- "paddusb %%xmm1,%%xmm0 \n"
- "movq %%xmm0," MEMACCESS(1) " \n"
- "movhlps %%xmm0,%%xmm1 \n"
- "movd %%xmm1," MEMACCESS2(0x8,1) " \n"
- "lea " MEMLEA(0xc,1) ",%1 \n"
- "sub $0xc,%2 \n"
- "jg 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(dst_ptr), // %1
- "+r"(dst_width) // %2
- : "m"(kShuf38a), // %3
- "m"(kShuf38b) // %4
- : "memory", "cc", "xmm0", "xmm1", "xmm4", "xmm5"
- );
+ asm volatile(
+ "movdqa %3,%%xmm4 \n"
+ "movdqa %4,%%xmm5 \n"
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "lea 0x20(%0),%0 \n"
+ "pshufb %%xmm4,%%xmm0 \n"
+ "pshufb %%xmm5,%%xmm1 \n"
+ "paddusb %%xmm1,%%xmm0 \n"
+ "movq %%xmm0,(%1) \n"
+ "movhlps %%xmm0,%%xmm1 \n"
+ "movd %%xmm1,0x8(%1) \n"
+ "lea 0xc(%1),%1 \n"
+ "sub $0xc,%2 \n"
+ "jg 1b \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst_ptr), // %1
+ "+r"(dst_width) // %2
+ : "m"(kShuf38a), // %3
+ "m"(kShuf38b) // %4
+ : "memory", "cc", "xmm0", "xmm1", "xmm4", "xmm5");
}
-void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr,
+void ScaleRowDown38_2_Box_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width) {
asm volatile(
"movdqa %0,%%xmm2 \n"
@@ -696,40 +691,39 @@ void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr,
"m"(kShufAb2), // %2
"m"(kScaleAb2) // %3
);
- asm volatile (
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- MEMOPREG(movdqu,0x00,0,3,1,xmm1) // movdqu (%0,%3,1),%%xmm1
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "pavgb %%xmm1,%%xmm0 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "pshufb %%xmm2,%%xmm1 \n"
- "movdqa %%xmm0,%%xmm6 \n"
- "pshufb %%xmm3,%%xmm6 \n"
- "paddusw %%xmm6,%%xmm1 \n"
- "pshufb %%xmm4,%%xmm0 \n"
- "paddusw %%xmm0,%%xmm1 \n"
- "pmulhuw %%xmm5,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm1 \n"
- "movd %%xmm1," MEMACCESS(1) " \n"
- "psrlq $0x10,%%xmm1 \n"
- "movd %%xmm1," MEMACCESS2(0x2,1) " \n"
- "lea " MEMLEA(0x6,1) ",%1 \n"
- "sub $0x6,%2 \n"
- "jg 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(dst_ptr), // %1
- "+r"(dst_width) // %2
- : "r"((intptr_t)(src_stride)) // %3
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
- );
+ asm volatile(
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x00(%0,%3,1),%%xmm1 \n"
+ "lea 0x10(%0),%0 \n"
+ "pavgb %%xmm1,%%xmm0 \n"
+ "movdqa %%xmm0,%%xmm1 \n"
+ "pshufb %%xmm2,%%xmm1 \n"
+ "movdqa %%xmm0,%%xmm6 \n"
+ "pshufb %%xmm3,%%xmm6 \n"
+ "paddusw %%xmm6,%%xmm1 \n"
+ "pshufb %%xmm4,%%xmm0 \n"
+ "paddusw %%xmm0,%%xmm1 \n"
+ "pmulhuw %%xmm5,%%xmm1 \n"
+ "packuswb %%xmm1,%%xmm1 \n"
+ "movd %%xmm1,(%1) \n"
+ "psrlq $0x10,%%xmm1 \n"
+ "movd %%xmm1,0x2(%1) \n"
+ "lea 0x6(%1),%1 \n"
+ "sub $0x6,%2 \n"
+ "jg 1b \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst_ptr), // %1
+ "+r"(dst_width) // %2
+ : "r"((intptr_t)(src_stride)) // %3
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6");
}
-void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr,
+void ScaleRowDown38_3_Box_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width) {
asm volatile(
"movdqa %0,%%xmm2 \n"
@@ -741,112 +735,117 @@ void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr,
"m"(kShufAc3), // %1
"m"(kScaleAc33) // %2
);
- asm volatile (
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- MEMOPREG(movdqu,0x00,0,3,1,xmm6) // movdqu (%0,%3,1),%%xmm6
- "movhlps %%xmm0,%%xmm1 \n"
- "movhlps %%xmm6,%%xmm7 \n"
- "punpcklbw %%xmm5,%%xmm0 \n"
- "punpcklbw %%xmm5,%%xmm1 \n"
- "punpcklbw %%xmm5,%%xmm6 \n"
- "punpcklbw %%xmm5,%%xmm7 \n"
- "paddusw %%xmm6,%%xmm0 \n"
- "paddusw %%xmm7,%%xmm1 \n"
- MEMOPREG(movdqu,0x00,0,3,2,xmm6) // movdqu (%0,%3,2),%%xmm6
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "movhlps %%xmm6,%%xmm7 \n"
- "punpcklbw %%xmm5,%%xmm6 \n"
- "punpcklbw %%xmm5,%%xmm7 \n"
- "paddusw %%xmm6,%%xmm0 \n"
- "paddusw %%xmm7,%%xmm1 \n"
- "movdqa %%xmm0,%%xmm6 \n"
- "psrldq $0x2,%%xmm0 \n"
- "paddusw %%xmm0,%%xmm6 \n"
- "psrldq $0x2,%%xmm0 \n"
- "paddusw %%xmm0,%%xmm6 \n"
- "pshufb %%xmm2,%%xmm6 \n"
- "movdqa %%xmm1,%%xmm7 \n"
- "psrldq $0x2,%%xmm1 \n"
- "paddusw %%xmm1,%%xmm7 \n"
- "psrldq $0x2,%%xmm1 \n"
- "paddusw %%xmm1,%%xmm7 \n"
- "pshufb %%xmm3,%%xmm7 \n"
- "paddusw %%xmm7,%%xmm6 \n"
- "pmulhuw %%xmm4,%%xmm6 \n"
- "packuswb %%xmm6,%%xmm6 \n"
- "movd %%xmm6," MEMACCESS(1) " \n"
- "psrlq $0x10,%%xmm6 \n"
- "movd %%xmm6," MEMACCESS2(0x2,1) " \n"
- "lea " MEMLEA(0x6,1) ",%1 \n"
- "sub $0x6,%2 \n"
- "jg 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(dst_ptr), // %1
- "+r"(dst_width) // %2
- : "r"((intptr_t)(src_stride)) // %3
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
- );
+ asm volatile(
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x00(%0,%3,1),%%xmm6 \n"
+ "movhlps %%xmm0,%%xmm1 \n"
+ "movhlps %%xmm6,%%xmm7 \n"
+ "punpcklbw %%xmm5,%%xmm0 \n"
+ "punpcklbw %%xmm5,%%xmm1 \n"
+ "punpcklbw %%xmm5,%%xmm6 \n"
+ "punpcklbw %%xmm5,%%xmm7 \n"
+ "paddusw %%xmm6,%%xmm0 \n"
+ "paddusw %%xmm7,%%xmm1 \n"
+ "movdqu 0x00(%0,%3,2),%%xmm6 \n"
+ "lea 0x10(%0),%0 \n"
+ "movhlps %%xmm6,%%xmm7 \n"
+ "punpcklbw %%xmm5,%%xmm6 \n"
+ "punpcklbw %%xmm5,%%xmm7 \n"
+ "paddusw %%xmm6,%%xmm0 \n"
+ "paddusw %%xmm7,%%xmm1 \n"
+ "movdqa %%xmm0,%%xmm6 \n"
+ "psrldq $0x2,%%xmm0 \n"
+ "paddusw %%xmm0,%%xmm6 \n"
+ "psrldq $0x2,%%xmm0 \n"
+ "paddusw %%xmm0,%%xmm6 \n"
+ "pshufb %%xmm2,%%xmm6 \n"
+ "movdqa %%xmm1,%%xmm7 \n"
+ "psrldq $0x2,%%xmm1 \n"
+ "paddusw %%xmm1,%%xmm7 \n"
+ "psrldq $0x2,%%xmm1 \n"
+ "paddusw %%xmm1,%%xmm7 \n"
+ "pshufb %%xmm3,%%xmm7 \n"
+ "paddusw %%xmm7,%%xmm6 \n"
+ "pmulhuw %%xmm4,%%xmm6 \n"
+ "packuswb %%xmm6,%%xmm6 \n"
+ "movd %%xmm6,(%1) \n"
+ "psrlq $0x10,%%xmm6 \n"
+ "movd %%xmm6,0x2(%1) \n"
+ "lea 0x6(%1),%1 \n"
+ "sub $0x6,%2 \n"
+ "jg 1b \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst_ptr), // %1
+ "+r"(dst_width) // %2
+ : "r"((intptr_t)(src_stride)) // %3
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
+ "xmm7");
}
// Reads 16xN bytes and produces 16 shorts at a time.
-void ScaleAddRow_SSE2(const uint8* src_ptr, uint16* dst_ptr, int src_width) {
- asm volatile (
- "pxor %%xmm5,%%xmm5 \n"
-
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm3 \n"
- "lea " MEMLEA(0x10,0) ",%0 \n" // src_ptr += 16
- "movdqu " MEMACCESS(1) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,1) ",%%xmm1 \n"
- "movdqa %%xmm3,%%xmm2 \n"
- "punpcklbw %%xmm5,%%xmm2 \n"
- "punpckhbw %%xmm5,%%xmm3 \n"
- "paddusw %%xmm2,%%xmm0 \n"
- "paddusw %%xmm3,%%xmm1 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "movdqu %%xmm1," MEMACCESS2(0x10,1) " \n"
- "lea " MEMLEA(0x20,1) ",%1 \n"
- "sub $0x10,%2 \n"
- "jg 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(dst_ptr), // %1
- "+r"(src_width) // %2
- :
- : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
- );
+void ScaleAddRow_SSE2(const uint8_t* src_ptr,
+ uint16_t* dst_ptr,
+ int src_width) {
+ asm volatile(
+
+ "pxor %%xmm5,%%xmm5 \n"
+
+ // 16 pixel loop.
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm3 \n"
+ "lea 0x10(%0),%0 \n" // src_ptr += 16
+ "movdqu (%1),%%xmm0 \n"
+ "movdqu 0x10(%1),%%xmm1 \n"
+ "movdqa %%xmm3,%%xmm2 \n"
+ "punpcklbw %%xmm5,%%xmm2 \n"
+ "punpckhbw %%xmm5,%%xmm3 \n"
+ "paddusw %%xmm2,%%xmm0 \n"
+ "paddusw %%xmm3,%%xmm1 \n"
+ "movdqu %%xmm0,(%1) \n"
+ "movdqu %%xmm1,0x10(%1) \n"
+ "lea 0x20(%1),%1 \n"
+ "sub $0x10,%2 \n"
+ "jg 1b \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst_ptr), // %1
+ "+r"(src_width) // %2
+ :
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5");
}
#ifdef HAS_SCALEADDROW_AVX2
// Reads 32 bytes and accumulates to 32 shorts at a time.
-void ScaleAddRow_AVX2(const uint8* src_ptr, uint16* dst_ptr, int src_width) {
- asm volatile (
- "vpxor %%ymm5,%%ymm5,%%ymm5 \n"
-
- LABELALIGN
- "1: \n"
- "vmovdqu " MEMACCESS(0) ",%%ymm3 \n"
- "lea " MEMLEA(0x20,0) ",%0 \n" // src_ptr += 32
- "vpermq $0xd8,%%ymm3,%%ymm3 \n"
- "vpunpcklbw %%ymm5,%%ymm3,%%ymm2 \n"
- "vpunpckhbw %%ymm5,%%ymm3,%%ymm3 \n"
- "vpaddusw " MEMACCESS(1) ",%%ymm2,%%ymm0 \n"
- "vpaddusw " MEMACCESS2(0x20,1) ",%%ymm3,%%ymm1 \n"
- "vmovdqu %%ymm0," MEMACCESS(1) " \n"
- "vmovdqu %%ymm1," MEMACCESS2(0x20,1) " \n"
- "lea " MEMLEA(0x40,1) ",%1 \n"
- "sub $0x20,%2 \n"
- "jg 1b \n"
- "vzeroupper \n"
- : "+r"(src_ptr), // %0
- "+r"(dst_ptr), // %1
- "+r"(src_width) // %2
- :
- : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
- );
+void ScaleAddRow_AVX2(const uint8_t* src_ptr,
+ uint16_t* dst_ptr,
+ int src_width) {
+ asm volatile(
+
+ "vpxor %%ymm5,%%ymm5,%%ymm5 \n"
+
+ LABELALIGN
+ "1: \n"
+ "vmovdqu (%0),%%ymm3 \n"
+ "lea 0x20(%0),%0 \n" // src_ptr += 32
+ "vpermq $0xd8,%%ymm3,%%ymm3 \n"
+ "vpunpcklbw %%ymm5,%%ymm3,%%ymm2 \n"
+ "vpunpckhbw %%ymm5,%%ymm3,%%ymm3 \n"
+ "vpaddusw (%1),%%ymm2,%%ymm0 \n"
+ "vpaddusw 0x20(%1),%%ymm3,%%ymm1 \n"
+ "vmovdqu %%ymm0,(%1) \n"
+ "vmovdqu %%ymm1,0x20(%1) \n"
+ "lea 0x40(%1),%1 \n"
+ "sub $0x20,%2 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst_ptr), // %1
+ "+r"(src_width) // %2
+ :
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5");
}
#endif // HAS_SCALEADDROW_AVX2
@@ -860,394 +859,393 @@ static const uvec16 kFadd40 = {0x4040, 0x4040, 0x4040, 0x4040,
0x4040, 0x4040, 0x4040, 0x4040};
// Bilinear column filtering. SSSE3 version.
-void ScaleFilterCols_SSSE3(uint8* dst_ptr,
- const uint8* src_ptr,
+void ScaleFilterCols_SSSE3(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
int dst_width,
int x,
int dx) {
intptr_t x0, x1, temp_pixel;
- asm volatile (
- "movd %6,%%xmm2 \n"
- "movd %7,%%xmm3 \n"
- "movl $0x04040000,%k2 \n"
- "movd %k2,%%xmm5 \n"
- "pcmpeqb %%xmm6,%%xmm6 \n"
- "psrlw $0x9,%%xmm6 \n" // 0x007f007f
- "pcmpeqb %%xmm7,%%xmm7 \n"
- "psrlw $15,%%xmm7 \n" // 0x00010001
-
- "pextrw $0x1,%%xmm2,%k3 \n"
- "subl $0x2,%5 \n"
- "jl 29f \n"
- "movdqa %%xmm2,%%xmm0 \n"
- "paddd %%xmm3,%%xmm0 \n"
- "punpckldq %%xmm0,%%xmm2 \n"
- "punpckldq %%xmm3,%%xmm3 \n"
- "paddd %%xmm3,%%xmm3 \n"
- "pextrw $0x3,%%xmm2,%k4 \n"
-
- LABELALIGN
- "2: \n"
- "movdqa %%xmm2,%%xmm1 \n"
- "paddd %%xmm3,%%xmm2 \n"
- MEMOPARG(movzwl,0x00,1,3,1,k2) // movzwl (%1,%3,1),%k2
- "movd %k2,%%xmm0 \n"
- "psrlw $0x9,%%xmm1 \n"
- MEMOPARG(movzwl,0x00,1,4,1,k2) // movzwl (%1,%4,1),%k2
- "movd %k2,%%xmm4 \n"
- "pshufb %%xmm5,%%xmm1 \n"
- "punpcklwd %%xmm4,%%xmm0 \n"
- "psubb %8,%%xmm0 \n" // make pixels signed.
- "pxor %%xmm6,%%xmm1 \n" // 128 - f = (f ^ 127 ) + 1
- "paddusb %%xmm7,%%xmm1 \n"
- "pmaddubsw %%xmm0,%%xmm1 \n"
- "pextrw $0x1,%%xmm2,%k3 \n"
- "pextrw $0x3,%%xmm2,%k4 \n"
- "paddw %9,%%xmm1 \n" // make pixels unsigned.
- "psrlw $0x7,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm1 \n"
- "movd %%xmm1,%k2 \n"
- "mov %w2," MEMACCESS(0) " \n"
- "lea " MEMLEA(0x2,0) ",%0 \n"
- "subl $0x2,%5 \n"
- "jge 2b \n"
-
- LABELALIGN
- "29: \n"
- "addl $0x1,%5 \n"
- "jl 99f \n"
- MEMOPARG(movzwl,0x00,1,3,1,k2) // movzwl (%1,%3,1),%k2
- "movd %k2,%%xmm0 \n"
- "psrlw $0x9,%%xmm2 \n"
- "pshufb %%xmm5,%%xmm2 \n"
- "psubb %8,%%xmm0 \n" // make pixels signed.
- "pxor %%xmm6,%%xmm2 \n"
- "paddusb %%xmm7,%%xmm2 \n"
- "pmaddubsw %%xmm0,%%xmm2 \n"
- "paddw %9,%%xmm2 \n" // make pixels unsigned.
- "psrlw $0x7,%%xmm2 \n"
- "packuswb %%xmm2,%%xmm2 \n"
- "movd %%xmm2,%k2 \n"
- "mov %b2," MEMACCESS(0) " \n"
- "99: \n"
- : "+r"(dst_ptr), // %0
- "+r"(src_ptr), // %1
- "=&a"(temp_pixel), // %2
- "=&r"(x0), // %3
- "=&r"(x1), // %4
+ asm volatile(
+ "movd %6,%%xmm2 \n"
+ "movd %7,%%xmm3 \n"
+ "movl $0x04040000,%k2 \n"
+ "movd %k2,%%xmm5 \n"
+ "pcmpeqb %%xmm6,%%xmm6 \n"
+ "psrlw $0x9,%%xmm6 \n" // 0x007f007f
+ "pcmpeqb %%xmm7,%%xmm7 \n"
+ "psrlw $15,%%xmm7 \n" // 0x00010001
+
+ "pextrw $0x1,%%xmm2,%k3 \n"
+ "subl $0x2,%5 \n"
+ "jl 29f \n"
+ "movdqa %%xmm2,%%xmm0 \n"
+ "paddd %%xmm3,%%xmm0 \n"
+ "punpckldq %%xmm0,%%xmm2 \n"
+ "punpckldq %%xmm3,%%xmm3 \n"
+ "paddd %%xmm3,%%xmm3 \n"
+ "pextrw $0x3,%%xmm2,%k4 \n"
+
+ LABELALIGN
+ "2: \n"
+ "movdqa %%xmm2,%%xmm1 \n"
+ "paddd %%xmm3,%%xmm2 \n"
+ "movzwl 0x00(%1,%3,1),%k2 \n"
+ "movd %k2,%%xmm0 \n"
+ "psrlw $0x9,%%xmm1 \n"
+ "movzwl 0x00(%1,%4,1),%k2 \n"
+ "movd %k2,%%xmm4 \n"
+ "pshufb %%xmm5,%%xmm1 \n"
+ "punpcklwd %%xmm4,%%xmm0 \n"
+ "psubb %8,%%xmm0 \n" // make pixels signed.
+ "pxor %%xmm6,%%xmm1 \n" // 128 - f = (f ^ 127 ) +
+ // 1
+ "paddusb %%xmm7,%%xmm1 \n"
+ "pmaddubsw %%xmm0,%%xmm1 \n"
+ "pextrw $0x1,%%xmm2,%k3 \n"
+ "pextrw $0x3,%%xmm2,%k4 \n"
+ "paddw %9,%%xmm1 \n" // make pixels unsigned.
+ "psrlw $0x7,%%xmm1 \n"
+ "packuswb %%xmm1,%%xmm1 \n"
+ "movd %%xmm1,%k2 \n"
+ "mov %w2,(%0) \n"
+ "lea 0x2(%0),%0 \n"
+ "subl $0x2,%5 \n"
+ "jge 2b \n"
+
+ LABELALIGN
+ "29: \n"
+ "addl $0x1,%5 \n"
+ "jl 99f \n"
+ "movzwl 0x00(%1,%3,1),%k2 \n"
+ "movd %k2,%%xmm0 \n"
+ "psrlw $0x9,%%xmm2 \n"
+ "pshufb %%xmm5,%%xmm2 \n"
+ "psubb %8,%%xmm0 \n" // make pixels signed.
+ "pxor %%xmm6,%%xmm2 \n"
+ "paddusb %%xmm7,%%xmm2 \n"
+ "pmaddubsw %%xmm0,%%xmm2 \n"
+ "paddw %9,%%xmm2 \n" // make pixels unsigned.
+ "psrlw $0x7,%%xmm2 \n"
+ "packuswb %%xmm2,%%xmm2 \n"
+ "movd %%xmm2,%k2 \n"
+ "mov %b2,(%0) \n"
+ "99: \n"
+ : "+r"(dst_ptr), // %0
+ "+r"(src_ptr), // %1
+ "=&a"(temp_pixel), // %2
+ "=&r"(x0), // %3
+ "=&r"(x1), // %4
#if defined(__x86_64__)
- "+rm"(dst_width) // %5
+ "+rm"(dst_width) // %5
#else
- "+m"(dst_width) // %5
+ "+m"(dst_width) // %5
#endif
- : "rm"(x), // %6
- "rm"(dx), // %7
+ : "rm"(x), // %6
+ "rm"(dx), // %7
#if defined(__x86_64__)
- "x"(kFsub80), // %8
- "x"(kFadd40) // %9
+ "x"(kFsub80), // %8
+ "x"(kFadd40) // %9
#else
- "m"(kFsub80), // %8
- "m"(kFadd40) // %9
+ "m"(kFsub80), // %8
+ "m"(kFadd40) // %9
#endif
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
- );
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
+ "xmm7");
}
// Reads 4 pixels, duplicates them and writes 8 pixels.
// Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned.
-void ScaleColsUp2_SSE2(uint8* dst_ptr,
- const uint8* src_ptr,
+void ScaleColsUp2_SSE2(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
int dst_width,
int x,
int dx) {
(void)x;
(void)dx;
- asm volatile (
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(1) ",%%xmm0 \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "punpcklbw %%xmm0,%%xmm0 \n"
- "punpckhbw %%xmm1,%%xmm1 \n"
- "movdqu %%xmm0," MEMACCESS(0) " \n"
- "movdqu %%xmm1," MEMACCESS2(0x10,0) " \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "sub $0x20,%2 \n"
- "jg 1b \n"
-
- : "+r"(dst_ptr), // %0
- "+r"(src_ptr), // %1
- "+r"(dst_width) // %2
- :: "memory", "cc", "xmm0", "xmm1"
- );
+ asm volatile(
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%1),%%xmm0 \n"
+ "lea 0x10(%1),%1 \n"
+ "movdqa %%xmm0,%%xmm1 \n"
+ "punpcklbw %%xmm0,%%xmm0 \n"
+ "punpckhbw %%xmm1,%%xmm1 \n"
+ "movdqu %%xmm0,(%0) \n"
+ "movdqu %%xmm1,0x10(%0) \n"
+ "lea 0x20(%0),%0 \n"
+ "sub $0x20,%2 \n"
+ "jg 1b \n"
+
+ : "+r"(dst_ptr), // %0
+ "+r"(src_ptr), // %1
+ "+r"(dst_width) // %2
+ ::"memory",
+ "cc", "xmm0", "xmm1");
}
-void ScaleARGBRowDown2_SSE2(const uint8* src_argb,
+void ScaleARGBRowDown2_SSE2(const uint8_t* src_argb,
ptrdiff_t src_stride,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_width) {
(void)src_stride;
- asm volatile (
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "shufps $0xdd,%%xmm1,%%xmm0 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x4,%2 \n"
- "jg 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb), // %1
- "+r"(dst_width) // %2
- :: "memory", "cc", "xmm0", "xmm1"
- );
+ asm volatile(
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "lea 0x20(%0),%0 \n"
+ "shufps $0xdd,%%xmm1,%%xmm0 \n"
+ "movdqu %%xmm0,(%1) \n"
+ "lea 0x10(%1),%1 \n"
+ "sub $0x4,%2 \n"
+ "jg 1b \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_argb), // %1
+ "+r"(dst_width) // %2
+ ::"memory",
+ "cc", "xmm0", "xmm1");
}
-void ScaleARGBRowDown2Linear_SSE2(const uint8* src_argb,
+void ScaleARGBRowDown2Linear_SSE2(const uint8_t* src_argb,
ptrdiff_t src_stride,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_width) {
(void)src_stride;
- asm volatile (
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "movdqa %%xmm0,%%xmm2 \n"
- "shufps $0x88,%%xmm1,%%xmm0 \n"
- "shufps $0xdd,%%xmm1,%%xmm2 \n"
- "pavgb %%xmm2,%%xmm0 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x4,%2 \n"
- "jg 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb), // %1
- "+r"(dst_width) // %2
- :: "memory", "cc", "xmm0", "xmm1"
- );
+ asm volatile(
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "lea 0x20(%0),%0 \n"
+ "movdqa %%xmm0,%%xmm2 \n"
+ "shufps $0x88,%%xmm1,%%xmm0 \n"
+ "shufps $0xdd,%%xmm1,%%xmm2 \n"
+ "pavgb %%xmm2,%%xmm0 \n"
+ "movdqu %%xmm0,(%1) \n"
+ "lea 0x10(%1),%1 \n"
+ "sub $0x4,%2 \n"
+ "jg 1b \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_argb), // %1
+ "+r"(dst_width) // %2
+ ::"memory",
+ "cc", "xmm0", "xmm1");
}
-void ScaleARGBRowDown2Box_SSE2(const uint8* src_argb,
+void ScaleARGBRowDown2Box_SSE2(const uint8_t* src_argb,
ptrdiff_t src_stride,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_width) {
- asm volatile (
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- MEMOPREG(movdqu,0x00,0,3,1,xmm2) // movdqu (%0,%3,1),%%xmm2
- MEMOPREG(movdqu,0x10,0,3,1,xmm3) // movdqu 0x10(%0,%3,1),%%xmm3
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "pavgb %%xmm2,%%xmm0 \n"
- "pavgb %%xmm3,%%xmm1 \n"
- "movdqa %%xmm0,%%xmm2 \n"
- "shufps $0x88,%%xmm1,%%xmm0 \n"
- "shufps $0xdd,%%xmm1,%%xmm2 \n"
- "pavgb %%xmm2,%%xmm0 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x4,%2 \n"
- "jg 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb), // %1
- "+r"(dst_width) // %2
- : "r"((intptr_t)(src_stride)) // %3
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm3"
- );
+ asm volatile(
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "movdqu 0x00(%0,%3,1),%%xmm2 \n"
+ "movdqu 0x10(%0,%3,1),%%xmm3 \n"
+ "lea 0x20(%0),%0 \n"
+ "pavgb %%xmm2,%%xmm0 \n"
+ "pavgb %%xmm3,%%xmm1 \n"
+ "movdqa %%xmm0,%%xmm2 \n"
+ "shufps $0x88,%%xmm1,%%xmm0 \n"
+ "shufps $0xdd,%%xmm1,%%xmm2 \n"
+ "pavgb %%xmm2,%%xmm0 \n"
+ "movdqu %%xmm0,(%1) \n"
+ "lea 0x10(%1),%1 \n"
+ "sub $0x4,%2 \n"
+ "jg 1b \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_argb), // %1
+ "+r"(dst_width) // %2
+ : "r"((intptr_t)(src_stride)) // %3
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3");
}
// Reads 4 pixels at a time.
// Alignment requirement: dst_argb 16 byte aligned.
-void ScaleARGBRowDownEven_SSE2(const uint8* src_argb,
+void ScaleARGBRowDownEven_SSE2(const uint8_t* src_argb,
ptrdiff_t src_stride,
int src_stepx,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_width) {
intptr_t src_stepx_x4 = (intptr_t)(src_stepx);
intptr_t src_stepx_x12;
(void)src_stride;
- asm volatile (
- "lea " MEMLEA3(0x00,1,4) ",%1 \n"
- "lea " MEMLEA4(0x00,1,1,2) ",%4 \n"
- LABELALIGN
- "1: \n"
- "movd " MEMACCESS(0) ",%%xmm0 \n"
- MEMOPREG(movd,0x00,0,1,1,xmm1) // movd (%0,%1,1),%%xmm1
- "punpckldq %%xmm1,%%xmm0 \n"
- MEMOPREG(movd,0x00,0,1,2,xmm2) // movd (%0,%1,2),%%xmm2
- MEMOPREG(movd,0x00,0,4,1,xmm3) // movd (%0,%4,1),%%xmm3
- "lea " MEMLEA4(0x00,0,1,4) ",%0 \n"
- "punpckldq %%xmm3,%%xmm2 \n"
- "punpcklqdq %%xmm2,%%xmm0 \n"
- "movdqu %%xmm0," MEMACCESS(2) " \n"
- "lea " MEMLEA(0x10,2) ",%2 \n"
- "sub $0x4,%3 \n"
- "jg 1b \n"
- : "+r"(src_argb), // %0
- "+r"(src_stepx_x4), // %1
- "+r"(dst_argb), // %2
- "+r"(dst_width), // %3
- "=&r"(src_stepx_x12) // %4
- :: "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm3"
- );
+ asm volatile(
+ "lea 0x00(,%1,4),%1 \n"
+ "lea 0x00(%1,%1,2),%4 \n"
+
+ LABELALIGN
+ "1: \n"
+ "movd (%0),%%xmm0 \n"
+ "movd 0x00(%0,%1,1),%%xmm1 \n"
+ "punpckldq %%xmm1,%%xmm0 \n"
+ "movd 0x00(%0,%1,2),%%xmm2 \n"
+ "movd 0x00(%0,%4,1),%%xmm3 \n"
+ "lea 0x00(%0,%1,4),%0 \n"
+ "punpckldq %%xmm3,%%xmm2 \n"
+ "punpcklqdq %%xmm2,%%xmm0 \n"
+ "movdqu %%xmm0,(%2) \n"
+ "lea 0x10(%2),%2 \n"
+ "sub $0x4,%3 \n"
+ "jg 1b \n"
+ : "+r"(src_argb), // %0
+ "+r"(src_stepx_x4), // %1
+ "+r"(dst_argb), // %2
+ "+r"(dst_width), // %3
+ "=&r"(src_stepx_x12) // %4
+ ::"memory",
+ "cc", "xmm0", "xmm1", "xmm2", "xmm3");
}
// Blends four 2x2 to 4x1.
// Alignment requirement: dst_argb 16 byte aligned.
-void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb,
+void ScaleARGBRowDownEvenBox_SSE2(const uint8_t* src_argb,
ptrdiff_t src_stride,
int src_stepx,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_width) {
intptr_t src_stepx_x4 = (intptr_t)(src_stepx);
intptr_t src_stepx_x12;
intptr_t row1 = (intptr_t)(src_stride);
- asm volatile (
- "lea " MEMLEA3(0x00,1,4) ",%1 \n"
- "lea " MEMLEA4(0x00,1,1,2) ",%4 \n"
- "lea " MEMLEA4(0x00,0,5,1) ",%5 \n"
-
- LABELALIGN
- "1: \n"
- "movq " MEMACCESS(0) ",%%xmm0 \n"
- MEMOPREG(movhps,0x00,0,1,1,xmm0) // movhps (%0,%1,1),%%xmm0
- MEMOPREG(movq,0x00,0,1,2,xmm1) // movq (%0,%1,2),%%xmm1
- MEMOPREG(movhps,0x00,0,4,1,xmm1) // movhps (%0,%4,1),%%xmm1
- "lea " MEMLEA4(0x00,0,1,4) ",%0 \n"
- "movq " MEMACCESS(5) ",%%xmm2 \n"
- MEMOPREG(movhps,0x00,5,1,1,xmm2) // movhps (%5,%1,1),%%xmm2
- MEMOPREG(movq,0x00,5,1,2,xmm3) // movq (%5,%1,2),%%xmm3
- MEMOPREG(movhps,0x00,5,4,1,xmm3) // movhps (%5,%4,1),%%xmm3
- "lea " MEMLEA4(0x00,5,1,4) ",%5 \n"
- "pavgb %%xmm2,%%xmm0 \n"
- "pavgb %%xmm3,%%xmm1 \n"
- "movdqa %%xmm0,%%xmm2 \n"
- "shufps $0x88,%%xmm1,%%xmm0 \n"
- "shufps $0xdd,%%xmm1,%%xmm2 \n"
- "pavgb %%xmm2,%%xmm0 \n"
- "movdqu %%xmm0," MEMACCESS(2) " \n"
- "lea " MEMLEA(0x10,2) ",%2 \n"
- "sub $0x4,%3 \n"
- "jg 1b \n"
- : "+r"(src_argb), // %0
- "+r"(src_stepx_x4), // %1
- "+r"(dst_argb), // %2
- "+rm"(dst_width), // %3
- "=&r"(src_stepx_x12), // %4
- "+r"(row1) // %5
- :: "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm3"
- );
+ asm volatile(
+ "lea 0x00(,%1,4),%1 \n"
+ "lea 0x00(%1,%1,2),%4 \n"
+ "lea 0x00(%0,%5,1),%5 \n"
+
+ LABELALIGN
+ "1: \n"
+ "movq (%0),%%xmm0 \n"
+ "movhps 0x00(%0,%1,1),%%xmm0 \n"
+ "movq 0x00(%0,%1,2),%%xmm1 \n"
+ "movhps 0x00(%0,%4,1),%%xmm1 \n"
+ "lea 0x00(%0,%1,4),%0 \n"
+ "movq (%5),%%xmm2 \n"
+ "movhps 0x00(%5,%1,1),%%xmm2 \n"
+ "movq 0x00(%5,%1,2),%%xmm3 \n"
+ "movhps 0x00(%5,%4,1),%%xmm3 \n"
+ "lea 0x00(%5,%1,4),%5 \n"
+ "pavgb %%xmm2,%%xmm0 \n"
+ "pavgb %%xmm3,%%xmm1 \n"
+ "movdqa %%xmm0,%%xmm2 \n"
+ "shufps $0x88,%%xmm1,%%xmm0 \n"
+ "shufps $0xdd,%%xmm1,%%xmm2 \n"
+ "pavgb %%xmm2,%%xmm0 \n"
+ "movdqu %%xmm0,(%2) \n"
+ "lea 0x10(%2),%2 \n"
+ "sub $0x4,%3 \n"
+ "jg 1b \n"
+ : "+r"(src_argb), // %0
+ "+r"(src_stepx_x4), // %1
+ "+r"(dst_argb), // %2
+ "+rm"(dst_width), // %3
+ "=&r"(src_stepx_x12), // %4
+ "+r"(row1) // %5
+ ::"memory",
+ "cc", "xmm0", "xmm1", "xmm2", "xmm3");
}
-void ScaleARGBCols_SSE2(uint8* dst_argb,
- const uint8* src_argb,
+void ScaleARGBCols_SSE2(uint8_t* dst_argb,
+ const uint8_t* src_argb,
int dst_width,
int x,
int dx) {
intptr_t x0, x1;
- asm volatile (
- "movd %5,%%xmm2 \n"
- "movd %6,%%xmm3 \n"
- "pshufd $0x0,%%xmm2,%%xmm2 \n"
- "pshufd $0x11,%%xmm3,%%xmm0 \n"
- "paddd %%xmm0,%%xmm2 \n"
- "paddd %%xmm3,%%xmm3 \n"
- "pshufd $0x5,%%xmm3,%%xmm0 \n"
- "paddd %%xmm0,%%xmm2 \n"
- "paddd %%xmm3,%%xmm3 \n"
- "pshufd $0x0,%%xmm3,%%xmm3 \n"
- "pextrw $0x1,%%xmm2,%k0 \n"
- "pextrw $0x3,%%xmm2,%k1 \n"
- "cmp $0x0,%4 \n"
- "jl 99f \n"
- "sub $0x4,%4 \n"
- "jl 49f \n"
-
- LABELALIGN
- "40: \n"
- MEMOPREG(movd,0x00,3,0,4,xmm0) // movd (%3,%0,4),%%xmm0
- MEMOPREG(movd,0x00,3,1,4,xmm1) // movd (%3,%1,4),%%xmm1
- "pextrw $0x5,%%xmm2,%k0 \n"
- "pextrw $0x7,%%xmm2,%k1 \n"
- "paddd %%xmm3,%%xmm2 \n"
- "punpckldq %%xmm1,%%xmm0 \n"
- MEMOPREG(movd,0x00,3,0,4,xmm1) // movd (%3,%0,4),%%xmm1
- MEMOPREG(movd,0x00,3,1,4,xmm4) // movd (%3,%1,4),%%xmm4
- "pextrw $0x1,%%xmm2,%k0 \n"
- "pextrw $0x3,%%xmm2,%k1 \n"
- "punpckldq %%xmm4,%%xmm1 \n"
- "punpcklqdq %%xmm1,%%xmm0 \n"
- "movdqu %%xmm0," MEMACCESS(2) " \n"
- "lea " MEMLEA(0x10,2) ",%2 \n"
- "sub $0x4,%4 \n"
- "jge 40b \n"
-
- "49: \n"
- "test $0x2,%4 \n"
- "je 29f \n"
- MEMOPREG(movd,0x00,3,0,4,xmm0) // movd (%3,%0,4),%%xmm0
- MEMOPREG(movd,0x00,3,1,4,xmm1) // movd (%3,%1,4),%%xmm1
- "pextrw $0x5,%%xmm2,%k0 \n"
- "punpckldq %%xmm1,%%xmm0 \n"
- "movq %%xmm0," MEMACCESS(2) " \n"
- "lea " MEMLEA(0x8,2) ",%2 \n"
- "29: \n"
- "test $0x1,%4 \n"
- "je 99f \n"
- MEMOPREG(movd,0x00,3,0,4,xmm0) // movd (%3,%0,4),%%xmm0
- "movd %%xmm0," MEMACCESS(2) " \n"
- "99: \n"
- : "=&a"(x0), // %0
- "=&d"(x1), // %1
- "+r"(dst_argb), // %2
- "+r"(src_argb), // %3
- "+r"(dst_width) // %4
- : "rm"(x), // %5
- "rm"(dx) // %6
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm4"
- );
+ asm volatile(
+ "movd %5,%%xmm2 \n"
+ "movd %6,%%xmm3 \n"
+ "pshufd $0x0,%%xmm2,%%xmm2 \n"
+ "pshufd $0x11,%%xmm3,%%xmm0 \n"
+ "paddd %%xmm0,%%xmm2 \n"
+ "paddd %%xmm3,%%xmm3 \n"
+ "pshufd $0x5,%%xmm3,%%xmm0 \n"
+ "paddd %%xmm0,%%xmm2 \n"
+ "paddd %%xmm3,%%xmm3 \n"
+ "pshufd $0x0,%%xmm3,%%xmm3 \n"
+ "pextrw $0x1,%%xmm2,%k0 \n"
+ "pextrw $0x3,%%xmm2,%k1 \n"
+ "cmp $0x0,%4 \n"
+ "jl 99f \n"
+ "sub $0x4,%4 \n"
+ "jl 49f \n"
+
+ LABELALIGN
+ "40: \n"
+ "movd 0x00(%3,%0,4),%%xmm0 \n"
+ "movd 0x00(%3,%1,4),%%xmm1 \n"
+ "pextrw $0x5,%%xmm2,%k0 \n"
+ "pextrw $0x7,%%xmm2,%k1 \n"
+ "paddd %%xmm3,%%xmm2 \n"
+ "punpckldq %%xmm1,%%xmm0 \n"
+ "movd 0x00(%3,%0,4),%%xmm1 \n"
+ "movd 0x00(%3,%1,4),%%xmm4 \n"
+ "pextrw $0x1,%%xmm2,%k0 \n"
+ "pextrw $0x3,%%xmm2,%k1 \n"
+ "punpckldq %%xmm4,%%xmm1 \n"
+ "punpcklqdq %%xmm1,%%xmm0 \n"
+ "movdqu %%xmm0,(%2) \n"
+ "lea 0x10(%2),%2 \n"
+ "sub $0x4,%4 \n"
+ "jge 40b \n"
+
+ "49: \n"
+ "test $0x2,%4 \n"
+ "je 29f \n"
+ "movd 0x00(%3,%0,4),%%xmm0 \n"
+ "movd 0x00(%3,%1,4),%%xmm1 \n"
+ "pextrw $0x5,%%xmm2,%k0 \n"
+ "punpckldq %%xmm1,%%xmm0 \n"
+ "movq %%xmm0,(%2) \n"
+ "lea 0x8(%2),%2 \n"
+ "29: \n"
+ "test $0x1,%4 \n"
+ "je 99f \n"
+ "movd 0x00(%3,%0,4),%%xmm0 \n"
+ "movd %%xmm0,(%2) \n"
+ "99: \n"
+ : "=&a"(x0), // %0
+ "=&d"(x1), // %1
+ "+r"(dst_argb), // %2
+ "+r"(src_argb), // %3
+ "+r"(dst_width) // %4
+ : "rm"(x), // %5
+ "rm"(dx) // %6
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4");
}
// Reads 4 pixels, duplicates them and writes 8 pixels.
// Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned.
-void ScaleARGBColsUp2_SSE2(uint8* dst_argb,
- const uint8* src_argb,
+void ScaleARGBColsUp2_SSE2(uint8_t* dst_argb,
+ const uint8_t* src_argb,
int dst_width,
int x,
int dx) {
(void)x;
(void)dx;
- asm volatile (
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(1) ",%%xmm0 \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "punpckldq %%xmm0,%%xmm0 \n"
- "punpckhdq %%xmm1,%%xmm1 \n"
- "movdqu %%xmm0," MEMACCESS(0) " \n"
- "movdqu %%xmm1," MEMACCESS2(0x10,0) " \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "sub $0x8,%2 \n"
- "jg 1b \n"
-
- : "+r"(dst_argb), // %0
- "+r"(src_argb), // %1
- "+r"(dst_width) // %2
- :: "memory", "cc", NACL_R14
- "xmm0", "xmm1"
- );
+ asm volatile(
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%1),%%xmm0 \n"
+ "lea 0x10(%1),%1 \n"
+ "movdqa %%xmm0,%%xmm1 \n"
+ "punpckldq %%xmm0,%%xmm0 \n"
+ "punpckhdq %%xmm1,%%xmm1 \n"
+ "movdqu %%xmm0,(%0) \n"
+ "movdqu %%xmm1,0x10(%0) \n"
+ "lea 0x20(%0),%0 \n"
+ "sub $0x8,%2 \n"
+ "jg 1b \n"
+
+ : "+r"(dst_argb), // %0
+ "+r"(src_argb), // %1
+ "+r"(dst_width) // %2
+ ::"memory",
+ "cc", "xmm0", "xmm1");
}
// Shuffle table for arranging 2 pixels into pairs for pmaddubsw
@@ -1262,8 +1260,8 @@ static const uvec8 kShuffleFractions = {
};
// Bilinear row filtering combines 4x2 -> 4x1. SSSE3 version
-void ScaleARGBFilterCols_SSSE3(uint8* dst_argb,
- const uint8* src_argb,
+void ScaleARGBFilterCols_SSSE3(uint8_t* dst_argb,
+ const uint8_t* src_argb,
int dst_width,
int x,
int dx) {
@@ -1276,67 +1274,65 @@ void ScaleARGBFilterCols_SSSE3(uint8* dst_argb,
"m"(kShuffleFractions) // %1
);
- asm volatile (
- "movd %5,%%xmm2 \n"
- "movd %6,%%xmm3 \n"
- "pcmpeqb %%xmm6,%%xmm6 \n"
- "psrlw $0x9,%%xmm6 \n"
- "pextrw $0x1,%%xmm2,%k3 \n"
- "sub $0x2,%2 \n"
- "jl 29f \n"
- "movdqa %%xmm2,%%xmm0 \n"
- "paddd %%xmm3,%%xmm0 \n"
- "punpckldq %%xmm0,%%xmm2 \n"
- "punpckldq %%xmm3,%%xmm3 \n"
- "paddd %%xmm3,%%xmm3 \n"
- "pextrw $0x3,%%xmm2,%k4 \n"
-
- LABELALIGN
- "2: \n"
- "movdqa %%xmm2,%%xmm1 \n"
- "paddd %%xmm3,%%xmm2 \n"
- MEMOPREG(movq,0x00,1,3,4,xmm0) // movq (%1,%3,4),%%xmm0
- "psrlw $0x9,%%xmm1 \n"
- MEMOPREG(movhps,0x00,1,4,4,xmm0) // movhps (%1,%4,4),%%xmm0
- "pshufb %%xmm5,%%xmm1 \n"
- "pshufb %%xmm4,%%xmm0 \n"
- "pxor %%xmm6,%%xmm1 \n"
- "pmaddubsw %%xmm1,%%xmm0 \n"
- "psrlw $0x7,%%xmm0 \n"
- "pextrw $0x1,%%xmm2,%k3 \n"
- "pextrw $0x3,%%xmm2,%k4 \n"
- "packuswb %%xmm0,%%xmm0 \n"
- "movq %%xmm0," MEMACCESS(0) " \n"
- "lea " MEMLEA(0x8,0) ",%0 \n"
- "sub $0x2,%2 \n"
- "jge 2b \n"
-
- LABELALIGN
- "29: \n"
- "add $0x1,%2 \n"
- "jl 99f \n"
- "psrlw $0x9,%%xmm2 \n"
- MEMOPREG(movq,0x00,1,3,4,xmm0) // movq (%1,%3,4),%%xmm0
- "pshufb %%xmm5,%%xmm2 \n"
- "pshufb %%xmm4,%%xmm0 \n"
- "pxor %%xmm6,%%xmm2 \n"
- "pmaddubsw %%xmm2,%%xmm0 \n"
- "psrlw $0x7,%%xmm0 \n"
- "packuswb %%xmm0,%%xmm0 \n"
- "movd %%xmm0," MEMACCESS(0) " \n"
-
- LABELALIGN
- "99: \n"
- : "+r"(dst_argb), // %0
- "+r"(src_argb), // %1
- "+rm"(dst_width), // %2
- "=&r"(x0), // %3
- "=&r"(x1) // %4
- : "rm"(x), // %5
- "rm"(dx) // %6
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
- );
+ asm volatile(
+ "movd %5,%%xmm2 \n"
+ "movd %6,%%xmm3 \n"
+ "pcmpeqb %%xmm6,%%xmm6 \n"
+ "psrlw $0x9,%%xmm6 \n"
+ "pextrw $0x1,%%xmm2,%k3 \n"
+ "sub $0x2,%2 \n"
+ "jl 29f \n"
+ "movdqa %%xmm2,%%xmm0 \n"
+ "paddd %%xmm3,%%xmm0 \n"
+ "punpckldq %%xmm0,%%xmm2 \n"
+ "punpckldq %%xmm3,%%xmm3 \n"
+ "paddd %%xmm3,%%xmm3 \n"
+ "pextrw $0x3,%%xmm2,%k4 \n"
+
+ LABELALIGN
+ "2: \n"
+ "movdqa %%xmm2,%%xmm1 \n"
+ "paddd %%xmm3,%%xmm2 \n"
+ "movq 0x00(%1,%3,4),%%xmm0 \n"
+ "psrlw $0x9,%%xmm1 \n"
+ "movhps 0x00(%1,%4,4),%%xmm0 \n"
+ "pshufb %%xmm5,%%xmm1 \n"
+ "pshufb %%xmm4,%%xmm0 \n"
+ "pxor %%xmm6,%%xmm1 \n"
+ "pmaddubsw %%xmm1,%%xmm0 \n"
+ "psrlw $0x7,%%xmm0 \n"
+ "pextrw $0x1,%%xmm2,%k3 \n"
+ "pextrw $0x3,%%xmm2,%k4 \n"
+ "packuswb %%xmm0,%%xmm0 \n"
+ "movq %%xmm0,(%0) \n"
+ "lea 0x8(%0),%0 \n"
+ "sub $0x2,%2 \n"
+ "jge 2b \n"
+
+ LABELALIGN
+ "29: \n"
+ "add $0x1,%2 \n"
+ "jl 99f \n"
+ "psrlw $0x9,%%xmm2 \n"
+ "movq 0x00(%1,%3,4),%%xmm0 \n"
+ "pshufb %%xmm5,%%xmm2 \n"
+ "pshufb %%xmm4,%%xmm0 \n"
+ "pxor %%xmm6,%%xmm2 \n"
+ "pmaddubsw %%xmm2,%%xmm0 \n"
+ "psrlw $0x7,%%xmm0 \n"
+ "packuswb %%xmm0,%%xmm0 \n"
+ "movd %%xmm0,(%0) \n"
+
+ LABELALIGN "99: \n" // clang-format error.
+
+ : "+r"(dst_argb), // %0
+ "+r"(src_argb), // %1
+ "+rm"(dst_width), // %2
+ "=&r"(x0), // %3
+ "=&r"(x1) // %4
+ : "rm"(x), // %5
+ "rm"(dx) // %6
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6");
}
// Divide num by div and return as 16.16 fixed point result.
diff --git a/chromium/third_party/libyuv/source/scale_msa.cc b/chromium/third_party/libyuv/source/scale_msa.cc
index df1f482be6d..482a521f0d2 100644
--- a/chromium/third_party/libyuv/source/scale_msa.cc
+++ b/chromium/third_party/libyuv/source/scale_msa.cc
@@ -127,13 +127,13 @@ void ScaleARGBRowDownEven_MSA(const uint8_t* src_argb,
}
}
-void ScaleARGBRowDownEvenBox_MSA(const uint8* src_argb,
+void ScaleARGBRowDownEvenBox_MSA(const uint8_t* src_argb,
ptrdiff_t src_stride,
int src_stepx,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_width) {
int x;
- const uint8* nxt_argb = src_argb + src_stride;
+ const uint8_t* nxt_argb = src_argb + src_stride;
int32_t stepx = src_stepx * 4;
int64_t data0, data1, data2, data3;
v16u8 src0 = {0}, src1 = {0}, src2 = {0}, src3 = {0};
@@ -553,8 +553,8 @@ void ScaleAddRow_MSA(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width) {
}
}
-void ScaleFilterCols_MSA(uint8* dst_ptr,
- const uint8* src_ptr,
+void ScaleFilterCols_MSA(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
int dst_width,
int x,
int dx) {
@@ -630,13 +630,13 @@ void ScaleFilterCols_MSA(uint8* dst_ptr,
}
}
-void ScaleARGBCols_MSA(uint8* dst_argb,
- const uint8* src_argb,
+void ScaleARGBCols_MSA(uint8_t* dst_argb,
+ const uint8_t* src_argb,
int dst_width,
int x,
int dx) {
- const uint32* src = (const uint32*)(src_argb);
- uint32* dst = (uint32*)(dst_argb);
+ const uint32_t* src = (const uint32_t*)(src_argb);
+ uint32_t* dst = (uint32_t*)(dst_argb);
int j;
v4i32 x_vec = __msa_fill_w(x);
v4i32 dx_vec = __msa_fill_w(dx);
@@ -657,12 +657,12 @@ void ScaleARGBCols_MSA(uint8* dst_argb,
}
}
-void ScaleARGBFilterCols_MSA(uint8* dst_argb,
- const uint8* src_argb,
+void ScaleARGBFilterCols_MSA(uint8_t* dst_argb,
+ const uint8_t* src_argb,
int dst_width,
int x,
int dx) {
- const uint32* src = (const uint32*)(src_argb);
+ const uint32_t* src = (const uint32_t*)(src_argb);
int j;
v4u32 src0, src1, src2, src3;
v4u32 vec0, vec1, vec2, vec3;
@@ -722,9 +722,9 @@ void ScaleARGBFilterCols_MSA(uint8* dst_argb,
}
}
-void ScaleRowDown34_MSA(const uint8* src_ptr,
+void ScaleRowDown34_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst,
+ uint8_t* dst,
int dst_width) {
int x;
(void)src_stride;
@@ -753,12 +753,12 @@ void ScaleRowDown34_MSA(const uint8* src_ptr,
}
}
-void ScaleRowDown34_0_Box_MSA(const uint8* src_ptr,
+void ScaleRowDown34_0_Box_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* d,
+ uint8_t* d,
int dst_width) {
- const uint8* s = src_ptr;
- const uint8* t = src_ptr + src_stride;
+ const uint8_t* s = src_ptr;
+ const uint8_t* t = src_ptr + src_stride;
int x;
v16u8 src0, src1, src2, src3, src4, src5, src6, src7, dst0, dst1, dst2;
v16u8 vec0, vec1, vec2, vec3, vec4, vec5;
@@ -847,12 +847,12 @@ void ScaleRowDown34_0_Box_MSA(const uint8* src_ptr,
}
}
-void ScaleRowDown34_1_Box_MSA(const uint8* src_ptr,
+void ScaleRowDown34_1_Box_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* d,
+ uint8_t* d,
int dst_width) {
- const uint8* s = src_ptr;
- const uint8* t = src_ptr + src_stride;
+ const uint8_t* s = src_ptr;
+ const uint8_t* t = src_ptr + src_stride;
int x;
v16u8 src0, src1, src2, src3, src4, src5, src6, src7, dst0, dst1, dst2;
v16u8 vec0, vec1, vec2, vec3, vec4, vec5;
diff --git a/chromium/third_party/libyuv/source/scale_neon.cc b/chromium/third_party/libyuv/source/scale_neon.cc
index 46da9d5e272..459a2995dfe 100644
--- a/chromium/third_party/libyuv/source/scale_neon.cc
+++ b/chromium/third_party/libyuv/source/scale_neon.cc
@@ -23,9 +23,9 @@ extern "C" {
// Provided by Fritz Koenig
// Read 32x1 throw away even pixels, and write 16x1.
-void ScaleRowDown2_NEON(const uint8* src_ptr,
+void ScaleRowDown2_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst,
+ uint8_t* dst,
int dst_width) {
(void)src_stride;
asm volatile(
@@ -44,9 +44,9 @@ void ScaleRowDown2_NEON(const uint8* src_ptr,
}
// Read 32x1 average down and write 16x1.
-void ScaleRowDown2Linear_NEON(const uint8* src_ptr,
+void ScaleRowDown2Linear_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst,
+ uint8_t* dst,
int dst_width) {
(void)src_stride;
asm volatile(
@@ -65,9 +65,9 @@ void ScaleRowDown2Linear_NEON(const uint8* src_ptr,
}
// Read 32x2 average down and write 16x1.
-void ScaleRowDown2Box_NEON(const uint8* src_ptr,
+void ScaleRowDown2Box_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst,
+ uint8_t* dst,
int dst_width) {
asm volatile(
// change the stride to row 2 pointer
@@ -95,9 +95,9 @@ void ScaleRowDown2Box_NEON(const uint8* src_ptr,
);
}
-void ScaleRowDown4_NEON(const uint8* src_ptr,
+void ScaleRowDown4_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width) {
(void)src_stride;
asm volatile(
@@ -113,13 +113,13 @@ void ScaleRowDown4_NEON(const uint8* src_ptr,
: "q0", "q1", "memory", "cc");
}
-void ScaleRowDown4Box_NEON(const uint8* src_ptr,
+void ScaleRowDown4Box_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width) {
- const uint8* src_ptr1 = src_ptr + src_stride;
- const uint8* src_ptr2 = src_ptr + src_stride * 2;
- const uint8* src_ptr3 = src_ptr + src_stride * 3;
+ const uint8_t* src_ptr1 = src_ptr + src_stride;
+ const uint8_t* src_ptr2 = src_ptr + src_stride * 2;
+ const uint8_t* src_ptr3 = src_ptr + src_stride * 3;
asm volatile(
"1: \n"
"vld1.8 {q0}, [%0]! \n" // load up 16x4
@@ -149,9 +149,9 @@ void ScaleRowDown4Box_NEON(const uint8* src_ptr,
// Down scale from 4 to 3 pixels. Use the neon multilane read/write
// to load up the every 4th pixel into a 4 different registers.
// Point samples 32 pixels to 24 pixels.
-void ScaleRowDown34_NEON(const uint8* src_ptr,
+void ScaleRowDown34_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width) {
(void)src_stride;
asm volatile(
@@ -168,9 +168,9 @@ void ScaleRowDown34_NEON(const uint8* src_ptr,
: "d0", "d1", "d2", "d3", "memory", "cc");
}
-void ScaleRowDown34_0_Box_NEON(const uint8* src_ptr,
+void ScaleRowDown34_0_Box_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width) {
asm volatile(
"vmov.u8 d24, #3 \n"
@@ -225,9 +225,9 @@ void ScaleRowDown34_0_Box_NEON(const uint8* src_ptr,
"cc");
}
-void ScaleRowDown34_1_Box_NEON(const uint8* src_ptr,
+void ScaleRowDown34_1_Box_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width) {
asm volatile(
"vmov.u8 d24, #3 \n"
@@ -276,9 +276,9 @@ static const vec16 kMult38_Div9 = {65536 / 18, 65536 / 18, 65536 / 18,
65536 / 18, 65536 / 18};
// 32 -> 12
-void ScaleRowDown38_NEON(const uint8* src_ptr,
+void ScaleRowDown38_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width) {
(void)src_stride;
asm volatile(
@@ -299,11 +299,11 @@ void ScaleRowDown38_NEON(const uint8* src_ptr,
}
// 32x3 -> 12x1
-void OMITFP ScaleRowDown38_3_Box_NEON(const uint8* src_ptr,
+void OMITFP ScaleRowDown38_3_Box_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width) {
- const uint8* src_ptr1 = src_ptr + src_stride * 2;
+ const uint8_t* src_ptr1 = src_ptr + src_stride * 2;
asm volatile(
"vld1.16 {q13}, [%5] \n"
@@ -411,9 +411,9 @@ void OMITFP ScaleRowDown38_3_Box_NEON(const uint8* src_ptr,
}
// 32x2 -> 12x1
-void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr,
+void ScaleRowDown38_2_Box_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width) {
asm volatile(
"vld1.16 {q13}, [%4] \n"
@@ -504,12 +504,12 @@ void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr,
: "q0", "q1", "q2", "q3", "q13", "q14", "memory", "cc");
}
-void ScaleAddRows_NEON(const uint8* src_ptr,
+void ScaleAddRows_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint16* dst_ptr,
+ uint16_t* dst_ptr,
int src_width,
int src_height) {
- const uint8* src_tmp;
+ const uint8_t* src_tmp;
asm volatile(
"1: \n"
"mov %0, %1 \n"
@@ -547,17 +547,17 @@ void ScaleAddRows_NEON(const uint8* src_ptr,
"vld2.8 {d6[" #n "], d7[" #n "]}, [%6] \n"
// The NEON version mimics this formula (from row_common.cc):
-// #define BLENDER(a, b, f) (uint8)((int)(a) +
+// #define BLENDER(a, b, f) (uint8_t)((int)(a) +
// ((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16))
-void ScaleFilterCols_NEON(uint8* dst_ptr,
- const uint8* src_ptr,
+void ScaleFilterCols_NEON(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
int dst_width,
int x,
int dx) {
int dx_offset[4] = {0, 1, 2, 3};
int* tmp = dx_offset;
- const uint8* src_tmp = src_ptr;
+ const uint8_t* src_tmp = src_ptr;
asm volatile (
"vdup.32 q0, %3 \n" // x
"vdup.32 q1, %4 \n" // dx
@@ -615,8 +615,8 @@ void ScaleFilterCols_NEON(uint8* dst_ptr,
#undef LOAD2_DATA8_LANE
// 16x2 -> 16x1
-void ScaleFilterRows_NEON(uint8* dst_ptr,
- const uint8* src_ptr,
+void ScaleFilterRows_NEON(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
ptrdiff_t src_stride,
int dst_width,
int source_y_fraction) {
@@ -699,9 +699,9 @@ void ScaleFilterRows_NEON(uint8* dst_ptr,
: "q0", "q1", "d4", "d5", "q13", "q14", "memory", "cc");
}
-void ScaleARGBRowDown2_NEON(const uint8* src_ptr,
+void ScaleARGBRowDown2_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst,
+ uint8_t* dst,
int dst_width) {
(void)src_stride;
asm volatile(
@@ -727,9 +727,9 @@ void ScaleARGBRowDown2_NEON(const uint8* src_ptr,
// 54: f942 038d vst2.32 {d16-d19}, [r2]!
// 58: d1f5 bne.n 46 <ScaleARGBRowDown2_C+0x46>
-void ScaleARGBRowDown2Linear_NEON(const uint8* src_argb,
+void ScaleARGBRowDown2Linear_NEON(const uint8_t* src_argb,
ptrdiff_t src_stride,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_width) {
(void)src_stride;
asm volatile(
@@ -749,9 +749,9 @@ void ScaleARGBRowDown2Linear_NEON(const uint8* src_argb,
);
}
-void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr,
+void ScaleARGBRowDown2Box_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst,
+ uint8_t* dst,
int dst_width) {
asm volatile(
// change the stride to row 2 pointer
@@ -786,10 +786,10 @@ void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr,
// Reads 4 pixels at a time.
// Alignment requirement: src_argb 4 byte aligned.
-void ScaleARGBRowDownEven_NEON(const uint8* src_argb,
+void ScaleARGBRowDownEven_NEON(const uint8_t* src_argb,
ptrdiff_t src_stride,
int src_stepx,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_width) {
(void)src_stride;
asm volatile(
@@ -811,10 +811,10 @@ void ScaleARGBRowDownEven_NEON(const uint8* src_argb,
// Reads 4 pixels at a time.
// Alignment requirement: src_argb 4 byte aligned.
-void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb,
+void ScaleARGBRowDownEvenBox_NEON(const uint8_t* src_argb,
ptrdiff_t src_stride,
int src_stepx,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_width) {
asm volatile(
"mov r12, %4, lsl #2 \n"
@@ -857,13 +857,13 @@ void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb,
"add %3, %3, %4 \n" \
"vld1.32 {" #dn "[" #n "]}, [%6] \n"
-void ScaleARGBCols_NEON(uint8* dst_argb,
- const uint8* src_argb,
+void ScaleARGBCols_NEON(uint8_t* dst_argb,
+ const uint8_t* src_argb,
int dst_width,
int x,
int dx) {
int tmp;
- const uint8* src_tmp = src_argb;
+ const uint8_t* src_tmp = src_argb;
asm volatile(
"1: \n"
// clang-format off
@@ -900,14 +900,14 @@ void ScaleARGBCols_NEON(uint8* dst_argb,
"add %3, %3, %4 \n" \
"vld2.32 {" #dn1 "[" #n "], " #dn2 "[" #n "]}, [%6] \n"
-void ScaleARGBFilterCols_NEON(uint8* dst_argb,
- const uint8* src_argb,
+void ScaleARGBFilterCols_NEON(uint8_t* dst_argb,
+ const uint8_t* src_argb,
int dst_width,
int x,
int dx) {
int dx_offset[4] = {0, 1, 2, 3};
int* tmp = dx_offset;
- const uint8* src_tmp = src_argb;
+ const uint8_t* src_tmp = src_argb;
asm volatile (
"vdup.32 q0, %3 \n" // x
"vdup.32 q1, %4 \n" // dx
diff --git a/chromium/third_party/libyuv/source/scale_neon64.cc b/chromium/third_party/libyuv/source/scale_neon64.cc
index 73aed9e1b38..494a9cfbfbe 100644
--- a/chromium/third_party/libyuv/source/scale_neon64.cc
+++ b/chromium/third_party/libyuv/source/scale_neon64.cc
@@ -21,9 +21,9 @@ extern "C" {
#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
// Read 32x1 throw away even pixels, and write 16x1.
-void ScaleRowDown2_NEON(const uint8* src_ptr,
+void ScaleRowDown2_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst,
+ uint8_t* dst,
int dst_width) {
(void)src_stride;
asm volatile(
@@ -42,9 +42,9 @@ void ScaleRowDown2_NEON(const uint8* src_ptr,
}
// Read 32x1 average down and write 16x1.
-void ScaleRowDown2Linear_NEON(const uint8* src_ptr,
+void ScaleRowDown2Linear_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst,
+ uint8_t* dst,
int dst_width) {
(void)src_stride;
asm volatile(
@@ -64,9 +64,9 @@ void ScaleRowDown2Linear_NEON(const uint8* src_ptr,
}
// Read 32x2 average down and write 16x1.
-void ScaleRowDown2Box_NEON(const uint8* src_ptr,
+void ScaleRowDown2Box_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst,
+ uint8_t* dst,
int dst_width) {
asm volatile(
// change the stride to row 2 pointer
@@ -92,9 +92,9 @@ void ScaleRowDown2Box_NEON(const uint8* src_ptr,
);
}
-void ScaleRowDown4_NEON(const uint8* src_ptr,
+void ScaleRowDown4_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width) {
(void)src_stride;
asm volatile(
@@ -110,13 +110,13 @@ void ScaleRowDown4_NEON(const uint8* src_ptr,
: "v0", "v1", "v2", "v3", "memory", "cc");
}
-void ScaleRowDown4Box_NEON(const uint8* src_ptr,
+void ScaleRowDown4Box_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width) {
- const uint8* src_ptr1 = src_ptr + src_stride;
- const uint8* src_ptr2 = src_ptr + src_stride * 2;
- const uint8* src_ptr3 = src_ptr + src_stride * 3;
+ const uint8_t* src_ptr1 = src_ptr + src_stride;
+ const uint8_t* src_ptr2 = src_ptr + src_stride * 2;
+ const uint8_t* src_ptr3 = src_ptr + src_stride * 3;
asm volatile(
"1: \n"
"ld1 {v0.16b}, [%0], #16 \n" // load up 16x4
@@ -145,9 +145,9 @@ void ScaleRowDown4Box_NEON(const uint8* src_ptr,
// Down scale from 4 to 3 pixels. Use the neon multilane read/write
// to load up the every 4th pixel into a 4 different registers.
// Point samples 32 pixels to 24 pixels.
-void ScaleRowDown34_NEON(const uint8* src_ptr,
+void ScaleRowDown34_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width) {
(void)src_stride;
asm volatile(
@@ -164,9 +164,9 @@ void ScaleRowDown34_NEON(const uint8* src_ptr,
: "v0", "v1", "v2", "v3", "memory", "cc");
}
-void ScaleRowDown34_0_Box_NEON(const uint8* src_ptr,
+void ScaleRowDown34_0_Box_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width) {
asm volatile(
"movi v20.8b, #3 \n"
@@ -221,9 +221,9 @@ void ScaleRowDown34_0_Box_NEON(const uint8* src_ptr,
"v19", "v20", "memory", "cc");
}
-void ScaleRowDown34_1_Box_NEON(const uint8* src_ptr,
+void ScaleRowDown34_1_Box_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width) {
asm volatile(
"movi v20.8b, #3 \n"
@@ -273,9 +273,9 @@ static const vec16 kMult38_Div9 = {65536 / 18, 65536 / 18, 65536 / 18,
65536 / 18, 65536 / 18};
// 32 -> 12
-void ScaleRowDown38_NEON(const uint8* src_ptr,
+void ScaleRowDown38_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width) {
(void)src_stride;
asm volatile(
@@ -295,11 +295,11 @@ void ScaleRowDown38_NEON(const uint8* src_ptr,
}
// 32x3 -> 12x1
-void OMITFP ScaleRowDown38_3_Box_NEON(const uint8* src_ptr,
+void OMITFP ScaleRowDown38_3_Box_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width) {
- const uint8* src_ptr1 = src_ptr + src_stride * 2;
+ const uint8_t* src_ptr1 = src_ptr + src_stride * 2;
ptrdiff_t tmp_src_stride = src_stride;
asm volatile(
@@ -415,9 +415,9 @@ void OMITFP ScaleRowDown38_3_Box_NEON(const uint8* src_ptr,
}
// 32x2 -> 12x1
-void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr,
+void ScaleRowDown38_2_Box_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width) {
// TODO(fbarchard): use src_stride directly for clang 3.5+.
ptrdiff_t tmp_src_stride = src_stride;
@@ -515,12 +515,12 @@ void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr,
"v19", "v30", "v31", "memory", "cc");
}
-void ScaleAddRows_NEON(const uint8* src_ptr,
+void ScaleAddRows_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint16* dst_ptr,
+ uint16_t* dst_ptr,
int src_width,
int src_height) {
- const uint8* src_tmp;
+ const uint8_t* src_tmp;
asm volatile(
"1: \n"
"mov %0, %1 \n"
@@ -558,19 +558,19 @@ void ScaleAddRows_NEON(const uint8* src_ptr,
"ld2 {v4.b, v5.b}[" #n "], [%6] \n"
// The NEON version mimics this formula (from row_common.cc):
-// #define BLENDER(a, b, f) (uint8)((int)(a) +
+// #define BLENDER(a, b, f) (uint8_t)((int)(a) +
// ((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16))
-void ScaleFilterCols_NEON(uint8* dst_ptr,
- const uint8* src_ptr,
+void ScaleFilterCols_NEON(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
int dst_width,
int x,
int dx) {
int dx_offset[4] = {0, 1, 2, 3};
int* tmp = dx_offset;
- const uint8* src_tmp = src_ptr;
- int64 x64 = (int64)x; // NOLINT
- int64 dx64 = (int64)dx; // NOLINT
+ const uint8_t* src_tmp = src_ptr;
+ int64_t x64 = (int64_t)x; // NOLINT
+ int64_t dx64 = (int64_t)dx; // NOLINT
asm volatile (
"dup v0.4s, %w3 \n" // x
"dup v1.4s, %w4 \n" // dx
@@ -628,8 +628,8 @@ void ScaleFilterCols_NEON(uint8* dst_ptr,
#undef LOAD2_DATA8_LANE
// 16x2 -> 16x1
-void ScaleFilterRows_NEON(uint8* dst_ptr,
- const uint8* src_ptr,
+void ScaleFilterRows_NEON(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
ptrdiff_t src_stride,
int dst_width,
int source_y_fraction) {
@@ -713,9 +713,9 @@ void ScaleFilterRows_NEON(uint8* dst_ptr,
: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "memory", "cc");
}
-void ScaleARGBRowDown2_NEON(const uint8* src_ptr,
+void ScaleARGBRowDown2_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst,
+ uint8_t* dst,
int dst_width) {
(void)src_stride;
asm volatile(
@@ -734,9 +734,9 @@ void ScaleARGBRowDown2_NEON(const uint8* src_ptr,
);
}
-void ScaleARGBRowDown2Linear_NEON(const uint8* src_argb,
+void ScaleARGBRowDown2Linear_NEON(const uint8_t* src_argb,
ptrdiff_t src_stride,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_width) {
(void)src_stride;
asm volatile(
@@ -757,9 +757,9 @@ void ScaleARGBRowDown2Linear_NEON(const uint8* src_argb,
);
}
-void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr,
+void ScaleARGBRowDown2Box_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst,
+ uint8_t* dst,
int dst_width) {
asm volatile(
// change the stride to row 2 pointer
@@ -792,10 +792,10 @@ void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr,
// Reads 4 pixels at a time.
// Alignment requirement: src_argb 4 byte aligned.
-void ScaleARGBRowDownEven_NEON(const uint8* src_argb,
+void ScaleARGBRowDownEven_NEON(const uint8_t* src_argb,
ptrdiff_t src_stride,
int src_stepx,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_width) {
(void)src_stride;
asm volatile(
@@ -807,10 +807,10 @@ void ScaleARGBRowDownEven_NEON(const uint8* src_argb,
"subs %w2, %w2, #4 \n" // 4 pixels per loop.
"st1 {v0.16b}, [%1], #16 \n"
"b.gt 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb), // %1
- "+r"(dst_width) // %2
- : "r"((int64)(src_stepx * 4)) // %3
+ : "+r"(src_argb), // %0
+ "+r"(dst_argb), // %1
+ "+r"(dst_width) // %2
+ : "r"((int64_t)(src_stepx * 4)) // %3
: "memory", "cc", "v0");
}
@@ -818,10 +818,10 @@ void ScaleARGBRowDownEven_NEON(const uint8* src_argb,
// Alignment requirement: src_argb 4 byte aligned.
// TODO(Yang Zhang): Might be worth another optimization pass in future.
// It could be upgraded to 8 pixels at a time to start with.
-void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb,
+void ScaleARGBRowDownEvenBox_NEON(const uint8_t* src_argb,
ptrdiff_t src_stride,
int src_stepx,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_width) {
asm volatile(
"add %1, %1, %0 \n"
@@ -851,11 +851,11 @@ void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb,
"subs %w3, %w3, #4 \n" // 4 pixels per loop.
"st1 {v0.16b}, [%2], #16 \n"
"b.gt 1b \n"
- : "+r"(src_argb), // %0
- "+r"(src_stride), // %1
- "+r"(dst_argb), // %2
- "+r"(dst_width) // %3
- : "r"((int64)(src_stepx * 4)) // %4
+ : "+r"(src_argb), // %0
+ "+r"(src_stride), // %1
+ "+r"(dst_argb), // %2
+ "+r"(dst_width) // %3
+ : "r"((int64_t)(src_stepx * 4)) // %4
: "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16");
}
@@ -867,15 +867,15 @@ void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb,
"add %3, %3, %4 \n" \
"ld1 {" #vn ".s}[" #n "], [%6] \n"
-void ScaleARGBCols_NEON(uint8* dst_argb,
- const uint8* src_argb,
+void ScaleARGBCols_NEON(uint8_t* dst_argb,
+ const uint8_t* src_argb,
int dst_width,
int x,
int dx) {
- const uint8* src_tmp = src_argb;
- int64 x64 = (int64)x; // NOLINT
- int64 dx64 = (int64)dx; // NOLINT
- int64 tmp64;
+ const uint8_t* src_tmp = src_argb;
+ int64_t x64 = (int64_t)x; // NOLINT
+ int64_t dx64 = (int64_t)dx; // NOLINT
+ int64_t tmp64;
asm volatile(
"1: \n"
// clang-format off
@@ -912,16 +912,16 @@ void ScaleARGBCols_NEON(uint8* dst_argb,
"add %3, %3, %4 \n" \
"ld2 {" #vn1 ".s, " #vn2 ".s}[" #n "], [%6] \n"
-void ScaleARGBFilterCols_NEON(uint8* dst_argb,
- const uint8* src_argb,
+void ScaleARGBFilterCols_NEON(uint8_t* dst_argb,
+ const uint8_t* src_argb,
int dst_width,
int x,
int dx) {
int dx_offset[4] = {0, 1, 2, 3};
int* tmp = dx_offset;
- const uint8* src_tmp = src_argb;
- int64 x64 = (int64)x; // NOLINT
- int64 dx64 = (int64)dx; // NOLINT
+ const uint8_t* src_tmp = src_argb;
+ int64_t x64 = (int64_t)x; // NOLINT
+ int64_t dx64 = (int64_t)dx; // NOLINT
asm volatile (
"dup v0.4s, %w3 \n" // x
"dup v1.4s, %w4 \n" // dx
@@ -978,9 +978,9 @@ void ScaleARGBFilterCols_NEON(uint8* dst_argb,
#undef LOAD2_DATA32_LANE
// Read 16x2 average down and write 8x1.
-void ScaleRowDown2Box_16_NEON(const uint16* src_ptr,
+void ScaleRowDown2Box_16_NEON(const uint16_t* src_ptr,
ptrdiff_t src_stride,
- uint16* dst,
+ uint16_t* dst,
int dst_width) {
asm volatile(
// change the stride to row 2 pointer
@@ -1008,9 +1008,9 @@ void ScaleRowDown2Box_16_NEON(const uint16* src_ptr,
// Read 8x2 upsample with filtering and write 16x1.
// Actually reads an extra pixel, so 9x2.
-void ScaleRowUp2_16_NEON(const uint16* src_ptr,
+void ScaleRowUp2_16_NEON(const uint16_t* src_ptr,
ptrdiff_t src_stride,
- uint16* dst,
+ uint16_t* dst,
int dst_width) {
asm volatile(
"add %1, %0, %1, lsl #1 \n" // ptr + stide * 2
diff --git a/chromium/third_party/libyuv/source/scale_win.cc b/chromium/third_party/libyuv/source/scale_win.cc
index b33881998aa..c5fc86f3e96 100644
--- a/chromium/third_party/libyuv/source/scale_win.cc
+++ b/chromium/third_party/libyuv/source/scale_win.cc
@@ -89,9 +89,9 @@ static const uvec16 kScaleAb2 = {65536 / 3, 65536 / 3, 65536 / 2, 65536 / 3,
65536 / 3, 65536 / 2, 0, 0};
// Reads 32 pixels, throws half away and writes 16 pixels.
-__declspec(naked) void ScaleRowDown2_SSSE3(const uint8* src_ptr,
+__declspec(naked) void ScaleRowDown2_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width) {
__asm {
mov eax, [esp + 4] // src_ptr
@@ -116,9 +116,9 @@ __declspec(naked) void ScaleRowDown2_SSSE3(const uint8* src_ptr,
}
// Blends 32x1 rectangle to 16x1.
-__declspec(naked) void ScaleRowDown2Linear_SSSE3(const uint8* src_ptr,
+__declspec(naked) void ScaleRowDown2Linear_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width) {
__asm {
mov eax, [esp + 4] // src_ptr
@@ -150,9 +150,9 @@ __declspec(naked) void ScaleRowDown2Linear_SSSE3(const uint8* src_ptr,
}
// Blends 32x2 rectangle to 16x1.
-__declspec(naked) void ScaleRowDown2Box_SSSE3(const uint8* src_ptr,
+__declspec(naked) void ScaleRowDown2Box_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width) {
__asm {
push esi
@@ -195,9 +195,9 @@ __declspec(naked) void ScaleRowDown2Box_SSSE3(const uint8* src_ptr,
#ifdef HAS_SCALEROWDOWN2_AVX2
// Reads 64 pixels, throws half away and writes 32 pixels.
-__declspec(naked) void ScaleRowDown2_AVX2(const uint8* src_ptr,
+__declspec(naked) void ScaleRowDown2_AVX2(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width) {
__asm {
mov eax, [esp + 4] // src_ptr
@@ -224,9 +224,9 @@ __declspec(naked) void ScaleRowDown2_AVX2(const uint8* src_ptr,
}
// Blends 64x1 rectangle to 32x1.
-__declspec(naked) void ScaleRowDown2Linear_AVX2(const uint8* src_ptr,
+__declspec(naked) void ScaleRowDown2Linear_AVX2(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width) {
__asm {
mov eax, [esp + 4] // src_ptr
@@ -262,9 +262,9 @@ __declspec(naked) void ScaleRowDown2Linear_AVX2(const uint8* src_ptr,
// For rounding, average = (sum + 2) / 4
// becomes average((sum >> 1), 0)
// Blends 64x2 rectangle to 32x1.
-__declspec(naked) void ScaleRowDown2Box_AVX2(const uint8* src_ptr,
+__declspec(naked) void ScaleRowDown2Box_AVX2(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width) {
__asm {
push esi
@@ -309,9 +309,9 @@ __declspec(naked) void ScaleRowDown2Box_AVX2(const uint8* src_ptr,
#endif // HAS_SCALEROWDOWN2_AVX2
// Point samples 32 pixels to 8 pixels.
-__declspec(naked) void ScaleRowDown4_SSSE3(const uint8* src_ptr,
+__declspec(naked) void ScaleRowDown4_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width) {
__asm {
mov eax, [esp + 4] // src_ptr
@@ -341,9 +341,9 @@ __declspec(naked) void ScaleRowDown4_SSSE3(const uint8* src_ptr,
}
// Blends 32x4 rectangle to 8x1.
-__declspec(naked) void ScaleRowDown4Box_SSSE3(const uint8* src_ptr,
+__declspec(naked) void ScaleRowDown4Box_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width) {
__asm {
push esi
@@ -400,9 +400,9 @@ __declspec(naked) void ScaleRowDown4Box_SSSE3(const uint8* src_ptr,
#ifdef HAS_SCALEROWDOWN4_AVX2
// Point samples 64 pixels to 16 pixels.
-__declspec(naked) void ScaleRowDown4_AVX2(const uint8* src_ptr,
+__declspec(naked) void ScaleRowDown4_AVX2(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width) {
__asm {
mov eax, [esp + 4] // src_ptr
@@ -435,9 +435,9 @@ __declspec(naked) void ScaleRowDown4_AVX2(const uint8* src_ptr,
}
// Blends 64x4 rectangle to 16x1.
-__declspec(naked) void ScaleRowDown4Box_AVX2(const uint8* src_ptr,
+__declspec(naked) void ScaleRowDown4Box_AVX2(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width) {
__asm {
push esi
@@ -499,9 +499,9 @@ __declspec(naked) void ScaleRowDown4Box_AVX2(const uint8* src_ptr,
// Produces three 8 byte values. For each 8 bytes, 16 bytes are read.
// Then shuffled to do the scaling.
-__declspec(naked) void ScaleRowDown34_SSSE3(const uint8* src_ptr,
+__declspec(naked) void ScaleRowDown34_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width) {
__asm {
mov eax, [esp + 4] // src_ptr
@@ -547,9 +547,9 @@ __declspec(naked) void ScaleRowDown34_SSSE3(const uint8* src_ptr,
// xmm7 kRound34
// Note that movdqa+palign may be better than movdqu.
-__declspec(naked) void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr,
+__declspec(naked) void ScaleRowDown34_1_Box_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width) {
__asm {
push esi
@@ -604,9 +604,9 @@ __declspec(naked) void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr,
}
// Note that movdqa+palign may be better than movdqu.
-__declspec(naked) void ScaleRowDown34_0_Box_SSSE3(const uint8* src_ptr,
+__declspec(naked) void ScaleRowDown34_0_Box_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width) {
__asm {
push esi
@@ -666,9 +666,9 @@ __declspec(naked) void ScaleRowDown34_0_Box_SSSE3(const uint8* src_ptr,
// 3/8 point sampler
// Scale 32 pixels to 12
-__declspec(naked) void ScaleRowDown38_SSSE3(const uint8* src_ptr,
+__declspec(naked) void ScaleRowDown38_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width) {
__asm {
mov eax, [esp + 4] // src_ptr
@@ -698,9 +698,9 @@ __declspec(naked) void ScaleRowDown38_SSSE3(const uint8* src_ptr,
}
// Scale 16x3 pixels to 6x1 with interpolation
-__declspec(naked) void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr,
+__declspec(naked) void ScaleRowDown38_3_Box_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width) {
__asm {
push esi
@@ -763,9 +763,9 @@ __declspec(naked) void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr,
}
// Scale 16x2 pixels to 6x1 with interpolation
-__declspec(naked) void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr,
+__declspec(naked) void ScaleRowDown38_2_Box_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
- uint8* dst_ptr,
+ uint8_t* dst_ptr,
int dst_width) {
__asm {
push esi
@@ -808,8 +808,8 @@ __declspec(naked) void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr,
}
// Reads 16 bytes and accumulates to 16 shorts at a time.
-__declspec(naked) void ScaleAddRow_SSE2(const uint8* src_ptr,
- uint16* dst_ptr,
+__declspec(naked) void ScaleAddRow_SSE2(const uint8_t* src_ptr,
+ uint16_t* dst_ptr,
int src_width) {
__asm {
mov eax, [esp + 4] // src_ptr
@@ -839,8 +839,8 @@ __declspec(naked) void ScaleAddRow_SSE2(const uint8* src_ptr,
#ifdef HAS_SCALEADDROW_AVX2
// Reads 32 bytes and accumulates to 32 shorts at a time.
-__declspec(naked) void ScaleAddRow_AVX2(const uint8* src_ptr,
- uint16* dst_ptr,
+__declspec(naked) void ScaleAddRow_AVX2(const uint8_t* src_ptr,
+ uint16_t* dst_ptr,
int src_width) {
__asm {
mov eax, [esp + 4] // src_ptr
@@ -879,8 +879,8 @@ static const uvec16 kFadd40 = {0x4040, 0x4040, 0x4040, 0x4040,
0x4040, 0x4040, 0x4040, 0x4040};
// Bilinear column filtering. SSSE3 version.
-__declspec(naked) void ScaleFilterCols_SSSE3(uint8* dst_ptr,
- const uint8* src_ptr,
+__declspec(naked) void ScaleFilterCols_SSSE3(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
int dst_width,
int x,
int dx) {
@@ -965,8 +965,8 @@ __declspec(naked) void ScaleFilterCols_SSSE3(uint8* dst_ptr,
}
// Reads 16 pixels, duplicates them and writes 32 pixels.
-__declspec(naked) void ScaleColsUp2_SSE2(uint8* dst_ptr,
- const uint8* src_ptr,
+__declspec(naked) void ScaleColsUp2_SSE2(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
int dst_width,
int x,
int dx) {
@@ -992,9 +992,9 @@ __declspec(naked) void ScaleColsUp2_SSE2(uint8* dst_ptr,
}
// Reads 8 pixels, throws half away and writes 4 even pixels (0, 2, 4, 6)
-__declspec(naked) void ScaleARGBRowDown2_SSE2(const uint8* src_argb,
+__declspec(naked) void ScaleARGBRowDown2_SSE2(const uint8_t* src_argb,
ptrdiff_t src_stride,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_width) {
__asm {
mov eax, [esp + 4] // src_argb
@@ -1017,9 +1017,9 @@ __declspec(naked) void ScaleARGBRowDown2_SSE2(const uint8* src_argb,
}
// Blends 8x1 rectangle to 4x1.
-__declspec(naked) void ScaleARGBRowDown2Linear_SSE2(const uint8* src_argb,
+__declspec(naked) void ScaleARGBRowDown2Linear_SSE2(const uint8_t* src_argb,
ptrdiff_t src_stride,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_width) {
__asm {
mov eax, [esp + 4] // src_argb
@@ -1045,9 +1045,9 @@ __declspec(naked) void ScaleARGBRowDown2Linear_SSE2(const uint8* src_argb,
}
// Blends 8x2 rectangle to 4x1.
-__declspec(naked) void ScaleARGBRowDown2Box_SSE2(const uint8* src_argb,
+__declspec(naked) void ScaleARGBRowDown2Box_SSE2(const uint8_t* src_argb,
ptrdiff_t src_stride,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_width) {
__asm {
push esi
@@ -1079,10 +1079,10 @@ __declspec(naked) void ScaleARGBRowDown2Box_SSE2(const uint8* src_argb,
}
// Reads 4 pixels at a time.
-__declspec(naked) void ScaleARGBRowDownEven_SSE2(const uint8* src_argb,
+__declspec(naked) void ScaleARGBRowDownEven_SSE2(const uint8_t* src_argb,
ptrdiff_t src_stride,
int src_stepx,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_width) {
__asm {
push ebx
@@ -1116,10 +1116,10 @@ __declspec(naked) void ScaleARGBRowDownEven_SSE2(const uint8* src_argb,
}
// Blends four 2x2 to 4x1.
-__declspec(naked) void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb,
+__declspec(naked) void ScaleARGBRowDownEvenBox_SSE2(const uint8_t* src_argb,
ptrdiff_t src_stride,
int src_stepx,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_width) {
__asm {
push ebx
@@ -1164,8 +1164,8 @@ __declspec(naked) void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb,
}
// Column scaling unfiltered. SSE2 version.
-__declspec(naked) void ScaleARGBCols_SSE2(uint8* dst_argb,
- const uint8* src_argb,
+__declspec(naked) void ScaleARGBCols_SSE2(uint8_t* dst_argb,
+ const uint8_t* src_argb,
int dst_width,
int x,
int dx) {
@@ -1257,8 +1257,8 @@ static const uvec8 kShuffleFractions = {
0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 4u, 4u, 4u, 4u, 4u, 4u, 4u, 4u,
};
-__declspec(naked) void ScaleARGBFilterCols_SSSE3(uint8* dst_argb,
- const uint8* src_argb,
+__declspec(naked) void ScaleARGBFilterCols_SSSE3(uint8_t* dst_argb,
+ const uint8_t* src_argb,
int dst_width,
int x,
int dx) {
@@ -1330,8 +1330,8 @@ __declspec(naked) void ScaleARGBFilterCols_SSSE3(uint8* dst_argb,
}
// Reads 4 pixels, duplicates them and writes 8 pixels.
-__declspec(naked) void ScaleARGBColsUp2_SSE2(uint8* dst_argb,
- const uint8* src_argb,
+__declspec(naked) void ScaleARGBColsUp2_SSE2(uint8_t* dst_argb,
+ const uint8_t* src_argb,
int dst_width,
int x,
int dx) {
diff --git a/chromium/third_party/libyuv/source/video_common.cc b/chromium/third_party/libyuv/source/video_common.cc
index 3e9c6a29502..92384c050cd 100644
--- a/chromium/third_party/libyuv/source/video_common.cc
+++ b/chromium/third_party/libyuv/source/video_common.cc
@@ -15,14 +15,13 @@ namespace libyuv {
extern "C" {
#endif
-#define ARRAY_SIZE(x) (int)(sizeof(x) / sizeof(x[0]))
-
struct FourCCAliasEntry {
- uint32 alias;
- uint32 canonical;
+ uint32_t alias;
+ uint32_t canonical;
};
-static const struct FourCCAliasEntry kFourCCAliases[] = {
+#define NUM_ALIASES 18
+static const struct FourCCAliasEntry kFourCCAliases[NUM_ALIASES] = {
{FOURCC_IYUV, FOURCC_I420},
{FOURCC_YU12, FOURCC_I420},
{FOURCC_YU16, FOURCC_I422},
@@ -46,9 +45,9 @@ static const struct FourCCAliasEntry kFourCCAliases[] = {
// {FOURCC_BGRA, FOURCC_ARGB}, // kCMPixelFormat_32BGRA
LIBYUV_API
-uint32 CanonicalFourCC(uint32 fourcc) {
+uint32_t CanonicalFourCC(uint32_t fourcc) {
int i;
- for (i = 0; i < ARRAY_SIZE(kFourCCAliases); ++i) {
+ for (i = 0; i < NUM_ALIASES; ++i) {
if (kFourCCAliases[i].alias == fourcc) {
return kFourCCAliases[i].canonical;
}
diff --git a/chromium/third_party/libyuv/unit_test/basictypes_test.cc b/chromium/third_party/libyuv/unit_test/basictypes_test.cc
index 89f7644d58e..9aaa2dcd989 100644
--- a/chromium/third_party/libyuv/unit_test/basictypes_test.cc
+++ b/chromium/third_party/libyuv/unit_test/basictypes_test.cc
@@ -13,25 +13,15 @@
namespace libyuv {
-TEST_F(LibYUVBaseTest, Endian) {
- uint16 v16 = 0x1234u;
- uint8 first_byte = *reinterpret_cast<uint8*>(&v16);
-#if defined(LIBYUV_LITTLE_ENDIAN)
- EXPECT_EQ(0x34u, first_byte);
-#else
- EXPECT_EQ(0x12u, first_byte);
-#endif
-}
-
TEST_F(LibYUVBaseTest, SizeOfTypes) {
- int8 i8 = -1;
- uint8 u8 = 1u;
- int16 i16 = -1;
- uint16 u16 = 1u;
- int32 i32 = -1;
- uint32 u32 = 1u;
- int64 i64 = -1;
- uint64 u64 = 1u;
+ int8_t i8 = -1;
+ uint8_t u8 = 1u;
+ int16_t i16 = -1;
+ uint16_t u16 = 1u;
+ int32_t i32 = -1;
+ uint32_t u32 = 1u;
+ int64_t i64 = -1;
+ uint64_t u64 = 1u;
EXPECT_EQ(1u, sizeof(i8));
EXPECT_EQ(1u, sizeof(u8));
EXPECT_EQ(2u, sizeof(i16));
@@ -50,11 +40,4 @@ TEST_F(LibYUVBaseTest, SizeOfTypes) {
EXPECT_LT(0u, u64);
}
-TEST_F(LibYUVBaseTest, SizeOfConstants) {
- EXPECT_EQ(8u, sizeof(INT64_C(0)));
- EXPECT_EQ(8u, sizeof(UINT64_C(0)));
- EXPECT_EQ(8u, sizeof(INT64_C(0x1234567887654321)));
- EXPECT_EQ(8u, sizeof(UINT64_C(0x8765432112345678)));
-}
-
} // namespace libyuv
diff --git a/chromium/third_party/libyuv/unit_test/color_test.cc b/chromium/third_party/libyuv/unit_test/color_test.cc
index 30b6411283f..4bb448d56fe 100644
--- a/chromium/third_party/libyuv/unit_test/color_test.cc
+++ b/chromium/third_party/libyuv/unit_test/color_test.cc
@@ -63,10 +63,10 @@ namespace libyuv {
\
/* The test is overall for color conversion matrix being reversible, so */ \
/* this initializes the pixel with 2x2 blocks to eliminate subsampling. */ \
- uint8* p = orig_y; \
+ uint8_t* p = orig_y; \
for (int y = 0; y < benchmark_height_ - HS1; y += HS) { \
for (int x = 0; x < benchmark_width_ - 1; x += 2) { \
- uint8 r = static_cast<uint8>(fastrand()); \
+ uint8_t r = static_cast<uint8_t>(fastrand()); \
p[0] = r; \
p[1] = r; \
p[HN] = r; \
@@ -74,7 +74,7 @@ namespace libyuv {
p += 2; \
} \
if (benchmark_width_ & 1) { \
- uint8 r = static_cast<uint8>(fastrand()); \
+ uint8_t r = static_cast<uint8_t>(fastrand()); \
p[0] = r; \
p[HN] = r; \
p += 1; \
@@ -83,13 +83,13 @@ namespace libyuv {
} \
if ((benchmark_height_ & 1) && HS == 2) { \
for (int x = 0; x < benchmark_width_ - 1; x += 2) { \
- uint8 r = static_cast<uint8>(fastrand()); \
+ uint8_t r = static_cast<uint8_t>(fastrand()); \
p[0] = r; \
p[1] = r; \
p += 2; \
} \
if (benchmark_width_ & 1) { \
- uint8 r = static_cast<uint8>(fastrand()); \
+ uint8_t r = static_cast<uint8_t>(fastrand()); \
p[0] = r; \
p += 1; \
} \
@@ -147,10 +147,10 @@ static void YUVToRGB(int y, int u, int v, int* r, int* g, int* b) {
const int kPixels = kWidth * kHeight;
const int kHalfPixels = ((kWidth + 1) / 2) * ((kHeight + 1) / 2);
- SIMD_ALIGNED(uint8 orig_y[16]);
- SIMD_ALIGNED(uint8 orig_u[8]);
- SIMD_ALIGNED(uint8 orig_v[8]);
- SIMD_ALIGNED(uint8 orig_pixels[16 * 4]);
+ SIMD_ALIGNED(uint8_t orig_y[16]);
+ SIMD_ALIGNED(uint8_t orig_u[8]);
+ SIMD_ALIGNED(uint8_t orig_v[8]);
+ SIMD_ALIGNED(uint8_t orig_pixels[16 * 4]);
memset(orig_y, y, kPixels);
memset(orig_u, u, kHalfPixels);
memset(orig_v, v, kHalfPixels);
@@ -170,10 +170,10 @@ static void YUVJToRGB(int y, int u, int v, int* r, int* g, int* b) {
const int kPixels = kWidth * kHeight;
const int kHalfPixels = ((kWidth + 1) / 2) * ((kHeight + 1) / 2);
- SIMD_ALIGNED(uint8 orig_y[16]);
- SIMD_ALIGNED(uint8 orig_u[8]);
- SIMD_ALIGNED(uint8 orig_v[8]);
- SIMD_ALIGNED(uint8 orig_pixels[16 * 4]);
+ SIMD_ALIGNED(uint8_t orig_y[16]);
+ SIMD_ALIGNED(uint8_t orig_u[8]);
+ SIMD_ALIGNED(uint8_t orig_v[8]);
+ SIMD_ALIGNED(uint8_t orig_pixels[16 * 4]);
memset(orig_y, y, kPixels);
memset(orig_u, u, kHalfPixels);
memset(orig_v, v, kHalfPixels);
@@ -192,8 +192,8 @@ static void YToRGB(int y, int* r, int* g, int* b) {
const int kHeight = 1;
const int kPixels = kWidth * kHeight;
- SIMD_ALIGNED(uint8 orig_y[16]);
- SIMD_ALIGNED(uint8 orig_pixels[16 * 4]);
+ SIMD_ALIGNED(uint8_t orig_y[16]);
+ SIMD_ALIGNED(uint8_t orig_pixels[16 * 4]);
memset(orig_y, y, kPixels);
/* YUV converted to ARGB. */
@@ -209,8 +209,8 @@ static void YJToRGB(int y, int* r, int* g, int* b) {
const int kHeight = 1;
const int kPixels = kWidth * kHeight;
- SIMD_ALIGNED(uint8 orig_y[16]);
- SIMD_ALIGNED(uint8 orig_pixels[16 * 4]);
+ SIMD_ALIGNED(uint8_t orig_y[16]);
+ SIMD_ALIGNED(uint8_t orig_pixels[16 * 4]);
memset(orig_y, y, kPixels);
/* YUV converted to ARGB. */
diff --git a/chromium/third_party/libyuv/unit_test/compare_test.cc b/chromium/third_party/libyuv/unit_test/compare_test.cc
index 1c6d988ef2c..136254e169b 100644
--- a/chromium/third_party/libyuv/unit_test/compare_test.cc
+++ b/chromium/third_party/libyuv/unit_test/compare_test.cc
@@ -22,8 +22,10 @@
namespace libyuv {
// hash seed of 5381 recommended.
-static uint32 ReferenceHashDjb2(const uint8* src, uint64 count, uint32 seed) {
- uint32 hash = seed;
+static uint32_t ReferenceHashDjb2(const uint8_t* src,
+ uint64_t count,
+ uint32_t seed) {
+ uint32_t hash = seed;
if (count > 0) {
do {
hash = hash * 33 + *src++;
@@ -41,8 +43,8 @@ TEST_F(LibYUVCompareTest, Djb2_Test) {
"The quick brown fox jumps over the lazy dog"
" and feels as if he were in the seventh heaven of typography"
" together with Hermann Zapf";
- uint32 foxhash = HashDjb2(reinterpret_cast<const uint8*>(fox), 131, 5381);
- const uint32 kExpectedFoxHash = 2611006483u;
+ uint32_t foxhash = HashDjb2(reinterpret_cast<const uint8_t*>(fox), 131, 5381);
+ const uint32_t kExpectedFoxHash = 2611006483u;
EXPECT_EQ(kExpectedFoxHash, foxhash);
for (int i = 0; i < kMaxTest; ++i) {
@@ -50,8 +52,8 @@ TEST_F(LibYUVCompareTest, Djb2_Test) {
src_b[i] = (fastrand() & 0xff);
}
// Compare different buffers. Expect hash is different.
- uint32 h1 = HashDjb2(src_a, kMaxTest, 5381);
- uint32 h2 = HashDjb2(src_b, kMaxTest, 5381);
+ uint32_t h1 = HashDjb2(src_a, kMaxTest, 5381);
+ uint32_t h2 = HashDjb2(src_b, kMaxTest, 5381);
EXPECT_NE(h1, h2);
// Make last half same. Expect hash is different.
@@ -124,8 +126,8 @@ TEST_F(LibYUVCompareTest, BenchmarkDjb2_Opt) {
for (int i = 0; i < kMaxTest; ++i) {
src_a[i] = i;
}
- uint32 h2 = ReferenceHashDjb2(src_a, kMaxTest, 5381);
- uint32 h1;
+ uint32_t h2 = ReferenceHashDjb2(src_a, kMaxTest, 5381);
+ uint32_t h1;
for (int i = 0; i < benchmark_iterations_; ++i) {
h1 = HashDjb2(src_a, kMaxTest, 5381);
}
@@ -139,8 +141,8 @@ TEST_F(LibYUVCompareTest, BenchmarkDjb2_Unaligned) {
for (int i = 0; i < kMaxTest; ++i) {
src_a[i + 1] = i;
}
- uint32 h2 = ReferenceHashDjb2(src_a + 1, kMaxTest, 5381);
- uint32 h1;
+ uint32_t h2 = ReferenceHashDjb2(src_a + 1, kMaxTest, 5381);
+ uint32_t h1;
for (int i = 0; i < benchmark_iterations_; ++i) {
h1 = HashDjb2(src_a + 1, kMaxTest, 5381);
}
@@ -149,7 +151,7 @@ TEST_F(LibYUVCompareTest, BenchmarkDjb2_Unaligned) {
}
TEST_F(LibYUVCompareTest, BenchmarkARGBDetect_Opt) {
- uint32 fourcc;
+ uint32_t fourcc;
const int kMaxTest = benchmark_width_ * benchmark_height_ * 4;
align_buffer_page_end(src_a, kMaxTest);
for (int i = 0; i < kMaxTest; ++i) {
@@ -159,12 +161,12 @@ TEST_F(LibYUVCompareTest, BenchmarkARGBDetect_Opt) {
src_a[0] = 0;
fourcc = ARGBDetect(src_a, benchmark_width_ * 4, benchmark_width_,
benchmark_height_);
- EXPECT_EQ(static_cast<uint32>(libyuv::FOURCC_BGRA), fourcc);
+ EXPECT_EQ(static_cast<uint32_t>(libyuv::FOURCC_BGRA), fourcc);
src_a[0] = 255;
src_a[3] = 0;
fourcc = ARGBDetect(src_a, benchmark_width_ * 4, benchmark_width_,
benchmark_height_);
- EXPECT_EQ(static_cast<uint32>(libyuv::FOURCC_ARGB), fourcc);
+ EXPECT_EQ(static_cast<uint32_t>(libyuv::FOURCC_ARGB), fourcc);
src_a[3] = 255;
for (int i = 0; i < benchmark_iterations_; ++i) {
@@ -177,7 +179,7 @@ TEST_F(LibYUVCompareTest, BenchmarkARGBDetect_Opt) {
}
TEST_F(LibYUVCompareTest, BenchmarkARGBDetect_Unaligned) {
- uint32 fourcc;
+ uint32_t fourcc;
const int kMaxTest = benchmark_width_ * benchmark_height_ * 4 + 1;
align_buffer_page_end(src_a, kMaxTest);
for (int i = 1; i < kMaxTest; ++i) {
@@ -187,12 +189,12 @@ TEST_F(LibYUVCompareTest, BenchmarkARGBDetect_Unaligned) {
src_a[0 + 1] = 0;
fourcc = ARGBDetect(src_a + 1, benchmark_width_ * 4, benchmark_width_,
benchmark_height_);
- EXPECT_EQ(static_cast<uint32>(libyuv::FOURCC_BGRA), fourcc);
+ EXPECT_EQ(static_cast<uint32_t>(libyuv::FOURCC_BGRA), fourcc);
src_a[0 + 1] = 255;
src_a[3 + 1] = 0;
fourcc = ARGBDetect(src_a + 1, benchmark_width_ * 4, benchmark_width_,
benchmark_height_);
- EXPECT_EQ(static_cast<uint32>(libyuv::FOURCC_ARGB), fourcc);
+ EXPECT_EQ(static_cast<uint32_t>(libyuv::FOURCC_ARGB), fourcc);
src_a[3 + 1] = 255;
for (int i = 0; i < benchmark_iterations_; ++i) {
@@ -214,14 +216,14 @@ TEST_F(LibYUVCompareTest, BenchmarkHammingDistance_Opt) {
// Test known value
memcpy(src_a, "test0123test4567", 16);
memcpy(src_b, "tick0123tock4567", 16);
- uint32 h1 = HammingDistance_C(src_a, src_b, 16);
+ uint32_t h1 = HammingDistance_C(src_a, src_b, 16);
EXPECT_EQ(16u, h1);
// Test C vs OPT on random buffer
MemRandomize(src_a, kMaxWidth);
MemRandomize(src_b, kMaxWidth);
- uint32 h0 = HammingDistance_C(src_a, src_b, kMaxWidth);
+ uint32_t h0 = HammingDistance_C(src_a, src_b, kMaxWidth);
int count =
benchmark_iterations_ *
@@ -273,14 +275,14 @@ TEST_F(LibYUVCompareTest, BenchmarkHammingDistance_C) {
// Test known value
memcpy(src_a, "test0123test4567", 16);
memcpy(src_b, "tick0123tock4567", 16);
- uint32 h1 = HammingDistance_C(src_a, src_b, 16);
+ uint32_t h1 = HammingDistance_C(src_a, src_b, 16);
EXPECT_EQ(16u, h1);
// Test C vs OPT on random buffer
MemRandomize(src_a, kMaxWidth);
MemRandomize(src_b, kMaxWidth);
- uint32 h0 = HammingDistance_C(src_a, src_b, kMaxWidth);
+ uint32_t h0 = HammingDistance_C(src_a, src_b, kMaxWidth);
int count =
benchmark_iterations_ *
@@ -304,14 +306,14 @@ TEST_F(LibYUVCompareTest, BenchmarkHammingDistance) {
memcpy(src_a, "test0123test4567", 16);
memcpy(src_b, "tick0123tock4567", 16);
- uint64 h1 = ComputeHammingDistance(src_a, src_b, 16);
+ uint64_t h1 = ComputeHammingDistance(src_a, src_b, 16);
EXPECT_EQ(16u, h1);
// Test C vs OPT on random buffer
MemRandomize(src_a, kMaxWidth);
MemRandomize(src_b, kMaxWidth);
- uint32 h0 = HammingDistance_C(src_a, src_b, kMaxWidth);
+ uint32_t h0 = HammingDistance_C(src_a, src_b, kMaxWidth);
int count =
benchmark_iterations_ *
@@ -337,14 +339,14 @@ static const int kMaxOptCount = (1 << (32 - 3)) - 64; // 536870848
#endif
TEST_F(LibYUVCompareTest, TestHammingDistance_Opt) {
- uint32 h1 = 0;
+ uint32_t h1 = 0;
const int kMaxWidth = (benchmark_width_ * benchmark_height_ + 31) & ~31;
align_buffer_page_end(src_a, kMaxWidth);
align_buffer_page_end(src_b, kMaxWidth);
memset(src_a, 255u, kMaxWidth);
memset(src_b, 0u, kMaxWidth);
- uint64 h0 = ComputeHammingDistance(src_a, src_b, kMaxWidth);
+ uint64_t h0 = ComputeHammingDistance(src_a, src_b, kMaxWidth);
EXPECT_EQ(kMaxWidth * 8ULL, h0);
for (int i = 0; i < benchmark_iterations_; ++i) {
@@ -385,7 +387,7 @@ TEST_F(LibYUVCompareTest, TestHammingDistance_Opt) {
if (kMaxWidth <= kMaxOptCount) {
EXPECT_EQ(kMaxWidth * 8U, h1);
} else {
- if (kMaxWidth * 8ULL != static_cast<uint64>(h1)) {
+ if (kMaxWidth * 8ULL != static_cast<uint64_t>(h1)) {
printf(
"warning - HammingDistance_Opt %u does not match %llu "
"but length of %u is longer than guaranteed.\n",
@@ -408,7 +410,7 @@ TEST_F(LibYUVCompareTest, TestHammingDistance) {
memset(src_a, 255u, benchmark_width_ * benchmark_height_);
memset(src_b, 0, benchmark_width_ * benchmark_height_);
- uint64 h1 = 0;
+ uint64_t h1 = 0;
for (int i = 0; i < benchmark_iterations_; ++i) {
h1 = ComputeHammingDistance(src_a, src_b,
benchmark_width_ * benchmark_height_);
@@ -428,7 +430,7 @@ TEST_F(LibYUVCompareTest, BenchmarkSumSquareError_Opt) {
memcpy(src_a, "test0123test4567", 16);
memcpy(src_b, "tick0123tock4567", 16);
- uint64 h1 = ComputeSumSquareError(src_a, src_b, 16);
+ uint64_t h1 = ComputeSumSquareError(src_a, src_b, 16);
EXPECT_EQ(790u, h1);
for (int i = 0; i < kMaxWidth; ++i) {
@@ -458,7 +460,7 @@ TEST_F(LibYUVCompareTest, SumSquareError) {
memset(src_a, 0, kMaxWidth);
memset(src_b, 0, kMaxWidth);
- uint64 err;
+ uint64_t err;
err = ComputeSumSquareError(src_a, src_b, kMaxWidth);
EXPECT_EQ(0u, err);
@@ -480,10 +482,10 @@ TEST_F(LibYUVCompareTest, SumSquareError) {
}
MaskCpuFlags(disable_cpu_flags_);
- uint64 c_err = ComputeSumSquareError(src_a, src_b, kMaxWidth);
+ uint64_t c_err = ComputeSumSquareError(src_a, src_b, kMaxWidth);
MaskCpuFlags(benchmark_cpu_info_);
- uint64 opt_err = ComputeSumSquareError(src_a, src_b, kMaxWidth);
+ uint64_t opt_err = ComputeSumSquareError(src_a, src_b, kMaxWidth);
EXPECT_EQ(c_err, opt_err);
@@ -502,9 +504,10 @@ TEST_F(LibYUVCompareTest, BenchmarkPsnr_Opt) {
MaskCpuFlags(benchmark_cpu_info_);
double opt_time = get_time();
- for (int i = 0; i < benchmark_iterations_; ++i)
+ for (int i = 0; i < benchmark_iterations_; ++i) {
CalcFramePsnr(src_a, benchmark_width_, src_b, benchmark_width_,
benchmark_width_, benchmark_height_);
+ }
opt_time = (get_time() - opt_time) / benchmark_iterations_;
printf("BenchmarkPsnr_Opt - %8.2f us opt\n", opt_time * 1e6);
@@ -526,9 +529,10 @@ TEST_F(LibYUVCompareTest, BenchmarkPsnr_Unaligned) {
MaskCpuFlags(benchmark_cpu_info_);
double opt_time = get_time();
- for (int i = 0; i < benchmark_iterations_; ++i)
+ for (int i = 0; i < benchmark_iterations_; ++i) {
CalcFramePsnr(src_a + 1, benchmark_width_, src_b, benchmark_width_,
benchmark_width_, benchmark_height_);
+ }
opt_time = (get_time() - opt_time) / benchmark_iterations_;
printf("BenchmarkPsnr_Opt - %8.2f us opt\n", opt_time * 1e6);
@@ -627,9 +631,10 @@ TEST_F(LibYUVCompareTest, DISABLED_BenchmarkSsim_Opt) {
MaskCpuFlags(benchmark_cpu_info_);
double opt_time = get_time();
- for (int i = 0; i < benchmark_iterations_; ++i)
+ for (int i = 0; i < benchmark_iterations_; ++i) {
CalcFrameSsim(src_a, benchmark_width_, src_b, benchmark_width_,
benchmark_width_, benchmark_height_);
+ }
opt_time = (get_time() - opt_time) / benchmark_iterations_;
printf("BenchmarkSsim_Opt - %8.2f us opt\n", opt_time * 1e6);
diff --git a/chromium/third_party/libyuv/unit_test/convert_test.cc b/chromium/third_party/libyuv/unit_test/convert_test.cc
index 7d196a1d8e0..750bd871992 100644
--- a/chromium/third_party/libyuv/unit_test/convert_test.cc
+++ b/chromium/third_party/libyuv/unit_test/convert_test.cc
@@ -41,6 +41,7 @@ namespace libyuv {
// Alias to copy pixels as is
#define AR30ToAR30 ARGBCopy
+#define ABGRToABGR ARGBCopy
#define SUBSAMPLE(v, a) ((((v) + (a)-1)) / (a))
@@ -136,20 +137,20 @@ namespace libyuv {
FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \
benchmark_width_, _Opt, +, 0)
-TESTPLANARTOP(I420, uint8, 1, 2, 2, I420, uint8, 1, 2, 2)
-TESTPLANARTOP(I422, uint8, 1, 2, 1, I420, uint8, 1, 2, 2)
-TESTPLANARTOP(I444, uint8, 1, 1, 1, I420, uint8, 1, 2, 2)
-TESTPLANARTOP(I420, uint8, 1, 2, 2, I422, uint8, 1, 2, 1)
-TESTPLANARTOP(I420, uint8, 1, 2, 2, I444, uint8, 1, 1, 1)
-TESTPLANARTOP(I420, uint8, 1, 2, 2, I420Mirror, uint8, 1, 2, 2)
-TESTPLANARTOP(I422, uint8, 1, 2, 1, I422, uint8, 1, 2, 1)
-TESTPLANARTOP(I444, uint8, 1, 1, 1, I444, uint8, 1, 1, 1)
-TESTPLANARTOP(I010, uint16, 2, 2, 2, I010, uint16, 2, 2, 2)
-TESTPLANARTOP(I010, uint16, 2, 2, 2, I420, uint8, 1, 2, 2)
-TESTPLANARTOP(I420, uint8, 1, 2, 2, I010, uint16, 2, 2, 2)
-TESTPLANARTOP(H010, uint16, 2, 2, 2, H010, uint16, 2, 2, 2)
-TESTPLANARTOP(H010, uint16, 2, 2, 2, H420, uint8, 1, 2, 2)
-TESTPLANARTOP(H420, uint8, 1, 2, 2, H010, uint16, 2, 2, 2)
+TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2)
+TESTPLANARTOP(I422, uint8_t, 1, 2, 1, I420, uint8_t, 1, 2, 2)
+TESTPLANARTOP(I444, uint8_t, 1, 1, 1, I420, uint8_t, 1, 2, 2)
+TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I422, uint8_t, 1, 2, 1)
+TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I444, uint8_t, 1, 1, 1)
+TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I420Mirror, uint8_t, 1, 2, 2)
+TESTPLANARTOP(I422, uint8_t, 1, 2, 1, I422, uint8_t, 1, 2, 1)
+TESTPLANARTOP(I444, uint8_t, 1, 1, 1, I444, uint8_t, 1, 1, 1)
+TESTPLANARTOP(I010, uint16_t, 2, 2, 2, I010, uint16_t, 2, 2, 2)
+TESTPLANARTOP(I010, uint16_t, 2, 2, 2, I420, uint8_t, 1, 2, 2)
+TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I010, uint16_t, 2, 2, 2)
+TESTPLANARTOP(H010, uint16_t, 2, 2, 2, H010, uint16_t, 2, 2, 2)
+TESTPLANARTOP(H010, uint16_t, 2, 2, 2, H420, uint8_t, 1, 2, 2)
+TESTPLANARTOP(H420, uint8_t, 1, 2, 2, H010, uint16_t, 2, 2, 2)
// Test Android 420 to I420
#define TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, \
@@ -173,8 +174,8 @@ TESTPLANARTOP(H420, uint8, 1, 2, 2, H010, uint16, 2, 2, 2)
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
align_buffer_page_end(dst_v_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * \
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
- uint8* src_u = src_uv + OFF_U; \
- uint8* src_v = src_uv + (PIXEL_STRIDE == 1 ? kSizeUV : OFF_V); \
+ uint8_t* src_u = src_uv + OFF_U; \
+ uint8_t* src_v = src_uv + (PIXEL_STRIDE == 1 ? kSizeUV : OFF_V); \
int src_stride_uv = SUBSAMPLE(kWidth, SUBSAMP_X) * PIXEL_STRIDE; \
for (int i = 0; i < kHeight; ++i) \
for (int j = 0; j < kWidth; ++j) \
@@ -594,6 +595,7 @@ TESTPLANARTOB(I422, 2, 1, UYVY, 2, 4, 1, 0, ARGB, 4)
TESTPLANARTOB(I420, 2, 2, I400, 1, 1, 1, 0, ARGB, 4)
TESTPLANARTOB(J420, 2, 2, J400, 1, 1, 1, 0, ARGB, 4)
TESTPLANARTOB(I420, 2, 2, AR30, 4, 4, 1, 0, ARGB, 4)
+TESTPLANARTOB(H420, 2, 2, AR30, 4, 4, 1, 0, ARGB, 4)
// TESTPLANARTOB(I420, 2, 2, AR30, 4, 4, 1, 0, ABGR, 4)
#define TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
@@ -1064,6 +1066,7 @@ TESTATOB(ARGB, 4, 4, 1, RGB24, 3, 3, 1, 0)
TESTATOB(ARGB, 4, 4, 1, RGB565, 2, 2, 1, 0)
TESTATOB(ARGB, 4, 4, 1, ARGB1555, 2, 2, 1, 0)
TESTATOB(ARGB, 4, 4, 1, ARGB4444, 2, 2, 1, 0)
+TESTATOB(ABGR, 4, 4, 1, AR30, 4, 4, 1, 0)
TESTATOB(ARGB, 4, 4, 1, AR30, 4, 4, 1, 0)
TESTATOB(ARGB, 4, 4, 1, YUY2, 2, 4, 1, 4)
TESTATOB(ARGB, 4, 4, 1, UYVY, 2, 4, 1, 4)
@@ -1080,6 +1083,7 @@ TESTATOB(RGB565, 2, 2, 1, ARGB, 4, 4, 1, 0)
TESTATOB(ARGB1555, 2, 2, 1, ARGB, 4, 4, 1, 0)
TESTATOB(ARGB4444, 2, 2, 1, ARGB, 4, 4, 1, 0)
TESTATOB(AR30, 4, 4, 1, ARGB, 4, 4, 1, 0)
+TESTATOB(AR30, 4, 4, 1, ABGR, 4, 4, 1, 0)
TESTATOB(YUY2, 2, 4, 1, ARGB, 4, 4, 1, ARM_YUV_ERROR)
TESTATOB(UYVY, 2, 4, 1, ARGB, 4, 4, 1, ARM_YUV_ERROR)
TESTATOB(YUY2, 2, 4, 1, Y, 1, 1, 1, 0)
@@ -1238,8 +1242,8 @@ TESTSYM(BGRAToARGB, 4, 4, 1)
TESTSYM(ABGRToARGB, 4, 4, 1)
TEST_F(LibYUVConvertTest, Test565) {
- SIMD_ALIGNED(uint8 orig_pixels[256][4]);
- SIMD_ALIGNED(uint8 pixels565[256][2]);
+ SIMD_ALIGNED(uint8_t orig_pixels[256][4]);
+ SIMD_ALIGNED(uint8_t pixels565[256][2]);
for (int i = 0; i < 256; ++i) {
for (int j = 0; j < 4; ++j) {
@@ -1247,7 +1251,7 @@ TEST_F(LibYUVConvertTest, Test565) {
}
}
ARGBToRGB565(&orig_pixels[0][0], 0, &pixels565[0][0], 0, 256, 1);
- uint32 checksum = HashDjb2(&pixels565[0][0], sizeof(pixels565), 5381);
+ uint32_t checksum = HashDjb2(&pixels565[0][0], sizeof(pixels565), 5381);
EXPECT_EQ(610919429u, checksum);
}
@@ -1442,7 +1446,7 @@ TEST_F(LibYUVConvertTest, NV12Crop) {
const int sample_size =
kWidth * kHeight + kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y) * 2;
align_buffer_page_end(src_y, sample_size);
- uint8* src_uv = src_y + kWidth * kHeight;
+ uint8_t* src_uv = src_y + kWidth * kHeight;
align_buffer_page_end(dst_y, kDestWidth * kDestHeight);
align_buffer_page_end(dst_u, SUBSAMPLE(kDestWidth, SUBSAMP_X) *
@@ -1510,13 +1514,13 @@ TEST_F(LibYUVConvertTest, NV12Crop) {
}
TEST_F(LibYUVConvertTest, TestYToARGB) {
- uint8 y[32];
- uint8 expectedg[32];
+ uint8_t y[32];
+ uint8_t expectedg[32];
for (int i = 0; i < 32; ++i) {
y[i] = i * 5 + 17;
expectedg[i] = static_cast<int>((y[i] - 16) * 1.164f + 0.5f);
}
- uint8 argb[32 * 4];
+ uint8_t argb[32 * 4];
YToARGB(y, 0, argb, 0, 32, 1);
for (int i = 0; i < 32; ++i) {
@@ -1528,7 +1532,7 @@ TEST_F(LibYUVConvertTest, TestYToARGB) {
}
}
-static const uint8 kNoDither4x4[16] = {
+static const uint8_t kNoDither4x4[16] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
@@ -1555,7 +1559,7 @@ TEST_F(LibYUVConvertTest, TestNoDither) {
}
// Ordered 4x4 dither for 888 to 565. Values from 0 to 7.
-static const uint8 kDither565_4x4[16] = {
+static const uint8_t kDither565_4x4[16] = {
0, 4, 1, 5, 6, 2, 7, 3, 1, 5, 0, 4, 7, 3, 6, 2,
};
@@ -1943,8 +1947,9 @@ TESTQPLANARTOE(I420Alpha, 2, 2, ABGR, 1, 4, ARGB, 4)
// Caveat: Destination needs to be 4 bytes
TESTPLANETOE(ARGB, 1, 4, AR30, 1, 4, ARGB, 4)
-
-// TESTPLANETOE(ARGB, 1, 4, AR30, 1, 4, ABGR, 4)
+TESTPLANETOE(ABGR, 1, 4, AR30, 1, 4, ABGR, 4)
+TESTPLANETOE(AR30, 1, 4, ARGB, 1, 4, ABGR, 4)
+TESTPLANETOE(AR30, 1, 4, ABGR, 1, 4, ARGB, 4)
TEST_F(LibYUVConvertTest, RotateWithARGBSource) {
// 2x2 frames
@@ -2015,79 +2020,113 @@ TEST_F(LibYUVConvertTest, ARGBToAR30Row_Opt) {
}
#endif // HAS_ARGBTOAR30ROW_AVX2
-#define TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \
- ALIGN, YALIGN, W1280, DIFF, N, NEG, SOFF, DOFF, \
- FMT_C, BPP_C) \
- TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \
- const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
- const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \
- const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN); \
- const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \
- const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \
- const int kBpc = 2; \
- align_buffer_page_end(src_y, kWidth* kHeight* kBpc + SOFF); \
- align_buffer_page_end(src_u, kSizeUV* kBpc + SOFF); \
- align_buffer_page_end(src_v, kSizeUV* kBpc + SOFF); \
- align_buffer_page_end(dst_argb_c, kStrideB* kHeight + DOFF); \
- align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + DOFF); \
- for (int i = 0; i < kWidth * kHeight; ++i) { \
- reinterpret_cast<uint16*>(src_y + SOFF)[i] = (fastrand() & 0x3ff); \
- } \
- for (int i = 0; i < kSizeUV; ++i) { \
- reinterpret_cast<uint16*>(src_u + SOFF)[i] = (fastrand() & 0x3ff); \
- reinterpret_cast<uint16*>(src_v + SOFF)[i] = (fastrand() & 0x3ff); \
- } \
- memset(dst_argb_c + DOFF, 1, kStrideB * kHeight); \
- memset(dst_argb_opt + DOFF, 101, kStrideB * kHeight); \
- MaskCpuFlags(disable_cpu_flags_); \
- FMT_PLANAR##To##FMT_B(reinterpret_cast<uint16*>(src_y + SOFF), kWidth, \
- reinterpret_cast<uint16*>(src_u + SOFF), kStrideUV, \
- reinterpret_cast<uint16*>(src_v + SOFF), kStrideUV, \
- dst_argb_c + DOFF, kStrideB, kWidth, NEG kHeight); \
- MaskCpuFlags(benchmark_cpu_info_); \
- for (int i = 0; i < benchmark_iterations_; ++i) { \
- FMT_PLANAR##To##FMT_B( \
- reinterpret_cast<uint16*>(src_y + SOFF), kWidth, \
- reinterpret_cast<uint16*>(src_u + SOFF), kStrideUV, \
- reinterpret_cast<uint16*>(src_v + SOFF), kStrideUV, \
- dst_argb_opt + DOFF, kStrideB, kWidth, NEG kHeight); \
- } \
- int max_diff = 0; \
- for (int i = 0; i < kWidth * BPP_C * kHeight; ++i) { \
- int abs_diff = abs(static_cast<int>(dst_argb_c[i + DOFF]) - \
- static_cast<int>(dst_argb_opt[i + DOFF])); \
- if (abs_diff > max_diff) { \
- max_diff = abs_diff; \
- } \
- } \
- EXPECT_LE(max_diff, DIFF); \
- free_aligned_buffer_page_end(src_y); \
- free_aligned_buffer_page_end(src_u); \
- free_aligned_buffer_page_end(src_v); \
- free_aligned_buffer_page_end(dst_argb_c); \
- free_aligned_buffer_page_end(dst_argb_opt); \
+#ifdef HAS_ABGRTOAR30ROW_AVX2
+TEST_F(LibYUVConvertTest, ABGRToAR30Row_Opt) {
+ // ABGRToAR30Row_AVX2 expects a multiple of 8 pixels.
+ const int kPixels = (benchmark_width_ * benchmark_height_ + 7) & ~7;
+ align_buffer_page_end(src, kPixels * 4);
+ align_buffer_page_end(dst_opt, kPixels * 4);
+ align_buffer_page_end(dst_c, kPixels * 4);
+ MemRandomize(src, kPixels * 4);
+ memset(dst_opt, 0, kPixels * 4);
+ memset(dst_c, 1, kPixels * 4);
+
+ ABGRToAR30Row_C(src, dst_c, kPixels);
+
+ int has_avx2 = TestCpuFlag(kCpuHasAVX2);
+ int has_ssse3 = TestCpuFlag(kCpuHasSSSE3);
+ for (int i = 0; i < benchmark_iterations_; ++i) {
+ if (has_avx2) {
+ ABGRToAR30Row_AVX2(src, dst_opt, kPixels);
+ } else if (has_ssse3) {
+ ABGRToAR30Row_SSSE3(src, dst_opt, kPixels);
+ } else {
+ ABGRToAR30Row_C(src, dst_opt, kPixels);
+ }
+ }
+ for (int i = 0; i < kPixels * 4; ++i) {
+ EXPECT_EQ(dst_opt[i], dst_c[i]);
+ }
+
+ free_aligned_buffer_page_end(src);
+ free_aligned_buffer_page_end(dst_opt);
+ free_aligned_buffer_page_end(dst_c);
+}
+#endif // HAS_ABGRTOAR30ROW_AVX2
+
+// TODO(fbarchard): Fix clamping issue affected by U channel.
+#define TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \
+ ALIGN, YALIGN, W1280, DIFF, N, NEG, SOFF, DOFF) \
+ TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \
+ const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
+ const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \
+ const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN); \
+ const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \
+ const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \
+ const int kBpc = 2; \
+ align_buffer_page_end(src_y, kWidth* kHeight* kBpc + SOFF); \
+ align_buffer_page_end(src_u, kSizeUV* kBpc + SOFF); \
+ align_buffer_page_end(src_v, kSizeUV* kBpc + SOFF); \
+ align_buffer_page_end(dst_argb_c, kStrideB* kHeight + DOFF); \
+ align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + DOFF); \
+ for (int i = 0; i < kWidth * kHeight; ++i) { \
+ reinterpret_cast<uint16_t*>(src_y + SOFF)[i] = (fastrand() & 0x3ff); \
+ } \
+ for (int i = 0; i < kSizeUV; ++i) { \
+ reinterpret_cast<uint16_t*>(src_u + SOFF)[i] = (fastrand() & 0x3ff); \
+ reinterpret_cast<uint16_t*>(src_v + SOFF)[i] = (fastrand() & 0x3ff); \
+ } \
+ memset(dst_argb_c + DOFF, 1, kStrideB * kHeight); \
+ memset(dst_argb_opt + DOFF, 101, kStrideB * kHeight); \
+ MaskCpuFlags(disable_cpu_flags_); \
+ FMT_PLANAR##To##FMT_B( \
+ reinterpret_cast<uint16_t*>(src_y + SOFF), kWidth, \
+ reinterpret_cast<uint16_t*>(src_u + SOFF), kStrideUV, \
+ reinterpret_cast<uint16_t*>(src_v + SOFF), kStrideUV, \
+ dst_argb_c + DOFF, kStrideB, kWidth, NEG kHeight); \
+ MaskCpuFlags(benchmark_cpu_info_); \
+ for (int i = 0; i < benchmark_iterations_; ++i) { \
+ FMT_PLANAR##To##FMT_B( \
+ reinterpret_cast<uint16_t*>(src_y + SOFF), kWidth, \
+ reinterpret_cast<uint16_t*>(src_u + SOFF), kStrideUV, \
+ reinterpret_cast<uint16_t*>(src_v + SOFF), kStrideUV, \
+ dst_argb_opt + DOFF, kStrideB, kWidth, NEG kHeight); \
+ } \
+ int max_diff = 0; \
+ for (int i = 0; i < kWidth * BPP_B * kHeight; ++i) { \
+ int abs_diff = abs(static_cast<int>(dst_argb_c[i + DOFF]) - \
+ static_cast<int>(dst_argb_opt[i + DOFF])); \
+ if (abs_diff > max_diff) { \
+ max_diff = abs_diff; \
+ } \
+ } \
+ EXPECT_LE(max_diff, DIFF); \
+ free_aligned_buffer_page_end(src_y); \
+ free_aligned_buffer_page_end(src_u); \
+ free_aligned_buffer_page_end(src_v); \
+ free_aligned_buffer_page_end(dst_argb_c); \
+ free_aligned_buffer_page_end(dst_argb_opt); \
}
#define TESTPLANAR16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
- YALIGN, DIFF, FMT_C, BPP_C) \
+ YALIGN, DIFF) \
TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
- YALIGN, benchmark_width_ - 4, DIFF, _Any, +, 0, 0, FMT_C, \
- BPP_C) \
+ YALIGN, benchmark_width_ - 4, DIFF, _Any, +, 0, 0) \
TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
- YALIGN, benchmark_width_, DIFF, _Unaligned, +, 1, 1, FMT_C, \
- BPP_C) \
+ YALIGN, benchmark_width_, DIFF, _Unaligned, +, 1, 1) \
TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
- YALIGN, benchmark_width_, DIFF, _Invert, -, 0, 0, FMT_C, \
- BPP_C) \
+ YALIGN, benchmark_width_, DIFF, _Invert, -, 0, 0) \
TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
- YALIGN, benchmark_width_, DIFF, _Opt, +, 0, 0, FMT_C, \
- BPP_C)
+ YALIGN, benchmark_width_, DIFF, _Opt, +, 0, 0)
-TESTPLANAR16TOB(H010, 2, 2, AR30, 4, 4, 1, 2, AR30, 4)
-TESTPLANAR16TOB(H010, 2, 2, ARGB, 4, 4, 1, 2, ARGB, 4)
-TESTPLANAR16TOB(H010, 2, 2, ABGR, 4, 4, 1, 2, ARGB, 4)
-TESTPLANAR16TOB(I010, 2, 2, ARGB, 4, 4, 1, 2, ARGB, 4)
-TESTPLANAR16TOB(I010, 2, 2, ABGR, 4, 4, 1, 2, ARGB, 4)
+TESTPLANAR16TOB(I010, 2, 2, ARGB, 4, 4, 1, 2)
+TESTPLANAR16TOB(I010, 2, 2, ABGR, 4, 4, 1, 2)
+TESTPLANAR16TOB(I010, 2, 2, AR30, 4, 4, 1, 2)
+TESTPLANAR16TOB(I010, 2, 2, AB30, 4, 4, 1, 2)
+TESTPLANAR16TOB(H010, 2, 2, ARGB, 4, 4, 1, 2)
+TESTPLANAR16TOB(H010, 2, 2, ABGR, 4, 4, 1, 2)
+TESTPLANAR16TOB(H010, 2, 2, AR30, 4, 4, 1, 2)
+TESTPLANAR16TOB(H010, 2, 2, AB30, 4, 4, 1, 2)
static int Clamp(int y) {
if (y < 0) {
@@ -2099,13 +2138,30 @@ static int Clamp(int y) {
return y;
}
+static int Clamp10(int y) {
+ if (y < 0) {
+ y = 0;
+ }
+ if (y > 1023) {
+ y = 1023;
+ }
+ return y;
+}
+
+// Test 8 bit YUV to 8 bit RGB
TEST_F(LibYUVConvertTest, TestH420ToARGB) {
const int kSize = 256;
+ int histogram_b[256];
+ int histogram_g[256];
+ int histogram_r[256];
+ memset(histogram_b, 0, sizeof(histogram_b));
+ memset(histogram_g, 0, sizeof(histogram_g));
+ memset(histogram_r, 0, sizeof(histogram_r));
align_buffer_page_end(orig_yuv, kSize + kSize / 2 * 2);
align_buffer_page_end(argb_pixels, kSize * 4);
- uint8* orig_y = orig_yuv;
- uint8* orig_u = orig_y + kSize;
- uint8* orig_v = orig_u + kSize / 2;
+ uint8_t* orig_y = orig_yuv;
+ uint8_t* orig_u = orig_y + kSize;
+ uint8_t* orig_v = orig_u + kSize / 2;
// Test grey scale
for (int i = 0; i < kSize; ++i) {
@@ -2119,23 +2175,54 @@ TEST_F(LibYUVConvertTest, TestH420ToARGB) {
H420ToARGB(orig_y, 0, orig_u, 0, orig_v, 0, argb_pixels, 0, kSize, 1);
for (int i = 0; i < kSize; ++i) {
+ int b = argb_pixels[i * 4 + 0];
+ int g = argb_pixels[i * 4 + 1];
+ int r = argb_pixels[i * 4 + 2];
+ int a = argb_pixels[i * 4 + 3];
+ ++histogram_b[b];
+ ++histogram_g[g];
+ ++histogram_r[r];
int expected_y = Clamp(static_cast<int>((i - 16) * 1.164f));
- EXPECT_NEAR(argb_pixels[i * 4 + 0], expected_y, 1);
- EXPECT_NEAR(argb_pixels[i * 4 + 1], expected_y, 1);
- EXPECT_NEAR(argb_pixels[i * 4 + 2], expected_y, 1);
- EXPECT_EQ(argb_pixels[i * 4 + 3], 255);
+ EXPECT_NEAR(b, expected_y, 1);
+ EXPECT_NEAR(g, expected_y, 1);
+ EXPECT_NEAR(r, expected_y, 1);
+ EXPECT_EQ(a, 255);
}
+
+ int count_b = 0;
+ int count_g = 0;
+ int count_r = 0;
+ for (int i = 0; i < kSize; ++i) {
+ if (histogram_b[i]) {
+ ++count_b;
+ }
+ if (histogram_g[i]) {
+ ++count_g;
+ }
+ if (histogram_r[i]) {
+ ++count_r;
+ }
+ }
+ printf("uniques: B %d, G, %d, R %d\n", count_b, count_g, count_r);
+
free_aligned_buffer_page_end(orig_yuv);
free_aligned_buffer_page_end(argb_pixels);
}
+// Test 10 bit YUV to 8 bit RGB
TEST_F(LibYUVConvertTest, TestH010ToARGB) {
const int kSize = 1024;
+ int histogram_b[1024];
+ int histogram_g[1024];
+ int histogram_r[1024];
+ memset(histogram_b, 0, sizeof(histogram_b));
+ memset(histogram_g, 0, sizeof(histogram_g));
+ memset(histogram_r, 0, sizeof(histogram_r));
align_buffer_page_end(orig_yuv, kSize * 2 + kSize / 2 * 2 * 2);
align_buffer_page_end(argb_pixels, kSize * 4);
- uint16* orig_y = reinterpret_cast<uint16*>(orig_yuv);
- uint16* orig_u = orig_y + kSize;
- uint16* orig_v = orig_u + kSize / 2;
+ uint16_t* orig_y = reinterpret_cast<uint16_t*>(orig_yuv);
+ uint16_t* orig_u = orig_y + kSize;
+ uint16_t* orig_v = orig_u + kSize / 2;
// Test grey scale
for (int i = 0; i < kSize; ++i) {
@@ -2149,14 +2236,226 @@ TEST_F(LibYUVConvertTest, TestH010ToARGB) {
H010ToARGB(orig_y, 0, orig_u, 0, orig_v, 0, argb_pixels, 0, kSize, 1);
for (int i = 0; i < kSize; ++i) {
+ int b = argb_pixels[i * 4 + 0];
+ int g = argb_pixels[i * 4 + 1];
+ int r = argb_pixels[i * 4 + 2];
+ int a = argb_pixels[i * 4 + 3];
+ ++histogram_b[b];
+ ++histogram_g[g];
+ ++histogram_r[r];
int expected_y = Clamp(static_cast<int>((i - 64) * 1.164f / 4));
- EXPECT_NEAR(argb_pixels[i * 4 + 0], expected_y, 1);
- EXPECT_NEAR(argb_pixels[i * 4 + 1], expected_y, 1);
- EXPECT_NEAR(argb_pixels[i * 4 + 2], expected_y, 1);
- EXPECT_EQ(argb_pixels[i * 4 + 3], 255);
+ EXPECT_NEAR(b, expected_y, 1);
+ EXPECT_NEAR(g, expected_y, 1);
+ EXPECT_NEAR(r, expected_y, 1);
+ EXPECT_EQ(a, 255);
+ }
+
+ int count_b = 0;
+ int count_g = 0;
+ int count_r = 0;
+ for (int i = 0; i < kSize; ++i) {
+ if (histogram_b[i]) {
+ ++count_b;
+ }
+ if (histogram_g[i]) {
+ ++count_g;
+ }
+ if (histogram_r[i]) {
+ ++count_r;
+ }
}
+ printf("uniques: B %d, G, %d, R %d\n", count_b, count_g, count_r);
+
free_aligned_buffer_page_end(orig_yuv);
free_aligned_buffer_page_end(argb_pixels);
}
+// Test 10 bit YUV to 10 bit RGB
+// Caveat: Result is near due to float rounding in expected result.
+TEST_F(LibYUVConvertTest, TestH010ToAR30) {
+ const int kSize = 1024;
+ int histogram_b[1024];
+ int histogram_g[1024];
+ int histogram_r[1024];
+ memset(histogram_b, 0, sizeof(histogram_b));
+ memset(histogram_g, 0, sizeof(histogram_g));
+ memset(histogram_r, 0, sizeof(histogram_r));
+
+ align_buffer_page_end(orig_yuv, kSize * 2 + kSize / 2 * 2 * 2);
+ align_buffer_page_end(ar30_pixels, kSize * 4);
+ uint16_t* orig_y = reinterpret_cast<uint16_t*>(orig_yuv);
+ uint16_t* orig_u = orig_y + kSize;
+ uint16_t* orig_v = orig_u + kSize / 2;
+
+ // Test grey scale
+ for (int i = 0; i < kSize; ++i) {
+ orig_y[i] = i;
+ }
+ for (int i = 0; i < kSize / 2; ++i) {
+ orig_u[i] = 512; // 512 is 0.
+ orig_v[i] = 512;
+ }
+
+ H010ToAR30(orig_y, 0, orig_u, 0, orig_v, 0, ar30_pixels, 0, kSize, 1);
+
+ for (int i = 0; i < kSize; ++i) {
+ int b10 = reinterpret_cast<uint32_t*>(ar30_pixels)[i] & 1023;
+ int g10 = (reinterpret_cast<uint32_t*>(ar30_pixels)[i] >> 10) & 1023;
+ int r10 = (reinterpret_cast<uint32_t*>(ar30_pixels)[i] >> 20) & 1023;
+ int a2 = (reinterpret_cast<uint32_t*>(ar30_pixels)[i] >> 30) & 3;
+ ++histogram_b[b10];
+ ++histogram_g[g10];
+ ++histogram_r[r10];
+ int expected_y = Clamp10(static_cast<int>((i - 64) * 1.164f));
+ EXPECT_NEAR(b10, expected_y, 4);
+ EXPECT_NEAR(g10, expected_y, 4);
+ EXPECT_NEAR(r10, expected_y, 4);
+ EXPECT_EQ(a2, 3);
+ }
+
+ int count_b = 0;
+ int count_g = 0;
+ int count_r = 0;
+ for (int i = 0; i < kSize; ++i) {
+ if (histogram_b[i]) {
+ ++count_b;
+ }
+ if (histogram_g[i]) {
+ ++count_g;
+ }
+ if (histogram_r[i]) {
+ ++count_r;
+ }
+ }
+ printf("uniques: B %d, G, %d, R %d\n", count_b, count_g, count_r);
+
+ free_aligned_buffer_page_end(orig_yuv);
+ free_aligned_buffer_page_end(ar30_pixels);
+}
+
+// Test 10 bit YUV to 10 bit RGB
+// Caveat: Result is near due to float rounding in expected result.
+TEST_F(LibYUVConvertTest, TestH010ToAB30) {
+ const int kSize = 1024;
+ int histogram_b[1024];
+ int histogram_g[1024];
+ int histogram_r[1024];
+ memset(histogram_b, 0, sizeof(histogram_b));
+ memset(histogram_g, 0, sizeof(histogram_g));
+ memset(histogram_r, 0, sizeof(histogram_r));
+
+ align_buffer_page_end(orig_yuv, kSize * 2 + kSize / 2 * 2 * 2);
+ align_buffer_page_end(ab30_pixels, kSize * 4);
+ uint16_t* orig_y = reinterpret_cast<uint16_t*>(orig_yuv);
+ uint16_t* orig_u = orig_y + kSize;
+ uint16_t* orig_v = orig_u + kSize / 2;
+
+ // Test grey scale
+ for (int i = 0; i < kSize; ++i) {
+ orig_y[i] = i;
+ }
+ for (int i = 0; i < kSize / 2; ++i) {
+ orig_u[i] = 512; // 512 is 0.
+ orig_v[i] = 512;
+ }
+
+ H010ToAB30(orig_y, 0, orig_u, 0, orig_v, 0, ab30_pixels, 0, kSize, 1);
+
+ for (int i = 0; i < kSize; ++i) {
+ int r10 = reinterpret_cast<uint32_t*>(ab30_pixels)[i] & 1023;
+ int g10 = (reinterpret_cast<uint32_t*>(ab30_pixels)[i] >> 10) & 1023;
+ int b10 = (reinterpret_cast<uint32_t*>(ab30_pixels)[i] >> 20) & 1023;
+ int a2 = (reinterpret_cast<uint32_t*>(ab30_pixels)[i] >> 30) & 3;
+ ++histogram_b[b10];
+ ++histogram_g[g10];
+ ++histogram_r[r10];
+ int expected_y = Clamp10(static_cast<int>((i - 64) * 1.164f));
+ EXPECT_NEAR(b10, expected_y, 4);
+ EXPECT_NEAR(g10, expected_y, 4);
+ EXPECT_NEAR(r10, expected_y, 4);
+ EXPECT_EQ(a2, 3);
+ }
+
+ int count_b = 0;
+ int count_g = 0;
+ int count_r = 0;
+ for (int i = 0; i < kSize; ++i) {
+ if (histogram_b[i]) {
+ ++count_b;
+ }
+ if (histogram_g[i]) {
+ ++count_g;
+ }
+ if (histogram_r[i]) {
+ ++count_r;
+ }
+ }
+ printf("uniques: B %d, G, %d, R %d\n", count_b, count_g, count_r);
+
+ free_aligned_buffer_page_end(orig_yuv);
+ free_aligned_buffer_page_end(ab30_pixels);
+}
+
+// Test 8 bit YUV to 10 bit RGB
+TEST_F(LibYUVConvertTest, TestH420ToAR30) {
+ const int kSize = 256;
+ const int kHistSize = 1024;
+ int histogram_b[kHistSize];
+ int histogram_g[kHistSize];
+ int histogram_r[kHistSize];
+ memset(histogram_b, 0, sizeof(histogram_b));
+ memset(histogram_g, 0, sizeof(histogram_g));
+ memset(histogram_r, 0, sizeof(histogram_r));
+ align_buffer_page_end(orig_yuv, kSize + kSize / 2 * 2);
+ align_buffer_page_end(ar30_pixels, kSize * 4);
+ uint8_t* orig_y = orig_yuv;
+ uint8_t* orig_u = orig_y + kSize;
+ uint8_t* orig_v = orig_u + kSize / 2;
+
+ // Test grey scale
+ for (int i = 0; i < kSize; ++i) {
+ orig_y[i] = i;
+ }
+ for (int i = 0; i < kSize / 2; ++i) {
+ orig_u[i] = 128; // 128 is 0.
+ orig_v[i] = 128;
+ }
+
+ H420ToAR30(orig_y, 0, orig_u, 0, orig_v, 0, ar30_pixels, 0, kSize, 1);
+
+ for (int i = 0; i < kSize; ++i) {
+ int b10 = reinterpret_cast<uint32_t*>(ar30_pixels)[i] & 1023;
+ int g10 = (reinterpret_cast<uint32_t*>(ar30_pixels)[i] >> 10) & 1023;
+ int r10 = (reinterpret_cast<uint32_t*>(ar30_pixels)[i] >> 20) & 1023;
+ int a2 = (reinterpret_cast<uint32_t*>(ar30_pixels)[i] >> 30) & 3;
+ ++histogram_b[b10];
+ ++histogram_g[g10];
+ ++histogram_r[r10];
+ int expected_y = Clamp10(static_cast<int>((i - 16) * 1.164f * 4.f));
+ EXPECT_NEAR(b10, expected_y, 4);
+ EXPECT_NEAR(g10, expected_y, 4);
+ EXPECT_NEAR(r10, expected_y, 4);
+ EXPECT_EQ(a2, 3);
+ }
+
+ int count_b = 0;
+ int count_g = 0;
+ int count_r = 0;
+ for (int i = 0; i < kHistSize; ++i) {
+ if (histogram_b[i]) {
+ ++count_b;
+ }
+ if (histogram_g[i]) {
+ ++count_g;
+ }
+ if (histogram_r[i]) {
+ ++count_r;
+ }
+ }
+ printf("uniques: B %d, G, %d, R %d\n", count_b, count_g, count_r);
+
+ free_aligned_buffer_page_end(orig_yuv);
+ free_aligned_buffer_page_end(ar30_pixels);
+}
+
} // namespace libyuv
diff --git a/chromium/third_party/libyuv/unit_test/math_test.cc b/chromium/third_party/libyuv/unit_test/math_test.cc
index 2b4b57b1cea..0abbad51321 100644
--- a/chromium/third_party/libyuv/unit_test/math_test.cc
+++ b/chromium/third_party/libyuv/unit_test/math_test.cc
@@ -65,8 +65,8 @@ TEST_F(LibYUVBaseTest, TestFixedDiv) {
}
EXPECT_EQ(123 * 65536, libyuv::FixedDiv(123, 1));
- MemRandomize(reinterpret_cast<uint8*>(&num[0]), sizeof(num));
- MemRandomize(reinterpret_cast<uint8*>(&div[0]), sizeof(div));
+ MemRandomize(reinterpret_cast<uint8_t*>(&num[0]), sizeof(num));
+ MemRandomize(reinterpret_cast<uint8_t*>(&div[0]), sizeof(div));
for (int j = 0; j < 1280; ++j) {
if (div[j] == 0) {
div[j] = 1280;
@@ -90,8 +90,8 @@ TEST_F(LibYUVBaseTest, TestFixedDiv_Opt) {
int result_opt[1280];
int result_c[1280];
- MemRandomize(reinterpret_cast<uint8*>(&num[0]), sizeof(num));
- MemRandomize(reinterpret_cast<uint8*>(&div[0]), sizeof(div));
+ MemRandomize(reinterpret_cast<uint8_t*>(&num[0]), sizeof(num));
+ MemRandomize(reinterpret_cast<uint8_t*>(&div[0]), sizeof(div));
for (int j = 0; j < 1280; ++j) {
num[j] &= 4095; // Make numerator smaller.
div[j] &= 4095; // Make divisor smaller.
@@ -124,8 +124,8 @@ TEST_F(LibYUVBaseTest, TestFixedDiv1_Opt) {
int result_opt[1280];
int result_c[1280];
- MemRandomize(reinterpret_cast<uint8*>(&num[0]), sizeof(num));
- MemRandomize(reinterpret_cast<uint8*>(&div[0]), sizeof(div));
+ MemRandomize(reinterpret_cast<uint8_t*>(&num[0]), sizeof(num));
+ MemRandomize(reinterpret_cast<uint8_t*>(&div[0]), sizeof(div));
for (int j = 0; j < 1280; ++j) {
num[j] &= 4095; // Make numerator smaller.
div[j] &= 4095; // Make divisor smaller.
diff --git a/chromium/third_party/libyuv/unit_test/planar_test.cc b/chromium/third_party/libyuv/unit_test/planar_test.cc
index a499688feed..9f95941ce03 100644
--- a/chromium/third_party/libyuv/unit_test/planar_test.cc
+++ b/chromium/third_party/libyuv/unit_test/planar_test.cc
@@ -252,8 +252,8 @@ TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Opt) {
}
TEST_F(LibYUVPlanarTest, TestARGBComputeCumulativeSum) {
- SIMD_ALIGNED(uint8 orig_pixels[16][16][4]);
- SIMD_ALIGNED(int32 added_pixels[16][16][4]);
+ SIMD_ALIGNED(uint8_t orig_pixels[16][16][4]);
+ SIMD_ALIGNED(int32_t added_pixels[16][16][4]);
for (int y = 0; y < 16; ++y) {
for (int x = 0; x < 16; ++x) {
@@ -278,7 +278,7 @@ TEST_F(LibYUVPlanarTest, TestARGBComputeCumulativeSum) {
}
TEST_F(LibYUVPlanarTest, TestARGBGray) {
- SIMD_ALIGNED(uint8 orig_pixels[1280][4]);
+ SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
memset(orig_pixels, 0, sizeof(orig_pixels));
// Test blue
@@ -349,8 +349,8 @@ TEST_F(LibYUVPlanarTest, TestARGBGray) {
}
TEST_F(LibYUVPlanarTest, TestARGBGrayTo) {
- SIMD_ALIGNED(uint8 orig_pixels[1280][4]);
- SIMD_ALIGNED(uint8 gray_pixels[1280][4]);
+ SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
+ SIMD_ALIGNED(uint8_t gray_pixels[1280][4]);
memset(orig_pixels, 0, sizeof(orig_pixels));
// Test blue
@@ -421,7 +421,7 @@ TEST_F(LibYUVPlanarTest, TestARGBGrayTo) {
}
TEST_F(LibYUVPlanarTest, TestARGBSepia) {
- SIMD_ALIGNED(uint8 orig_pixels[1280][4]);
+ SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
memset(orig_pixels, 0, sizeof(orig_pixels));
// Test blue
@@ -493,12 +493,12 @@ TEST_F(LibYUVPlanarTest, TestARGBSepia) {
}
TEST_F(LibYUVPlanarTest, TestARGBColorMatrix) {
- SIMD_ALIGNED(uint8 orig_pixels[1280][4]);
- SIMD_ALIGNED(uint8 dst_pixels_opt[1280][4]);
- SIMD_ALIGNED(uint8 dst_pixels_c[1280][4]);
+ SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
+ SIMD_ALIGNED(uint8_t dst_pixels_opt[1280][4]);
+ SIMD_ALIGNED(uint8_t dst_pixels_c[1280][4]);
// Matrix for Sepia.
- SIMD_ALIGNED(static const int8 kRGBToSepia[]) = {
+ SIMD_ALIGNED(static const int8_t kRGBToSepia[]) = {
17 / 2, 68 / 2, 35 / 2, 0, 22 / 2, 88 / 2, 45 / 2, 0,
24 / 2, 98 / 2, 50 / 2, 0, 0, 0, 0, 64, // Copy alpha.
};
@@ -569,10 +569,10 @@ TEST_F(LibYUVPlanarTest, TestARGBColorMatrix) {
}
TEST_F(LibYUVPlanarTest, TestRGBColorMatrix) {
- SIMD_ALIGNED(uint8 orig_pixels[1280][4]);
+ SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
// Matrix for Sepia.
- SIMD_ALIGNED(static const int8 kRGBToSepia[]) = {
+ SIMD_ALIGNED(static const int8_t kRGBToSepia[]) = {
17, 68, 35, 0, 22, 88, 45, 0,
24, 98, 50, 0, 0, 0, 0, 0, // Unused but makes matrix 16 bytes.
};
@@ -629,11 +629,11 @@ TEST_F(LibYUVPlanarTest, TestRGBColorMatrix) {
}
TEST_F(LibYUVPlanarTest, TestARGBColorTable) {
- SIMD_ALIGNED(uint8 orig_pixels[1280][4]);
+ SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
memset(orig_pixels, 0, sizeof(orig_pixels));
// Matrix for Sepia.
- static const uint8 kARGBTable[256 * 4] = {
+ static const uint8_t kARGBTable[256 * 4] = {
1u, 2u, 3u, 4u, 5u, 6u, 7u, 8u, 9u, 10u, 11u, 12u, 13u, 14u, 15u, 16u,
};
@@ -685,11 +685,11 @@ TEST_F(LibYUVPlanarTest, TestARGBColorTable) {
// Same as TestARGBColorTable except alpha does not change.
TEST_F(LibYUVPlanarTest, TestRGBColorTable) {
- SIMD_ALIGNED(uint8 orig_pixels[1280][4]);
+ SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
memset(orig_pixels, 0, sizeof(orig_pixels));
// Matrix for Sepia.
- static const uint8 kARGBTable[256 * 4] = {
+ static const uint8_t kARGBTable[256 * 4] = {
1u, 2u, 3u, 4u, 5u, 6u, 7u, 8u, 9u, 10u, 11u, 12u, 13u, 14u, 15u, 16u,
};
@@ -740,7 +740,7 @@ TEST_F(LibYUVPlanarTest, TestRGBColorTable) {
}
TEST_F(LibYUVPlanarTest, TestARGBQuantize) {
- SIMD_ALIGNED(uint8 orig_pixels[1280][4]);
+ SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
for (int i = 0; i < 1280; ++i) {
orig_pixels[i][0] = i;
@@ -764,8 +764,8 @@ TEST_F(LibYUVPlanarTest, TestARGBQuantize) {
}
TEST_F(LibYUVPlanarTest, TestARGBMirror) {
- SIMD_ALIGNED(uint8 orig_pixels[1280][4]);
- SIMD_ALIGNED(uint8 dst_pixels[1280][4]);
+ SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
+ SIMD_ALIGNED(uint8_t dst_pixels[1280][4]);
for (int i = 0; i < 1280; ++i) {
orig_pixels[i][0] = i;
@@ -787,8 +787,8 @@ TEST_F(LibYUVPlanarTest, TestARGBMirror) {
}
TEST_F(LibYUVPlanarTest, TestShade) {
- SIMD_ALIGNED(uint8 orig_pixels[1280][4]);
- SIMD_ALIGNED(uint8 shade_pixels[1280][4]);
+ SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
+ SIMD_ALIGNED(uint8_t shade_pixels[1280][4]);
memset(orig_pixels, 0, sizeof(orig_pixels));
orig_pixels[0][0] = 10u;
@@ -845,9 +845,9 @@ TEST_F(LibYUVPlanarTest, TestShade) {
}
TEST_F(LibYUVPlanarTest, TestARGBInterpolate) {
- SIMD_ALIGNED(uint8 orig_pixels_0[1280][4]);
- SIMD_ALIGNED(uint8 orig_pixels_1[1280][4]);
- SIMD_ALIGNED(uint8 interpolate_pixels[1280][4]);
+ SIMD_ALIGNED(uint8_t orig_pixels_0[1280][4]);
+ SIMD_ALIGNED(uint8_t orig_pixels_1[1280][4]);
+ SIMD_ALIGNED(uint8_t interpolate_pixels[1280][4]);
memset(orig_pixels_0, 0, sizeof(orig_pixels_0));
memset(orig_pixels_1, 0, sizeof(orig_pixels_1));
@@ -926,9 +926,9 @@ TEST_F(LibYUVPlanarTest, TestARGBInterpolate) {
}
TEST_F(LibYUVPlanarTest, TestInterpolatePlane) {
- SIMD_ALIGNED(uint8 orig_pixels_0[1280]);
- SIMD_ALIGNED(uint8 orig_pixels_1[1280]);
- SIMD_ALIGNED(uint8 interpolate_pixels[1280]);
+ SIMD_ALIGNED(uint8_t orig_pixels_0[1280]);
+ SIMD_ALIGNED(uint8_t orig_pixels_1[1280]);
+ SIMD_ALIGNED(uint8_t interpolate_pixels[1280]);
memset(orig_pixels_0, 0, sizeof(orig_pixels_0));
memset(orig_pixels_1, 0, sizeof(orig_pixels_1));
@@ -1192,7 +1192,6 @@ static void TestBlendPlane(int width,
free_aligned_buffer_page_end(src_argb_alpha);
free_aligned_buffer_page_end(dst_argb_c);
free_aligned_buffer_page_end(dst_argb_opt);
- return;
}
TEST_F(LibYUVPlanarTest, BlendPlane_Opt) {
@@ -1286,7 +1285,6 @@ static void TestI420Blend(int width,
free_aligned_buffer_page_end(dst_y_opt);
free_aligned_buffer_page_end(dst_u_opt);
free_aligned_buffer_page_end(dst_v_opt);
- return;
}
TEST_F(LibYUVPlanarTest, I420Blend_Opt) {
@@ -1309,8 +1307,8 @@ TEST_F(LibYUVPlanarTest, I420Blend_Invert) {
}
TEST_F(LibYUVPlanarTest, TestAffine) {
- SIMD_ALIGNED(uint8 orig_pixels_0[1280][4]);
- SIMD_ALIGNED(uint8 interpolate_pixels_C[1280][4]);
+ SIMD_ALIGNED(uint8_t orig_pixels_0[1280][4]);
+ SIMD_ALIGNED(uint8_t interpolate_pixels_C[1280][4]);
for (int i = 0; i < 1280; ++i) {
for (int j = 0; j < 4; ++j) {
@@ -1327,7 +1325,7 @@ TEST_F(LibYUVPlanarTest, TestAffine) {
EXPECT_EQ(191u, interpolate_pixels_C[255][3]);
#if defined(HAS_ARGBAFFINEROW_SSE2)
- SIMD_ALIGNED(uint8 interpolate_pixels_Opt[1280][4]);
+ SIMD_ALIGNED(uint8_t interpolate_pixels_Opt[1280][4]);
ARGBAffineRow_SSE2(&orig_pixels_0[0][0], 0, &interpolate_pixels_Opt[0][0],
uv_step, 1280);
EXPECT_EQ(0, memcmp(interpolate_pixels_Opt, interpolate_pixels_C, 1280 * 4));
@@ -1367,7 +1365,7 @@ TEST_F(LibYUVPlanarTest, TestCopyPlane) {
// Fill destination buffers with random data.
for (i = 0; i < y_plane_size; ++i) {
- uint8 random_number = fastrand() & 0x7f;
+ uint8_t random_number = fastrand() & 0x7f;
dst_c[i] = random_number;
dst_opt[i] = dst_c[i];
}
@@ -1390,8 +1388,9 @@ TEST_F(LibYUVPlanarTest, TestCopyPlane) {
}
for (i = 0; i < y_plane_size; ++i) {
- if (dst_c[i] != dst_opt[i])
+ if (dst_c[i] != dst_opt[i]) {
++err;
+ }
}
free_aligned_buffer_page_end(orig_y);
@@ -1867,12 +1866,12 @@ static int TestBlur(int width,
MaskCpuFlags(disable_cpu_flags);
ARGBBlur(src_argb_a + off, kStride, dst_argb_c, kStride,
- reinterpret_cast<int32*>(dst_cumsum), width * 4, width,
+ reinterpret_cast<int32_t*>(dst_cumsum), width * 4, width,
invert * height, radius);
MaskCpuFlags(benchmark_cpu_info);
for (int i = 0; i < benchmark_iterations; ++i) {
ARGBBlur(src_argb_a + off, kStride, dst_argb_opt, kStride,
- reinterpret_cast<int32*>(dst_cumsum), width * 4, width,
+ reinterpret_cast<int32_t*>(dst_cumsum), width * 4, width,
invert * height, radius);
}
int max_diff = 0;
@@ -1949,9 +1948,9 @@ TEST_F(LibYUVPlanarTest, ARGBBlurSmall_Opt) {
}
TEST_F(LibYUVPlanarTest, TestARGBPolynomial) {
- SIMD_ALIGNED(uint8 orig_pixels[1280][4]);
- SIMD_ALIGNED(uint8 dst_pixels_opt[1280][4]);
- SIMD_ALIGNED(uint8 dst_pixels_c[1280][4]);
+ SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
+ SIMD_ALIGNED(uint8_t dst_pixels_opt[1280][4]);
+ SIMD_ALIGNED(uint8_t dst_pixels_c[1280][4]);
memset(orig_pixels, 0, sizeof(orig_pixels));
SIMD_ALIGNED(static const float kWarmifyPolynomial[16]) = {
@@ -2046,37 +2045,38 @@ int TestHalfFloatPlane(int benchmark_width,
const int y_plane_size = benchmark_width * benchmark_height * 2;
align_buffer_page_end(orig_y, y_plane_size * 3);
- uint8* dst_opt = orig_y + y_plane_size;
- uint8* dst_c = orig_y + y_plane_size * 2;
+ uint8_t* dst_opt = orig_y + y_plane_size;
+ uint8_t* dst_c = orig_y + y_plane_size * 2;
MemRandomize(orig_y, y_plane_size);
memset(dst_c, 0, y_plane_size);
memset(dst_opt, 1, y_plane_size);
for (i = 0; i < y_plane_size / 2; ++i) {
- reinterpret_cast<uint16*>(orig_y)[i] &= mask;
+ reinterpret_cast<uint16_t*>(orig_y)[i] &= mask;
}
// Disable all optimizations.
MaskCpuFlags(disable_cpu_flags);
for (j = 0; j < benchmark_iterations; j++) {
- HalfFloatPlane(reinterpret_cast<uint16*>(orig_y), benchmark_width * 2,
- reinterpret_cast<uint16*>(dst_c), benchmark_width * 2, scale,
- benchmark_width, benchmark_height);
+ HalfFloatPlane(reinterpret_cast<uint16_t*>(orig_y), benchmark_width * 2,
+ reinterpret_cast<uint16_t*>(dst_c), benchmark_width * 2,
+ scale, benchmark_width, benchmark_height);
}
// Enable optimizations.
MaskCpuFlags(benchmark_cpu_info);
for (j = 0; j < benchmark_iterations; j++) {
- HalfFloatPlane(reinterpret_cast<uint16*>(orig_y), benchmark_width * 2,
- reinterpret_cast<uint16*>(dst_opt), benchmark_width * 2,
+ HalfFloatPlane(reinterpret_cast<uint16_t*>(orig_y), benchmark_width * 2,
+ reinterpret_cast<uint16_t*>(dst_opt), benchmark_width * 2,
scale, benchmark_width, benchmark_height);
}
int max_diff = 0;
for (i = 0; i < y_plane_size / 2; ++i) {
- int abs_diff = abs(static_cast<int>(reinterpret_cast<uint16*>(dst_c)[i]) -
- static_cast<int>(reinterpret_cast<uint16*>(dst_opt)[i]));
+ int abs_diff =
+ abs(static_cast<int>(reinterpret_cast<uint16_t*>(dst_c)[i]) -
+ static_cast<int>(reinterpret_cast<uint16_t*>(dst_opt)[i]));
if (abs_diff > max_diff) {
max_diff = abs_diff;
}
@@ -2169,9 +2169,9 @@ TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_12bit_One) {
}
TEST_F(LibYUVPlanarTest, TestARGBLumaColorTable) {
- SIMD_ALIGNED(uint8 orig_pixels[1280][4]);
- SIMD_ALIGNED(uint8 dst_pixels_opt[1280][4]);
- SIMD_ALIGNED(uint8 dst_pixels_c[1280][4]);
+ SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
+ SIMD_ALIGNED(uint8_t dst_pixels_opt[1280][4]);
+ SIMD_ALIGNED(uint8_t dst_pixels_c[1280][4]);
memset(orig_pixels, 0, sizeof(orig_pixels));
align_buffer_page_end(lumacolortable, 32768);
@@ -2343,7 +2343,7 @@ static int TestARGBRect(int width,
}
const int kStride = width * bpp;
const int kSize = kStride * height;
- const uint32 v32 = fastrand() & (bpp == 4 ? 0xffffffff : 0xff);
+ const uint32_t v32 = fastrand() & (bpp == 4 ? 0xffffffff : 0xff);
align_buffer_page_end(dst_argb_c, kSize + off);
align_buffer_page_end(dst_argb_opt, kSize + off);
@@ -2631,21 +2631,21 @@ TEST_F(LibYUVPlanarTest, MergeUVRow_16_Opt) {
memset(dst_pixels_uv_opt, 0, kPixels * 2 * 2);
memset(dst_pixels_uv_c, 1, kPixels * 2 * 2);
- MergeUVRow_16_C(reinterpret_cast<const uint16*>(src_pixels_u),
- reinterpret_cast<const uint16*>(src_pixels_v),
- reinterpret_cast<uint16*>(dst_pixels_uv_c), 64, kPixels);
+ MergeUVRow_16_C(reinterpret_cast<const uint16_t*>(src_pixels_u),
+ reinterpret_cast<const uint16_t*>(src_pixels_v),
+ reinterpret_cast<uint16_t*>(dst_pixels_uv_c), 64, kPixels);
int has_avx2 = TestCpuFlag(kCpuHasAVX2);
for (int i = 0; i < benchmark_iterations_; ++i) {
if (has_avx2) {
- MergeUVRow_16_AVX2(reinterpret_cast<const uint16*>(src_pixels_u),
- reinterpret_cast<const uint16*>(src_pixels_v),
- reinterpret_cast<uint16*>(dst_pixels_uv_opt), 64,
+ MergeUVRow_16_AVX2(reinterpret_cast<const uint16_t*>(src_pixels_u),
+ reinterpret_cast<const uint16_t*>(src_pixels_v),
+ reinterpret_cast<uint16_t*>(dst_pixels_uv_opt), 64,
kPixels);
} else {
- MergeUVRow_16_C(reinterpret_cast<const uint16*>(src_pixels_u),
- reinterpret_cast<const uint16*>(src_pixels_v),
- reinterpret_cast<uint16*>(dst_pixels_uv_opt), 64,
+ MergeUVRow_16_C(reinterpret_cast<const uint16_t*>(src_pixels_u),
+ reinterpret_cast<const uint16_t*>(src_pixels_v),
+ reinterpret_cast<uint16_t*>(dst_pixels_uv_opt), 64,
kPixels);
}
}
@@ -2673,18 +2673,18 @@ TEST_F(LibYUVPlanarTest, MultiplyRow_16_Opt) {
memset(dst_pixels_y_opt, 0, kPixels * 2);
memset(dst_pixels_y_c, 1, kPixels * 2);
- MultiplyRow_16_C(reinterpret_cast<const uint16*>(src_pixels_y),
- reinterpret_cast<uint16*>(dst_pixels_y_c), 64, kPixels);
+ MultiplyRow_16_C(reinterpret_cast<const uint16_t*>(src_pixels_y),
+ reinterpret_cast<uint16_t*>(dst_pixels_y_c), 64, kPixels);
int has_avx2 = TestCpuFlag(kCpuHasAVX2);
for (int i = 0; i < benchmark_iterations_; ++i) {
if (has_avx2) {
- MultiplyRow_16_AVX2(reinterpret_cast<const uint16*>(src_pixels_y),
- reinterpret_cast<uint16*>(dst_pixels_y_opt), 64,
+ MultiplyRow_16_AVX2(reinterpret_cast<const uint16_t*>(src_pixels_y),
+ reinterpret_cast<uint16_t*>(dst_pixels_y_opt), 64,
kPixels);
} else {
- MultiplyRow_16_C(reinterpret_cast<const uint16*>(src_pixels_y),
- reinterpret_cast<uint16*>(dst_pixels_y_opt), 64,
+ MultiplyRow_16_C(reinterpret_cast<const uint16_t*>(src_pixels_y),
+ reinterpret_cast<uint16_t*>(dst_pixels_y_opt), 64,
kPixels);
}
}
@@ -2710,13 +2710,13 @@ TEST_F(LibYUVPlanarTest, Convert16To8Plane) {
memset(dst_pixels_y_c, 1, kPixels);
MaskCpuFlags(disable_cpu_flags_);
- Convert16To8Plane(reinterpret_cast<const uint16*>(src_pixels_y),
+ Convert16To8Plane(reinterpret_cast<const uint16_t*>(src_pixels_y),
benchmark_width_, dst_pixels_y_c, benchmark_width_, 16384,
benchmark_width_, benchmark_height_);
MaskCpuFlags(benchmark_cpu_info_);
for (int i = 0; i < benchmark_iterations_; ++i) {
- Convert16To8Plane(reinterpret_cast<const uint16*>(src_pixels_y),
+ Convert16To8Plane(reinterpret_cast<const uint16_t*>(src_pixels_y),
benchmark_width_, dst_pixels_y_opt, benchmark_width_,
16384, benchmark_width_, benchmark_height_);
}
@@ -2742,26 +2742,26 @@ TEST_F(LibYUVPlanarTest, Convert16To8Row_Opt) {
MemRandomize(src_pixels_y, kPixels * 2);
// clamp source range to 10 bits.
for (int i = 0; i < kPixels; ++i) {
- reinterpret_cast<uint16*>(src_pixels_y)[i] &= 1023;
+ reinterpret_cast<uint16_t*>(src_pixels_y)[i] &= 1023;
}
memset(dst_pixels_y_opt, 0, kPixels);
memset(dst_pixels_y_c, 1, kPixels);
- Convert16To8Row_C(reinterpret_cast<const uint16*>(src_pixels_y),
+ Convert16To8Row_C(reinterpret_cast<const uint16_t*>(src_pixels_y),
dst_pixels_y_c, 16384, kPixels);
int has_avx2 = TestCpuFlag(kCpuHasAVX2);
int has_ssse3 = TestCpuFlag(kCpuHasSSSE3);
for (int i = 0; i < benchmark_iterations_; ++i) {
if (has_avx2) {
- Convert16To8Row_AVX2(reinterpret_cast<const uint16*>(src_pixels_y),
+ Convert16To8Row_AVX2(reinterpret_cast<const uint16_t*>(src_pixels_y),
dst_pixels_y_opt, 16384, kPixels);
} else if (has_ssse3) {
- Convert16To8Row_SSSE3(reinterpret_cast<const uint16*>(src_pixels_y),
+ Convert16To8Row_SSSE3(reinterpret_cast<const uint16_t*>(src_pixels_y),
dst_pixels_y_opt, 16384, kPixels);
} else {
- Convert16To8Row_C(reinterpret_cast<const uint16*>(src_pixels_y),
+ Convert16To8Row_C(reinterpret_cast<const uint16_t*>(src_pixels_y),
dst_pixels_y_opt, 16384, kPixels);
}
}
@@ -2788,13 +2788,14 @@ TEST_F(LibYUVPlanarTest, Convert8To16Plane) {
MaskCpuFlags(disable_cpu_flags_);
Convert8To16Plane(src_pixels_y, benchmark_width_,
- reinterpret_cast<uint16*>(dst_pixels_y_c), benchmark_width_,
- 1024, benchmark_width_, benchmark_height_);
+ reinterpret_cast<uint16_t*>(dst_pixels_y_c),
+ benchmark_width_, 1024, benchmark_width_,
+ benchmark_height_);
MaskCpuFlags(benchmark_cpu_info_);
for (int i = 0; i < benchmark_iterations_; ++i) {
Convert8To16Plane(src_pixels_y, benchmark_width_,
- reinterpret_cast<uint16*>(dst_pixels_y_opt),
+ reinterpret_cast<uint16_t*>(dst_pixels_y_opt),
benchmark_width_, 1024, benchmark_width_,
benchmark_height_);
}
@@ -2820,7 +2821,7 @@ TEST_F(LibYUVPlanarTest, Convert8To16Row_Opt) {
memset(dst_pixels_y_opt, 0, kPixels * 2);
memset(dst_pixels_y_c, 1, kPixels * 2);
- Convert8To16Row_C(src_pixels_y, reinterpret_cast<uint16*>(dst_pixels_y_c),
+ Convert8To16Row_C(src_pixels_y, reinterpret_cast<uint16_t*>(dst_pixels_y_c),
1024, kPixels);
int has_avx2 = TestCpuFlag(kCpuHasAVX2);
@@ -2828,15 +2829,15 @@ TEST_F(LibYUVPlanarTest, Convert8To16Row_Opt) {
for (int i = 0; i < benchmark_iterations_; ++i) {
if (has_avx2) {
Convert8To16Row_AVX2(src_pixels_y,
- reinterpret_cast<uint16*>(dst_pixels_y_opt), 1024,
+ reinterpret_cast<uint16_t*>(dst_pixels_y_opt), 1024,
kPixels);
} else if (has_sse2) {
Convert8To16Row_SSE2(src_pixels_y,
- reinterpret_cast<uint16*>(dst_pixels_y_opt), 1024,
+ reinterpret_cast<uint16_t*>(dst_pixels_y_opt), 1024,
kPixels);
} else {
Convert8To16Row_C(src_pixels_y,
- reinterpret_cast<uint16*>(dst_pixels_y_opt), 1024,
+ reinterpret_cast<uint16_t*>(dst_pixels_y_opt), 1024,
kPixels);
}
}
@@ -2861,8 +2862,8 @@ float TestScaleMaxSamples(int benchmark_width,
// NEON does multiple of 8, so round count up
const int kPixels = (benchmark_width * benchmark_height + 7) & ~7;
align_buffer_page_end(orig_y, kPixels * 4 * 3 + 48);
- uint8* dst_c = orig_y + kPixels * 4 + 16;
- uint8* dst_opt = orig_y + kPixels * 4 * 2 + 32;
+ uint8_t* dst_c = orig_y + kPixels * 4 + 16;
+ uint8_t* dst_opt = orig_y + kPixels * 4 * 2 + 32;
// Randomize works but may contain some denormals affecting performance.
// MemRandomize(orig_y, kPixels * 4);
@@ -2929,8 +2930,8 @@ float TestScaleSumSamples(int benchmark_width,
// NEON does multiple of 8, so round count up
const int kPixels = (benchmark_width * benchmark_height + 7) & ~7;
align_buffer_page_end(orig_y, kPixels * 4 * 3);
- uint8* dst_c = orig_y + kPixels * 4;
- uint8* dst_opt = orig_y + kPixels * 4 * 2;
+ uint8_t* dst_c = orig_y + kPixels * 4;
+ uint8_t* dst_opt = orig_y + kPixels * 4 * 2;
// Randomize works but may contain some denormals affecting performance.
// MemRandomize(orig_y, kPixels * 4);
@@ -3007,8 +3008,8 @@ float TestScaleSamples(int benchmark_width,
// NEON does multiple of 8, so round count up
const int kPixels = (benchmark_width * benchmark_height + 7) & ~7;
align_buffer_page_end(orig_y, kPixels * 4 * 3);
- uint8* dst_c = orig_y + kPixels * 4;
- uint8* dst_opt = orig_y + kPixels * 4 * 2;
+ uint8_t* dst_c = orig_y + kPixels * 4;
+ uint8_t* dst_opt = orig_y + kPixels * 4 * 2;
// Randomize works but may contain some denormals affecting performance.
// MemRandomize(orig_y, kPixels * 4);
@@ -3070,8 +3071,8 @@ float TestCopySamples(int benchmark_width,
// NEON does multiple of 16 floats, so round count up
const int kPixels = (benchmark_width * benchmark_height + 15) & ~15;
align_buffer_page_end(orig_y, kPixels * 4 * 3);
- uint8* dst_c = orig_y + kPixels * 4;
- uint8* dst_opt = orig_y + kPixels * 4 * 2;
+ uint8_t* dst_c = orig_y + kPixels * 4;
+ uint8_t* dst_opt = orig_y + kPixels * 4 * 2;
// Randomize works but may contain some denormals affecting performance.
// MemRandomize(orig_y, kPixels * 4);
@@ -3122,13 +3123,13 @@ TEST_F(LibYUVPlanarTest, TestCopySamples_Opt) {
EXPECT_EQ(0, diff);
}
-extern "C" void GaussRow_NEON(const uint32* src, uint16* dst, int width);
-extern "C" void GaussRow_C(const uint32* src, uint16* dst, int width);
+extern "C" void GaussRow_NEON(const uint32_t* src, uint16_t* dst, int width);
+extern "C" void GaussRow_C(const uint32_t* src, uint16_t* dst, int width);
TEST_F(LibYUVPlanarTest, TestGaussRow_Opt) {
- SIMD_ALIGNED(uint32 orig_pixels[640 + 4]);
- SIMD_ALIGNED(uint16 dst_pixels_c[640]);
- SIMD_ALIGNED(uint16 dst_pixels_opt[640]);
+ SIMD_ALIGNED(uint32_t orig_pixels[640 + 4]);
+ SIMD_ALIGNED(uint16_t dst_pixels_c[640]);
+ SIMD_ALIGNED(uint16_t dst_pixels_opt[640]);
memset(orig_pixels, 0, sizeof(orig_pixels));
memset(dst_pixels_c, 1, sizeof(dst_pixels_c));
@@ -3156,30 +3157,30 @@ TEST_F(LibYUVPlanarTest, TestGaussRow_Opt) {
}
EXPECT_EQ(dst_pixels_c[0],
- static_cast<uint16>(0 * 1 + 1 * 4 + 2 * 6 + 3 * 4 + 4 * 1));
- EXPECT_EQ(dst_pixels_c[639], static_cast<uint16>(10256));
+ static_cast<uint16_t>(0 * 1 + 1 * 4 + 2 * 6 + 3 * 4 + 4 * 1));
+ EXPECT_EQ(dst_pixels_c[639], static_cast<uint16_t>(10256));
}
-extern "C" void GaussCol_NEON(const uint16* src0,
- const uint16* src1,
- const uint16* src2,
- const uint16* src3,
- const uint16* src4,
- uint32* dst,
+extern "C" void GaussCol_NEON(const uint16_t* src0,
+ const uint16_t* src1,
+ const uint16_t* src2,
+ const uint16_t* src3,
+ const uint16_t* src4,
+ uint32_t* dst,
int width);
-extern "C" void GaussCol_C(const uint16* src0,
- const uint16* src1,
- const uint16* src2,
- const uint16* src3,
- const uint16* src4,
- uint32* dst,
+extern "C" void GaussCol_C(const uint16_t* src0,
+ const uint16_t* src1,
+ const uint16_t* src2,
+ const uint16_t* src3,
+ const uint16_t* src4,
+ uint32_t* dst,
int width);
TEST_F(LibYUVPlanarTest, TestGaussCol_Opt) {
- SIMD_ALIGNED(uint16 orig_pixels[640 * 5]);
- SIMD_ALIGNED(uint32 dst_pixels_c[640]);
- SIMD_ALIGNED(uint32 dst_pixels_opt[640]);
+ SIMD_ALIGNED(uint16_t orig_pixels[640 * 5]);
+ SIMD_ALIGNED(uint32_t dst_pixels_c[640]);
+ SIMD_ALIGNED(uint32_t dst_pixels_opt[640]);
memset(orig_pixels, 0, sizeof(orig_pixels));
memset(dst_pixels_c, 1, sizeof(dst_pixels_c));
@@ -3214,9 +3215,10 @@ TEST_F(LibYUVPlanarTest, TestGaussCol_Opt) {
EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
}
- EXPECT_EQ(dst_pixels_c[0], static_cast<uint32>(0 * 1 + 640 * 4 + 640 * 2 * 6 +
- 640 * 3 * 4 + 640 * 4 * 1));
- EXPECT_EQ(dst_pixels_c[639], static_cast<uint32>(30704));
+ EXPECT_EQ(dst_pixels_c[0],
+ static_cast<uint32_t>(0 * 1 + 640 * 4 + 640 * 2 * 6 + 640 * 3 * 4 +
+ 640 * 4 * 1));
+ EXPECT_EQ(dst_pixels_c[639], static_cast<uint32_t>(30704));
}
} // namespace libyuv
diff --git a/chromium/third_party/libyuv/unit_test/scale_argb_test.cc b/chromium/third_party/libyuv/unit_test/scale_argb_test.cc
index d11aec20476..a1be85b8d8a 100644
--- a/chromium/third_party/libyuv/unit_test/scale_argb_test.cc
+++ b/chromium/third_party/libyuv/unit_test/scale_argb_test.cc
@@ -37,7 +37,7 @@ static int ARGBTestFilter(int src_width,
int i, j;
const int b = 0; // 128 to test for padding/stride.
- int64 src_argb_plane_size =
+ int64_t src_argb_plane_size =
(Abs(src_width) + b * 2) * (Abs(src_height) + b * 2) * 4LL;
int src_stride_argb = (b * 2 + Abs(src_width)) * 4;
@@ -48,7 +48,8 @@ static int ARGBTestFilter(int src_width,
}
MemRandomize(src_argb, src_argb_plane_size);
- int64 dst_argb_plane_size = (dst_width + b * 2) * (dst_height + b * 2) * 4LL;
+ int64_t dst_argb_plane_size =
+ (dst_width + b * 2) * (dst_height + b * 2) * 4LL;
int dst_stride_argb = (b * 2 + dst_width) * 4;
align_buffer_page_end(dst_argb_c, dst_argb_plane_size);
@@ -116,11 +117,11 @@ static int ARGBTestFilter(int src_width,
static const int kTileX = 8;
static const int kTileY = 8;
-static int TileARGBScale(const uint8* src_argb,
+static int TileARGBScale(const uint8_t* src_argb,
int src_stride_argb,
int src_width,
int src_height,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int dst_width,
int dst_height,
@@ -157,7 +158,7 @@ static int ARGBClipTestFilter(int src_width,
}
const int b = 128;
- int64 src_argb_plane_size =
+ int64_t src_argb_plane_size =
(Abs(src_width) + b * 2) * (Abs(src_height) + b * 2) * 4;
int src_stride_argb = (b * 2 + Abs(src_width)) * 4;
@@ -168,7 +169,7 @@ static int ARGBClipTestFilter(int src_width,
}
memset(src_argb, 1, src_argb_plane_size);
- int64 dst_argb_plane_size = (dst_width + b * 2) * (dst_height + b * 2) * 4;
+ int64_t dst_argb_plane_size = (dst_width + b * 2) * (dst_height + b * 2) * 4;
int dst_stride_argb = (b * 2 + dst_width) * 4;
int i, j;
@@ -310,19 +311,20 @@ TEST_SCALETO(ARGBScale, 1280, 720)
#undef TEST_SCALETO
// Scale with YUV conversion to ARGB and clipping.
+// TODO(fbarchard): Add fourcc support. All 4 ARGB formats is easy to support.
LIBYUV_API
-int YUVToARGBScaleReference2(const uint8* src_y,
+int YUVToARGBScaleReference2(const uint8_t* src_y,
int src_stride_y,
- const uint8* src_u,
+ const uint8_t* src_u,
int src_stride_u,
- const uint8* src_v,
+ const uint8_t* src_v,
int src_stride_v,
- uint32 /* src_fourcc */, // TODO: Add support.
+ uint32 /* src_fourcc */,
int src_width,
int src_height,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
- uint32 /* dst_fourcc */, // TODO: Add support.
+ uint32 /* dst_fourcc */,
int dst_width,
int dst_height,
int clip_x,
@@ -330,7 +332,8 @@ int YUVToARGBScaleReference2(const uint8* src_y,
int clip_width,
int clip_height,
enum FilterMode filtering) {
- uint8* argb_buffer = static_cast<uint8*>(malloc(src_width * src_height * 4));
+ uint8_t* argb_buffer =
+ static_cast<uint8_t*>(malloc(src_width * src_height * 4));
int r;
I420ToARGB(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v,
argb_buffer, src_width * 4, src_width, src_height);
@@ -342,7 +345,12 @@ int YUVToARGBScaleReference2(const uint8* src_y,
return r;
}
-static void FillRamp(uint8* buf, int width, int height, int v, int dx, int dy) {
+static void FillRamp(uint8_t* buf,
+ int width,
+ int height,
+ int v,
+ int dx,
+ int dy) {
int rv = v;
for (int y = 0; y < height; ++y) {
for (int x = 0; x < width; ++x) {
@@ -369,8 +377,8 @@ static int YUVToARGBTestFilter(int src_width,
int dst_height,
FilterMode f,
int benchmark_iterations) {
- int64 src_y_plane_size = Abs(src_width) * Abs(src_height);
- int64 src_uv_plane_size =
+ int64_t src_y_plane_size = Abs(src_width) * Abs(src_height);
+ int64_t src_uv_plane_size =
((Abs(src_width) + 1) / 2) * ((Abs(src_height) + 1) / 2);
int src_stride_y = Abs(src_width);
int src_stride_uv = (Abs(src_width) + 1) / 2;
@@ -379,7 +387,7 @@ static int YUVToARGBTestFilter(int src_width,
align_buffer_page_end(src_u, src_uv_plane_size);
align_buffer_page_end(src_v, src_uv_plane_size);
- int64 dst_argb_plane_size = (dst_width) * (dst_height)*4LL;
+ int64_t dst_argb_plane_size = (dst_width) * (dst_height)*4LL;
int dst_stride_argb = (dst_width)*4;
align_buffer_page_end(dst_argb_c, dst_argb_plane_size);
align_buffer_page_end(dst_argb_opt, dst_argb_plane_size);
diff --git a/chromium/third_party/libyuv/unit_test/scale_test.cc b/chromium/third_party/libyuv/unit_test/scale_test.cc
index c39211a161b..08b6cffaa26 100644
--- a/chromium/third_party/libyuv/unit_test/scale_test.cc
+++ b/chromium/third_party/libyuv/unit_test/scale_test.cc
@@ -38,8 +38,8 @@ static int TestFilter(int src_width,
int src_width_uv = (Abs(src_width) + 1) >> 1;
int src_height_uv = (Abs(src_height) + 1) >> 1;
- int64 src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
- int64 src_uv_plane_size = (src_width_uv) * (src_height_uv);
+ int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
+ int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv);
int src_stride_y = Abs(src_width);
int src_stride_uv = src_width_uv;
@@ -58,8 +58,8 @@ static int TestFilter(int src_width,
int dst_width_uv = (dst_width + 1) >> 1;
int dst_height_uv = (dst_height + 1) >> 1;
- int64 dst_y_plane_size = (dst_width) * (dst_height);
- int64 dst_uv_plane_size = (dst_width_uv) * (dst_height_uv);
+ int64_t dst_y_plane_size = (dst_width) * (dst_height);
+ int64_t dst_uv_plane_size = (dst_width_uv) * (dst_height_uv);
int dst_stride_y = dst_width;
int dst_stride_uv = dst_width_uv;
@@ -157,8 +157,8 @@ static int TestFilter_16(int src_width,
int src_width_uv = (Abs(src_width) + 1) >> 1;
int src_height_uv = (Abs(src_height) + 1) >> 1;
- int64 src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
- int64 src_uv_plane_size = (src_width_uv) * (src_height_uv);
+ int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
+ int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv);
int src_stride_y = Abs(src_width);
int src_stride_uv = src_width_uv;
@@ -173,9 +173,9 @@ static int TestFilter_16(int src_width,
printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
return 0;
}
- uint16* p_src_y_16 = reinterpret_cast<uint16*>(src_y_16);
- uint16* p_src_u_16 = reinterpret_cast<uint16*>(src_u_16);
- uint16* p_src_v_16 = reinterpret_cast<uint16*>(src_v_16);
+ uint16_t* p_src_y_16 = reinterpret_cast<uint16_t*>(src_y_16);
+ uint16_t* p_src_u_16 = reinterpret_cast<uint16_t*>(src_u_16);
+ uint16_t* p_src_v_16 = reinterpret_cast<uint16_t*>(src_v_16);
MemRandomize(src_y, src_y_plane_size);
MemRandomize(src_u, src_uv_plane_size);
@@ -205,9 +205,9 @@ static int TestFilter_16(int src_width,
align_buffer_page_end(dst_u_16, dst_uv_plane_size * 2);
align_buffer_page_end(dst_v_16, dst_uv_plane_size * 2);
- uint16* p_dst_y_16 = reinterpret_cast<uint16*>(dst_y_16);
- uint16* p_dst_u_16 = reinterpret_cast<uint16*>(dst_u_16);
- uint16* p_dst_v_16 = reinterpret_cast<uint16*>(dst_v_16);
+ uint16_t* p_dst_y_16 = reinterpret_cast<uint16_t*>(dst_y_16);
+ uint16_t* p_dst_u_16 = reinterpret_cast<uint16_t*>(dst_u_16);
+ uint16_t* p_dst_v_16 = reinterpret_cast<uint16_t*>(dst_v_16);
MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
I420Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
@@ -345,9 +345,9 @@ TEST_SCALETO(Scale, 1280, 720)
#ifdef HAS_SCALEROWDOWN2_SSSE3
TEST_F(LibYUVScaleTest, TestScaleRowDown2Box_Odd_SSSE3) {
- SIMD_ALIGNED(uint8 orig_pixels[128 * 2]);
- SIMD_ALIGNED(uint8 dst_pixels_opt[64]);
- SIMD_ALIGNED(uint8 dst_pixels_c[64]);
+ SIMD_ALIGNED(uint8_t orig_pixels[128 * 2]);
+ SIMD_ALIGNED(uint8_t dst_pixels_opt[64]);
+ SIMD_ALIGNED(uint8_t dst_pixels_c[64]);
memset(orig_pixels, 0, sizeof(orig_pixels));
memset(dst_pixels_opt, 0, sizeof(dst_pixels_opt));
memset(dst_pixels_c, 0, sizeof(dst_pixels_c));
@@ -433,19 +433,19 @@ TEST_F(LibYUVScaleTest, TestScaleRowDown2Box_Odd_SSSE3) {
}
#endif // HAS_SCALEROWDOWN2_SSSE3
-extern "C" void ScaleRowUp2_16_NEON(const uint16* src_ptr,
+extern "C" void ScaleRowUp2_16_NEON(const uint16_t* src_ptr,
ptrdiff_t src_stride,
- uint16* dst,
+ uint16_t* dst,
int dst_width);
-extern "C" void ScaleRowUp2_16_C(const uint16* src_ptr,
+extern "C" void ScaleRowUp2_16_C(const uint16_t* src_ptr,
ptrdiff_t src_stride,
- uint16* dst,
+ uint16_t* dst,
int dst_width);
TEST_F(LibYUVScaleTest, TestScaleRowUp2_16) {
- SIMD_ALIGNED(uint16 orig_pixels[640 * 2 + 1]); // 2 rows + 1 pixel overrun.
- SIMD_ALIGNED(uint16 dst_pixels_opt[1280]);
- SIMD_ALIGNED(uint16 dst_pixels_c[1280]);
+ SIMD_ALIGNED(uint16_t orig_pixels[640 * 2 + 1]); // 2 rows + 1 pixel overrun.
+ SIMD_ALIGNED(uint16_t dst_pixels_opt[1280]);
+ SIMD_ALIGNED(uint16_t dst_pixels_c[1280]);
memset(orig_pixels, 0, sizeof(orig_pixels));
memset(dst_pixels_opt, 1, sizeof(dst_pixels_opt));
@@ -475,15 +475,15 @@ TEST_F(LibYUVScaleTest, TestScaleRowUp2_16) {
EXPECT_EQ(dst_pixels_c[1279], 800);
}
-extern "C" void ScaleRowDown2Box_16_NEON(const uint16* src_ptr,
+extern "C" void ScaleRowDown2Box_16_NEON(const uint16_t* src_ptr,
ptrdiff_t src_stride,
- uint16* dst,
+ uint16_t* dst,
int dst_width);
TEST_F(LibYUVScaleTest, TestScaleRowDown2Box_16) {
- SIMD_ALIGNED(uint16 orig_pixels[2560 * 2]);
- SIMD_ALIGNED(uint16 dst_pixels_c[1280]);
- SIMD_ALIGNED(uint16 dst_pixels_opt[1280]);
+ SIMD_ALIGNED(uint16_t orig_pixels[2560 * 2]);
+ SIMD_ALIGNED(uint16_t dst_pixels_c[1280]);
+ SIMD_ALIGNED(uint16_t dst_pixels_opt[1280]);
memset(orig_pixels, 0, sizeof(orig_pixels));
memset(dst_pixels_c, 1, sizeof(dst_pixels_c));
@@ -530,7 +530,7 @@ static int TestPlaneFilter_16(int src_width,
}
int i;
- int64 src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
+ int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
int src_stride_y = Abs(src_width);
int dst_y_plane_size = dst_width * dst_height;
int dst_stride_y = dst_width;
@@ -539,8 +539,8 @@ static int TestPlaneFilter_16(int src_width,
align_buffer_page_end(src_y_16, src_y_plane_size * 2);
align_buffer_page_end(dst_y_8, dst_y_plane_size);
align_buffer_page_end(dst_y_16, dst_y_plane_size * 2);
- uint16* p_src_y_16 = reinterpret_cast<uint16*>(src_y_16);
- uint16* p_dst_y_16 = reinterpret_cast<uint16*>(dst_y_16);
+ uint16_t* p_src_y_16 = reinterpret_cast<uint16_t*>(src_y_16);
+ uint16_t* p_dst_y_16 = reinterpret_cast<uint16_t*>(dst_y_16);
MemRandomize(src_y, src_y_plane_size);
memset(dst_y_8, 0, dst_y_plane_size);
diff --git a/chromium/third_party/libyuv/unit_test/unit_test.cc b/chromium/third_party/libyuv/unit_test/unit_test.cc
index c2d7a1db7b5..20aadb44e2f 100644
--- a/chromium/third_party/libyuv/unit_test/unit_test.cc
+++ b/chromium/third_party/libyuv/unit_test/unit_test.cc
@@ -31,11 +31,11 @@ DEFINE_int32(libyuv_cpu_info,
"cpu flags for benchmark code. 1 = C, -1 = SIMD");
#else
// Disable command line parameters if gflags disabled.
-static const int32 FLAGS_libyuv_width = 0;
-static const int32 FLAGS_libyuv_height = 0;
-static const int32 FLAGS_libyuv_repeat = 0;
-static const int32 FLAGS_libyuv_flags = 0;
-static const int32 FLAGS_libyuv_cpu_info = 0;
+static const int32_t FLAGS_libyuv_width = 0;
+static const int32_t FLAGS_libyuv_height = 0;
+static const int32_t FLAGS_libyuv_repeat = 0;
+static const int32_t FLAGS_libyuv_flags = 0;
+static const int32_t FLAGS_libyuv_cpu_info = 0;
#endif
// For quicker unittests, default is 128 x 72. But when benchmarking,
diff --git a/chromium/third_party/libyuv/unit_test/unit_test.h b/chromium/third_party/libyuv/unit_test/unit_test.h
index 6454389d52d..dee3952fdc8 100644
--- a/chromium/third_party/libyuv/unit_test/unit_test.h
+++ b/chromium/third_party/libyuv/unit_test/unit_test.h
@@ -69,10 +69,10 @@ static inline bool SizeValid(int src_width,
return true;
}
-#define align_buffer_page_end(var, size) \
- uint8* var##_mem = \
- reinterpret_cast<uint8*>(malloc(((size) + 4095 + 63) & ~4095)); \
- uint8* var = reinterpret_cast<uint8*>( \
+#define align_buffer_page_end(var, size) \
+ uint8_t* var##_mem = \
+ reinterpret_cast<uint8_t*>(malloc(((size) + 4095 + 63) & ~4095)); \
+ uint8_t* var = reinterpret_cast<uint8_t*>( \
(intptr_t)(var##_mem + (((size) + 4095 + 63) & ~4095) - (size)) & ~63)
#define free_aligned_buffer_page_end(var) \
@@ -111,10 +111,10 @@ inline int fastrand() {
return static_cast<int>((fastrand_seed >> 16) & 0xffff);
}
-static inline void MemRandomize(uint8* dst, int64 len) {
- int64 i;
+static inline void MemRandomize(uint8_t* dst, int64_t len) {
+ int64_t i;
for (i = 0; i < len - 1; i += 2) {
- *reinterpret_cast<uint16*>(dst) = fastrand();
+ *reinterpret_cast<uint16_t*>(dst) = fastrand();
dst += 2;
}
for (; i < len; ++i) {
@@ -129,7 +129,6 @@ class LibYUVColorTest : public ::testing::Test {
int benchmark_iterations_; // Default 1. Use 1000 for benchmarking.
int benchmark_width_; // Default 1280. Use 640 for benchmarking VGA.
int benchmark_height_; // Default 720. Use 360 for benchmarking VGA.
- int benchmark_pixels_div256_; // Total pixels to benchmark / 256.
int benchmark_pixels_div1280_; // Total pixels to benchmark / 1280.
int disable_cpu_flags_; // Default 1. Use -1 for benchmarking.
int benchmark_cpu_info_; // Default -1. Use 1 to disable SIMD.
@@ -142,7 +141,6 @@ class LibYUVConvertTest : public ::testing::Test {
int benchmark_iterations_; // Default 1. Use 1000 for benchmarking.
int benchmark_width_; // Default 1280. Use 640 for benchmarking VGA.
int benchmark_height_; // Default 720. Use 360 for benchmarking VGA.
- int benchmark_pixels_div256_; // Total pixels to benchmark / 256.
int benchmark_pixels_div1280_; // Total pixels to benchmark / 1280.
int disable_cpu_flags_; // Default 1. Use -1 for benchmarking.
int benchmark_cpu_info_; // Default -1. Use 1 to disable SIMD.
@@ -155,7 +153,6 @@ class LibYUVScaleTest : public ::testing::Test {
int benchmark_iterations_; // Default 1. Use 1000 for benchmarking.
int benchmark_width_; // Default 1280. Use 640 for benchmarking VGA.
int benchmark_height_; // Default 720. Use 360 for benchmarking VGA.
- int benchmark_pixels_div256_; // Total pixels to benchmark / 256.
int benchmark_pixels_div1280_; // Total pixels to benchmark / 1280.
int disable_cpu_flags_; // Default 1. Use -1 for benchmarking.
int benchmark_cpu_info_; // Default -1. Use 1 to disable SIMD.
@@ -168,7 +165,6 @@ class LibYUVRotateTest : public ::testing::Test {
int benchmark_iterations_; // Default 1. Use 1000 for benchmarking.
int benchmark_width_; // Default 1280. Use 640 for benchmarking VGA.
int benchmark_height_; // Default 720. Use 360 for benchmarking VGA.
- int benchmark_pixels_div256_; // Total pixels to benchmark / 256.
int benchmark_pixels_div1280_; // Total pixels to benchmark / 1280.
int disable_cpu_flags_; // Default 1. Use -1 for benchmarking.
int benchmark_cpu_info_; // Default -1. Use 1 to disable SIMD.
@@ -181,7 +177,6 @@ class LibYUVPlanarTest : public ::testing::Test {
int benchmark_iterations_; // Default 1. Use 1000 for benchmarking.
int benchmark_width_; // Default 1280. Use 640 for benchmarking VGA.
int benchmark_height_; // Default 720. Use 360 for benchmarking VGA.
- int benchmark_pixels_div256_; // Total pixels to benchmark / 256.
int benchmark_pixels_div1280_; // Total pixels to benchmark / 1280.
int disable_cpu_flags_; // Default 1. Use -1 for benchmarking.
int benchmark_cpu_info_; // Default -1. Use 1 to disable SIMD.
@@ -194,7 +189,6 @@ class LibYUVBaseTest : public ::testing::Test {
int benchmark_iterations_; // Default 1. Use 1000 for benchmarking.
int benchmark_width_; // Default 1280. Use 640 for benchmarking VGA.
int benchmark_height_; // Default 720. Use 360 for benchmarking VGA.
- int benchmark_pixels_div256_; // Total pixels to benchmark / 256.
int benchmark_pixels_div1280_; // Total pixels to benchmark / 1280.
int disable_cpu_flags_; // Default 1. Use -1 for benchmarking.
int benchmark_cpu_info_; // Default -1. Use 1 to disable SIMD.
@@ -207,7 +201,6 @@ class LibYUVCompareTest : public ::testing::Test {
int benchmark_iterations_; // Default 1. Use 1000 for benchmarking.
int benchmark_width_; // Default 1280. Use 640 for benchmarking VGA.
int benchmark_height_; // Default 720. Use 360 for benchmarking VGA.
- int benchmark_pixels_div256_; // Total pixels to benchmark / 256.
int benchmark_pixels_div1280_; // Total pixels to benchmark / 1280.
int disable_cpu_flags_; // Default 1. Use -1 for benchmarking.
int benchmark_cpu_info_; // Default -1. Use 1 to disable SIMD.
diff --git a/chromium/third_party/libyuv/unit_test/video_common_test.cc b/chromium/third_party/libyuv/unit_test/video_common_test.cc
index ba7b15a9d28..4d89586e76f 100644
--- a/chromium/third_party/libyuv/unit_test/video_common_test.cc
+++ b/chromium/third_party/libyuv/unit_test/video_common_test.cc
@@ -18,15 +18,12 @@ namespace libyuv {
// Tests FourCC codes in video common, which are used for ConvertToI420().
-static bool TestValidChar(uint32 onecc) {
- if ((onecc >= '0' && onecc <= '9') || (onecc >= 'A' && onecc <= 'Z') ||
- (onecc >= 'a' && onecc <= 'z') || (onecc == ' ') || (onecc == 0xff)) {
- return true;
- }
- return false;
+static bool TestValidChar(uint32_t onecc) {
+ return (onecc >= '0' && onecc <= '9') || (onecc >= 'A' && onecc <= 'Z') ||
+ (onecc >= 'a' && onecc <= 'z') || (onecc == ' ') || (onecc == 0xff);
}
-static bool TestValidFourCC(uint32 fourcc, int bpp) {
+static bool TestValidFourCC(uint32_t fourcc, int bpp) {
if (!TestValidChar(fourcc & 0xff) || !TestValidChar((fourcc >> 8) & 0xff) ||
!TestValidChar((fourcc >> 16) & 0xff) ||
!TestValidChar((fourcc >> 24) & 0xff)) {
@@ -39,23 +36,23 @@ static bool TestValidFourCC(uint32 fourcc, int bpp) {
}
TEST_F(LibYUVBaseTest, TestCanonicalFourCC) {
- EXPECT_EQ(static_cast<uint32>(FOURCC_I420), CanonicalFourCC(FOURCC_IYUV));
- EXPECT_EQ(static_cast<uint32>(FOURCC_I420), CanonicalFourCC(FOURCC_YU12));
- EXPECT_EQ(static_cast<uint32>(FOURCC_I422), CanonicalFourCC(FOURCC_YU16));
- EXPECT_EQ(static_cast<uint32>(FOURCC_I444), CanonicalFourCC(FOURCC_YU24));
- EXPECT_EQ(static_cast<uint32>(FOURCC_YUY2), CanonicalFourCC(FOURCC_YUYV));
- EXPECT_EQ(static_cast<uint32>(FOURCC_YUY2), CanonicalFourCC(FOURCC_YUVS));
- EXPECT_EQ(static_cast<uint32>(FOURCC_UYVY), CanonicalFourCC(FOURCC_HDYC));
- EXPECT_EQ(static_cast<uint32>(FOURCC_UYVY), CanonicalFourCC(FOURCC_2VUY));
- EXPECT_EQ(static_cast<uint32>(FOURCC_MJPG), CanonicalFourCC(FOURCC_JPEG));
- EXPECT_EQ(static_cast<uint32>(FOURCC_MJPG), CanonicalFourCC(FOURCC_DMB1));
- EXPECT_EQ(static_cast<uint32>(FOURCC_RAW), CanonicalFourCC(FOURCC_RGB3));
- EXPECT_EQ(static_cast<uint32>(FOURCC_24BG), CanonicalFourCC(FOURCC_BGR3));
- EXPECT_EQ(static_cast<uint32>(FOURCC_BGRA), CanonicalFourCC(FOURCC_CM32));
- EXPECT_EQ(static_cast<uint32>(FOURCC_RAW), CanonicalFourCC(FOURCC_CM24));
- EXPECT_EQ(static_cast<uint32>(FOURCC_RGBO), CanonicalFourCC(FOURCC_L555));
- EXPECT_EQ(static_cast<uint32>(FOURCC_RGBP), CanonicalFourCC(FOURCC_L565));
- EXPECT_EQ(static_cast<uint32>(FOURCC_RGBO), CanonicalFourCC(FOURCC_5551));
+ EXPECT_EQ(static_cast<uint32_t>(FOURCC_I420), CanonicalFourCC(FOURCC_IYUV));
+ EXPECT_EQ(static_cast<uint32_t>(FOURCC_I420), CanonicalFourCC(FOURCC_YU12));
+ EXPECT_EQ(static_cast<uint32_t>(FOURCC_I422), CanonicalFourCC(FOURCC_YU16));
+ EXPECT_EQ(static_cast<uint32_t>(FOURCC_I444), CanonicalFourCC(FOURCC_YU24));
+ EXPECT_EQ(static_cast<uint32_t>(FOURCC_YUY2), CanonicalFourCC(FOURCC_YUYV));
+ EXPECT_EQ(static_cast<uint32_t>(FOURCC_YUY2), CanonicalFourCC(FOURCC_YUVS));
+ EXPECT_EQ(static_cast<uint32_t>(FOURCC_UYVY), CanonicalFourCC(FOURCC_HDYC));
+ EXPECT_EQ(static_cast<uint32_t>(FOURCC_UYVY), CanonicalFourCC(FOURCC_2VUY));
+ EXPECT_EQ(static_cast<uint32_t>(FOURCC_MJPG), CanonicalFourCC(FOURCC_JPEG));
+ EXPECT_EQ(static_cast<uint32_t>(FOURCC_MJPG), CanonicalFourCC(FOURCC_DMB1));
+ EXPECT_EQ(static_cast<uint32_t>(FOURCC_RAW), CanonicalFourCC(FOURCC_RGB3));
+ EXPECT_EQ(static_cast<uint32_t>(FOURCC_24BG), CanonicalFourCC(FOURCC_BGR3));
+ EXPECT_EQ(static_cast<uint32_t>(FOURCC_BGRA), CanonicalFourCC(FOURCC_CM32));
+ EXPECT_EQ(static_cast<uint32_t>(FOURCC_RAW), CanonicalFourCC(FOURCC_CM24));
+ EXPECT_EQ(static_cast<uint32_t>(FOURCC_RGBO), CanonicalFourCC(FOURCC_L555));
+ EXPECT_EQ(static_cast<uint32_t>(FOURCC_RGBP), CanonicalFourCC(FOURCC_L565));
+ EXPECT_EQ(static_cast<uint32_t>(FOURCC_RGBO), CanonicalFourCC(FOURCC_5551));
}
TEST_F(LibYUVBaseTest, TestFourCC) {
@@ -74,6 +71,7 @@ TEST_F(LibYUVBaseTest, TestFourCC) {
EXPECT_TRUE(TestValidFourCC(FOURCC_BGRA, FOURCC_BPP_BGRA));
EXPECT_TRUE(TestValidFourCC(FOURCC_ABGR, FOURCC_BPP_ABGR));
EXPECT_TRUE(TestValidFourCC(FOURCC_AR30, FOURCC_BPP_AR30));
+ EXPECT_TRUE(TestValidFourCC(FOURCC_AB30, FOURCC_BPP_AB30));
EXPECT_TRUE(TestValidFourCC(FOURCC_24BG, FOURCC_BPP_24BG));
EXPECT_TRUE(TestValidFourCC(FOURCC_RAW, FOURCC_BPP_RAW));
EXPECT_TRUE(TestValidFourCC(FOURCC_RGBA, FOURCC_BPP_RGBA));
diff --git a/chromium/third_party/libyuv/util/compare.cc b/chromium/third_party/libyuv/util/compare.cc
index ef0beefafee..a16613ee2f9 100644
--- a/chromium/third_party/libyuv/util/compare.cc
+++ b/chromium/third_party/libyuv/util/compare.cc
@@ -29,22 +29,24 @@ int main(int argc, char** argv) {
FILE* fin2 = name2 ? fopen(name2, "rb") : NULL;
const int kBlockSize = 32768;
- uint8 buf1[kBlockSize];
- uint8 buf2[kBlockSize];
- uint32 hash1 = 5381;
- uint32 hash2 = 5381;
- uint64 sum_square_err = 0;
- uint64 size_min = 0;
+ uint8_t buf1[kBlockSize];
+ uint8_t buf2[kBlockSize];
+ uint32_t hash1 = 5381;
+ uint32_t hash2 = 5381;
+ uint64_t sum_square_err = 0;
+ uint64_t size_min = 0;
int amt1 = 0;
int amt2 = 0;
do {
amt1 = static_cast<int>(fread(buf1, 1, kBlockSize, fin1));
- if (amt1 > 0)
+ if (amt1 > 0) {
hash1 = libyuv::HashDjb2(buf1, amt1, hash1);
+ }
if (fin2) {
amt2 = static_cast<int>(fread(buf2, 1, kBlockSize, fin2));
- if (amt2 > 0)
+ if (amt2 > 0) {
hash2 = libyuv::HashDjb2(buf2, amt2, hash2);
+ }
int amt_min = (amt1 < amt2) ? amt1 : amt2;
size_min += amt_min;
sum_square_err += libyuv::ComputeSumSquareError(buf1, buf2, amt_min);
diff --git a/chromium/third_party/libyuv/util/psnr.cc b/chromium/third_party/libyuv/util/psnr.cc
index 27f876c0b4a..f54015bab82 100644
--- a/chromium/third_party/libyuv/util/psnr.cc
+++ b/chromium/third_party/libyuv/util/psnr.cc
@@ -21,14 +21,14 @@
extern "C" {
#endif
-typedef unsigned int uint32; // NOLINT
+typedef unsigned int uint32_t; // NOLINT
#ifdef _MSC_VER
-typedef unsigned __int64 uint64;
+typedef unsigned __int64 uint64_t;
#else // COMPILER_MSVC
#if defined(__LP64__) && !defined(__OpenBSD__) && !defined(__APPLE__)
-typedef unsigned long uint64; // NOLINT
+typedef unsigned long uint64_t; // NOLINT
#else // defined(__LP64__) && !defined(__OpenBSD__) && !defined(__APPLE__)
-typedef unsigned long long uint64; // NOLINT
+typedef unsigned long long uint64_t; // NOLINT
#endif // __LP64__
#endif // _MSC_VER
@@ -38,10 +38,10 @@ typedef unsigned long long uint64; // NOLINT
#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__) && \
!defined(__aarch64__)
#define HAS_SUMSQUAREERROR_NEON
-static uint32 SumSquareError_NEON(const uint8* src_a,
- const uint8* src_b,
- int count) {
- volatile uint32 sse;
+static uint32_t SumSquareError_NEON(const uint8_t* src_a,
+ const uint8_t* src_b,
+ int count) {
+ volatile uint32_t sse;
asm volatile(
"vmov.u8 q7, #0 \n"
"vmov.u8 q9, #0 \n"
@@ -73,10 +73,10 @@ static uint32 SumSquareError_NEON(const uint8* src_a,
}
#elif !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
#define HAS_SUMSQUAREERROR_NEON
-static uint32 SumSquareError_NEON(const uint8* src_a,
- const uint8* src_b,
- int count) {
- volatile uint32 sse;
+static uint32_t SumSquareError_NEON(const uint8_t* src_a,
+ const uint8_t* src_b,
+ int count) {
+ volatile uint32_t sse;
asm volatile(
"eor v16.16b, v16.16b, v16.16b \n"
"eor v18.16b, v18.16b, v18.16b \n"
@@ -107,9 +107,9 @@ static uint32 SumSquareError_NEON(const uint8* src_a,
}
#elif !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
#define HAS_SUMSQUAREERROR_SSE2
-__declspec(naked) static uint32 SumSquareError_SSE2(const uint8* /*src_a*/,
- const uint8* /*src_b*/,
- int /*count*/) {
+__declspec(naked) static uint32_t SumSquareError_SSE2(const uint8_t* /*src_a*/,
+ const uint8_t* /*src_b*/,
+ int /*count*/) {
__asm {
mov eax, [esp + 4] // src_a
mov edx, [esp + 8] // src_b
@@ -146,10 +146,10 @@ __declspec(naked) static uint32 SumSquareError_SSE2(const uint8* /*src_a*/,
}
#elif !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))
#define HAS_SUMSQUAREERROR_SSE2
-static uint32 SumSquareError_SSE2(const uint8* src_a,
- const uint8* src_b,
- int count) {
- uint32 sse;
+static uint32_t SumSquareError_SSE2(const uint8_t* src_a,
+ const uint8_t* src_b,
+ int count) {
+ uint32_t sse;
asm volatile( // NOLINT
"pxor %%xmm0,%%xmm0 \n"
"pxor %%xmm5,%%xmm5 \n"
@@ -228,22 +228,22 @@ static int CpuHasSSE2() {
}
#endif // HAS_SUMSQUAREERROR_SSE2
-static uint32 SumSquareError_C(const uint8* src_a,
- const uint8* src_b,
- int count) {
- uint32 sse = 0u;
+static uint32_t SumSquareError_C(const uint8_t* src_a,
+ const uint8_t* src_b,
+ int count) {
+ uint32_t sse = 0u;
for (int x = 0; x < count; ++x) {
int diff = src_a[x] - src_b[x];
- sse += static_cast<uint32>(diff * diff);
+ sse += static_cast<uint32_t>(diff * diff);
}
return sse;
}
-double ComputeSumSquareError(const uint8* src_a,
- const uint8* src_b,
+double ComputeSumSquareError(const uint8_t* src_a,
+ const uint8_t* src_b,
int count) {
- uint32 (*SumSquareError)(const uint8* src_a, const uint8* src_b, int count) =
- SumSquareError_C;
+ uint32_t (*SumSquareError)(const uint8_t* src_a, const uint8_t* src_b,
+ int count) = SumSquareError_C;
#if defined(HAS_SUMSQUAREERROR_NEON)
SumSquareError = SumSquareError_NEON;
#endif
@@ -253,7 +253,7 @@ double ComputeSumSquareError(const uint8* src_a,
}
#endif
const int kBlockSize = 1 << 15;
- uint64 sse = 0;
+ uint64_t sse = 0;
#ifdef _OPENMP
#pragma omp parallel for reduction(+ : sse)
#endif
@@ -280,8 +280,9 @@ double ComputeSumSquareError(const uint8* src_a,
// Returns 128.0 (kMaxPSNR) if sse is 0 (perfect match).
double ComputePSNR(double sse, double size) {
const double kMINSSE = 255.0 * 255.0 * size / pow(10.0, kMaxPSNR / 10.0);
- if (sse <= kMINSSE)
+ if (sse <= kMINSSE) {
sse = kMINSSE; // Produces max PSNR of 128
+ }
return 10.0 * log10(255.0 * 255.0 * size / sse);
}
diff --git a/chromium/third_party/libyuv/util/psnr.h b/chromium/third_party/libyuv/util/psnr.h
index 0816b976001..aac128cbca8 100644
--- a/chromium/third_party/libyuv/util/psnr.h
+++ b/chromium/third_party/libyuv/util/psnr.h
@@ -20,7 +20,7 @@ extern "C" {
#endif
#if !defined(INT_TYPES_DEFINED) && !defined(UINT8_TYPE_DEFINED)
-typedef unsigned char uint8;
+typedef unsigned char uint8_t;
#define UINT8_TYPE_DEFINED
#endif
@@ -31,7 +31,9 @@ static const double kMaxPSNR = 128.0;
#if !defined(HAVE_JPEG)
// Computer Sum of Squared Error (SSE).
// Pass this to ComputePSNR for final result.
-double ComputeSumSquareError(const uint8* org, const uint8* rec, int size);
+double ComputeSumSquareError(const uint8_t* src_a,
+ const uint8_t* src_b,
+ int count);
#endif
// PSNR formula: psnr = 10 * log10 (Peak Signal^2 * size / sse)
diff --git a/chromium/third_party/libyuv/util/psnr_main.cc b/chromium/third_party/libyuv/util/psnr_main.cc
index 4d930be4aed..a930b202ecf 100644
--- a/chromium/third_party/libyuv/util/psnr_main.cc
+++ b/chromium/third_party/libyuv/util/psnr_main.cc
@@ -90,9 +90,9 @@ bool ExtractResolutionFromFilename(const char* name,
fseek(file_org, 0, SEEK_END);
size_t total_size = ftell(file_org);
fseek(file_org, 0, SEEK_SET);
- uint8* const ch_org = new uint8[total_size];
+ uint8_t* const ch_org = new uint8_t[total_size];
memset(ch_org, 0, total_size);
- size_t bytes_org = fread(ch_org, sizeof(uint8), total_size, file_org);
+ size_t bytes_org = fread(ch_org, sizeof(uint8_t), total_size, file_org);
fclose(file_org);
if (bytes_org == total_size) {
if (0 == libyuv::MJPGSize(ch_org, total_size, width_ptr, height_ptr)) {
@@ -107,13 +107,15 @@ bool ExtractResolutionFromFilename(const char* name,
// Scale Y channel from 16..240 to 0..255.
// This can be useful when comparing codecs that are inconsistant about Y
-uint8 ScaleY(uint8 y) {
+uint8_t ScaleY(uint8_t y) {
int ny = (y - 16) * 256 / 224;
- if (ny < 0)
+ if (ny < 0) {
ny = 0;
- if (ny > 255)
+ }
+ if (ny > 255) {
ny = 255;
- return static_cast<uint8>(ny);
+ }
+ return static_cast<uint8_t>(ny);
}
// MSE = Mean Square Error
@@ -150,8 +152,9 @@ void PrintHelp(const char* program) {
}
void ParseOptions(int argc, const char* argv[]) {
- if (argc <= 1)
+ if (argc <= 1) {
PrintHelp(argv[0]);
+ }
for (int c = 1; c < argc; ++c) {
if (!strcmp(argv[c], "-v")) {
verbose = true;
@@ -237,8 +240,8 @@ void ParseOptions(int argc, const char* argv[]) {
}
}
-bool UpdateMetrics(uint8* ch_org,
- uint8* ch_rec,
+bool UpdateMetrics(uint8_t* ch_org,
+ uint8_t* ch_rec,
const int y_size,
const int uv_size,
const size_t total_size,
@@ -247,10 +250,10 @@ bool UpdateMetrics(uint8* ch_org,
metric* distorted_frame,
bool do_psnr) {
const int uv_offset = (do_swap_uv ? uv_size : 0);
- const uint8* const u_org = ch_org + y_size + uv_offset;
- const uint8* const u_rec = ch_rec + y_size;
- const uint8* const v_org = ch_org + y_size + (uv_size - uv_offset);
- const uint8* const v_rec = ch_rec + y_size + uv_size;
+ const uint8_t* const u_org = ch_org + y_size + uv_offset;
+ const uint8_t* const u_rec = ch_rec + y_size;
+ const uint8_t* const v_org = ch_org + y_size + (uv_size - uv_offset);
+ const uint8_t* const v_rec = ch_rec + y_size + uv_size;
if (do_psnr) {
#ifdef HAVE_JPEG
double y_err = static_cast<double>(
@@ -301,12 +304,15 @@ bool UpdateMetrics(uint8* ch_org,
cur_distortion_psnr->all += distorted_frame->all;
bool ismin = false;
- if (distorted_frame->y < cur_distortion_psnr->min_y)
+ if (distorted_frame->y < cur_distortion_psnr->min_y) {
cur_distortion_psnr->min_y = distorted_frame->y;
- if (distorted_frame->u < cur_distortion_psnr->min_u)
+ }
+ if (distorted_frame->u < cur_distortion_psnr->min_u) {
cur_distortion_psnr->min_u = distorted_frame->u;
- if (distorted_frame->v < cur_distortion_psnr->min_v)
+ }
+ if (distorted_frame->v < cur_distortion_psnr->min_v) {
cur_distortion_psnr->min_v = distorted_frame->v;
+ }
if (distorted_frame->all < cur_distortion_psnr->min_all) {
cur_distortion_psnr->min_all = distorted_frame->all;
cur_distortion_psnr->min_frame = number_of_frames;
@@ -374,8 +380,8 @@ int main(int argc, const char* argv[]) {
#endif
}
- uint8* const ch_org = new uint8[total_size];
- uint8* const ch_rec = new uint8[total_size];
+ uint8_t* const ch_org = new uint8_t[total_size];
+ uint8_t* const ch_rec = new uint8_t[total_size];
if (ch_org == NULL || ch_rec == NULL) {
fprintf(stderr, "No memory available\n");
fclose(file_org);
@@ -429,14 +435,15 @@ int main(int argc, const char* argv[]) {
int number_of_frames;
for (number_of_frames = 0;; ++number_of_frames) {
- if (num_frames && number_of_frames >= num_frames)
+ if (num_frames && number_of_frames >= num_frames) {
break;
+ }
- size_t bytes_org = fread(ch_org, sizeof(uint8), total_size, file_org);
+ size_t bytes_org = fread(ch_org, sizeof(uint8_t), total_size, file_org);
if (bytes_org < total_size) {
#ifdef HAVE_JPEG
// Try parsing file as a jpeg.
- uint8* const ch_jpeg = new uint8[bytes_org];
+ uint8_t* const ch_jpeg = new uint8_t[bytes_org];
memcpy(ch_jpeg, ch_org, bytes_org);
memset(ch_org, 0, total_size);
@@ -456,11 +463,11 @@ int main(int argc, const char* argv[]) {
for (int cur_rec = 0; cur_rec < num_rec; ++cur_rec) {
size_t bytes_rec =
- fread(ch_rec, sizeof(uint8), total_size, file_rec[cur_rec]);
+ fread(ch_rec, sizeof(uint8_t), total_size, file_rec[cur_rec]);
if (bytes_rec < total_size) {
#ifdef HAVE_JPEG
// Try parsing file as a jpeg.
- uint8* const ch_jpeg = new uint8[bytes_rec];
+ uint8_t* const ch_jpeg = new uint8_t[bytes_rec];
memcpy(ch_jpeg, ch_rec, bytes_rec);
memset(ch_rec, 0, total_size);
@@ -482,7 +489,7 @@ int main(int argc, const char* argv[]) {
printf("%5d", number_of_frames);
}
if (do_psnr) {
- metric distorted_frame;
+ metric distorted_frame = {};
metric* cur_distortion_psnr = &distortion_psnr[cur_rec];
bool ismin = UpdateMetrics(ch_org, ch_rec, y_size, uv_size, total_size,
number_of_frames, cur_distortion_psnr,
@@ -496,7 +503,7 @@ int main(int argc, const char* argv[]) {
}
}
if (do_ssim) {
- metric distorted_frame;
+ metric distorted_frame = {};
metric* cur_distortion_ssim = &distortion_ssim[cur_rec];
bool ismin = UpdateMetrics(ch_org, ch_rec, y_size, uv_size, total_size,
number_of_frames, cur_distortion_ssim,
diff --git a/chromium/third_party/libyuv/util/ssim.cc b/chromium/third_party/libyuv/util/ssim.cc
index 43e725d8210..096fbcf0610 100644
--- a/chromium/third_party/libyuv/util/ssim.cc
+++ b/chromium/third_party/libyuv/util/ssim.cc
@@ -16,8 +16,8 @@
extern "C" {
#endif
-typedef unsigned int uint32; // NOLINT
-typedef unsigned short uint16; // NOLINT
+typedef unsigned int uint32_t; // NOLINT
+typedef unsigned short uint16_t; // NOLINT
#if !defined(LIBYUV_DISABLE_X86) && !defined(__SSE2__) && \
(defined(_M_X64) || (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)))
@@ -50,7 +50,7 @@ static const double kiW[KERNEL + 1 + 1] = {
#if !defined(LIBYUV_DISABLE_X86) && defined(__SSE2__)
-#define PWEIGHT(A, B) static_cast<uint16>(K[(A)] * K[(B)]) // weight product
+#define PWEIGHT(A, B) static_cast<uint16_t>(K[(A)] * K[(B)]) // weight product
#define MAKE_WEIGHT(L) \
{ \
{ \
@@ -66,7 +66,7 @@ static const double kiW[KERNEL + 1 + 1] = {
// values. We can't call _mm_set_epi16() for static compile-time initialization.
static const struct {
union {
- uint16 i16_[8];
+ uint16_t i16_[8];
__m128i m_;
} values_;
} W0 = MAKE_WEIGHT(0), W1 = MAKE_WEIGHT(1), W2 = MAKE_WEIGHT(2),
@@ -88,10 +88,12 @@ static double FinalizeSSIM(double iw,
double sxx = xxm * iw - iwx * iwx;
double syy = yym * iw - iwy * iwy;
// small errors are possible, due to rounding. Clamp to zero.
- if (sxx < 0.)
+ if (sxx < 0.) {
sxx = 0.;
- if (syy < 0.)
+ }
+ if (syy < 0.) {
syy = 0.;
+ }
const double sxsy = sqrt(sxx * syy);
const double sxy = xym * iw - iwx * iwy;
static const double C11 = (0.01 * 0.01) * (255 * 255);
@@ -109,21 +111,22 @@ static double FinalizeSSIM(double iw,
// Note: worst case of accumulation is a weight of 33 = 11 + 2 * (7 + 3 + 1)
// with a diff of 255, squared. The maximum error is thus 0x4388241,
// which fits into 32 bits integers.
-double GetSSIM(const uint8* org,
- const uint8* rec,
+double GetSSIM(const uint8_t* org,
+ const uint8_t* rec,
int xo,
int yo,
int W,
int H,
int stride) {
- uint32 ws = 0, xm = 0, ym = 0, xxm = 0, xym = 0, yym = 0;
+ uint32_t ws = 0, xm = 0, ym = 0, xxm = 0, xym = 0, yym = 0;
org += (yo - KERNEL) * stride;
org += (xo - KERNEL);
rec += (yo - KERNEL) * stride;
rec += (xo - KERNEL);
for (int y_ = 0; y_ < KERNEL_SIZE; ++y_, org += stride, rec += stride) {
- if (((yo - KERNEL + y_) < 0) || ((yo - KERNEL + y_) >= H))
+ if (((yo - KERNEL + y_) < 0) || ((yo - KERNEL + y_) >= H)) {
continue;
+ }
const int Wy = K[y_];
for (int x_ = 0; x_ < KERNEL_SIZE; ++x_) {
const int Wxy = Wy * K[x_];
@@ -142,13 +145,13 @@ double GetSSIM(const uint8* org,
return FinalizeSSIM(1. / ws, xm, ym, xxm, xym, yym);
}
-double GetSSIMFullKernel(const uint8* org,
- const uint8* rec,
+double GetSSIMFullKernel(const uint8_t* org,
+ const uint8_t* rec,
int xo,
int yo,
int stride,
double area_weight) {
- uint32 xm = 0, ym = 0, xxm = 0, xym = 0, yym = 0;
+ uint32_t xm = 0, ym = 0, xxm = 0, xym = 0, yym = 0;
#if defined(LIBYUV_DISABLE_X86) || !defined(__SSE2__)
@@ -262,7 +265,7 @@ double GetSSIMFullKernel(const uint8* org,
#define ADD_AND_STORE_FOUR_EPI32(M, OUT) \
do { \
- uint32 tmp[4]; \
+ uint32_t tmp[4]; \
_mm_storeu_si128(reinterpret_cast<__m128i*>(tmp), (M)); \
(OUT) = tmp[3] + tmp[2] + tmp[1] + tmp[0]; \
} while (0)
@@ -292,8 +295,8 @@ static int start_max(int x, int y) {
return (x > y) ? x : y;
}
-double CalcSSIM(const uint8* org,
- const uint8* rec,
+double CalcSSIM(const uint8_t* org,
+ const uint8_t* rec,
const int image_width,
const int image_height) {
double SSIM = 0.;
@@ -328,8 +331,8 @@ double CalcSSIM(const uint8* org,
// NOTE: we could use similar method for the left-most pixels too.
const int kScratchWidth = 8;
const int kScratchStride = kScratchWidth + KERNEL + 1;
- uint8 scratch_org[KERNEL_SIZE * kScratchStride] = {0};
- uint8 scratch_rec[KERNEL_SIZE * kScratchStride] = {0};
+ uint8_t scratch_org[KERNEL_SIZE * kScratchStride] = {0};
+ uint8_t scratch_rec[KERNEL_SIZE * kScratchStride] = {0};
for (int k = 0; k < KERNEL_SIZE; ++k) {
const int offset =
diff --git a/chromium/third_party/libyuv/util/ssim.h b/chromium/third_party/libyuv/util/ssim.h
index 4647f45de14..a855f1d1233 100644
--- a/chromium/third_party/libyuv/util/ssim.h
+++ b/chromium/third_party/libyuv/util/ssim.h
@@ -20,12 +20,12 @@ extern "C" {
#endif
#if !defined(INT_TYPES_DEFINED) && !defined(UINT8_TYPE_DEFINED)
-typedef unsigned char uint8;
+typedef unsigned char uint8_t;
#define UINT8_TYPE_DEFINED
#endif
-double CalcSSIM(const uint8* org,
- const uint8* rec,
+double CalcSSIM(const uint8_t* org,
+ const uint8_t* rec,
const int image_width,
const int image_height);
diff --git a/chromium/third_party/libyuv/util/yuvconvert.cc b/chromium/third_party/libyuv/util/yuvconvert.cc
index bc01d9ff503..27cdfe9e375 100644
--- a/chromium/third_party/libyuv/util/yuvconvert.cc
+++ b/chromium/third_party/libyuv/util/yuvconvert.cc
@@ -37,7 +37,7 @@ int num_skip_org = 0; // Number of frames to skip in original.
int num_frames = 0; // Number of frames to convert.
int filter = 1; // Bilinear filter for scaling.
-static __inline uint32 Abs(int32 v) {
+static __inline uint32_t Abs(int32_t v) {
return v >= 0 ? v : -v;
}
@@ -79,8 +79,9 @@ void PrintHelp(const char* program) {
}
void ParseOptions(int argc, const char* argv[]) {
- if (argc <= 1)
+ if (argc <= 1) {
PrintHelp(argv[0]);
+ }
for (int c = 1; c < argc; ++c) {
if (!strcmp(argv[c], "-v")) {
verbose = true;
@@ -158,11 +159,11 @@ void ParseOptions(int argc, const char* argv[]) {
static const int kTileX = 32;
static const int kTileY = 32;
-static int TileARGBScale(const uint8* src_argb,
+static int TileARGBScale(const uint8_t* src_argb,
int src_stride_argb,
int src_width,
int src_height,
- uint8* dst_argb,
+ uint8_t* dst_argb,
int dst_stride_argb,
int dst_width,
int dst_height,
@@ -242,9 +243,9 @@ int main(int argc, const char* argv[]) {
fseek(file_org, num_skip_org * total_size, SEEK_SET);
#endif
- uint8* const ch_org = new uint8[org_size];
- uint8* const ch_dst = new uint8[dst_size];
- uint8* const ch_rec = new uint8[total_size];
+ uint8_t* const ch_org = new uint8_t[org_size];
+ uint8_t* const ch_dst = new uint8_t[dst_size];
+ uint8_t* const ch_rec = new uint8_t[total_size];
if (ch_org == NULL || ch_rec == NULL) {
fprintf(stderr, "No memory available\n");
fclose(file_org);
@@ -265,14 +266,16 @@ int main(int argc, const char* argv[]) {
int number_of_frames;
for (number_of_frames = 0;; ++number_of_frames) {
- if (num_frames && number_of_frames >= num_frames)
+ if (num_frames && number_of_frames >= num_frames) {
break;
+ }
// Load original YUV or ARGB frame.
size_t bytes_org =
- fread(ch_org, sizeof(uint8), static_cast<size_t>(org_size), file_org);
- if (bytes_org < static_cast<size_t>(org_size))
+ fread(ch_org, sizeof(uint8_t), static_cast<size_t>(org_size), file_org);
+ if (bytes_org < static_cast<size_t>(org_size)) {
break;
+ }
// TODO(fbarchard): Attenuate doesnt need to know dimensions.
// ARGB attenuate frame
@@ -329,16 +332,18 @@ int main(int argc, const char* argv[]) {
// Output YUV or ARGB frame.
if (rec_is_yuv) {
size_t bytes_rec =
- fwrite(ch_rec, sizeof(uint8), static_cast<size_t>(total_size),
+ fwrite(ch_rec, sizeof(uint8_t), static_cast<size_t>(total_size),
file_rec[cur_rec]);
- if (bytes_rec < static_cast<size_t>(total_size))
+ if (bytes_rec < static_cast<size_t>(total_size)) {
break;
+ }
} else {
size_t bytes_rec =
- fwrite(ch_dst, sizeof(uint8), static_cast<size_t>(dst_size),
+ fwrite(ch_dst, sizeof(uint8_t), static_cast<size_t>(dst_size),
file_rec[cur_rec]);
- if (bytes_rec < static_cast<size_t>(dst_size))
+ if (bytes_rec < static_cast<size_t>(dst_size)) {
break;
+ }
}
if (verbose) {
printf("%5d", number_of_frames);