summaryrefslogtreecommitdiff
path: root/chromium/third_party/libyuv/source/scale_neon64.cc
diff options
context:
space:
mode:
Diffstat (limited to 'chromium/third_party/libyuv/source/scale_neon64.cc')
-rw-r--r--chromium/third_party/libyuv/source/scale_neon64.cc42
1 files changed, 15 insertions, 27 deletions
diff --git a/chromium/third_party/libyuv/source/scale_neon64.cc b/chromium/third_party/libyuv/source/scale_neon64.cc
index 494a9cfbfbe..f4aed5fc92f 100644
--- a/chromium/third_party/libyuv/source/scale_neon64.cc
+++ b/chromium/third_party/libyuv/source/scale_neon64.cc
@@ -515,37 +515,25 @@ void ScaleRowDown38_2_Box_NEON(const uint8_t* src_ptr,
"v19", "v30", "v31", "memory", "cc");
}
-void ScaleAddRows_NEON(const uint8_t* src_ptr,
- ptrdiff_t src_stride,
- uint16_t* dst_ptr,
- int src_width,
- int src_height) {
- const uint8_t* src_tmp;
+// Add a row of bytes to a row of shorts. Used for box filter.
+// Reads 16 bytes and accumulates to 16 shorts at a time.
+void ScaleAddRow_NEON(const uint8_t* src_ptr,
+ uint16_t* dst_ptr,
+ int src_width) {
asm volatile(
"1: \n"
- "mov %0, %1 \n"
- "mov w12, %w5 \n"
- "eor v2.16b, v2.16b, v2.16b \n"
- "eor v3.16b, v3.16b, v3.16b \n"
- "2: \n"
- // load 16 pixels into q0
- "ld1 {v0.16b}, [%0], %3 \n"
- "uaddw2 v3.8h, v3.8h, v0.16b \n"
- "uaddw v2.8h, v2.8h, v0.8b \n"
- "subs w12, w12, #1 \n"
- "b.gt 2b \n"
- "st1 {v2.8h, v3.8h}, [%2], #32 \n" // store pixels
- "add %1, %1, #16 \n"
- "subs %w4, %w4, #16 \n" // 16 processed per loop
+ "ld1 {v1.8h, v2.8h}, [%1] \n" // load accumulator
+ "ld1 {v0.16b}, [%0], #16 \n" // load 16 bytes
+ "uaddw2 v2.8h, v2.8h, v0.16b \n" // add
+ "uaddw v1.8h, v1.8h, v0.8b \n"
+ "st1 {v1.8h, v2.8h}, [%1], #32 \n" // store accumulator
+ "subs %w2, %w2, #16 \n" // 16 processed per loop
"b.gt 1b \n"
- : "=&r"(src_tmp), // %0
- "+r"(src_ptr), // %1
- "+r"(dst_ptr), // %2
- "+r"(src_stride), // %3
- "+r"(src_width), // %4
- "+r"(src_height) // %5
+ : "+r"(src_ptr), // %0
+ "+r"(dst_ptr), // %1
+ "+r"(src_width) // %2
:
- : "memory", "cc", "w12", "v0", "v1", "v2", "v3" // Clobber List
+ : "memory", "cc", "v0", "v1", "v2" // Clobber List
);
}