summaryrefslogtreecommitdiff
path: root/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct4x4_add_neon.c
diff options
context:
space:
mode:
Diffstat (limited to 'chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct4x4_add_neon.c')
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct4x4_add_neon.c20
1 files changed, 9 insertions, 11 deletions
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct4x4_add_neon.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct4x4_add_neon.c
index bff98cbc169..673a36840e3 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct4x4_add_neon.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct4x4_add_neon.c
@@ -13,13 +13,14 @@
#include "./vpx_dsp_rtcd.h"
#include "vpx_dsp/arm/idct_neon.h"
+#include "vpx_dsp/arm/mem_neon.h"
#include "vpx_dsp/txfm_common.h"
void vpx_idct4x4_16_add_neon(const tran_low_t *input, uint8_t *dest,
int stride) {
const uint8_t *dst = dest;
const int16x4_t cospis = vld1_s16(kCospi);
- uint32x2_t dest01_u32 = vdup_n_u32(0);
+ uint8x8_t dest01_u8;
uint32x2_t dest32_u32 = vdup_n_u32(0);
int16x8_t a0, a1;
uint8x8_t d01, d32;
@@ -39,25 +40,22 @@ void vpx_idct4x4_16_add_neon(const tran_low_t *input, uint8_t *dest,
a0 = vrshrq_n_s16(a0, 4);
a1 = vrshrq_n_s16(a1, 4);
- dest01_u32 = vld1_lane_u32((const uint32_t *)dst, dest01_u32, 0);
- dst += stride;
- dest01_u32 = vld1_lane_u32((const uint32_t *)dst, dest01_u32, 1);
- dst += stride;
+ dest01_u8 = load_u8(dst, stride);
+ dst += 2 * stride;
+ // The elements are loaded in reverse order.
dest32_u32 = vld1_lane_u32((const uint32_t *)dst, dest32_u32, 1);
dst += stride;
dest32_u32 = vld1_lane_u32((const uint32_t *)dst, dest32_u32, 0);
- d01_u16 =
- vaddw_u8(vreinterpretq_u16_s16(a0), vreinterpret_u8_u32(dest01_u32));
+ d01_u16 = vaddw_u8(vreinterpretq_u16_s16(a0), dest01_u8);
d32_u16 =
vaddw_u8(vreinterpretq_u16_s16(a1), vreinterpret_u8_u32(dest32_u32));
d01 = vqmovun_s16(vreinterpretq_s16_u16(d01_u16));
d32 = vqmovun_s16(vreinterpretq_s16_u16(d32_u16));
- vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d01), 0);
- dest += stride;
- vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d01), 1);
- dest += stride;
+ store_u8(dest, stride, d01);
+ dest += 2 * stride;
+ // The elements are stored in reverse order.
vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d32), 1);
dest += stride;
vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d32), 0);