diff options
author | Janne Grunau <j@jannau.net> | 2014-09-04 10:47:10 +0200 |
---|---|---|
committer | Janne Grunau <j@jannau.net> | 2014-10-09 23:25:36 +0200 |
commit | 3a1be40ea87ecc81e737aee6819ff96a6721f011 (patch) | |
tree | 6a74429521e2fca569f3f56913567470600dd33b | |
parent | 36e75c3efec08b1e9bdb9c1f69a5b0018abd8ac7 (diff) | |
download | gf-complete-3a1be40ea87ecc81e737aee6819ff96a6721f011.tar.gz |
arm: NEON optimisations for XOR in gf_multby_one
-rw-r--r-- | src/gf.c | 35 |
1 files changed, 35 insertions, 0 deletions
@@ -954,7 +954,42 @@ void gf_multby_one(void *src, void *dest, int bytes, int xor) } return; #endif +#if defined(ARM_NEON) + s8 = (uint8_t *) src; + d8 = (uint8_t *) dest; + if (uls % 16 == uld % 16) { + gf_set_region_data(&rd, NULL, src, dest, bytes, 1, xor, 16); + while (s8 != rd.s_start) { + *d8 ^= *s8; + s8++; + d8++; + } + while (s8 < (uint8_t *) rd.s_top) { + uint8x16_t vs = vld1q_u8 (s8); + uint8x16_t vd = vld1q_u8 (d8); + uint8x16_t vr = veorq_u8 (vs, vd); + vst1q_u8 (d8, vr); + s8 += 16; + d8 += 16; + } + } else { + while (s8 + 15 < (uint8_t *) src + bytes) { + uint8x16_t vs = vld1q_u8 (s8); + uint8x16_t vd = vld1q_u8 (d8); + uint8x16_t vr = veorq_u8 (vs, vd); + vst1q_u8 (d8, vr); + s8 += 16; + d8 += 16; + } + } + while (s8 < (uint8_t *) src + bytes) { + *d8 ^= *s8; + s8++; + d8++; + } + return; +#endif if (uls % 8 != uld % 8) { gf_unaligned_xor(src, dest, bytes); return; |