summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJanne Grunau <j@jannau.net>2014-09-04 10:47:10 +0200
committerJanne Grunau <j@jannau.net>2014-10-09 23:25:36 +0200
commit3a1be40ea87ecc81e737aee6819ff96a6721f011 (patch)
tree6a74429521e2fca569f3f56913567470600dd33b
parent36e75c3efec08b1e9bdb9c1f69a5b0018abd8ac7 (diff)
downloadgf-complete-3a1be40ea87ecc81e737aee6819ff96a6721f011.tar.gz
arm: NEON optimisations for XOR in gf_multby_one
-rw-r--r--src/gf.c35
1 files changed, 35 insertions, 0 deletions
diff --git a/src/gf.c b/src/gf.c
index ca6a7f8..c3801e7 100644
--- a/src/gf.c
+++ b/src/gf.c
@@ -954,7 +954,42 @@ void gf_multby_one(void *src, void *dest, int bytes, int xor)
}
return;
#endif
+#if defined(ARM_NEON)
+ s8 = (uint8_t *) src;
+ d8 = (uint8_t *) dest;
+ if (uls % 16 == uld % 16) {
+ gf_set_region_data(&rd, NULL, src, dest, bytes, 1, xor, 16);
+ while (s8 != rd.s_start) {
+ *d8 ^= *s8;
+ s8++;
+ d8++;
+ }
+ while (s8 < (uint8_t *) rd.s_top) {
+ uint8x16_t vs = vld1q_u8 (s8);
+ uint8x16_t vd = vld1q_u8 (d8);
+ uint8x16_t vr = veorq_u8 (vs, vd);
+ vst1q_u8 (d8, vr);
+ s8 += 16;
+ d8 += 16;
+ }
+ } else {
+ while (s8 + 15 < (uint8_t *) src + bytes) {
+ uint8x16_t vs = vld1q_u8 (s8);
+ uint8x16_t vd = vld1q_u8 (d8);
+ uint8x16_t vr = veorq_u8 (vs, vd);
+ vst1q_u8 (d8, vr);
+ s8 += 16;
+ d8 += 16;
+ }
+ }
+ while (s8 < (uint8_t *) src + bytes) {
+ *d8 ^= *s8;
+ s8++;
+ d8++;
+ }
+ return;
+#endif
if (uls % 8 != uld % 8) {
gf_unaligned_xor(src, dest, bytes);
return;