summaryrefslogtreecommitdiff
path: root/libcpp
diff options
context:
space:
mode:
authorrearnsha <rearnsha@138bc75d-0d04-0410-961f-82ee72b054a4>2016-11-08 13:29:32 +0000
committerrearnsha <rearnsha@138bc75d-0d04-0410-961f-82ee72b054a4>2016-11-08 13:29:32 +0000
commitfe7d706928e031e2eb2f70593bb1a163619ab4e9 (patch)
treef5882c83842519c0bd1810e726a53985fa319331 /libcpp
parent3d6071e935dba106c0150ed9fcf80815ac620db4 (diff)
downloadgcc-fe7d706928e031e2eb2f70593bb1a163619ab4e9.tar.gz
[AArch64] Optimized implementation of search_line_fast for the CPP lexer
* lex.c (search_line_fast): New implementation for AArch64. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@241964 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'libcpp')
-rw-r--r--libcpp/ChangeLog4
-rw-r--r--libcpp/lex.c95
2 files changed, 99 insertions, 0 deletions
diff --git a/libcpp/ChangeLog b/libcpp/ChangeLog
index 9083bdab980..a298a2fcef6 100644
--- a/libcpp/ChangeLog
+++ b/libcpp/ChangeLog
@@ -1,3 +1,7 @@
+2016-11-08 Richard Earnshaw <rearnsha@arm.com>
+
+ * lex.c (search_line_fast): New implementation for AArch64.
+
2016-10-25 David Malcolm <dmalcolm@redhat.com>
* files.c (destroy_cpp_file): Free file->path.
diff --git a/libcpp/lex.c b/libcpp/lex.c
index 6f65fa152fc..cea88488f36 100644
--- a/libcpp/lex.c
+++ b/libcpp/lex.c
@@ -752,6 +752,101 @@ search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
}
}
+#elif defined (__ARM_NEON) && defined (__ARM_64BIT_STATE)
+#include "arm_neon.h"
+
+/* This doesn't have to be the exact page size, but no system may use
+ a size smaller than this. ARMv8 requires a minimum page size of
+ 4k. The impact of being conservative here is a small number of
+ cases will take the slightly slower entry path into the main
+ loop. */
+
+#define AARCH64_MIN_PAGE_SIZE 4096
+
+static const uchar *
+search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
+{
+ const uint8x16_t repl_nl = vdupq_n_u8 ('\n');
+ const uint8x16_t repl_cr = vdupq_n_u8 ('\r');
+ const uint8x16_t repl_bs = vdupq_n_u8 ('\\');
+ const uint8x16_t repl_qm = vdupq_n_u8 ('?');
+ const uint8x16_t xmask = (uint8x16_t) vdupq_n_u64 (0x8040201008040201ULL);
+
+#ifdef __AARCH64EB
+ const int16x8_t shift = {8, 8, 8, 8, 0, 0, 0, 0};
+#else
+ const int16x8_t shift = {0, 0, 0, 0, 8, 8, 8, 8};
+#endif
+
+ unsigned int found;
+ const uint8_t *p;
+ uint8x16_t data;
+ uint8x16_t t;
+ uint16x8_t m;
+ uint8x16_t u, v, w;
+
+ /* Align the source pointer. */
+ p = (const uint8_t *)((uintptr_t)s & -16);
+
+ /* Assuming random string start positions, with a 4k page size we'll take
+ the slow path about 0.37% of the time. */
+ if (__builtin_expect ((AARCH64_MIN_PAGE_SIZE
+ - (((uintptr_t) s) & (AARCH64_MIN_PAGE_SIZE - 1)))
+ < 16, 0))
+ {
+ /* Slow path: the string starts near a possible page boundary. */
+ uint32_t misalign, mask;
+
+ misalign = (uintptr_t)s & 15;
+ mask = (-1u << misalign) & 0xffff;
+ data = vld1q_u8 (p);
+ t = vceqq_u8 (data, repl_nl);
+ u = vceqq_u8 (data, repl_cr);
+ v = vorrq_u8 (t, vceqq_u8 (data, repl_bs));
+ w = vorrq_u8 (u, vceqq_u8 (data, repl_qm));
+ t = vorrq_u8 (v, w);
+ t = vandq_u8 (t, xmask);
+ m = vpaddlq_u8 (t);
+ m = vshlq_u16 (m, shift);
+ found = vaddvq_u16 (m);
+ found &= mask;
+ if (found)
+ return (const uchar*)p + __builtin_ctz (found);
+ }
+ else
+ {
+ data = vld1q_u8 ((const uint8_t *) s);
+ t = vceqq_u8 (data, repl_nl);
+ u = vceqq_u8 (data, repl_cr);
+ v = vorrq_u8 (t, vceqq_u8 (data, repl_bs));
+ w = vorrq_u8 (u, vceqq_u8 (data, repl_qm));
+ t = vorrq_u8 (v, w);
+ if (__builtin_expect (vpaddd_u64 ((uint64x2_t)t), 0))
+ goto done;
+ }
+
+ do
+ {
+ p += 16;
+ data = vld1q_u8 (p);
+ t = vceqq_u8 (data, repl_nl);
+ u = vceqq_u8 (data, repl_cr);
+ v = vorrq_u8 (t, vceqq_u8 (data, repl_bs));
+ w = vorrq_u8 (u, vceqq_u8 (data, repl_qm));
+ t = vorrq_u8 (v, w);
+ } while (!vpaddd_u64 ((uint64x2_t)t));
+
+done:
+ /* Now that we've found the terminating substring, work out precisely where
+ we need to stop. */
+ t = vandq_u8 (t, xmask);
+ m = vpaddlq_u8 (t);
+ m = vshlq_u16 (m, shift);
+ found = vaddvq_u16 (m);
+ return (((((uintptr_t) p) < (uintptr_t) s) ? s : (const uchar *)p)
+ + __builtin_ctz (found));
+}
+
#elif defined (__ARM_NEON)
#include "arm_neon.h"