summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJanne Grunau <j@jannau.net>2014-09-04 18:29:58 +0200
committerJanne Grunau <j@jannau.net>2014-10-09 23:22:33 +0200
commiteb5ce0ca4206ed4f74009c1b9a3a72407693448b (patch)
tree755a68f435fc53f93a015e8a695eab5422905b35
parent568df90edc6ae07744de45de8665fb86ce6c84ee (diff)
downloadgf-complete-eb5ce0ca4206ed4f74009c1b9a3a72407693448b.tar.gz
configure: add ARM/AArch64 NEON support
Checks for arm_neon.h header.
-rw-r--r--configure.ac21
-rw-r--r--include/gf_complete.h4
-rw-r--r--m4/ax_ext.m449
3 files changed, 74 insertions, 0 deletions
diff --git a/configure.ac b/configure.ac
index 47d5d62..31ab1fa 100644
--- a/configure.ac
+++ b/configure.ac
@@ -24,6 +24,27 @@ AC_PROG_CC
AX_EXT()
+AC_ARG_ENABLE([neon],
+ AS_HELP_STRING([--disable-neon], [Build without NEON optimizations]))
+
+AS_IF([test "x$enable_neon" != "xno"],
+ [noneon_CPPFLAGS=$CPPFLAGS
+ CPPFLAGS="$CPPFLAGS $SIMD_FLAGS"
+ AC_CHECK_HEADER([arm_neon.h],
+ [have_neon=yes],
+ [have_neon=no
+ CPPFLAGS=$noneon_CPPFLAGS])],
+ [have_neon=no
+ AS_IF([test "x$ax_cv_have_neon_ext" = "xyes"],
+ [SIMD_FLAGS=""])
+ ])
+
+AS_IF([test "x$have_neon" = "xno"],
+ [AS_IF([test "x$enable_neon" = "xyes"],
+ [AC_MSG_ERROR([neon requested but arm_neon.h not found])])
+ ])
+AM_CONDITIONAL([HAVE_NEON], [test "x$have_neon" = "xyes"])
+
AC_ARG_ENABLE([sse],
AS_HELP_STRING([--disable-sse], [Build without SSE optimizations]),
[if test "x$enableval" = "xno" ; then
diff --git a/include/gf_complete.h b/include/gf_complete.h
index e8ea2ca..c4783e8 100644
--- a/include/gf_complete.h
+++ b/include/gf_complete.h
@@ -33,6 +33,10 @@
#include <wmmintrin.h>
#endif
+#if defined(ARM_NEON)
+ #include <arm_neon.h>
+#endif
+
/* These are the different ways to perform multiplication.
Not all are implemented for all values of w.
diff --git a/m4/ax_ext.m4 b/m4/ax_ext.m4
index cfbb797..c03ccef 100644
--- a/m4/ax_ext.m4
+++ b/m4/ax_ext.m4
@@ -41,6 +41,55 @@ AC_DEFUN([AX_EXT],
AC_REQUIRE([AC_CANONICAL_HOST])
case $host_cpu in
+ aarch64*)
+ AC_DEFINE(HAVE_ARCH_AARCH64,,[targeting AArch64])
+ SIMD_FLAGS="$SIMD_FLAGS -DARCH_AARCH64"
+
+ AC_CACHE_CHECK([whether NEON is supported], [ax_cv_have_neon_ext],
+ [
+ # TODO: detect / cross-compile
+ ax_cv_have_neon_ext=yes
+ ])
+ AC_CACHE_CHECK([whether cryptographic extension is supported], [ax_cv_have_arm_crypt_ext],
+ [
+ # TODO: detect / cross-compile
+ ax_cv_have_arm_crypt_ext=yes
+ ])
+
+ if test "$ax_cv_have_arm_crypt_ext" = yes; then
+ AC_DEFINE(HAVE_ARM_CRYPT_EXT,,[Support ARM cryptographic extension])
+ fi
+
+ if test "$ax_cv_have_neon_ext" = yes; then
+ AC_DEFINE(HAVE_NEON,,[Support NEON instructions])
+ fi
+
+ if test "$ax_cv_have_arm_crypt_ext" = yes && test "$ax_cv_have_neon_ext" = yes; then
+ AX_CHECK_COMPILE_FLAG(-march=armv8-a+simd+crypto,
+ SIMD_FLAGS="$SIMD_FLAGS -march=armv8-a+simd+crypto -DARM_CRYPT -DARM_NEON", [])
+ elif test "$ax_cv_have_arm_crypt_ext" = yes; then
+ AX_CHECK_COMPILE_FLAG(-march=armv8-a+crypto,
+ SIMD_FLAGS="$SIMD_FLAGS -march=armv8-a+crypto -DARM_CRYPT", [])
+ elif test "$ax_cv_have_neon_ext" = yes; then
+ AX_CHECK_COMPILE_FLAG(-march=armv8-a+simd,
+ SIMD_FLAGS="$SIMD_FLAGS -march=armv8-a+simd -DARM_NEON", [])
+ fi
+ ;;
+
+ arm*)
+ AC_CACHE_CHECK([whether NEON is supported], [ax_cv_have_neon_ext],
+ [
+ # TODO: detect / cross-compile
+ ax_cv_have_neon_ext=yes
+ ])
+
+ if test "$ax_cv_have_neon_ext" = yes; then
+ AC_DEFINE(HAVE_NEON,,[Support NEON instructions])
+ AX_CHECK_COMPILE_FLAG(-mfpu=neon,
+ SIMD_FLAGS="$SIMD_FLAGS -mfpu=neon -DARM_NEON", [])
+ fi
+ ;;
+
powerpc*)
AC_CACHE_CHECK([whether altivec is supported], [ax_cv_have_altivec_ext],
[