summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBassam Tabbara <bassam@symform.com>2016-09-06 23:48:39 -0700
committerBassam Tabbara <bassam@symform.com>2016-09-13 12:25:00 -0700
commitad11042132c7db78e8ae57a364c37df74572e8b6 (patch)
tree351db6b0ab5b08fc0ce0c4421fde5de26c0f3337
parent4339569f14c95a8895a347845f8ed6e18b345ace (diff)
downloadgf-complete-ad11042132c7db78e8ae57a364c37df74572e8b6.tar.gz
Simplify SIMD make scripts
ax_ext.m4 no longer performs any CPU checks. Instead it just checks if the the compile supports SIMD flags. Runtime detection will choose the right methods base on CPU instructions available. Intel AVX support is still done through the build since it would require a major refactoring of the code base to support it at runtime. For now I added a configuration flag --enable-avx that can be used to compile with AVX support. Also use cpu intrinsics instead of __asm__
-rw-r--r--configure.ac9
-rw-r--r--m4/ax_ext.m4295
-rw-r--r--m4/ax_gcc_x86_avx_xgetbv.m479
-rw-r--r--m4/ax_gcc_x86_cpuid.m479
-rw-r--r--src/gf_cpu.c33
-rw-r--r--test/Makefile.am2
-rw-r--r--tools/Makefile.am2
-rwxr-xr-xtools/test_simd.sh12
-rwxr-xr-xtools/test_simd_qemu.sh2
9 files changed, 79 insertions, 434 deletions
diff --git a/configure.ac b/configure.ac
index 3e8cf18..3beea03 100644
--- a/configure.ac
+++ b/configure.ac
@@ -66,5 +66,14 @@ AC_ARG_ENABLE([valgrind],
[enable_valgrind=no])
AM_CONDITIONAL(ENABLE_VALGRIND, test "x$enable_valgrind" != xno)
+AC_ARG_ENABLE([avx], AS_HELP_STRING([--enable-avx], [Build with AVX optimizations]))
+AX_CHECK_COMPILE_FLAG(-mavx, [ax_cv_support_avx=yes], [])
+
+AS_IF([test "x$enable_avx" = "xyes"],
+ [AS_IF([test "x$ax_cv_support_avx" = "xno"],
+ [AC_MSG_ERROR([AVX requested but compiler does not support -mavx])],
+ [SIMD_FLAGS="$SIMD_FLAGS -mavx"])
+ ])
+
AC_CONFIG_FILES([Makefile src/Makefile tools/Makefile test/Makefile examples/Makefile])
AC_OUTPUT
diff --git a/m4/ax_ext.m4 b/m4/ax_ext.m4
index c03ccef..95c4dbe 100644
--- a/m4/ax_ext.m4
+++ b/m4/ax_ext.m4
@@ -1,40 +1,7 @@
#
-# Updated by KMG to support -DINTEL_SSE for GF-Complete
+# This macro is based on http://www.gnu.org/software/autoconf-archive/ax_ext.html
+# but simplified to do compile time SIMD checks only
#
-# ===========================================================================
-# http://www.gnu.org/software/autoconf-archive/ax_ext.html
-# ===========================================================================
-#
-# SYNOPSIS
-#
-# AX_EXT
-#
-# DESCRIPTION
-#
-# Find supported SIMD extensions by requesting cpuid. When an SIMD
-# extension is found, the -m"simdextensionname" is added to SIMD_FLAGS if
-# compiler supports it. For example, if "sse2" is available, then "-msse2"
-# is added to SIMD_FLAGS.
-#
-# This macro calls:
-#
-# AC_SUBST(SIMD_FLAGS)
-#
-# And defines:
-#
-# HAVE_MMX / HAVE_SSE / HAVE_SSE2 / HAVE_SSE3 / HAVE_SSSE3 / HAVE_SSE4.1 / HAVE_SSE4.2 / HAVE_AVX
-#
-# LICENSE
-#
-# Copyright (c) 2007 Christophe Tournayre <turn3r@users.sourceforge.net>
-# Copyright (c) 2013 Michael Petch <mpetch@capp-sysware.com>
-#
-# Copying and distribution of this file, with or without modification, are
-# permitted in any medium without royalty provided the copyright notice
-# and this notice are preserved. This file is offered as-is, without any
-# warranty.
-
-#serial 12
AC_DEFUN([AX_EXT],
[
@@ -45,263 +12,63 @@ AC_DEFUN([AX_EXT],
AC_DEFINE(HAVE_ARCH_AARCH64,,[targeting AArch64])
SIMD_FLAGS="$SIMD_FLAGS -DARCH_AARCH64"
- AC_CACHE_CHECK([whether NEON is supported], [ax_cv_have_neon_ext],
- [
- # TODO: detect / cross-compile
- ax_cv_have_neon_ext=yes
- ])
- AC_CACHE_CHECK([whether cryptographic extension is supported], [ax_cv_have_arm_crypt_ext],
- [
- # TODO: detect / cross-compile
- ax_cv_have_arm_crypt_ext=yes
- ])
-
- if test "$ax_cv_have_arm_crypt_ext" = yes; then
- AC_DEFINE(HAVE_ARM_CRYPT_EXT,,[Support ARM cryptographic extension])
- fi
-
+ AC_CACHE_CHECK([whether NEON is enabled], [ax_cv_have_neon_ext], [ax_cv_have_neon_ext=yes])
if test "$ax_cv_have_neon_ext" = yes; then
- AC_DEFINE(HAVE_NEON,,[Support NEON instructions])
+ AX_CHECK_COMPILE_FLAG(-march=armv8-a+simd, [SIMD_FLAGS="$SIMD_FLAGS -march=armv8-a+simd -DARM_NEON"], [ax_cv_have_neon_ext=no])
fi
-
- if test "$ax_cv_have_arm_crypt_ext" = yes && test "$ax_cv_have_neon_ext" = yes; then
- AX_CHECK_COMPILE_FLAG(-march=armv8-a+simd+crypto,
- SIMD_FLAGS="$SIMD_FLAGS -march=armv8-a+simd+crypto -DARM_CRYPT -DARM_NEON", [])
- elif test "$ax_cv_have_arm_crypt_ext" = yes; then
- AX_CHECK_COMPILE_FLAG(-march=armv8-a+crypto,
- SIMD_FLAGS="$SIMD_FLAGS -march=armv8-a+crypto -DARM_CRYPT", [])
- elif test "$ax_cv_have_neon_ext" = yes; then
- AX_CHECK_COMPILE_FLAG(-march=armv8-a+simd,
- SIMD_FLAGS="$SIMD_FLAGS -march=armv8-a+simd -DARM_NEON", [])
- fi
- ;;
+ ;;
arm*)
- AC_CACHE_CHECK([whether NEON is supported], [ax_cv_have_neon_ext],
- [
- # TODO: detect / cross-compile
- ax_cv_have_neon_ext=yes
- ])
-
+ AC_CACHE_CHECK([whether NEON is enabled], [ax_cv_have_neon_ext], [ax_cv_have_neon_ext=yes])
if test "$ax_cv_have_neon_ext" = yes; then
- AC_DEFINE(HAVE_NEON,,[Support NEON instructions])
- AX_CHECK_COMPILE_FLAG(-mfpu=neon,
- SIMD_FLAGS="$SIMD_FLAGS -mfpu=neon -DARM_NEON", [])
+ AX_CHECK_COMPILE_FLAG(-mfpu=neon, [SIMD_FLAGS="$SIMD_FLAGS -mfpu=neon -DARM_NEON"], [ax_cv_have_neon_ext=no])
fi
- ;;
+ ;;
powerpc*)
- AC_CACHE_CHECK([whether altivec is supported], [ax_cv_have_altivec_ext],
- [
- if test `/usr/sbin/sysctl -a 2>/dev/null| grep -c hw.optional.altivec` != 0; then
- if test `/usr/sbin/sysctl -n hw.optional.altivec` = 1; then
- ax_cv_have_altivec_ext=yes
- fi
- fi
- ])
-
- if test "$ax_cv_have_altivec_ext" = yes; then
- AC_DEFINE(HAVE_ALTIVEC,,[Support Altivec instructions])
- AX_CHECK_COMPILE_FLAG(-faltivec, SIMD_FLAGS="$SIMD_FLAGS -faltivec", [])
- fi
- ;;
-
-
- i[[3456]]86*|x86_64*|amd64*)
-
- AC_REQUIRE([AX_GCC_X86_CPUID])
- AC_REQUIRE([AX_GCC_X86_AVX_XGETBV])
-
- AX_GCC_X86_CPUID(0x00000001)
- ecx=`echo $ax_cv_gcc_x86_cpuid_0x00000001 | cut -d ":" -f 3`
- edx=`echo $ax_cv_gcc_x86_cpuid_0x00000001 | cut -d ":" -f 4`
-
- AC_CACHE_CHECK([whether mmx is supported], [ax_cv_have_mmx_ext],
- [
- ax_cv_have_mmx_ext=no
- if test "$((0x$edx>>23&0x01))" = 1; then
- ax_cv_have_mmx_ext=yes
- fi
- ])
-
- AC_CACHE_CHECK([whether sse is supported], [ax_cv_have_sse_ext],
- [
- ax_cv_have_sse_ext=no
- if test "$((0x$edx>>25&0x01))" = 1; then
- ax_cv_have_sse_ext=yes
- fi
- ])
-
- AC_CACHE_CHECK([whether sse2 is supported], [ax_cv_have_sse2_ext],
- [
- ax_cv_have_sse2_ext=no
- if test "$((0x$edx>>26&0x01))" = 1; then
- ax_cv_have_sse2_ext=yes
- fi
- ])
-
- AC_CACHE_CHECK([whether sse3 is supported], [ax_cv_have_sse3_ext],
- [
- ax_cv_have_sse3_ext=no
- if test "$((0x$ecx&0x01))" = 1; then
- ax_cv_have_sse3_ext=yes
- fi
- ])
-
- AC_CACHE_CHECK([whether pclmuldq is supported], [ax_cv_have_pclmuldq_ext],
- [
- ax_cv_have_pclmuldq_ext=no
- if test "$((0x$ecx>>1&0x01))" = 1; then
- ax_cv_have_pclmuldq_ext=yes
- fi
- ])
-
- AC_CACHE_CHECK([whether ssse3 is supported], [ax_cv_have_ssse3_ext],
- [
- ax_cv_have_ssse3_ext=no
- if test "$((0x$ecx>>9&0x01))" = 1; then
- ax_cv_have_ssse3_ext=yes
- fi
- ])
-
- AC_CACHE_CHECK([whether sse4.1 is supported], [ax_cv_have_sse41_ext],
- [
- ax_cv_have_sse41_ext=no
- if test "$((0x$ecx>>19&0x01))" = 1; then
- ax_cv_have_sse41_ext=yes
- fi
- ])
-
- AC_CACHE_CHECK([whether sse4.2 is supported], [ax_cv_have_sse42_ext],
- [
- ax_cv_have_sse42_ext=no
- if test "$((0x$ecx>>20&0x01))" = 1; then
- ax_cv_have_sse42_ext=yes
- fi
- ])
-
- AC_CACHE_CHECK([whether avx is supported by processor], [ax_cv_have_avx_cpu_ext],
- [
- ax_cv_have_avx_cpu_ext=no
- if test "$((0x$ecx>>28&0x01))" = 1; then
- ax_cv_have_avx_cpu_ext=yes
- fi
- ])
-
- if test x"$ax_cv_have_avx_cpu_ext" = x"yes"; then
- AX_GCC_X86_AVX_XGETBV(0x00000000)
-
- xgetbv_eax="0"
- if test x"$ax_cv_gcc_x86_avx_xgetbv_0x00000000" != x"unknown"; then
- xgetbv_eax=`echo $ax_cv_gcc_x86_avx_xgetbv_0x00000000 | cut -d ":" -f 1`
- fi
-
- AC_CACHE_CHECK([whether avx is supported by operating system], [ax_cv_have_avx_ext],
- [
- ax_cv_have_avx_ext=no
-
- if test "$((0x$ecx>>27&0x01))" = 1; then
- if test "$((0x$xgetbv_eax&0x6))" = 6; then
- ax_cv_have_avx_ext=yes
- fi
- fi
- ])
- if test x"$ax_cv_have_avx_ext" = x"no"; then
- AC_MSG_WARN([Your processor supports AVX, but your operating system doesn't])
- fi
+ AC_CACHE_CHECK([whether altivec is enabled], [ax_cv_have_altivec_ext], [ax_cv_have_altivec_ext=yes])
+ if test "$ax_cv_have_altivec_ext" = yes; then
+ AX_CHECK_COMPILE_FLAG(-faltivec, [SIMD_FLAGS="$SIMD_FLAGS -faltivec"], [ax_cv_have_altivec_ext=no])
fi
+ ;;
- if test "$ax_cv_have_mmx_ext" = yes; then
- AX_CHECK_COMPILE_FLAG(-mmmx, ax_cv_support_mmx_ext=yes, [])
- if test x"$ax_cv_support_mmx_ext" = x"yes"; then
- SIMD_FLAGS="$SIMD_FLAGS -mmmx"
- AC_DEFINE(HAVE_MMX,,[Support mmx instructions])
- else
- AC_MSG_WARN([Your processor supports mmx instructions but not your compiler, can you try another compiler?])
- fi
- fi
+ i[[3456]]86*|x86_64*|amd64*)
+ AC_CACHE_CHECK([whether sse is enabled], [ax_cv_have_sse_ext], [ax_cv_have_sse_ext=yes])
if test "$ax_cv_have_sse_ext" = yes; then
- AX_CHECK_COMPILE_FLAG(-msse, ax_cv_support_sse_ext=yes, [])
- if test x"$ax_cv_support_sse_ext" = x"yes"; then
- SIMD_FLAGS="$SIMD_FLAGS -msse -DINTEL_SSE"
- AC_DEFINE(HAVE_SSE,,[Support SSE (Streaming SIMD Extensions) instructions])
- else
- AC_MSG_WARN([Your processor supports sse instructions but not your compiler, can you try another compiler?])
- fi
+ AX_CHECK_COMPILE_FLAG(-msse, [SIMD_FLAGS="$SIMD_FLAGS -msse -DINTEL_SSE"], [ax_cv_have_sse_ext=no])
fi
+ AC_CACHE_CHECK([whether sse2 is enabled], [ax_cv_have_sse2_ext], [ax_cv_have_sse2_ext=yes])
if test "$ax_cv_have_sse2_ext" = yes; then
- AX_CHECK_COMPILE_FLAG(-msse2, ax_cv_support_sse2_ext=yes, [])
- if test x"$ax_cv_support_sse2_ext" = x"yes"; then
- SIMD_FLAGS="$SIMD_FLAGS -msse2 -DINTEL_SSE2"
- AC_DEFINE(HAVE_SSE2,,[Support SSE2 (Streaming SIMD Extensions 2) instructions])
- else
- AC_MSG_WARN([Your processor supports sse2 instructions but not your compiler, can you try another compiler?])
- fi
+ AX_CHECK_COMPILE_FLAG(-msse2, [SIMD_FLAGS="$SIMD_FLAGS -msse2 -DINTEL_SSE2"], [ax_cv_have_sse2_ext=no])
fi
+ AC_CACHE_CHECK([whether sse3 is enabled], [ax_cv_have_sse3_ext], [ax_cv_have_sse3_ext=yes])
if test "$ax_cv_have_sse3_ext" = yes; then
- AX_CHECK_COMPILE_FLAG(-msse3, ax_cv_support_sse3_ext=yes, [])
- if test x"$ax_cv_support_sse3_ext" = x"yes"; then
- SIMD_FLAGS="$SIMD_FLAGS -msse3 -DINTEL_SSE3"
- AC_DEFINE(HAVE_SSE3,,[Support SSE3 (Streaming SIMD Extensions 3) instructions])
- else
- AC_MSG_WARN([Your processor supports sse3 instructions but not your compiler, can you try another compiler?])
- fi
- fi
-
- if test "$ax_cv_have_pclmuldq_ext" = yes; then
- AX_CHECK_COMPILE_FLAG(-mpclmul, ax_cv_support_pclmuldq_ext=yes, [])
- if test x"$ax_cv_support_pclmuldq_ext" = x"yes"; then
- SIMD_FLAGS="$SIMD_FLAGS -mpclmul -DINTEL_SSE4_PCLMUL"
- AC_DEFINE(HAVE_PCLMULDQ,,[Support (PCLMULDQ) Carry-Free Muliplication])
- else
- AC_MSG_WARN([Your processor supports pclmuldq instructions but not your compiler, can you try another compiler?])
- fi
+ AX_CHECK_COMPILE_FLAG(-msse3, [SIMD_FLAGS="$SIMD_FLAGS -msse3 -DINTEL_SSE3"], [ax_cv_have_sse3_ext=no])
fi
+ AC_CACHE_CHECK([whether ssse3 is enabled], [ax_cv_have_ssse3_ext], [ax_cv_have_ssse3_ext=yes])
if test "$ax_cv_have_ssse3_ext" = yes; then
- AX_CHECK_COMPILE_FLAG(-mssse3, ax_cv_support_ssse3_ext=yes, [])
- if test x"$ax_cv_support_ssse3_ext" = x"yes"; then
- SIMD_FLAGS="$SIMD_FLAGS -mssse3 -DINTEL_SSSE3"
- AC_DEFINE(HAVE_SSSE3,,[Support SSSE3 (Supplemental Streaming SIMD Extensions 3) instructions])
- else
- AC_MSG_WARN([Your processor supports ssse3 instructions but not your compiler, can you try another compiler?])
- fi
+ AX_CHECK_COMPILE_FLAG(-mssse3, [SIMD_FLAGS="$SIMD_FLAGS -mssse3 -DINTEL_SSSE3"], [ax_cv_have_ssse3_ext=no])
fi
- if test "$ax_cv_have_sse41_ext" = yes; then
- AX_CHECK_COMPILE_FLAG(-msse4.1, ax_cv_support_sse41_ext=yes, [])
- if test x"$ax_cv_support_sse41_ext" = x"yes"; then
- SIMD_FLAGS="$SIMD_FLAGS -msse4.1 -DINTEL_SSE4"
- AC_DEFINE(HAVE_SSE4_1,,[Support SSSE4.1 (Streaming SIMD Extensions 4.1) instructions])
- else
- AC_MSG_WARN([Your processor supports sse4.1 instructions but not your compiler, can you try another compiler?])
- fi
+ AC_CACHE_CHECK([whether pclmuldq is enabled], [ax_cv_have_pclmuldq_ext], [ax_cv_have_pclmuldq_ext=yes])
+ if test "$ax_cv_have_pclmuldq_ext" = yes; then
+ AX_CHECK_COMPILE_FLAG(-mpclmul, [SIMD_FLAGS="$SIMD_FLAGS -mpclmul -DINTEL_SSE4_PCLMUL"], [ax_cv_have_pclmuldq_ext=no])
fi
- if test "$ax_cv_have_sse42_ext" = yes; then
- AX_CHECK_COMPILE_FLAG(-msse4.2, ax_cv_support_sse42_ext=yes, [])
- if test x"$ax_cv_support_sse42_ext" = x"yes"; then
- SIMD_FLAGS="$SIMD_FLAGS -msse4.2 -DINTEL_SSE4"
- AC_DEFINE(HAVE_SSE4_2,,[Support SSSE4.2 (Streaming SIMD Extensions 4.2) instructions])
- else
- AC_MSG_WARN([Your processor supports sse4.2 instructions but not your compiler, can you try another compiler?])
- fi
+ AC_CACHE_CHECK([whether sse4.1 is enabled], [ax_cv_have_sse41_ext], [ax_cv_have_sse41_ext=yes])
+ if test "$ax_cv_have_sse41_ext" = yes; then
+ AX_CHECK_COMPILE_FLAG(-msse4.1, [SIMD_FLAGS="$SIMD_FLAGS -msse4.1 -DINTEL_SSE4"], [ax_cv_have_sse41_ext=no])
fi
- if test "$ax_cv_have_avx_ext" = yes; then
- AX_CHECK_COMPILE_FLAG(-mavx, ax_cv_support_avx_ext=yes, [])
- if test x"$ax_cv_support_avx_ext" = x"yes"; then
- SIMD_FLAGS="$SIMD_FLAGS -mavx"
- AC_DEFINE(HAVE_AVX,,[Support AVX (Advanced Vector Extensions) instructions])
- else
- AC_MSG_WARN([Your processor supports avx instructions but not your compiler, can you try another compiler?])
- fi
+ AC_CACHE_CHECK([whether sse4.2 is enabled], [ax_cv_have_sse42_ext], [ax_cv_have_sse42_ext=yes])
+ if test "$ax_cv_have_sse42_ext" = yes; then
+ AX_CHECK_COMPILE_FLAG(-msse4.2, [SIMD_FLAGS="$SIMD_FLAGS -msse4.2 -DINTEL_SSE4"], [ax_cv_have_sse42_ext=no])
fi
-
- ;;
+ ;;
esac
AC_SUBST(SIMD_FLAGS)
diff --git a/m4/ax_gcc_x86_avx_xgetbv.m4 b/m4/ax_gcc_x86_avx_xgetbv.m4
deleted file mode 100644
index 0624eeb..0000000
--- a/m4/ax_gcc_x86_avx_xgetbv.m4
+++ /dev/null
@@ -1,79 +0,0 @@
-# ===========================================================================
-# http://www.gnu.org/software/autoconf-archive/ax_gcc_x86_avx_xgetbv.html
-# ===========================================================================
-#
-# SYNOPSIS
-#
-# AX_GCC_X86_AVX_XGETBV
-#
-# DESCRIPTION
-#
-# On later x86 processors with AVX SIMD support, with gcc or a compiler
-# that has a compatible syntax for inline assembly instructions, run a
-# small program that executes the xgetbv instruction with input OP. This
-# can be used to detect if the OS supports AVX instruction usage.
-#
-# On output, the values of the eax and edx registers are stored as
-# hexadecimal strings as "eax:edx" in the cache variable
-# ax_cv_gcc_x86_avx_xgetbv.
-#
-# If the xgetbv instruction fails (because you are running a
-# cross-compiler, or because you are not using gcc, or because you are on
-# a processor that doesn't have this instruction),
-# ax_cv_gcc_x86_avx_xgetbv_OP is set to the string "unknown".
-#
-# This macro mainly exists to be used in AX_EXT.
-#
-# LICENSE
-#
-# Copyright (c) 2013 Michael Petch <mpetch@capp-sysware.com>
-#
-# This program is free software: you can redistribute it and/or modify it
-# under the terms of the GNU General Public License as published by the
-# Free Software Foundation, either version 3 of the License, or (at your
-# option) any later version.
-#
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
-# Public License for more details.
-#
-# You should have received a copy of the GNU General Public License along
-# with this program. If not, see <http://www.gnu.org/licenses/>.
-#
-# As a special exception, the respective Autoconf Macro's copyright owner
-# gives unlimited permission to copy, distribute and modify the configure
-# scripts that are the output of Autoconf when processing the Macro. You
-# need not follow the terms of the GNU General Public License when using
-# or distributing such scripts, even though portions of the text of the
-# Macro appear in them. The GNU General Public License (GPL) does govern
-# all other use of the material that constitutes the Autoconf Macro.
-#
-# This special exception to the GPL applies to versions of the Autoconf
-# Macro released by the Autoconf Archive. When you make and distribute a
-# modified version of the Autoconf Macro, you may extend this special
-# exception to the GPL to apply to your modified version as well.
-
-#serial 1
-
-AC_DEFUN([AX_GCC_X86_AVX_XGETBV],
-[AC_REQUIRE([AC_PROG_CC])
-AC_LANG_PUSH([C])
-AC_CACHE_CHECK(for x86-AVX xgetbv $1 output, ax_cv_gcc_x86_avx_xgetbv_$1,
- [AC_RUN_IFELSE([AC_LANG_PROGRAM([#include <stdio.h>], [
- int op = $1, eax, edx;
- FILE *f;
- /* Opcodes for xgetbv */
- __asm__(".byte 0x0f, 0x01, 0xd0"
- : "=a" (eax), "=d" (edx)
- : "c" (op));
- f = fopen("conftest_xgetbv", "w"); if (!f) return 1;
- fprintf(f, "%x:%x\n", eax, edx);
- fclose(f);
- return 0;
-])],
- [ax_cv_gcc_x86_avx_xgetbv_$1=`cat conftest_xgetbv`; rm -f conftest_xgetbv],
- [ax_cv_gcc_x86_avx_xgetbv_$1=unknown; rm -f conftest_xgetbv],
- [ax_cv_gcc_x86_avx_xgetbv_$1=unknown])])
-AC_LANG_POP([C])
-])
diff --git a/m4/ax_gcc_x86_cpuid.m4 b/m4/ax_gcc_x86_cpuid.m4
deleted file mode 100644
index 7d46fee..0000000
--- a/m4/ax_gcc_x86_cpuid.m4
+++ /dev/null
@@ -1,79 +0,0 @@
-# ===========================================================================
-# http://www.gnu.org/software/autoconf-archive/ax_gcc_x86_cpuid.html
-# ===========================================================================
-#
-# SYNOPSIS
-#
-# AX_GCC_X86_CPUID(OP)
-#
-# DESCRIPTION
-#
-# On Pentium and later x86 processors, with gcc or a compiler that has a
-# compatible syntax for inline assembly instructions, run a small program
-# that executes the cpuid instruction with input OP. This can be used to
-# detect the CPU type.
-#
-# On output, the values of the eax, ebx, ecx, and edx registers are stored
-# as hexadecimal strings as "eax:ebx:ecx:edx" in the cache variable
-# ax_cv_gcc_x86_cpuid_OP.
-#
-# If the cpuid instruction fails (because you are running a
-# cross-compiler, or because you are not using gcc, or because you are on
-# a processor that doesn't have this instruction), ax_cv_gcc_x86_cpuid_OP
-# is set to the string "unknown".
-#
-# This macro mainly exists to be used in AX_GCC_ARCHFLAG.
-#
-# LICENSE
-#
-# Copyright (c) 2008 Steven G. Johnson <stevenj@alum.mit.edu>
-# Copyright (c) 2008 Matteo Frigo
-#
-# This program is free software: you can redistribute it and/or modify it
-# under the terms of the GNU General Public License as published by the
-# Free Software Foundation, either version 3 of the License, or (at your
-# option) any later version.
-#
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
-# Public License for more details.
-#
-# You should have received a copy of the GNU General Public License along
-# with this program. If not, see <http://www.gnu.org/licenses/>.
-#
-# As a special exception, the respective Autoconf Macro's copyright owner
-# gives unlimited permission to copy, distribute and modify the configure
-# scripts that are the output of Autoconf when processing the Macro. You
-# need not follow the terms of the GNU General Public License when using
-# or distributing such scripts, even though portions of the text of the
-# Macro appear in them. The GNU General Public License (GPL) does govern
-# all other use of the material that constitutes the Autoconf Macro.
-#
-# This special exception to the GPL applies to versions of the Autoconf
-# Macro released by the Autoconf Archive. When you make and distribute a
-# modified version of the Autoconf Macro, you may extend this special
-# exception to the GPL to apply to your modified version as well.
-
-#serial 7
-
-AC_DEFUN([AX_GCC_X86_CPUID],
-[AC_REQUIRE([AC_PROG_CC])
-AC_LANG_PUSH([C])
-AC_CACHE_CHECK(for x86 cpuid $1 output, ax_cv_gcc_x86_cpuid_$1,
- [AC_RUN_IFELSE([AC_LANG_PROGRAM([#include <stdio.h>], [
- int op = $1, eax, ebx, ecx, edx;
- FILE *f;
- __asm__("cpuid"
- : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
- : "a" (op));
- f = fopen("conftest_cpuid", "w"); if (!f) return 1;
- fprintf(f, "%x:%x:%x:%x\n", eax, ebx, ecx, edx);
- fclose(f);
- return 0;
-])],
- [ax_cv_gcc_x86_cpuid_$1=`cat conftest_cpuid`; rm -f conftest_cpuid],
- [ax_cv_gcc_x86_cpuid_$1=unknown; rm -f conftest_cpuid],
- [ax_cv_gcc_x86_cpuid_$1=unknown])])
-AC_LANG_POP([C])
-])
diff --git a/src/gf_cpu.c b/src/gf_cpu.c
index ee2f847..fae2cd5 100644
--- a/src/gf_cpu.c
+++ b/src/gf_cpu.c
@@ -22,20 +22,35 @@ int gf_cpu_supports_arm_neon = 0;
#if defined(__x86_64__)
+#if defined(_MSC_VER)
+
+#define cpuid(info, x) __cpuidex(info, x, 0)
+
+#elif defined(__GNUC__)
+
+#include <cpuid.h>
+void cpuid(int info[4], int InfoType){
+ __cpuid_count(InfoType, 0, info[0], info[1], info[2], info[3]);
+}
+
+#else
+
+#error please add a way to detect CPU SIMD support at runtime
+
+#endif
+
void gf_cpu_identify(void)
{
if (gf_cpu_identified) {
return;
}
- int op = 1, eax, ebx, ecx, edx;
+ int reg[4];
- __asm__("cpuid"
- : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
- : "a" (op));
+ cpuid(reg, 1);
#if defined(INTEL_SSE4_PCLMUL)
- if ((ecx & 1) != 0 && !getenv("GF_COMPLETE_DISABLE_SSE4_PCLMUL")) {
+ if ((reg[2] & 1) != 0 && !getenv("GF_COMPLETE_DISABLE_SSE4_PCLMUL")) {
gf_cpu_supports_intel_pclmul = 1;
#ifdef DEBUG_CPU_DETECTION
printf("#gf_cpu_supports_intel_pclmul\n");
@@ -44,7 +59,7 @@ void gf_cpu_identify(void)
#endif
#if defined(INTEL_SSE4)
- if (((ecx & (1<<20)) != 0 || (ecx & (1<<19)) != 0) && !getenv("GF_COMPLETE_DISABLE_SSE4")) {
+ if (((reg[2] & (1<<20)) != 0 || (reg[2] & (1<<19)) != 0) && !getenv("GF_COMPLETE_DISABLE_SSE4")) {
gf_cpu_supports_intel_sse4 = 1;
#ifdef DEBUG_CPU_DETECTION
printf("#gf_cpu_supports_intel_sse4\n");
@@ -53,7 +68,7 @@ void gf_cpu_identify(void)
#endif
#if defined(INTEL_SSSE3)
- if ((ecx & (1<<9)) != 0 && !getenv("GF_COMPLETE_DISABLE_SSSE3")) {
+ if ((reg[2] & (1<<9)) != 0 && !getenv("GF_COMPLETE_DISABLE_SSSE3")) {
gf_cpu_supports_intel_ssse3 = 1;
#ifdef DEBUG_CPU_DETECTION
printf("#gf_cpu_supports_intel_ssse3\n");
@@ -62,7 +77,7 @@ void gf_cpu_identify(void)
#endif
#if defined(INTEL_SSE3)
- if ((ecx & 1) != 0 && !getenv("GF_COMPLETE_DISABLE_SSE3")) {
+ if ((reg[2] & 1) != 0 && !getenv("GF_COMPLETE_DISABLE_SSE3")) {
gf_cpu_supports_intel_sse3 = 1;
#ifdef DEBUG_CPU_DETECTION
printf("#gf_cpu_supports_intel_sse3\n");
@@ -71,7 +86,7 @@ void gf_cpu_identify(void)
#endif
#if defined(INTEL_SSE2)
- if ((edx & (1<<26)) != 0 && !getenv("GF_COMPLETE_DISABLE_SSE2")) {
+ if ((reg[3] & (1<<26)) != 0 && !getenv("GF_COMPLETE_DISABLE_SSE2")) {
gf_cpu_supports_intel_sse2 = 1;
#ifdef DEBUG_CPU_DETECTION
printf("#gf_cpu_supports_intel_sse2\n");
diff --git a/test/Makefile.am b/test/Makefile.am
index 2791528..f590ecc 100644
--- a/test/Makefile.am
+++ b/test/Makefile.am
@@ -1,7 +1,7 @@
# GF-Complete 'test' AM file
AM_CPPFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include
-AM_CFLAGS = -O3 $(SIMD_FLAGS) -fPIC
+AM_CFLAGS = -O3 -fPIC
bin_PROGRAMS = gf_unit
diff --git a/tools/Makefile.am b/tools/Makefile.am
index a9dd8b9..4ca9131 100644
--- a/tools/Makefile.am
+++ b/tools/Makefile.am
@@ -1,7 +1,7 @@
# GF-Complete 'tools' AM file
AM_CPPFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include
-AM_CFLAGS = -O3 $(SIMD_FLAGS) -fPIC
+AM_CFLAGS = -O3 -fPIC
bin_PROGRAMS = gf_mult gf_div gf_add gf_time gf_methods gf_poly gf_inline_time
diff --git a/tools/test_simd.sh b/tools/test_simd.sh
index 1b0e319..6401590 100755
--- a/tools/test_simd.sh
+++ b/tools/test_simd.sh
@@ -27,6 +27,16 @@ test_functions() {
return ${failed}
}
+# build with DEBUG_CPU_FUNCTIONS and print out CPU detection
+test_detection() {
+ failed=0
+
+ { ./configure && make clean && make CFLAGS="-DDEBUG_CPU_DETECTION"; } || { echo "Compile FAILED" >> ${results}; return 1; }
+ { ${script_dir}/gf_methods 32 -ACD -L | grep '#' >> ${results}; } || { echo "gf_methods $i FAILED" >> ${results}; ((++failed)); }
+
+ return ${failed}
+}
+
compile_arm() {
failed=0
@@ -167,7 +177,7 @@ runtime_intel_flags() {
{ ${script_dir}/gf_methods $i -ACD -X >> ${1}; } || { echo "gf_methods $i FAILED" >> ${1}; ((++failed)); }
done
- echo "====SSE2 support..." >> ${1}
+ echo "====SSE2 support..." >> ${1}
export ax_cv_have_sse_ext=no
export ax_cv_have_sse2_ext=yes
export ax_cv_have_sse3_ext=no
diff --git a/tools/test_simd_qemu.sh b/tools/test_simd_qemu.sh
index 7b2cb1c..5771874 100755
--- a/tools/test_simd_qemu.sh
+++ b/tools/test_simd_qemu.sh
@@ -224,6 +224,8 @@ run_test_simd_basic() {
{ run_test $arch $cpu "unit" && echo "SUCCESS"; } || { echo "FAILED"; ((++failed)); }
echo "=====running functions test"
{ run_test $arch $cpu "functions" && echo "SUCCESS"; } || { echo "FAILED"; ((++failed)); }
+ echo "=====running detection test"
+ { run_test $arch $cpu "detection" && echo "SUCCESS"; } || { echo "FAILED"; ((++failed)); }
echo "=====running runtime test"
{ run_test $arch $cpu "runtime" && echo "SUCCESS"; } || { echo "FAILED"; ((++failed)); }
stop_qemu