summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--configure.ac9
-rw-r--r--m4/ax_ext.m4295
-rw-r--r--m4/ax_gcc_x86_avx_xgetbv.m479
-rw-r--r--m4/ax_gcc_x86_cpuid.m479
-rw-r--r--src/gf_cpu.c33
-rw-r--r--test/Makefile.am2
-rw-r--r--tools/Makefile.am2
-rwxr-xr-xtools/test_simd.sh12
-rwxr-xr-xtools/test_simd_qemu.sh2
9 files changed, 79 insertions, 434 deletions
diff --git a/configure.ac b/configure.ac
index 3e8cf18..3beea03 100644
--- a/configure.ac
+++ b/configure.ac
@@ -66,5 +66,14 @@ AC_ARG_ENABLE([valgrind],
[enable_valgrind=no])
AM_CONDITIONAL(ENABLE_VALGRIND, test "x$enable_valgrind" != xno)
+AC_ARG_ENABLE([avx], AS_HELP_STRING([--enable-avx], [Build with AVX optimizations]))
+AX_CHECK_COMPILE_FLAG(-mavx, [ax_cv_support_avx=yes], [])
+
+AS_IF([test "x$enable_avx" = "xyes"],
+ [AS_IF([test "x$ax_cv_support_avx" = "xno"],
+ [AC_MSG_ERROR([AVX requested but compiler does not support -mavx])],
+ [SIMD_FLAGS="$SIMD_FLAGS -mavx"])
+ ])
+
AC_CONFIG_FILES([Makefile src/Makefile tools/Makefile test/Makefile examples/Makefile])
AC_OUTPUT
diff --git a/m4/ax_ext.m4 b/m4/ax_ext.m4
index c03ccef..95c4dbe 100644
--- a/m4/ax_ext.m4
+++ b/m4/ax_ext.m4
@@ -1,40 +1,7 @@
#
-# Updated by KMG to support -DINTEL_SSE for GF-Complete
+# This macro is based on http://www.gnu.org/software/autoconf-archive/ax_ext.html
+# but simplified to do compile time SIMD checks only
#
-# ===========================================================================
-# http://www.gnu.org/software/autoconf-archive/ax_ext.html
-# ===========================================================================
-#
-# SYNOPSIS
-#
-# AX_EXT
-#
-# DESCRIPTION
-#
-# Find supported SIMD extensions by requesting cpuid. When an SIMD
-# extension is found, the -m"simdextensionname" is added to SIMD_FLAGS if
-# compiler supports it. For example, if "sse2" is available, then "-msse2"
-# is added to SIMD_FLAGS.
-#
-# This macro calls:
-#
-# AC_SUBST(SIMD_FLAGS)
-#
-# And defines:
-#
-# HAVE_MMX / HAVE_SSE / HAVE_SSE2 / HAVE_SSE3 / HAVE_SSSE3 / HAVE_SSE4.1 / HAVE_SSE4.2 / HAVE_AVX
-#
-# LICENSE
-#
-# Copyright (c) 2007 Christophe Tournayre <turn3r@users.sourceforge.net>
-# Copyright (c) 2013 Michael Petch <mpetch@capp-sysware.com>
-#
-# Copying and distribution of this file, with or without modification, are
-# permitted in any medium without royalty provided the copyright notice
-# and this notice are preserved. This file is offered as-is, without any
-# warranty.
-
-#serial 12
AC_DEFUN([AX_EXT],
[
@@ -45,263 +12,63 @@ AC_DEFUN([AX_EXT],
AC_DEFINE(HAVE_ARCH_AARCH64,,[targeting AArch64])
SIMD_FLAGS="$SIMD_FLAGS -DARCH_AARCH64"
- AC_CACHE_CHECK([whether NEON is supported], [ax_cv_have_neon_ext],
- [
- # TODO: detect / cross-compile
- ax_cv_have_neon_ext=yes
- ])
- AC_CACHE_CHECK([whether cryptographic extension is supported], [ax_cv_have_arm_crypt_ext],
- [
- # TODO: detect / cross-compile
- ax_cv_have_arm_crypt_ext=yes
- ])
-
- if test "$ax_cv_have_arm_crypt_ext" = yes; then
- AC_DEFINE(HAVE_ARM_CRYPT_EXT,,[Support ARM cryptographic extension])
- fi
-
+ AC_CACHE_CHECK([whether NEON is enabled], [ax_cv_have_neon_ext], [ax_cv_have_neon_ext=yes])
if test "$ax_cv_have_neon_ext" = yes; then
- AC_DEFINE(HAVE_NEON,,[Support NEON instructions])
+ AX_CHECK_COMPILE_FLAG(-march=armv8-a+simd, [SIMD_FLAGS="$SIMD_FLAGS -march=armv8-a+simd -DARM_NEON"], [ax_cv_have_neon_ext=no])
fi
-
- if test "$ax_cv_have_arm_crypt_ext" = yes && test "$ax_cv_have_neon_ext" = yes; then
- AX_CHECK_COMPILE_FLAG(-march=armv8-a+simd+crypto,
- SIMD_FLAGS="$SIMD_FLAGS -march=armv8-a+simd+crypto -DARM_CRYPT -DARM_NEON", [])
- elif test "$ax_cv_have_arm_crypt_ext" = yes; then
- AX_CHECK_COMPILE_FLAG(-march=armv8-a+crypto,
- SIMD_FLAGS="$SIMD_FLAGS -march=armv8-a+crypto -DARM_CRYPT", [])
- elif test "$ax_cv_have_neon_ext" = yes; then
- AX_CHECK_COMPILE_FLAG(-march=armv8-a+simd,
- SIMD_FLAGS="$SIMD_FLAGS -march=armv8-a+simd -DARM_NEON", [])
- fi
- ;;
+ ;;
arm*)
- AC_CACHE_CHECK([whether NEON is supported], [ax_cv_have_neon_ext],
- [
- # TODO: detect / cross-compile
- ax_cv_have_neon_ext=yes
- ])
-
+ AC_CACHE_CHECK([whether NEON is enabled], [ax_cv_have_neon_ext], [ax_cv_have_neon_ext=yes])
if test "$ax_cv_have_neon_ext" = yes; then
- AC_DEFINE(HAVE_NEON,,[Support NEON instructions])
- AX_CHECK_COMPILE_FLAG(-mfpu=neon,
- SIMD_FLAGS="$SIMD_FLAGS -mfpu=neon -DARM_NEON", [])
+ AX_CHECK_COMPILE_FLAG(-mfpu=neon, [SIMD_FLAGS="$SIMD_FLAGS -mfpu=neon -DARM_NEON"], [ax_cv_have_neon_ext=no])
fi
- ;;
+ ;;
powerpc*)
- AC_CACHE_CHECK([whether altivec is supported], [ax_cv_have_altivec_ext],
- [
- if test `/usr/sbin/sysctl -a 2>/dev/null| grep -c hw.optional.altivec` != 0; then
- if test `/usr/sbin/sysctl -n hw.optional.altivec` = 1; then
- ax_cv_have_altivec_ext=yes
- fi
- fi
- ])
-
- if test "$ax_cv_have_altivec_ext" = yes; then
- AC_DEFINE(HAVE_ALTIVEC,,[Support Altivec instructions])
- AX_CHECK_COMPILE_FLAG(-faltivec, SIMD_FLAGS="$SIMD_FLAGS -faltivec", [])
- fi
- ;;
-
-
- i[[3456]]86*|x86_64*|amd64*)
-
- AC_REQUIRE([AX_GCC_X86_CPUID])
- AC_REQUIRE([AX_GCC_X86_AVX_XGETBV])
-
- AX_GCC_X86_CPUID(0x00000001)
- ecx=`echo $ax_cv_gcc_x86_cpuid_0x00000001 | cut -d ":" -f 3`
- edx=`echo $ax_cv_gcc_x86_cpuid_0x00000001 | cut -d ":" -f 4`
-
- AC_CACHE_CHECK([whether mmx is supported], [ax_cv_have_mmx_ext],
- [
- ax_cv_have_mmx_ext=no
- if test "$((0x$edx>>23&0x01))" = 1; then
- ax_cv_have_mmx_ext=yes
- fi
- ])
-
- AC_CACHE_CHECK([whether sse is supported], [ax_cv_have_sse_ext],
- [
- ax_cv_have_sse_ext=no
- if test "$((0x$edx>>25&0x01))" = 1; then
- ax_cv_have_sse_ext=yes
- fi
- ])
-
- AC_CACHE_CHECK([whether sse2 is supported], [ax_cv_have_sse2_ext],
- [
- ax_cv_have_sse2_ext=no
- if test "$((0x$edx>>26&0x01))" = 1; then
- ax_cv_have_sse2_ext=yes
- fi
- ])
-
- AC_CACHE_CHECK([whether sse3 is supported], [ax_cv_have_sse3_ext],
- [
- ax_cv_have_sse3_ext=no
- if test "$((0x$ecx&0x01))" = 1; then
- ax_cv_have_sse3_ext=yes
- fi
- ])
-
- AC_CACHE_CHECK([whether pclmuldq is supported], [ax_cv_have_pclmuldq_ext],
- [
- ax_cv_have_pclmuldq_ext=no
- if test "$((0x$ecx>>1&0x01))" = 1; then
- ax_cv_have_pclmuldq_ext=yes
- fi
- ])
-
- AC_CACHE_CHECK([whether ssse3 is supported], [ax_cv_have_ssse3_ext],
- [
- ax_cv_have_ssse3_ext=no
- if test "$((0x$ecx>>9&0x01))" = 1; then
- ax_cv_have_ssse3_ext=yes
- fi
- ])
-
- AC_CACHE_CHECK([whether sse4.1 is supported], [ax_cv_have_sse41_ext],
- [
- ax_cv_have_sse41_ext=no
- if test "$((0x$ecx>>19&0x01))" = 1; then
- ax_cv_have_sse41_ext=yes
- fi
- ])
-
- AC_CACHE_CHECK([whether sse4.2 is supported], [ax_cv_have_sse42_ext],
- [
- ax_cv_have_sse42_ext=no
- if test "$((0x$ecx>>20&0x01))" = 1; then
- ax_cv_have_sse42_ext=yes
- fi
- ])
-
- AC_CACHE_CHECK([whether avx is supported by processor], [ax_cv_have_avx_cpu_ext],
- [
- ax_cv_have_avx_cpu_ext=no
- if test "$((0x$ecx>>28&0x01))" = 1; then
- ax_cv_have_avx_cpu_ext=yes
- fi
- ])
-
- if test x"$ax_cv_have_avx_cpu_ext" = x"yes"; then
- AX_GCC_X86_AVX_XGETBV(0x00000000)
-
- xgetbv_eax="0"
- if test x"$ax_cv_gcc_x86_avx_xgetbv_0x00000000" != x"unknown"; then
- xgetbv_eax=`echo $ax_cv_gcc_x86_avx_xgetbv_0x00000000 | cut -d ":" -f 1`
- fi
-
- AC_CACHE_CHECK([whether avx is supported by operating system], [ax_cv_have_avx_ext],
- [
- ax_cv_have_avx_ext=no
-
- if test "$((0x$ecx>>27&0x01))" = 1; then
- if test "$((0x$xgetbv_eax&0x6))" = 6; then
- ax_cv_have_avx_ext=yes
- fi
- fi
- ])
- if test x"$ax_cv_have_avx_ext" = x"no"; then
- AC_MSG_WARN([Your processor supports AVX, but your operating system doesn't])
- fi
+ AC_CACHE_CHECK([whether altivec is enabled], [ax_cv_have_altivec_ext], [ax_cv_have_altivec_ext=yes])
+ if test "$ax_cv_have_altivec_ext" = yes; then
+ AX_CHECK_COMPILE_FLAG(-faltivec, [SIMD_FLAGS="$SIMD_FLAGS -faltivec"], [ax_cv_have_altivec_ext=no])
fi
+ ;;
- if test "$ax_cv_have_mmx_ext" = yes; then
- AX_CHECK_COMPILE_FLAG(-mmmx, ax_cv_support_mmx_ext=yes, [])
- if test x"$ax_cv_support_mmx_ext" = x"yes"; then
- SIMD_FLAGS="$SIMD_FLAGS -mmmx"
- AC_DEFINE(HAVE_MMX,,[Support mmx instructions])
- else
- AC_MSG_WARN([Your processor supports mmx instructions but not your compiler, can you try another compiler?])
- fi
- fi
+ i[[3456]]86*|x86_64*|amd64*)
+ AC_CACHE_CHECK([whether sse is enabled], [ax_cv_have_sse_ext], [ax_cv_have_sse_ext=yes])
if test "$ax_cv_have_sse_ext" = yes; then
- AX_CHECK_COMPILE_FLAG(-msse, ax_cv_support_sse_ext=yes, [])
- if test x"$ax_cv_support_sse_ext" = x"yes"; then
- SIMD_FLAGS="$SIMD_FLAGS -msse -DINTEL_SSE"
- AC_DEFINE(HAVE_SSE,,[Support SSE (Streaming SIMD Extensions) instructions])
- else
- AC_MSG_WARN([Your processor supports sse instructions but not your compiler, can you try another compiler?])
- fi
+ AX_CHECK_COMPILE_FLAG(-msse, [SIMD_FLAGS="$SIMD_FLAGS -msse -DINTEL_SSE"], [ax_cv_have_sse_ext=no])
fi
+ AC_CACHE_CHECK([whether sse2 is enabled], [ax_cv_have_sse2_ext], [ax_cv_have_sse2_ext=yes])
if test "$ax_cv_have_sse2_ext" = yes; then
- AX_CHECK_COMPILE_FLAG(-msse2, ax_cv_support_sse2_ext=yes, [])
- if test x"$ax_cv_support_sse2_ext" = x"yes"; then
- SIMD_FLAGS="$SIMD_FLAGS -msse2 -DINTEL_SSE2"
- AC_DEFINE(HAVE_SSE2,,[Support SSE2 (Streaming SIMD Extensions 2) instructions])
- else
- AC_MSG_WARN([Your processor supports sse2 instructions but not your compiler, can you try another compiler?])
- fi
+ AX_CHECK_COMPILE_FLAG(-msse2, [SIMD_FLAGS="$SIMD_FLAGS -msse2 -DINTEL_SSE2"], [ax_cv_have_sse2_ext=no])
fi
+ AC_CACHE_CHECK([whether sse3 is enabled], [ax_cv_have_sse3_ext], [ax_cv_have_sse3_ext=yes])
if test "$ax_cv_have_sse3_ext" = yes; then
- AX_CHECK_COMPILE_FLAG(-msse3, ax_cv_support_sse3_ext=yes, [])
- if test x"$ax_cv_support_sse3_ext" = x"yes"; then
- SIMD_FLAGS="$SIMD_FLAGS -msse3 -DINTEL_SSE3"
- AC_DEFINE(HAVE_SSE3,,[Support SSE3 (Streaming SIMD Extensions 3) instructions])
- else
- AC_MSG_WARN([Your processor supports sse3 instructions but not your compiler, can you try another compiler?])
- fi
- fi
-
- if test "$ax_cv_have_pclmuldq_ext" = yes; then
- AX_CHECK_COMPILE_FLAG(-mpclmul, ax_cv_support_pclmuldq_ext=yes, [])
- if test x"$ax_cv_support_pclmuldq_ext" = x"yes"; then
- SIMD_FLAGS="$SIMD_FLAGS -mpclmul -DINTEL_SSE4_PCLMUL"
- AC_DEFINE(HAVE_PCLMULDQ,,[Support (PCLMULDQ) Carry-Free Muliplication])
- else
- AC_MSG_WARN([Your processor supports pclmuldq instructions but not your compiler, can you try another compiler?])
- fi
+ AX_CHECK_COMPILE_FLAG(-msse3, [SIMD_FLAGS="$SIMD_FLAGS -msse3 -DINTEL_SSE3"], [ax_cv_have_sse3_ext=no])
fi
+ AC_CACHE_CHECK([whether ssse3 is enabled], [ax_cv_have_ssse3_ext], [ax_cv_have_ssse3_ext=yes])
if test "$ax_cv_have_ssse3_ext" = yes; then
- AX_CHECK_COMPILE_FLAG(-mssse3, ax_cv_support_ssse3_ext=yes, [])
- if test x"$ax_cv_support_ssse3_ext" = x"yes"; then
- SIMD_FLAGS="$SIMD_FLAGS -mssse3 -DINTEL_SSSE3"
- AC_DEFINE(HAVE_SSSE3,,[Support SSSE3 (Supplemental Streaming SIMD Extensions 3) instructions])
- else
- AC_MSG_WARN([Your processor supports ssse3 instructions but not your compiler, can you try another compiler?])
- fi
+ AX_CHECK_COMPILE_FLAG(-mssse3, [SIMD_FLAGS="$SIMD_FLAGS -mssse3 -DINTEL_SSSE3"], [ax_cv_have_ssse3_ext=no])
fi
- if test "$ax_cv_have_sse41_ext" = yes; then
- AX_CHECK_COMPILE_FLAG(-msse4.1, ax_cv_support_sse41_ext=yes, [])
- if test x"$ax_cv_support_sse41_ext" = x"yes"; then
- SIMD_FLAGS="$SIMD_FLAGS -msse4.1 -DINTEL_SSE4"
- AC_DEFINE(HAVE_SSE4_1,,[Support SSSE4.1 (Streaming SIMD Extensions 4.1) instructions])
- else
- AC_MSG_WARN([Your processor supports sse4.1 instructions but not your compiler, can you try another compiler?])
- fi
+ AC_CACHE_CHECK([whether pclmuldq is enabled], [ax_cv_have_pclmuldq_ext], [ax_cv_have_pclmuldq_ext=yes])
+ if test "$ax_cv_have_pclmuldq_ext" = yes; then
+ AX_CHECK_COMPILE_FLAG(-mpclmul, [SIMD_FLAGS="$SIMD_FLAGS -mpclmul -DINTEL_SSE4_PCLMUL"], [ax_cv_have_pclmuldq_ext=no])
fi
- if test "$ax_cv_have_sse42_ext" = yes; then
- AX_CHECK_COMPILE_FLAG(-msse4.2, ax_cv_support_sse42_ext=yes, [])
- if test x"$ax_cv_support_sse42_ext" = x"yes"; then
- SIMD_FLAGS="$SIMD_FLAGS -msse4.2 -DINTEL_SSE4"
- AC_DEFINE(HAVE_SSE4_2,,[Support SSSE4.2 (Streaming SIMD Extensions 4.2) instructions])
- else
- AC_MSG_WARN([Your processor supports sse4.2 instructions but not your compiler, can you try another compiler?])
- fi
+ AC_CACHE_CHECK([whether sse4.1 is enabled], [ax_cv_have_sse41_ext], [ax_cv_have_sse41_ext=yes])
+ if test "$ax_cv_have_sse41_ext" = yes; then
+ AX_CHECK_COMPILE_FLAG(-msse4.1, [SIMD_FLAGS="$SIMD_FLAGS -msse4.1 -DINTEL_SSE4"], [ax_cv_have_sse41_ext=no])
fi
- if test "$ax_cv_have_avx_ext" = yes; then
- AX_CHECK_COMPILE_FLAG(-mavx, ax_cv_support_avx_ext=yes, [])
- if test x"$ax_cv_support_avx_ext" = x"yes"; then
- SIMD_FLAGS="$SIMD_FLAGS -mavx"
- AC_DEFINE(HAVE_AVX,,[Support AVX (Advanced Vector Extensions) instructions])
- else
- AC_MSG_WARN([Your processor supports avx instructions but not your compiler, can you try another compiler?])
- fi
+ AC_CACHE_CHECK([whether sse4.2 is enabled], [ax_cv_have_sse42_ext], [ax_cv_have_sse42_ext=yes])
+ if test "$ax_cv_have_sse42_ext" = yes; then
+ AX_CHECK_COMPILE_FLAG(-msse4.2, [SIMD_FLAGS="$SIMD_FLAGS -msse4.2 -DINTEL_SSE4"], [ax_cv_have_sse42_ext=no])
fi
-
- ;;
+ ;;
esac
AC_SUBST(SIMD_FLAGS)
diff --git a/m4/ax_gcc_x86_avx_xgetbv.m4 b/m4/ax_gcc_x86_avx_xgetbv.m4
deleted file mode 100644
index 0624eeb..0000000
--- a/m4/ax_gcc_x86_avx_xgetbv.m4
+++ /dev/null
@@ -1,79 +0,0 @@
-# ===========================================================================
-# http://www.gnu.org/software/autoconf-archive/ax_gcc_x86_avx_xgetbv.html
-# ===========================================================================
-#
-# SYNOPSIS
-#
-# AX_GCC_X86_AVX_XGETBV
-#
-# DESCRIPTION
-#
-# On later x86 processors with AVX SIMD support, with gcc or a compiler
-# that has a compatible syntax for inline assembly instructions, run a
-# small program that executes the xgetbv instruction with input OP. This
-# can be used to detect if the OS supports AVX instruction usage.
-#
-# On output, the values of the eax and edx registers are stored as
-# hexadecimal strings as "eax:edx" in the cache variable
-# ax_cv_gcc_x86_avx_xgetbv.
-#
-# If the xgetbv instruction fails (because you are running a
-# cross-compiler, or because you are not using gcc, or because you are on
-# a processor that doesn't have this instruction),
-# ax_cv_gcc_x86_avx_xgetbv_OP is set to the string "unknown".
-#
-# This macro mainly exists to be used in AX_EXT.
-#
-# LICENSE
-#
-# Copyright (c) 2013 Michael Petch <mpetch@capp-sysware.com>
-#
-# This program is free software: you can redistribute it and/or modify it
-# under the terms of the GNU General Public License as published by the
-# Free Software Foundation, either version 3 of the License, or (at your
-# option) any later version.
-#
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
-# Public License for more details.
-#
-# You should have received a copy of the GNU General Public License along
-# with this program. If not, see <http://www.gnu.org/licenses/>.
-#
-# As a special exception, the respective Autoconf Macro's copyright owner
-# gives unlimited permission to copy, distribute and modify the configure
-# scripts that are the output of Autoconf when processing the Macro. You
-# need not follow the terms of the GNU General Public License when using
-# or distributing such scripts, even though portions of the text of the
-# Macro appear in them. The GNU General Public License (GPL) does govern
-# all other use of the material that constitutes the Autoconf Macro.
-#
-# This special exception to the GPL applies to versions of the Autoconf
-# Macro released by the Autoconf Archive. When you make and distribute a
-# modified version of the Autoconf Macro, you may extend this special
-# exception to the GPL to apply to your modified version as well.
-
-#serial 1
-
-AC_DEFUN([AX_GCC_X86_AVX_XGETBV],
-[AC_REQUIRE([AC_PROG_CC])
-AC_LANG_PUSH([C])
-AC_CACHE_CHECK(for x86-AVX xgetbv $1 output, ax_cv_gcc_x86_avx_xgetbv_$1,
- [AC_RUN_IFELSE([AC_LANG_PROGRAM([#include <stdio.h>], [
- int op = $1, eax, edx;
- FILE *f;
- /* Opcodes for xgetbv */
- __asm__(".byte 0x0f, 0x01, 0xd0"
- : "=a" (eax), "=d" (edx)
- : "c" (op));
- f = fopen("conftest_xgetbv", "w"); if (!f) return 1;
- fprintf(f, "%x:%x\n", eax, edx);
- fclose(f);
- return 0;
-])],
- [ax_cv_gcc_x86_avx_xgetbv_$1=`cat conftest_xgetbv`; rm -f conftest_xgetbv],
- [ax_cv_gcc_x86_avx_xgetbv_$1=unknown; rm -f conftest_xgetbv],
- [ax_cv_gcc_x86_avx_xgetbv_$1=unknown])])
-AC_LANG_POP([C])
-])
diff --git a/m4/ax_gcc_x86_cpuid.m4 b/m4/ax_gcc_x86_cpuid.m4
deleted file mode 100644
index 7d46fee..0000000
--- a/m4/ax_gcc_x86_cpuid.m4
+++ /dev/null
@@ -1,79 +0,0 @@
-# ===========================================================================
-# http://www.gnu.org/software/autoconf-archive/ax_gcc_x86_cpuid.html
-# ===========================================================================
-#
-# SYNOPSIS
-#
-# AX_GCC_X86_CPUID(OP)
-#
-# DESCRIPTION
-#
-# On Pentium and later x86 processors, with gcc or a compiler that has a
-# compatible syntax for inline assembly instructions, run a small program
-# that executes the cpuid instruction with input OP. This can be used to
-# detect the CPU type.
-#
-# On output, the values of the eax, ebx, ecx, and edx registers are stored
-# as hexadecimal strings as "eax:ebx:ecx:edx" in the cache variable
-# ax_cv_gcc_x86_cpuid_OP.
-#
-# If the cpuid instruction fails (because you are running a
-# cross-compiler, or because you are not using gcc, or because you are on
-# a processor that doesn't have this instruction), ax_cv_gcc_x86_cpuid_OP
-# is set to the string "unknown".
-#
-# This macro mainly exists to be used in AX_GCC_ARCHFLAG.
-#
-# LICENSE
-#
-# Copyright (c) 2008 Steven G. Johnson <stevenj@alum.mit.edu>
-# Copyright (c) 2008 Matteo Frigo
-#
-# This program is free software: you can redistribute it and/or modify it
-# under the terms of the GNU General Public License as published by the
-# Free Software Foundation, either version 3 of the License, or (at your
-# option) any later version.
-#
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
-# Public License for more details.
-#
-# You should have received a copy of the GNU General Public License along
-# with this program. If not, see <http://www.gnu.org/licenses/>.
-#
-# As a special exception, the respective Autoconf Macro's copyright owner
-# gives unlimited permission to copy, distribute and modify the configure
-# scripts that are the output of Autoconf when processing the Macro. You
-# need not follow the terms of the GNU General Public License when using
-# or distributing such scripts, even though portions of the text of the
-# Macro appear in them. The GNU General Public License (GPL) does govern
-# all other use of the material that constitutes the Autoconf Macro.
-#
-# This special exception to the GPL applies to versions of the Autoconf
-# Macro released by the Autoconf Archive. When you make and distribute a
-# modified version of the Autoconf Macro, you may extend this special
-# exception to the GPL to apply to your modified version as well.
-
-#serial 7
-
-AC_DEFUN([AX_GCC_X86_CPUID],
-[AC_REQUIRE([AC_PROG_CC])
-AC_LANG_PUSH([C])
-AC_CACHE_CHECK(for x86 cpuid $1 output, ax_cv_gcc_x86_cpuid_$1,
- [AC_RUN_IFELSE([AC_LANG_PROGRAM([#include <stdio.h>], [
- int op = $1, eax, ebx, ecx, edx;
- FILE *f;
- __asm__("cpuid"
- : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
- : "a" (op));
- f = fopen("conftest_cpuid", "w"); if (!f) return 1;
- fprintf(f, "%x:%x:%x:%x\n", eax, ebx, ecx, edx);
- fclose(f);
- return 0;
-])],
- [ax_cv_gcc_x86_cpuid_$1=`cat conftest_cpuid`; rm -f conftest_cpuid],
- [ax_cv_gcc_x86_cpuid_$1=unknown; rm -f conftest_cpuid],
- [ax_cv_gcc_x86_cpuid_$1=unknown])])
-AC_LANG_POP([C])
-])
diff --git a/src/gf_cpu.c b/src/gf_cpu.c
index ee2f847..fae2cd5 100644
--- a/src/gf_cpu.c
+++ b/src/gf_cpu.c
@@ -22,20 +22,35 @@ int gf_cpu_supports_arm_neon = 0;
#if defined(__x86_64__)
+#if defined(_MSC_VER)
+
+#define cpuid(info, x) __cpuidex(info, x, 0)
+
+#elif defined(__GNUC__)
+
+#include <cpuid.h>
+void cpuid(int info[4], int InfoType){
+ __cpuid_count(InfoType, 0, info[0], info[1], info[2], info[3]);
+}
+
+#else
+
+#error please add a way to detect CPU SIMD support at runtime
+
+#endif
+
void gf_cpu_identify(void)
{
if (gf_cpu_identified) {
return;
}
- int op = 1, eax, ebx, ecx, edx;
+ int reg[4];
- __asm__("cpuid"
- : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
- : "a" (op));
+ cpuid(reg, 1);
#if defined(INTEL_SSE4_PCLMUL)
- if ((ecx & 1) != 0 && !getenv("GF_COMPLETE_DISABLE_SSE4_PCLMUL")) {
+ if ((reg[2] & 1) != 0 && !getenv("GF_COMPLETE_DISABLE_SSE4_PCLMUL")) {
gf_cpu_supports_intel_pclmul = 1;
#ifdef DEBUG_CPU_DETECTION
printf("#gf_cpu_supports_intel_pclmul\n");
@@ -44,7 +59,7 @@ void gf_cpu_identify(void)
#endif
#if defined(INTEL_SSE4)
- if (((ecx & (1<<20)) != 0 || (ecx & (1<<19)) != 0) && !getenv("GF_COMPLETE_DISABLE_SSE4")) {
+ if (((reg[2] & (1<<20)) != 0 || (reg[2] & (1<<19)) != 0) && !getenv("GF_COMPLETE_DISABLE_SSE4")) {
gf_cpu_supports_intel_sse4 = 1;
#ifdef DEBUG_CPU_DETECTION
printf("#gf_cpu_supports_intel_sse4\n");
@@ -53,7 +68,7 @@ void gf_cpu_identify(void)
#endif
#if defined(INTEL_SSSE3)
- if ((ecx & (1<<9)) != 0 && !getenv("GF_COMPLETE_DISABLE_SSSE3")) {
+ if ((reg[2] & (1<<9)) != 0 && !getenv("GF_COMPLETE_DISABLE_SSSE3")) {
gf_cpu_supports_intel_ssse3 = 1;
#ifdef DEBUG_CPU_DETECTION
printf("#gf_cpu_supports_intel_ssse3\n");
@@ -62,7 +77,7 @@ void gf_cpu_identify(void)
#endif
#if defined(INTEL_SSE3)
- if ((ecx & 1) != 0 && !getenv("GF_COMPLETE_DISABLE_SSE3")) {
+ if ((reg[2] & 1) != 0 && !getenv("GF_COMPLETE_DISABLE_SSE3")) {
gf_cpu_supports_intel_sse3 = 1;
#ifdef DEBUG_CPU_DETECTION
printf("#gf_cpu_supports_intel_sse3\n");
@@ -71,7 +86,7 @@ void gf_cpu_identify(void)
#endif
#if defined(INTEL_SSE2)
- if ((edx & (1<<26)) != 0 && !getenv("GF_COMPLETE_DISABLE_SSE2")) {
+ if ((reg[3] & (1<<26)) != 0 && !getenv("GF_COMPLETE_DISABLE_SSE2")) {
gf_cpu_supports_intel_sse2 = 1;
#ifdef DEBUG_CPU_DETECTION
printf("#gf_cpu_supports_intel_sse2\n");
diff --git a/test/Makefile.am b/test/Makefile.am
index 2791528..f590ecc 100644
--- a/test/Makefile.am
+++ b/test/Makefile.am
@@ -1,7 +1,7 @@
# GF-Complete 'test' AM file
AM_CPPFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include
-AM_CFLAGS = -O3 $(SIMD_FLAGS) -fPIC
+AM_CFLAGS = -O3 -fPIC
bin_PROGRAMS = gf_unit
diff --git a/tools/Makefile.am b/tools/Makefile.am
index a9dd8b9..4ca9131 100644
--- a/tools/Makefile.am
+++ b/tools/Makefile.am
@@ -1,7 +1,7 @@
# GF-Complete 'tools' AM file
AM_CPPFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include
-AM_CFLAGS = -O3 $(SIMD_FLAGS) -fPIC
+AM_CFLAGS = -O3 -fPIC
bin_PROGRAMS = gf_mult gf_div gf_add gf_time gf_methods gf_poly gf_inline_time
diff --git a/tools/test_simd.sh b/tools/test_simd.sh
index 1b0e319..6401590 100755
--- a/tools/test_simd.sh
+++ b/tools/test_simd.sh
@@ -27,6 +27,16 @@ test_functions() {
return ${failed}
}
+# build with DEBUG_CPU_FUNCTIONS and print out CPU detection
+test_detection() {
+ failed=0
+
+ { ./configure && make clean && make CFLAGS="-DDEBUG_CPU_DETECTION"; } || { echo "Compile FAILED" >> ${results}; return 1; }
+ { ${script_dir}/gf_methods 32 -ACD -L | grep '#' >> ${results}; } || { echo "gf_methods $i FAILED" >> ${results}; ((++failed)); }
+
+ return ${failed}
+}
+
compile_arm() {
failed=0
@@ -167,7 +177,7 @@ runtime_intel_flags() {
{ ${script_dir}/gf_methods $i -ACD -X >> ${1}; } || { echo "gf_methods $i FAILED" >> ${1}; ((++failed)); }
done
- echo "====SSE2 support..." >> ${1}
+ echo "====SSE2 support..." >> ${1}
export ax_cv_have_sse_ext=no
export ax_cv_have_sse2_ext=yes
export ax_cv_have_sse3_ext=no
diff --git a/tools/test_simd_qemu.sh b/tools/test_simd_qemu.sh
index 7b2cb1c..5771874 100755
--- a/tools/test_simd_qemu.sh
+++ b/tools/test_simd_qemu.sh
@@ -224,6 +224,8 @@ run_test_simd_basic() {
{ run_test $arch $cpu "unit" && echo "SUCCESS"; } || { echo "FAILED"; ((++failed)); }
echo "=====running functions test"
{ run_test $arch $cpu "functions" && echo "SUCCESS"; } || { echo "FAILED"; ((++failed)); }
+ echo "=====running detection test"
+ { run_test $arch $cpu "detection" && echo "SUCCESS"; } || { echo "FAILED"; ((++failed)); }
echo "=====running runtime test"
{ run_test $arch $cpu "runtime" && echo "SUCCESS"; } || { echo "FAILED"; ((++failed)); }
stop_qemu