From 22352ca094e242d7c93a2ed822f89a07eeb34c1a Mon Sep 17 00:00:00 2001 From: Bassam Tabbara Date: Fri, 2 Sep 2016 17:16:18 -0700 Subject: Remove generated autotools files from the build. Also update .gitignore to ignore some autotools files and tests. --- .gitignore | 7 + compile | 347 ------------------------ depcomp | 791 ------------------------------------------------------ m4/ltoptions.m4 | 384 -------------------------- m4/ltsugar.m4 | 123 --------- m4/lt~obsolete.m4 | 98 ------- 6 files changed, 7 insertions(+), 1743 deletions(-) delete mode 100755 compile delete mode 100755 depcomp delete mode 100644 m4/ltoptions.m4 delete mode 100644 m4/ltsugar.m4 delete mode 100644 m4/lt~obsolete.m4 diff --git a/.gitignore b/.gitignore index c455d23..f6f097d 100644 --- a/.gitignore +++ b/.gitignore @@ -50,6 +50,10 @@ config.sub ltmain.sh m4/libtool.m4 m4/ltversion.m4 +m4/ltoptions.m4 +m4/ltsugar.m4 +m4/lt~obsolete.m4 +test-driver src/.dirstamp test-driver @@ -68,3 +72,6 @@ tools/gf_methods tools/gf_mult tools/gf_poly tools/gf_time +tools/gf_unit_w* +tools/test-suite.log + diff --git a/compile b/compile deleted file mode 100755 index 531136b..0000000 --- a/compile +++ /dev/null @@ -1,347 +0,0 @@ -#! /bin/sh -# Wrapper for compilers which do not understand '-c -o'. - -scriptversion=2012-10-14.11; # UTC - -# Copyright (C) 1999-2013 Free Software Foundation, Inc. -# Written by Tom Tromey . -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2, or (at your option) -# any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . - -# As a special exception to the GNU General Public License, if you -# distribute this file as part of a program that contains a -# configuration script generated by Autoconf, you may include it under -# the same distribution terms that you use for the rest of that program. - -# This file is maintained in Automake, please report -# bugs to or send patches to -# . - -nl=' -' - -# We need space, tab and new line, in precisely that order. Quoting is -# there to prevent tools from complaining about whitespace usage. -IFS=" "" $nl" - -file_conv= - -# func_file_conv build_file lazy -# Convert a $build file to $host form and store it in $file -# Currently only supports Windows hosts. If the determined conversion -# type is listed in (the comma separated) LAZY, no conversion will -# take place. -func_file_conv () -{ - file=$1 - case $file in - / | /[!/]*) # absolute file, and not a UNC file - if test -z "$file_conv"; then - # lazily determine how to convert abs files - case `uname -s` in - MINGW*) - file_conv=mingw - ;; - CYGWIN*) - file_conv=cygwin - ;; - *) - file_conv=wine - ;; - esac - fi - case $file_conv/,$2, in - *,$file_conv,*) - ;; - mingw/*) - file=`cmd //C echo "$file " | sed -e 's/"\(.*\) " *$/\1/'` - ;; - cygwin/*) - file=`cygpath -m "$file" || echo "$file"` - ;; - wine/*) - file=`winepath -w "$file" || echo "$file"` - ;; - esac - ;; - esac -} - -# func_cl_dashL linkdir -# Make cl look for libraries in LINKDIR -func_cl_dashL () -{ - func_file_conv "$1" - if test -z "$lib_path"; then - lib_path=$file - else - lib_path="$lib_path;$file" - fi - linker_opts="$linker_opts -LIBPATH:$file" -} - -# func_cl_dashl library -# Do a library search-path lookup for cl -func_cl_dashl () -{ - lib=$1 - found=no - save_IFS=$IFS - IFS=';' - for dir in $lib_path $LIB - do - IFS=$save_IFS - if $shared && test -f "$dir/$lib.dll.lib"; then - found=yes - lib=$dir/$lib.dll.lib - break - fi - if test -f "$dir/$lib.lib"; then - found=yes - lib=$dir/$lib.lib - break - fi - if test -f "$dir/lib$lib.a"; then - found=yes - lib=$dir/lib$lib.a - break - fi - done - IFS=$save_IFS - - if test "$found" != yes; then - lib=$lib.lib - fi -} - -# func_cl_wrapper cl arg... -# Adjust compile command to suit cl -func_cl_wrapper () -{ - # Assume a capable shell - lib_path= - shared=: - linker_opts= - for arg - do - if test -n "$eat"; then - eat= - else - case $1 in - -o) - # configure might choose to run compile as 'compile cc -o foo foo.c'. - eat=1 - case $2 in - *.o | *.[oO][bB][jJ]) - func_file_conv "$2" - set x "$@" -Fo"$file" - shift - ;; - *) - func_file_conv "$2" - set x "$@" -Fe"$file" - shift - ;; - esac - ;; - -I) - eat=1 - func_file_conv "$2" mingw - set x "$@" -I"$file" - shift - ;; - -I*) - func_file_conv "${1#-I}" mingw - set x "$@" -I"$file" - shift - ;; - -l) - eat=1 - func_cl_dashl "$2" - set x "$@" "$lib" - shift - ;; - -l*) - func_cl_dashl "${1#-l}" - set x "$@" "$lib" - shift - ;; - -L) - eat=1 - func_cl_dashL "$2" - ;; - -L*) - func_cl_dashL "${1#-L}" - ;; - -static) - shared=false - ;; - -Wl,*) - arg=${1#-Wl,} - save_ifs="$IFS"; IFS=',' - for flag in $arg; do - IFS="$save_ifs" - linker_opts="$linker_opts $flag" - done - IFS="$save_ifs" - ;; - -Xlinker) - eat=1 - linker_opts="$linker_opts $2" - ;; - -*) - set x "$@" "$1" - shift - ;; - *.cc | *.CC | *.cxx | *.CXX | *.[cC]++) - func_file_conv "$1" - set x "$@" -Tp"$file" - shift - ;; - *.c | *.cpp | *.CPP | *.lib | *.LIB | *.Lib | *.OBJ | *.obj | *.[oO]) - func_file_conv "$1" mingw - set x "$@" "$file" - shift - ;; - *) - set x "$@" "$1" - shift - ;; - esac - fi - shift - done - if test -n "$linker_opts"; then - linker_opts="-link$linker_opts" - fi - exec "$@" $linker_opts - exit 1 -} - -eat= - -case $1 in - '') - echo "$0: No command. Try '$0 --help' for more information." 1>&2 - exit 1; - ;; - -h | --h*) - cat <<\EOF -Usage: compile [--help] [--version] PROGRAM [ARGS] - -Wrapper for compilers which do not understand '-c -o'. -Remove '-o dest.o' from ARGS, run PROGRAM with the remaining -arguments, and rename the output as expected. - -If you are trying to build a whole package this is not the -right script to run: please start by reading the file 'INSTALL'. - -Report bugs to . -EOF - exit $? - ;; - -v | --v*) - echo "compile $scriptversion" - exit $? - ;; - cl | *[/\\]cl | cl.exe | *[/\\]cl.exe ) - func_cl_wrapper "$@" # Doesn't return... - ;; -esac - -ofile= -cfile= - -for arg -do - if test -n "$eat"; then - eat= - else - case $1 in - -o) - # configure might choose to run compile as 'compile cc -o foo foo.c'. - # So we strip '-o arg' only if arg is an object. - eat=1 - case $2 in - *.o | *.obj) - ofile=$2 - ;; - *) - set x "$@" -o "$2" - shift - ;; - esac - ;; - *.c) - cfile=$1 - set x "$@" "$1" - shift - ;; - *) - set x "$@" "$1" - shift - ;; - esac - fi - shift -done - -if test -z "$ofile" || test -z "$cfile"; then - # If no '-o' option was seen then we might have been invoked from a - # pattern rule where we don't need one. That is ok -- this is a - # normal compilation that the losing compiler can handle. If no - # '.c' file was seen then we are probably linking. That is also - # ok. - exec "$@" -fi - -# Name of file we expect compiler to create. -cofile=`echo "$cfile" | sed 's|^.*[\\/]||; s|^[a-zA-Z]:||; s/\.c$/.o/'` - -# Create the lock directory. -# Note: use '[/\\:.-]' here to ensure that we don't use the same name -# that we are using for the .o file. Also, base the name on the expected -# object file name, since that is what matters with a parallel build. -lockdir=`echo "$cofile" | sed -e 's|[/\\:.-]|_|g'`.d -while true; do - if mkdir "$lockdir" >/dev/null 2>&1; then - break - fi - sleep 1 -done -# FIXME: race condition here if user kills between mkdir and trap. -trap "rmdir '$lockdir'; exit 1" 1 2 15 - -# Run the compile. -"$@" -ret=$? - -if test -f "$cofile"; then - test "$cofile" = "$ofile" || mv "$cofile" "$ofile" -elif test -f "${cofile}bj"; then - test "${cofile}bj" = "$ofile" || mv "${cofile}bj" "$ofile" -fi - -rmdir "$lockdir" -exit $ret - -# Local Variables: -# mode: shell-script -# sh-indentation: 2 -# eval: (add-hook 'write-file-hooks 'time-stamp) -# time-stamp-start: "scriptversion=" -# time-stamp-format: "%:y-%02m-%02d.%02H" -# time-stamp-time-zone: "UTC" -# time-stamp-end: "; # UTC" -# End: diff --git a/depcomp b/depcomp deleted file mode 100755 index 4ebd5b3..0000000 --- a/depcomp +++ /dev/null @@ -1,791 +0,0 @@ -#! /bin/sh -# depcomp - compile a program generating dependencies as side-effects - -scriptversion=2013-05-30.07; # UTC - -# Copyright (C) 1999-2013 Free Software Foundation, Inc. - -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2, or (at your option) -# any later version. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. - -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . - -# As a special exception to the GNU General Public License, if you -# distribute this file as part of a program that contains a -# configuration script generated by Autoconf, you may include it under -# the same distribution terms that you use for the rest of that program. - -# Originally written by Alexandre Oliva . - -case $1 in - '') - echo "$0: No command. Try '$0 --help' for more information." 1>&2 - exit 1; - ;; - -h | --h*) - cat <<\EOF -Usage: depcomp [--help] [--version] PROGRAM [ARGS] - -Run PROGRAMS ARGS to compile a file, generating dependencies -as side-effects. - -Environment variables: - depmode Dependency tracking mode. - source Source file read by 'PROGRAMS ARGS'. - object Object file output by 'PROGRAMS ARGS'. - DEPDIR directory where to store dependencies. - depfile Dependency file to output. - tmpdepfile Temporary file to use when outputting dependencies. - libtool Whether libtool is used (yes/no). - -Report bugs to . -EOF - exit $? - ;; - -v | --v*) - echo "depcomp $scriptversion" - exit $? - ;; -esac - -# Get the directory component of the given path, and save it in the -# global variables '$dir'. Note that this directory component will -# be either empty or ending with a '/' character. This is deliberate. -set_dir_from () -{ - case $1 in - */*) dir=`echo "$1" | sed -e 's|/[^/]*$|/|'`;; - *) dir=;; - esac -} - -# Get the suffix-stripped basename of the given path, and save it the -# global variable '$base'. -set_base_from () -{ - base=`echo "$1" | sed -e 's|^.*/||' -e 's/\.[^.]*$//'` -} - -# If no dependency file was actually created by the compiler invocation, -# we still have to create a dummy depfile, to avoid errors with the -# Makefile "include basename.Plo" scheme. -make_dummy_depfile () -{ - echo "#dummy" > "$depfile" -} - -# Factor out some common post-processing of the generated depfile. -# Requires the auxiliary global variable '$tmpdepfile' to be set. -aix_post_process_depfile () -{ - # If the compiler actually managed to produce a dependency file, - # post-process it. - if test -f "$tmpdepfile"; then - # Each line is of the form 'foo.o: dependency.h'. - # Do two passes, one to just change these to - # $object: dependency.h - # and one to simply output - # dependency.h: - # which is needed to avoid the deleted-header problem. - { sed -e "s,^.*\.[$lower]*:,$object:," < "$tmpdepfile" - sed -e "s,^.*\.[$lower]*:[$tab ]*,," -e 's,$,:,' < "$tmpdepfile" - } > "$depfile" - rm -f "$tmpdepfile" - else - make_dummy_depfile - fi -} - -# A tabulation character. -tab=' ' -# A newline character. -nl=' -' -# Character ranges might be problematic outside the C locale. -# These definitions help. -upper=ABCDEFGHIJKLMNOPQRSTUVWXYZ -lower=abcdefghijklmnopqrstuvwxyz -digits=0123456789 -alpha=${upper}${lower} - -if test -z "$depmode" || test -z "$source" || test -z "$object"; then - echo "depcomp: Variables source, object and depmode must be set" 1>&2 - exit 1 -fi - -# Dependencies for sub/bar.o or sub/bar.obj go into sub/.deps/bar.Po. -depfile=${depfile-`echo "$object" | - sed 's|[^\\/]*$|'${DEPDIR-.deps}'/&|;s|\.\([^.]*\)$|.P\1|;s|Pobj$|Po|'`} -tmpdepfile=${tmpdepfile-`echo "$depfile" | sed 's/\.\([^.]*\)$/.T\1/'`} - -rm -f "$tmpdepfile" - -# Avoid interferences from the environment. -gccflag= dashmflag= - -# Some modes work just like other modes, but use different flags. We -# parameterize here, but still list the modes in the big case below, -# to make depend.m4 easier to write. Note that we *cannot* use a case -# here, because this file can only contain one case statement. -if test "$depmode" = hp; then - # HP compiler uses -M and no extra arg. - gccflag=-M - depmode=gcc -fi - -if test "$depmode" = dashXmstdout; then - # This is just like dashmstdout with a different argument. - dashmflag=-xM - depmode=dashmstdout -fi - -cygpath_u="cygpath -u -f -" -if test "$depmode" = msvcmsys; then - # This is just like msvisualcpp but w/o cygpath translation. - # Just convert the backslash-escaped backslashes to single forward - # slashes to satisfy depend.m4 - cygpath_u='sed s,\\\\,/,g' - depmode=msvisualcpp -fi - -if test "$depmode" = msvc7msys; then - # This is just like msvc7 but w/o cygpath translation. - # Just convert the backslash-escaped backslashes to single forward - # slashes to satisfy depend.m4 - cygpath_u='sed s,\\\\,/,g' - depmode=msvc7 -fi - -if test "$depmode" = xlc; then - # IBM C/C++ Compilers xlc/xlC can output gcc-like dependency information. - gccflag=-qmakedep=gcc,-MF - depmode=gcc -fi - -case "$depmode" in -gcc3) -## gcc 3 implements dependency tracking that does exactly what -## we want. Yay! Note: for some reason libtool 1.4 doesn't like -## it if -MD -MP comes after the -MF stuff. Hmm. -## Unfortunately, FreeBSD c89 acceptance of flags depends upon -## the command line argument order; so add the flags where they -## appear in depend2.am. Note that the slowdown incurred here -## affects only configure: in makefiles, %FASTDEP% shortcuts this. - for arg - do - case $arg in - -c) set fnord "$@" -MT "$object" -MD -MP -MF "$tmpdepfile" "$arg" ;; - *) set fnord "$@" "$arg" ;; - esac - shift # fnord - shift # $arg - done - "$@" - stat=$? - if test $stat -ne 0; then - rm -f "$tmpdepfile" - exit $stat - fi - mv "$tmpdepfile" "$depfile" - ;; - -gcc) -## Note that this doesn't just cater to obsosete pre-3.x GCC compilers. -## but also to in-use compilers like IMB xlc/xlC and the HP C compiler. -## (see the conditional assignment to $gccflag above). -## There are various ways to get dependency output from gcc. Here's -## why we pick this rather obscure method: -## - Don't want to use -MD because we'd like the dependencies to end -## up in a subdir. Having to rename by hand is ugly. -## (We might end up doing this anyway to support other compilers.) -## - The DEPENDENCIES_OUTPUT environment variable makes gcc act like -## -MM, not -M (despite what the docs say). Also, it might not be -## supported by the other compilers which use the 'gcc' depmode. -## - Using -M directly means running the compiler twice (even worse -## than renaming). - if test -z "$gccflag"; then - gccflag=-MD, - fi - "$@" -Wp,"$gccflag$tmpdepfile" - stat=$? - if test $stat -ne 0; then - rm -f "$tmpdepfile" - exit $stat - fi - rm -f "$depfile" - echo "$object : \\" > "$depfile" - # The second -e expression handles DOS-style file names with drive - # letters. - sed -e 's/^[^:]*: / /' \ - -e 's/^['$alpha']:\/[^:]*: / /' < "$tmpdepfile" >> "$depfile" -## This next piece of magic avoids the "deleted header file" problem. -## The problem is that when a header file which appears in a .P file -## is deleted, the dependency causes make to die (because there is -## typically no way to rebuild the header). We avoid this by adding -## dummy dependencies for each header file. Too bad gcc doesn't do -## this for us directly. -## Some versions of gcc put a space before the ':'. On the theory -## that the space means something, we add a space to the output as -## well. hp depmode also adds that space, but also prefixes the VPATH -## to the object. Take care to not repeat it in the output. -## Some versions of the HPUX 10.20 sed can't process this invocation -## correctly. Breaking it into two sed invocations is a workaround. - tr ' ' "$nl" < "$tmpdepfile" \ - | sed -e 's/^\\$//' -e '/^$/d' -e "s|.*$object$||" -e '/:$/d' \ - | sed -e 's/$/ :/' >> "$depfile" - rm -f "$tmpdepfile" - ;; - -hp) - # This case exists only to let depend.m4 do its work. It works by - # looking at the text of this script. This case will never be run, - # since it is checked for above. - exit 1 - ;; - -sgi) - if test "$libtool" = yes; then - "$@" "-Wp,-MDupdate,$tmpdepfile" - else - "$@" -MDupdate "$tmpdepfile" - fi - stat=$? - if test $stat -ne 0; then - rm -f "$tmpdepfile" - exit $stat - fi - rm -f "$depfile" - - if test -f "$tmpdepfile"; then # yes, the sourcefile depend on other files - echo "$object : \\" > "$depfile" - # Clip off the initial element (the dependent). Don't try to be - # clever and replace this with sed code, as IRIX sed won't handle - # lines with more than a fixed number of characters (4096 in - # IRIX 6.2 sed, 8192 in IRIX 6.5). We also remove comment lines; - # the IRIX cc adds comments like '#:fec' to the end of the - # dependency line. - tr ' ' "$nl" < "$tmpdepfile" \ - | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' \ - | tr "$nl" ' ' >> "$depfile" - echo >> "$depfile" - # The second pass generates a dummy entry for each header file. - tr ' ' "$nl" < "$tmpdepfile" \ - | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' -e 's/$/:/' \ - >> "$depfile" - else - make_dummy_depfile - fi - rm -f "$tmpdepfile" - ;; - -xlc) - # This case exists only to let depend.m4 do its work. It works by - # looking at the text of this script. This case will never be run, - # since it is checked for above. - exit 1 - ;; - -aix) - # The C for AIX Compiler uses -M and outputs the dependencies - # in a .u file. In older versions, this file always lives in the - # current directory. Also, the AIX compiler puts '$object:' at the - # start of each line; $object doesn't have directory information. - # Version 6 uses the directory in both cases. - set_dir_from "$object" - set_base_from "$object" - if test "$libtool" = yes; then - tmpdepfile1=$dir$base.u - tmpdepfile2=$base.u - tmpdepfile3=$dir.libs/$base.u - "$@" -Wc,-M - else - tmpdepfile1=$dir$base.u - tmpdepfile2=$dir$base.u - tmpdepfile3=$dir$base.u - "$@" -M - fi - stat=$? - if test $stat -ne 0; then - rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" - exit $stat - fi - - for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" - do - test -f "$tmpdepfile" && break - done - aix_post_process_depfile - ;; - -tcc) - # tcc (Tiny C Compiler) understand '-MD -MF file' since version 0.9.26 - # FIXME: That version still under development at the moment of writing. - # Make that this statement remains true also for stable, released - # versions. - # It will wrap lines (doesn't matter whether long or short) with a - # trailing '\', as in: - # - # foo.o : \ - # foo.c \ - # foo.h \ - # - # It will put a trailing '\' even on the last line, and will use leading - # spaces rather than leading tabs (at least since its commit 0394caf7 - # "Emit spaces for -MD"). - "$@" -MD -MF "$tmpdepfile" - stat=$? - if test $stat -ne 0; then - rm -f "$tmpdepfile" - exit $stat - fi - rm -f "$depfile" - # Each non-empty line is of the form 'foo.o : \' or ' dep.h \'. - # We have to change lines of the first kind to '$object: \'. - sed -e "s|.*:|$object :|" < "$tmpdepfile" > "$depfile" - # And for each line of the second kind, we have to emit a 'dep.h:' - # dummy dependency, to avoid the deleted-header problem. - sed -n -e 's|^ *\(.*\) *\\$|\1:|p' < "$tmpdepfile" >> "$depfile" - rm -f "$tmpdepfile" - ;; - -## The order of this option in the case statement is important, since the -## shell code in configure will try each of these formats in the order -## listed in this file. A plain '-MD' option would be understood by many -## compilers, so we must ensure this comes after the gcc and icc options. -pgcc) - # Portland's C compiler understands '-MD'. - # Will always output deps to 'file.d' where file is the root name of the - # source file under compilation, even if file resides in a subdirectory. - # The object file name does not affect the name of the '.d' file. - # pgcc 10.2 will output - # foo.o: sub/foo.c sub/foo.h - # and will wrap long lines using '\' : - # foo.o: sub/foo.c ... \ - # sub/foo.h ... \ - # ... - set_dir_from "$object" - # Use the source, not the object, to determine the base name, since - # that's sadly what pgcc will do too. - set_base_from "$source" - tmpdepfile=$base.d - - # For projects that build the same source file twice into different object - # files, the pgcc approach of using the *source* file root name can cause - # problems in parallel builds. Use a locking strategy to avoid stomping on - # the same $tmpdepfile. - lockdir=$base.d-lock - trap " - echo '$0: caught signal, cleaning up...' >&2 - rmdir '$lockdir' - exit 1 - " 1 2 13 15 - numtries=100 - i=$numtries - while test $i -gt 0; do - # mkdir is a portable test-and-set. - if mkdir "$lockdir" 2>/dev/null; then - # This process acquired the lock. - "$@" -MD - stat=$? - # Release the lock. - rmdir "$lockdir" - break - else - # If the lock is being held by a different process, wait - # until the winning process is done or we timeout. - while test -d "$lockdir" && test $i -gt 0; do - sleep 1 - i=`expr $i - 1` - done - fi - i=`expr $i - 1` - done - trap - 1 2 13 15 - if test $i -le 0; then - echo "$0: failed to acquire lock after $numtries attempts" >&2 - echo "$0: check lockdir '$lockdir'" >&2 - exit 1 - fi - - if test $stat -ne 0; then - rm -f "$tmpdepfile" - exit $stat - fi - rm -f "$depfile" - # Each line is of the form `foo.o: dependent.h', - # or `foo.o: dep1.h dep2.h \', or ` dep3.h dep4.h \'. - # Do two passes, one to just change these to - # `$object: dependent.h' and one to simply `dependent.h:'. - sed "s,^[^:]*:,$object :," < "$tmpdepfile" > "$depfile" - # Some versions of the HPUX 10.20 sed can't process this invocation - # correctly. Breaking it into two sed invocations is a workaround. - sed 's,^[^:]*: \(.*\)$,\1,;s/^\\$//;/^$/d;/:$/d' < "$tmpdepfile" \ - | sed -e 's/$/ :/' >> "$depfile" - rm -f "$tmpdepfile" - ;; - -hp2) - # The "hp" stanza above does not work with aCC (C++) and HP's ia64 - # compilers, which have integrated preprocessors. The correct option - # to use with these is +Maked; it writes dependencies to a file named - # 'foo.d', which lands next to the object file, wherever that - # happens to be. - # Much of this is similar to the tru64 case; see comments there. - set_dir_from "$object" - set_base_from "$object" - if test "$libtool" = yes; then - tmpdepfile1=$dir$base.d - tmpdepfile2=$dir.libs/$base.d - "$@" -Wc,+Maked - else - tmpdepfile1=$dir$base.d - tmpdepfile2=$dir$base.d - "$@" +Maked - fi - stat=$? - if test $stat -ne 0; then - rm -f "$tmpdepfile1" "$tmpdepfile2" - exit $stat - fi - - for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" - do - test -f "$tmpdepfile" && break - done - if test -f "$tmpdepfile"; then - sed -e "s,^.*\.[$lower]*:,$object:," "$tmpdepfile" > "$depfile" - # Add 'dependent.h:' lines. - sed -ne '2,${ - s/^ *// - s/ \\*$// - s/$/:/ - p - }' "$tmpdepfile" >> "$depfile" - else - make_dummy_depfile - fi - rm -f "$tmpdepfile" "$tmpdepfile2" - ;; - -tru64) - # The Tru64 compiler uses -MD to generate dependencies as a side - # effect. 'cc -MD -o foo.o ...' puts the dependencies into 'foo.o.d'. - # At least on Alpha/Redhat 6.1, Compaq CCC V6.2-504 seems to put - # dependencies in 'foo.d' instead, so we check for that too. - # Subdirectories are respected. - set_dir_from "$object" - set_base_from "$object" - - if test "$libtool" = yes; then - # Libtool generates 2 separate objects for the 2 libraries. These - # two compilations output dependencies in $dir.libs/$base.o.d and - # in $dir$base.o.d. We have to check for both files, because - # one of the two compilations can be disabled. We should prefer - # $dir$base.o.d over $dir.libs/$base.o.d because the latter is - # automatically cleaned when .libs/ is deleted, while ignoring - # the former would cause a distcleancheck panic. - tmpdepfile1=$dir$base.o.d # libtool 1.5 - tmpdepfile2=$dir.libs/$base.o.d # Likewise. - tmpdepfile3=$dir.libs/$base.d # Compaq CCC V6.2-504 - "$@" -Wc,-MD - else - tmpdepfile1=$dir$base.d - tmpdepfile2=$dir$base.d - tmpdepfile3=$dir$base.d - "$@" -MD - fi - - stat=$? - if test $stat -ne 0; then - rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" - exit $stat - fi - - for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" - do - test -f "$tmpdepfile" && break - done - # Same post-processing that is required for AIX mode. - aix_post_process_depfile - ;; - -msvc7) - if test "$libtool" = yes; then - showIncludes=-Wc,-showIncludes - else - showIncludes=-showIncludes - fi - "$@" $showIncludes > "$tmpdepfile" - stat=$? - grep -v '^Note: including file: ' "$tmpdepfile" - if test $stat -ne 0; then - rm -f "$tmpdepfile" - exit $stat - fi - rm -f "$depfile" - echo "$object : \\" > "$depfile" - # The first sed program below extracts the file names and escapes - # backslashes for cygpath. The second sed program outputs the file - # name when reading, but also accumulates all include files in the - # hold buffer in order to output them again at the end. This only - # works with sed implementations that can handle large buffers. - sed < "$tmpdepfile" -n ' -/^Note: including file: *\(.*\)/ { - s//\1/ - s/\\/\\\\/g - p -}' | $cygpath_u | sort -u | sed -n ' -s/ /\\ /g -s/\(.*\)/'"$tab"'\1 \\/p -s/.\(.*\) \\/\1:/ -H -$ { - s/.*/'"$tab"'/ - G - p -}' >> "$depfile" - echo >> "$depfile" # make sure the fragment doesn't end with a backslash - rm -f "$tmpdepfile" - ;; - -msvc7msys) - # This case exists only to let depend.m4 do its work. It works by - # looking at the text of this script. This case will never be run, - # since it is checked for above. - exit 1 - ;; - -#nosideeffect) - # This comment above is used by automake to tell side-effect - # dependency tracking mechanisms from slower ones. - -dashmstdout) - # Important note: in order to support this mode, a compiler *must* - # always write the preprocessed file to stdout, regardless of -o. - "$@" || exit $? - - # Remove the call to Libtool. - if test "$libtool" = yes; then - while test "X$1" != 'X--mode=compile'; do - shift - done - shift - fi - - # Remove '-o $object'. - IFS=" " - for arg - do - case $arg in - -o) - shift - ;; - $object) - shift - ;; - *) - set fnord "$@" "$arg" - shift # fnord - shift # $arg - ;; - esac - done - - test -z "$dashmflag" && dashmflag=-M - # Require at least two characters before searching for ':' - # in the target name. This is to cope with DOS-style filenames: - # a dependency such as 'c:/foo/bar' could be seen as target 'c' otherwise. - "$@" $dashmflag | - sed "s|^[$tab ]*[^:$tab ][^:][^:]*:[$tab ]*|$object: |" > "$tmpdepfile" - rm -f "$depfile" - cat < "$tmpdepfile" > "$depfile" - # Some versions of the HPUX 10.20 sed can't process this sed invocation - # correctly. Breaking it into two sed invocations is a workaround. - tr ' ' "$nl" < "$tmpdepfile" \ - | sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' \ - | sed -e 's/$/ :/' >> "$depfile" - rm -f "$tmpdepfile" - ;; - -dashXmstdout) - # This case only exists to satisfy depend.m4. It is never actually - # run, as this mode is specially recognized in the preamble. - exit 1 - ;; - -makedepend) - "$@" || exit $? - # Remove any Libtool call - if test "$libtool" = yes; then - while test "X$1" != 'X--mode=compile'; do - shift - done - shift - fi - # X makedepend - shift - cleared=no eat=no - for arg - do - case $cleared in - no) - set ""; shift - cleared=yes ;; - esac - if test $eat = yes; then - eat=no - continue - fi - case "$arg" in - -D*|-I*) - set fnord "$@" "$arg"; shift ;; - # Strip any option that makedepend may not understand. Remove - # the object too, otherwise makedepend will parse it as a source file. - -arch) - eat=yes ;; - -*|$object) - ;; - *) - set fnord "$@" "$arg"; shift ;; - esac - done - obj_suffix=`echo "$object" | sed 's/^.*\././'` - touch "$tmpdepfile" - ${MAKEDEPEND-makedepend} -o"$obj_suffix" -f"$tmpdepfile" "$@" - rm -f "$depfile" - # makedepend may prepend the VPATH from the source file name to the object. - # No need to regex-escape $object, excess matching of '.' is harmless. - sed "s|^.*\($object *:\)|\1|" "$tmpdepfile" > "$depfile" - # Some versions of the HPUX 10.20 sed can't process the last invocation - # correctly. Breaking it into two sed invocations is a workaround. - sed '1,2d' "$tmpdepfile" \ - | tr ' ' "$nl" \ - | sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' \ - | sed -e 's/$/ :/' >> "$depfile" - rm -f "$tmpdepfile" "$tmpdepfile".bak - ;; - -cpp) - # Important note: in order to support this mode, a compiler *must* - # always write the preprocessed file to stdout. - "$@" || exit $? - - # Remove the call to Libtool. - if test "$libtool" = yes; then - while test "X$1" != 'X--mode=compile'; do - shift - done - shift - fi - - # Remove '-o $object'. - IFS=" " - for arg - do - case $arg in - -o) - shift - ;; - $object) - shift - ;; - *) - set fnord "$@" "$arg" - shift # fnord - shift # $arg - ;; - esac - done - - "$@" -E \ - | sed -n -e '/^# [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \ - -e '/^#line [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \ - | sed '$ s: \\$::' > "$tmpdepfile" - rm -f "$depfile" - echo "$object : \\" > "$depfile" - cat < "$tmpdepfile" >> "$depfile" - sed < "$tmpdepfile" '/^$/d;s/^ //;s/ \\$//;s/$/ :/' >> "$depfile" - rm -f "$tmpdepfile" - ;; - -msvisualcpp) - # Important note: in order to support this mode, a compiler *must* - # always write the preprocessed file to stdout. - "$@" || exit $? - - # Remove the call to Libtool. - if test "$libtool" = yes; then - while test "X$1" != 'X--mode=compile'; do - shift - done - shift - fi - - IFS=" " - for arg - do - case "$arg" in - -o) - shift - ;; - $object) - shift - ;; - "-Gm"|"/Gm"|"-Gi"|"/Gi"|"-ZI"|"/ZI") - set fnord "$@" - shift - shift - ;; - *) - set fnord "$@" "$arg" - shift - shift - ;; - esac - done - "$@" -E 2>/dev/null | - sed -n '/^#line [0-9][0-9]* "\([^"]*\)"/ s::\1:p' | $cygpath_u | sort -u > "$tmpdepfile" - rm -f "$depfile" - echo "$object : \\" > "$depfile" - sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s::'"$tab"'\1 \\:p' >> "$depfile" - echo "$tab" >> "$depfile" - sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s::\1\::p' >> "$depfile" - rm -f "$tmpdepfile" - ;; - -msvcmsys) - # This case exists only to let depend.m4 do its work. It works by - # looking at the text of this script. This case will never be run, - # since it is checked for above. - exit 1 - ;; - -none) - exec "$@" - ;; - -*) - echo "Unknown depmode $depmode" 1>&2 - exit 1 - ;; -esac - -exit 0 - -# Local Variables: -# mode: shell-script -# sh-indentation: 2 -# eval: (add-hook 'write-file-hooks 'time-stamp) -# time-stamp-start: "scriptversion=" -# time-stamp-format: "%:y-%02m-%02d.%02H" -# time-stamp-time-zone: "UTC" -# time-stamp-end: "; # UTC" -# End: diff --git a/m4/ltoptions.m4 b/m4/ltoptions.m4 deleted file mode 100644 index 5d9acd8..0000000 --- a/m4/ltoptions.m4 +++ /dev/null @@ -1,384 +0,0 @@ -# Helper functions for option handling. -*- Autoconf -*- -# -# Copyright (C) 2004, 2005, 2007, 2008, 2009 Free Software Foundation, -# Inc. -# Written by Gary V. Vaughan, 2004 -# -# This file is free software; the Free Software Foundation gives -# unlimited permission to copy and/or distribute it, with or without -# modifications, as long as this notice is preserved. - -# serial 7 ltoptions.m4 - -# This is to help aclocal find these macros, as it can't see m4_define. -AC_DEFUN([LTOPTIONS_VERSION], [m4_if([1])]) - - -# _LT_MANGLE_OPTION(MACRO-NAME, OPTION-NAME) -# ------------------------------------------ -m4_define([_LT_MANGLE_OPTION], -[[_LT_OPTION_]m4_bpatsubst($1__$2, [[^a-zA-Z0-9_]], [_])]) - - -# _LT_SET_OPTION(MACRO-NAME, OPTION-NAME) -# --------------------------------------- -# Set option OPTION-NAME for macro MACRO-NAME, and if there is a -# matching handler defined, dispatch to it. Other OPTION-NAMEs are -# saved as a flag. -m4_define([_LT_SET_OPTION], -[m4_define(_LT_MANGLE_OPTION([$1], [$2]))dnl -m4_ifdef(_LT_MANGLE_DEFUN([$1], [$2]), - _LT_MANGLE_DEFUN([$1], [$2]), - [m4_warning([Unknown $1 option `$2'])])[]dnl -]) - - -# _LT_IF_OPTION(MACRO-NAME, OPTION-NAME, IF-SET, [IF-NOT-SET]) -# ------------------------------------------------------------ -# Execute IF-SET if OPTION is set, IF-NOT-SET otherwise. -m4_define([_LT_IF_OPTION], -[m4_ifdef(_LT_MANGLE_OPTION([$1], [$2]), [$3], [$4])]) - - -# _LT_UNLESS_OPTIONS(MACRO-NAME, OPTION-LIST, IF-NOT-SET) -# ------------------------------------------------------- -# Execute IF-NOT-SET unless all options in OPTION-LIST for MACRO-NAME -# are set. -m4_define([_LT_UNLESS_OPTIONS], -[m4_foreach([_LT_Option], m4_split(m4_normalize([$2])), - [m4_ifdef(_LT_MANGLE_OPTION([$1], _LT_Option), - [m4_define([$0_found])])])[]dnl -m4_ifdef([$0_found], [m4_undefine([$0_found])], [$3 -])[]dnl -]) - - -# _LT_SET_OPTIONS(MACRO-NAME, OPTION-LIST) -# ---------------------------------------- -# OPTION-LIST is a space-separated list of Libtool options associated -# with MACRO-NAME. If any OPTION has a matching handler declared with -# LT_OPTION_DEFINE, dispatch to that macro; otherwise complain about -# the unknown option and exit. -m4_defun([_LT_SET_OPTIONS], -[# Set options -m4_foreach([_LT_Option], m4_split(m4_normalize([$2])), - [_LT_SET_OPTION([$1], _LT_Option)]) - -m4_if([$1],[LT_INIT],[ - dnl - dnl Simply set some default values (i.e off) if boolean options were not - dnl specified: - _LT_UNLESS_OPTIONS([LT_INIT], [dlopen], [enable_dlopen=no - ]) - _LT_UNLESS_OPTIONS([LT_INIT], [win32-dll], [enable_win32_dll=no - ]) - dnl - dnl If no reference was made to various pairs of opposing options, then - dnl we run the default mode handler for the pair. For example, if neither - dnl `shared' nor `disable-shared' was passed, we enable building of shared - dnl archives by default: - _LT_UNLESS_OPTIONS([LT_INIT], [shared disable-shared], [_LT_ENABLE_SHARED]) - _LT_UNLESS_OPTIONS([LT_INIT], [static disable-static], [_LT_ENABLE_STATIC]) - _LT_UNLESS_OPTIONS([LT_INIT], [pic-only no-pic], [_LT_WITH_PIC]) - _LT_UNLESS_OPTIONS([LT_INIT], [fast-install disable-fast-install], - [_LT_ENABLE_FAST_INSTALL]) - ]) -])# _LT_SET_OPTIONS - - -## --------------------------------- ## -## Macros to handle LT_INIT options. ## -## --------------------------------- ## - -# _LT_MANGLE_DEFUN(MACRO-NAME, OPTION-NAME) -# ----------------------------------------- -m4_define([_LT_MANGLE_DEFUN], -[[_LT_OPTION_DEFUN_]m4_bpatsubst(m4_toupper([$1__$2]), [[^A-Z0-9_]], [_])]) - - -# LT_OPTION_DEFINE(MACRO-NAME, OPTION-NAME, CODE) -# ----------------------------------------------- -m4_define([LT_OPTION_DEFINE], -[m4_define(_LT_MANGLE_DEFUN([$1], [$2]), [$3])[]dnl -])# LT_OPTION_DEFINE - - -# dlopen -# ------ -LT_OPTION_DEFINE([LT_INIT], [dlopen], [enable_dlopen=yes -]) - -AU_DEFUN([AC_LIBTOOL_DLOPEN], -[_LT_SET_OPTION([LT_INIT], [dlopen]) -AC_DIAGNOSE([obsolete], -[$0: Remove this warning and the call to _LT_SET_OPTION when you -put the `dlopen' option into LT_INIT's first parameter.]) -]) - -dnl aclocal-1.4 backwards compatibility: -dnl AC_DEFUN([AC_LIBTOOL_DLOPEN], []) - - -# win32-dll -# --------- -# Declare package support for building win32 dll's. -LT_OPTION_DEFINE([LT_INIT], [win32-dll], -[enable_win32_dll=yes - -case $host in -*-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-cegcc*) - AC_CHECK_TOOL(AS, as, false) - AC_CHECK_TOOL(DLLTOOL, dlltool, false) - AC_CHECK_TOOL(OBJDUMP, objdump, false) - ;; -esac - -test -z "$AS" && AS=as -_LT_DECL([], [AS], [1], [Assembler program])dnl - -test -z "$DLLTOOL" && DLLTOOL=dlltool -_LT_DECL([], [DLLTOOL], [1], [DLL creation program])dnl - -test -z "$OBJDUMP" && OBJDUMP=objdump -_LT_DECL([], [OBJDUMP], [1], [Object dumper program])dnl -])# win32-dll - -AU_DEFUN([AC_LIBTOOL_WIN32_DLL], -[AC_REQUIRE([AC_CANONICAL_HOST])dnl -_LT_SET_OPTION([LT_INIT], [win32-dll]) -AC_DIAGNOSE([obsolete], -[$0: Remove this warning and the call to _LT_SET_OPTION when you -put the `win32-dll' option into LT_INIT's first parameter.]) -]) - -dnl aclocal-1.4 backwards compatibility: -dnl AC_DEFUN([AC_LIBTOOL_WIN32_DLL], []) - - -# _LT_ENABLE_SHARED([DEFAULT]) -# ---------------------------- -# implement the --enable-shared flag, and supports the `shared' and -# `disable-shared' LT_INIT options. -# DEFAULT is either `yes' or `no'. If omitted, it defaults to `yes'. -m4_define([_LT_ENABLE_SHARED], -[m4_define([_LT_ENABLE_SHARED_DEFAULT], [m4_if($1, no, no, yes)])dnl -AC_ARG_ENABLE([shared], - [AS_HELP_STRING([--enable-shared@<:@=PKGS@:>@], - [build shared libraries @<:@default=]_LT_ENABLE_SHARED_DEFAULT[@:>@])], - [p=${PACKAGE-default} - case $enableval in - yes) enable_shared=yes ;; - no) enable_shared=no ;; - *) - enable_shared=no - # Look at the argument we got. We use all the common list separators. - lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR," - for pkg in $enableval; do - IFS="$lt_save_ifs" - if test "X$pkg" = "X$p"; then - enable_shared=yes - fi - done - IFS="$lt_save_ifs" - ;; - esac], - [enable_shared=]_LT_ENABLE_SHARED_DEFAULT) - - _LT_DECL([build_libtool_libs], [enable_shared], [0], - [Whether or not to build shared libraries]) -])# _LT_ENABLE_SHARED - -LT_OPTION_DEFINE([LT_INIT], [shared], [_LT_ENABLE_SHARED([yes])]) -LT_OPTION_DEFINE([LT_INIT], [disable-shared], [_LT_ENABLE_SHARED([no])]) - -# Old names: -AC_DEFUN([AC_ENABLE_SHARED], -[_LT_SET_OPTION([LT_INIT], m4_if([$1], [no], [disable-])[shared]) -]) - -AC_DEFUN([AC_DISABLE_SHARED], -[_LT_SET_OPTION([LT_INIT], [disable-shared]) -]) - -AU_DEFUN([AM_ENABLE_SHARED], [AC_ENABLE_SHARED($@)]) -AU_DEFUN([AM_DISABLE_SHARED], [AC_DISABLE_SHARED($@)]) - -dnl aclocal-1.4 backwards compatibility: -dnl AC_DEFUN([AM_ENABLE_SHARED], []) -dnl AC_DEFUN([AM_DISABLE_SHARED], []) - - - -# _LT_ENABLE_STATIC([DEFAULT]) -# ---------------------------- -# implement the --enable-static flag, and support the `static' and -# `disable-static' LT_INIT options. -# DEFAULT is either `yes' or `no'. If omitted, it defaults to `yes'. -m4_define([_LT_ENABLE_STATIC], -[m4_define([_LT_ENABLE_STATIC_DEFAULT], [m4_if($1, no, no, yes)])dnl -AC_ARG_ENABLE([static], - [AS_HELP_STRING([--enable-static@<:@=PKGS@:>@], - [build static libraries @<:@default=]_LT_ENABLE_STATIC_DEFAULT[@:>@])], - [p=${PACKAGE-default} - case $enableval in - yes) enable_static=yes ;; - no) enable_static=no ;; - *) - enable_static=no - # Look at the argument we got. We use all the common list separators. - lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR," - for pkg in $enableval; do - IFS="$lt_save_ifs" - if test "X$pkg" = "X$p"; then - enable_static=yes - fi - done - IFS="$lt_save_ifs" - ;; - esac], - [enable_static=]_LT_ENABLE_STATIC_DEFAULT) - - _LT_DECL([build_old_libs], [enable_static], [0], - [Whether or not to build static libraries]) -])# _LT_ENABLE_STATIC - -LT_OPTION_DEFINE([LT_INIT], [static], [_LT_ENABLE_STATIC([yes])]) -LT_OPTION_DEFINE([LT_INIT], [disable-static], [_LT_ENABLE_STATIC([no])]) - -# Old names: -AC_DEFUN([AC_ENABLE_STATIC], -[_LT_SET_OPTION([LT_INIT], m4_if([$1], [no], [disable-])[static]) -]) - -AC_DEFUN([AC_DISABLE_STATIC], -[_LT_SET_OPTION([LT_INIT], [disable-static]) -]) - -AU_DEFUN([AM_ENABLE_STATIC], [AC_ENABLE_STATIC($@)]) -AU_DEFUN([AM_DISABLE_STATIC], [AC_DISABLE_STATIC($@)]) - -dnl aclocal-1.4 backwards compatibility: -dnl AC_DEFUN([AM_ENABLE_STATIC], []) -dnl AC_DEFUN([AM_DISABLE_STATIC], []) - - - -# _LT_ENABLE_FAST_INSTALL([DEFAULT]) -# ---------------------------------- -# implement the --enable-fast-install flag, and support the `fast-install' -# and `disable-fast-install' LT_INIT options. -# DEFAULT is either `yes' or `no'. If omitted, it defaults to `yes'. -m4_define([_LT_ENABLE_FAST_INSTALL], -[m4_define([_LT_ENABLE_FAST_INSTALL_DEFAULT], [m4_if($1, no, no, yes)])dnl -AC_ARG_ENABLE([fast-install], - [AS_HELP_STRING([--enable-fast-install@<:@=PKGS@:>@], - [optimize for fast installation @<:@default=]_LT_ENABLE_FAST_INSTALL_DEFAULT[@:>@])], - [p=${PACKAGE-default} - case $enableval in - yes) enable_fast_install=yes ;; - no) enable_fast_install=no ;; - *) - enable_fast_install=no - # Look at the argument we got. We use all the common list separators. - lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR," - for pkg in $enableval; do - IFS="$lt_save_ifs" - if test "X$pkg" = "X$p"; then - enable_fast_install=yes - fi - done - IFS="$lt_save_ifs" - ;; - esac], - [enable_fast_install=]_LT_ENABLE_FAST_INSTALL_DEFAULT) - -_LT_DECL([fast_install], [enable_fast_install], [0], - [Whether or not to optimize for fast installation])dnl -])# _LT_ENABLE_FAST_INSTALL - -LT_OPTION_DEFINE([LT_INIT], [fast-install], [_LT_ENABLE_FAST_INSTALL([yes])]) -LT_OPTION_DEFINE([LT_INIT], [disable-fast-install], [_LT_ENABLE_FAST_INSTALL([no])]) - -# Old names: -AU_DEFUN([AC_ENABLE_FAST_INSTALL], -[_LT_SET_OPTION([LT_INIT], m4_if([$1], [no], [disable-])[fast-install]) -AC_DIAGNOSE([obsolete], -[$0: Remove this warning and the call to _LT_SET_OPTION when you put -the `fast-install' option into LT_INIT's first parameter.]) -]) - -AU_DEFUN([AC_DISABLE_FAST_INSTALL], -[_LT_SET_OPTION([LT_INIT], [disable-fast-install]) -AC_DIAGNOSE([obsolete], -[$0: Remove this warning and the call to _LT_SET_OPTION when you put -the `disable-fast-install' option into LT_INIT's first parameter.]) -]) - -dnl aclocal-1.4 backwards compatibility: -dnl AC_DEFUN([AC_ENABLE_FAST_INSTALL], []) -dnl AC_DEFUN([AM_DISABLE_FAST_INSTALL], []) - - -# _LT_WITH_PIC([MODE]) -# -------------------- -# implement the --with-pic flag, and support the `pic-only' and `no-pic' -# LT_INIT options. -# MODE is either `yes' or `no'. If omitted, it defaults to `both'. -m4_define([_LT_WITH_PIC], -[AC_ARG_WITH([pic], - [AS_HELP_STRING([--with-pic@<:@=PKGS@:>@], - [try to use only PIC/non-PIC objects @<:@default=use both@:>@])], - [lt_p=${PACKAGE-default} - case $withval in - yes|no) pic_mode=$withval ;; - *) - pic_mode=default - # Look at the argument we got. We use all the common list separators. - lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR," - for lt_pkg in $withval; do - IFS="$lt_save_ifs" - if test "X$lt_pkg" = "X$lt_p"; then - pic_mode=yes - fi - done - IFS="$lt_save_ifs" - ;; - esac], - [pic_mode=default]) - -test -z "$pic_mode" && pic_mode=m4_default([$1], [default]) - -_LT_DECL([], [pic_mode], [0], [What type of objects to build])dnl -])# _LT_WITH_PIC - -LT_OPTION_DEFINE([LT_INIT], [pic-only], [_LT_WITH_PIC([yes])]) -LT_OPTION_DEFINE([LT_INIT], [no-pic], [_LT_WITH_PIC([no])]) - -# Old name: -AU_DEFUN([AC_LIBTOOL_PICMODE], -[_LT_SET_OPTION([LT_INIT], [pic-only]) -AC_DIAGNOSE([obsolete], -[$0: Remove this warning and the call to _LT_SET_OPTION when you -put the `pic-only' option into LT_INIT's first parameter.]) -]) - -dnl aclocal-1.4 backwards compatibility: -dnl AC_DEFUN([AC_LIBTOOL_PICMODE], []) - -## ----------------- ## -## LTDL_INIT Options ## -## ----------------- ## - -m4_define([_LTDL_MODE], []) -LT_OPTION_DEFINE([LTDL_INIT], [nonrecursive], - [m4_define([_LTDL_MODE], [nonrecursive])]) -LT_OPTION_DEFINE([LTDL_INIT], [recursive], - [m4_define([_LTDL_MODE], [recursive])]) -LT_OPTION_DEFINE([LTDL_INIT], [subproject], - [m4_define([_LTDL_MODE], [subproject])]) - -m4_define([_LTDL_TYPE], []) -LT_OPTION_DEFINE([LTDL_INIT], [installable], - [m4_define([_LTDL_TYPE], [installable])]) -LT_OPTION_DEFINE([LTDL_INIT], [convenience], - [m4_define([_LTDL_TYPE], [convenience])]) diff --git a/m4/ltsugar.m4 b/m4/ltsugar.m4 deleted file mode 100644 index 9000a05..0000000 --- a/m4/ltsugar.m4 +++ /dev/null @@ -1,123 +0,0 @@ -# ltsugar.m4 -- libtool m4 base layer. -*-Autoconf-*- -# -# Copyright (C) 2004, 2005, 2007, 2008 Free Software Foundation, Inc. -# Written by Gary V. Vaughan, 2004 -# -# This file is free software; the Free Software Foundation gives -# unlimited permission to copy and/or distribute it, with or without -# modifications, as long as this notice is preserved. - -# serial 6 ltsugar.m4 - -# This is to help aclocal find these macros, as it can't see m4_define. -AC_DEFUN([LTSUGAR_VERSION], [m4_if([0.1])]) - - -# lt_join(SEP, ARG1, [ARG2...]) -# ----------------------------- -# Produce ARG1SEPARG2...SEPARGn, omitting [] arguments and their -# associated separator. -# Needed until we can rely on m4_join from Autoconf 2.62, since all earlier -# versions in m4sugar had bugs. -m4_define([lt_join], -[m4_if([$#], [1], [], - [$#], [2], [[$2]], - [m4_if([$2], [], [], [[$2]_])$0([$1], m4_shift(m4_shift($@)))])]) -m4_define([_lt_join], -[m4_if([$#$2], [2], [], - [m4_if([$2], [], [], [[$1$2]])$0([$1], m4_shift(m4_shift($@)))])]) - - -# lt_car(LIST) -# lt_cdr(LIST) -# ------------ -# Manipulate m4 lists. -# These macros are necessary as long as will still need to support -# Autoconf-2.59 which quotes differently. -m4_define([lt_car], [[$1]]) -m4_define([lt_cdr], -[m4_if([$#], 0, [m4_fatal([$0: cannot be called without arguments])], - [$#], 1, [], - [m4_dquote(m4_shift($@))])]) -m4_define([lt_unquote], $1) - - -# lt_append(MACRO-NAME, STRING, [SEPARATOR]) -# ------------------------------------------ -# Redefine MACRO-NAME to hold its former content plus `SEPARATOR'`STRING'. -# Note that neither SEPARATOR nor STRING are expanded; they are appended -# to MACRO-NAME as is (leaving the expansion for when MACRO-NAME is invoked). -# No SEPARATOR is output if MACRO-NAME was previously undefined (different -# than defined and empty). -# -# This macro is needed until we can rely on Autoconf 2.62, since earlier -# versions of m4sugar mistakenly expanded SEPARATOR but not STRING. -m4_define([lt_append], -[m4_define([$1], - m4_ifdef([$1], [m4_defn([$1])[$3]])[$2])]) - - - -# lt_combine(SEP, PREFIX-LIST, INFIX, SUFFIX1, [SUFFIX2...]) -# ---------------------------------------------------------- -# Produce a SEP delimited list of all paired combinations of elements of -# PREFIX-LIST with SUFFIX1 through SUFFIXn. Each element of the list -# has the form PREFIXmINFIXSUFFIXn. -# Needed until we can rely on m4_combine added in Autoconf 2.62. -m4_define([lt_combine], -[m4_if(m4_eval([$# > 3]), [1], - [m4_pushdef([_Lt_sep], [m4_define([_Lt_sep], m4_defn([lt_car]))])]]dnl -[[m4_foreach([_Lt_prefix], [$2], - [m4_foreach([_Lt_suffix], - ]m4_dquote(m4_dquote(m4_shift(m4_shift(m4_shift($@)))))[, - [_Lt_sep([$1])[]m4_defn([_Lt_prefix])[$3]m4_defn([_Lt_suffix])])])])]) - - -# lt_if_append_uniq(MACRO-NAME, VARNAME, [SEPARATOR], [UNIQ], [NOT-UNIQ]) -# ----------------------------------------------------------------------- -# Iff MACRO-NAME does not yet contain VARNAME, then append it (delimited -# by SEPARATOR if supplied) and expand UNIQ, else NOT-UNIQ. -m4_define([lt_if_append_uniq], -[m4_ifdef([$1], - [m4_if(m4_index([$3]m4_defn([$1])[$3], [$3$2$3]), [-1], - [lt_append([$1], [$2], [$3])$4], - [$5])], - [lt_append([$1], [$2], [$3])$4])]) - - -# lt_dict_add(DICT, KEY, VALUE) -# ----------------------------- -m4_define([lt_dict_add], -[m4_define([$1($2)], [$3])]) - - -# lt_dict_add_subkey(DICT, KEY, SUBKEY, VALUE) -# -------------------------------------------- -m4_define([lt_dict_add_subkey], -[m4_define([$1($2:$3)], [$4])]) - - -# lt_dict_fetch(DICT, KEY, [SUBKEY]) -# ---------------------------------- -m4_define([lt_dict_fetch], -[m4_ifval([$3], - m4_ifdef([$1($2:$3)], [m4_defn([$1($2:$3)])]), - m4_ifdef([$1($2)], [m4_defn([$1($2)])]))]) - - -# lt_if_dict_fetch(DICT, KEY, [SUBKEY], VALUE, IF-TRUE, [IF-FALSE]) -# ----------------------------------------------------------------- -m4_define([lt_if_dict_fetch], -[m4_if(lt_dict_fetch([$1], [$2], [$3]), [$4], - [$5], - [$6])]) - - -# lt_dict_filter(DICT, [SUBKEY], VALUE, [SEPARATOR], KEY, [...]) -# -------------------------------------------------------------- -m4_define([lt_dict_filter], -[m4_if([$5], [], [], - [lt_join(m4_quote(m4_default([$4], [[, ]])), - lt_unquote(m4_split(m4_normalize(m4_foreach(_Lt_key, lt_car([m4_shiftn(4, $@)]), - [lt_if_dict_fetch([$1], _Lt_key, [$2], [$3], [_Lt_key ])])))))])[]dnl -]) diff --git a/m4/lt~obsolete.m4 b/m4/lt~obsolete.m4 deleted file mode 100644 index c573da9..0000000 --- a/m4/lt~obsolete.m4 +++ /dev/null @@ -1,98 +0,0 @@ -# lt~obsolete.m4 -- aclocal satisfying obsolete definitions. -*-Autoconf-*- -# -# Copyright (C) 2004, 2005, 2007, 2009 Free Software Foundation, Inc. -# Written by Scott James Remnant, 2004. -# -# This file is free software; the Free Software Foundation gives -# unlimited permission to copy and/or distribute it, with or without -# modifications, as long as this notice is preserved. - -# serial 5 lt~obsolete.m4 - -# These exist entirely to fool aclocal when bootstrapping libtool. -# -# In the past libtool.m4 has provided macros via AC_DEFUN (or AU_DEFUN) -# which have later been changed to m4_define as they aren't part of the -# exported API, or moved to Autoconf or Automake where they belong. -# -# The trouble is, aclocal is a bit thick. It'll see the old AC_DEFUN -# in /usr/share/aclocal/libtool.m4 and remember it, then when it sees us -# using a macro with the same name in our local m4/libtool.m4 it'll -# pull the old libtool.m4 in (it doesn't see our shiny new m4_define -# and doesn't know about Autoconf macros at all.) -# -# So we provide this file, which has a silly filename so it's always -# included after everything else. This provides aclocal with the -# AC_DEFUNs it wants, but when m4 processes it, it doesn't do anything -# because those macros already exist, or will be overwritten later. -# We use AC_DEFUN over AU_DEFUN for compatibility with aclocal-1.6. -# -# Anytime we withdraw an AC_DEFUN or AU_DEFUN, remember to add it here. -# Yes, that means every name once taken will need to remain here until -# we give up compatibility with versions before 1.7, at which point -# we need to keep only those names which we still refer to. - -# This is to help aclocal find these macros, as it can't see m4_define. -AC_DEFUN([LTOBSOLETE_VERSION], [m4_if([1])]) - -m4_ifndef([AC_LIBTOOL_LINKER_OPTION], [AC_DEFUN([AC_LIBTOOL_LINKER_OPTION])]) -m4_ifndef([AC_PROG_EGREP], [AC_DEFUN([AC_PROG_EGREP])]) -m4_ifndef([_LT_AC_PROG_ECHO_BACKSLASH], [AC_DEFUN([_LT_AC_PROG_ECHO_BACKSLASH])]) -m4_ifndef([_LT_AC_SHELL_INIT], [AC_DEFUN([_LT_AC_SHELL_INIT])]) -m4_ifndef([_LT_AC_SYS_LIBPATH_AIX], [AC_DEFUN([_LT_AC_SYS_LIBPATH_AIX])]) -m4_ifndef([_LT_PROG_LTMAIN], [AC_DEFUN([_LT_PROG_LTMAIN])]) -m4_ifndef([_LT_AC_TAGVAR], [AC_DEFUN([_LT_AC_TAGVAR])]) -m4_ifndef([AC_LTDL_ENABLE_INSTALL], [AC_DEFUN([AC_LTDL_ENABLE_INSTALL])]) -m4_ifndef([AC_LTDL_PREOPEN], [AC_DEFUN([AC_LTDL_PREOPEN])]) -m4_ifndef([_LT_AC_SYS_COMPILER], [AC_DEFUN([_LT_AC_SYS_COMPILER])]) -m4_ifndef([_LT_AC_LOCK], [AC_DEFUN([_LT_AC_LOCK])]) -m4_ifndef([AC_LIBTOOL_SYS_OLD_ARCHIVE], [AC_DEFUN([AC_LIBTOOL_SYS_OLD_ARCHIVE])]) -m4_ifndef([_LT_AC_TRY_DLOPEN_SELF], [AC_DEFUN([_LT_AC_TRY_DLOPEN_SELF])]) -m4_ifndef([AC_LIBTOOL_PROG_CC_C_O], [AC_DEFUN([AC_LIBTOOL_PROG_CC_C_O])]) -m4_ifndef([AC_LIBTOOL_SYS_HARD_LINK_LOCKS], [AC_DEFUN([AC_LIBTOOL_SYS_HARD_LINK_LOCKS])]) -m4_ifndef([AC_LIBTOOL_OBJDIR], [AC_DEFUN([AC_LIBTOOL_OBJDIR])]) -m4_ifndef([AC_LTDL_OBJDIR], [AC_DEFUN([AC_LTDL_OBJDIR])]) -m4_ifndef([AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH], [AC_DEFUN([AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH])]) -m4_ifndef([AC_LIBTOOL_SYS_LIB_STRIP], [AC_DEFUN([AC_LIBTOOL_SYS_LIB_STRIP])]) -m4_ifndef([AC_PATH_MAGIC], [AC_DEFUN([AC_PATH_MAGIC])]) -m4_ifndef([AC_PROG_LD_GNU], [AC_DEFUN([AC_PROG_LD_GNU])]) -m4_ifndef([AC_PROG_LD_RELOAD_FLAG], [AC_DEFUN([AC_PROG_LD_RELOAD_FLAG])]) -m4_ifndef([AC_DEPLIBS_CHECK_METHOD], [AC_DEFUN([AC_DEPLIBS_CHECK_METHOD])]) -m4_ifndef([AC_LIBTOOL_PROG_COMPILER_NO_RTTI], [AC_DEFUN([AC_LIBTOOL_PROG_COMPILER_NO_RTTI])]) -m4_ifndef([AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE], [AC_DEFUN([AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE])]) -m4_ifndef([AC_LIBTOOL_PROG_COMPILER_PIC], [AC_DEFUN([AC_LIBTOOL_PROG_COMPILER_PIC])]) -m4_ifndef([AC_LIBTOOL_PROG_LD_SHLIBS], [AC_DEFUN([AC_LIBTOOL_PROG_LD_SHLIBS])]) -m4_ifndef([AC_LIBTOOL_POSTDEP_PREDEP], [AC_DEFUN([AC_LIBTOOL_POSTDEP_PREDEP])]) -m4_ifndef([LT_AC_PROG_EGREP], [AC_DEFUN([LT_AC_PROG_EGREP])]) -m4_ifndef([LT_AC_PROG_SED], [AC_DEFUN([LT_AC_PROG_SED])]) -m4_ifndef([_LT_CC_BASENAME], [AC_DEFUN([_LT_CC_BASENAME])]) -m4_ifndef([_LT_COMPILER_BOILERPLATE], [AC_DEFUN([_LT_COMPILER_BOILERPLATE])]) -m4_ifndef([_LT_LINKER_BOILERPLATE], [AC_DEFUN([_LT_LINKER_BOILERPLATE])]) -m4_ifndef([_AC_PROG_LIBTOOL], [AC_DEFUN([_AC_PROG_LIBTOOL])]) -m4_ifndef([AC_LIBTOOL_SETUP], [AC_DEFUN([AC_LIBTOOL_SETUP])]) -m4_ifndef([_LT_AC_CHECK_DLFCN], [AC_DEFUN([_LT_AC_CHECK_DLFCN])]) -m4_ifndef([AC_LIBTOOL_SYS_DYNAMIC_LINKER], [AC_DEFUN([AC_LIBTOOL_SYS_DYNAMIC_LINKER])]) -m4_ifndef([_LT_AC_TAGCONFIG], [AC_DEFUN([_LT_AC_TAGCONFIG])]) -m4_ifndef([AC_DISABLE_FAST_INSTALL], [AC_DEFUN([AC_DISABLE_FAST_INSTALL])]) -m4_ifndef([_LT_AC_LANG_CXX], [AC_DEFUN([_LT_AC_LANG_CXX])]) -m4_ifndef([_LT_AC_LANG_F77], [AC_DEFUN([_LT_AC_LANG_F77])]) -m4_ifndef([_LT_AC_LANG_GCJ], [AC_DEFUN([_LT_AC_LANG_GCJ])]) -m4_ifndef([AC_LIBTOOL_LANG_C_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_C_CONFIG])]) -m4_ifndef([_LT_AC_LANG_C_CONFIG], [AC_DEFUN([_LT_AC_LANG_C_CONFIG])]) -m4_ifndef([AC_LIBTOOL_LANG_CXX_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_CXX_CONFIG])]) -m4_ifndef([_LT_AC_LANG_CXX_CONFIG], [AC_DEFUN([_LT_AC_LANG_CXX_CONFIG])]) -m4_ifndef([AC_LIBTOOL_LANG_F77_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_F77_CONFIG])]) -m4_ifndef([_LT_AC_LANG_F77_CONFIG], [AC_DEFUN([_LT_AC_LANG_F77_CONFIG])]) -m4_ifndef([AC_LIBTOOL_LANG_GCJ_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_GCJ_CONFIG])]) -m4_ifndef([_LT_AC_LANG_GCJ_CONFIG], [AC_DEFUN([_LT_AC_LANG_GCJ_CONFIG])]) -m4_ifndef([AC_LIBTOOL_LANG_RC_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_RC_CONFIG])]) -m4_ifndef([_LT_AC_LANG_RC_CONFIG], [AC_DEFUN([_LT_AC_LANG_RC_CONFIG])]) -m4_ifndef([AC_LIBTOOL_CONFIG], [AC_DEFUN([AC_LIBTOOL_CONFIG])]) -m4_ifndef([_LT_AC_FILE_LTDLL_C], [AC_DEFUN([_LT_AC_FILE_LTDLL_C])]) -m4_ifndef([_LT_REQUIRED_DARWIN_CHECKS], [AC_DEFUN([_LT_REQUIRED_DARWIN_CHECKS])]) -m4_ifndef([_LT_AC_PROG_CXXCPP], [AC_DEFUN([_LT_AC_PROG_CXXCPP])]) -m4_ifndef([_LT_PREPARE_SED_QUOTE_VARS], [AC_DEFUN([_LT_PREPARE_SED_QUOTE_VARS])]) -m4_ifndef([_LT_PROG_ECHO_BACKSLASH], [AC_DEFUN([_LT_PROG_ECHO_BACKSLASH])]) -m4_ifndef([_LT_PROG_F77], [AC_DEFUN([_LT_PROG_F77])]) -m4_ifndef([_LT_PROG_FC], [AC_DEFUN([_LT_PROG_FC])]) -m4_ifndef([_LT_PROG_CXX], [AC_DEFUN([_LT_PROG_CXX])]) -- cgit v1.2.1 From 87f0d4395dbfe0ae559e964668b71f85819378a0 Mon Sep 17 00:00:00 2001 From: Bassam Tabbara Date: Fri, 2 Sep 2016 17:19:04 -0700 Subject: Add support for printing functions selected in gf_init There is currently no way to figure out which functions were selected during gf_init and as a result of SIMD options. This is not even possible in gdb since most functions are static. This commit adds a new macro SET_FUNCTION that records the name of the function selected during init inside the gf_internal structure. This macro only works when DEBUG_FUNCTIONS is defined during compile. Otherwise the code works exactly as it did before this change. The names of selected functions will be used during testing of SIMD runtime detection. All calls such as: gf->multiply.w32 = gf_w16_shift_multiply; need to be replaced with the following: SET_FUNCTION(gf,multiply,w32,gf_w16_shift_multiply) Also added a new flag to tools/gf_methods that will print the names of functions selected during gf_init. --- include/gf_int.h | 16 ++++++ src/gf_w128.c | 76 ++++++++++++++-------------- src/gf_w16.c | 122 ++++++++++++++++++++++---------------------- src/gf_w32.c | 134 ++++++++++++++++++++++++------------------------- src/gf_w4.c | 94 +++++++++++++++++----------------- src/gf_w64.c | 112 ++++++++++++++++++++--------------------- src/gf_w8.c | 132 ++++++++++++++++++++++++------------------------ src/gf_wgen.c | 64 +++++++++++------------ src/neon/gf_w16_neon.c | 4 +- src/neon/gf_w32_neon.c | 4 +- src/neon/gf_w4_neon.c | 6 +-- src/neon/gf_w64_neon.c | 4 +- src/neon/gf_w8_neon.c | 14 +++--- tools/gf_methods.c | 24 +++++++-- 14 files changed, 420 insertions(+), 386 deletions(-) diff --git a/include/gf_int.h b/include/gf_int.h index 32866f4..0356920 100644 --- a/include/gf_int.h +++ b/include/gf_int.h @@ -30,8 +30,24 @@ typedef struct { int arg2; gf_t *base_gf; void *private; +#ifdef DEBUG_FUNCTIONS + const char *multiply; + const char *divide; + const char *inverse; + const char *multiply_region; + const char *extract_word; +#endif } gf_internal_t; +#ifdef DEBUG_FUNCTIONS +#define SET_FUNCTION(gf,method,size,func) \ + { (gf)->method.size = (func); \ + ((gf_internal_t*)(gf)->scratch)->method = #func; } +#else +#define SET_FUNCTION(gf,method,size,func) \ + (gf)->method.size = (func); +#endif + extern int gf_w4_init (gf_t *gf); extern int gf_w4_scratch_size(int mult_type, int region_type, int divide_type, int arg1, int arg2); diff --git a/src/gf_w128.c b/src/gf_w128.c index b1e3d92..5f650b3 100644 --- a/src/gf_w128.c +++ b/src/gf_w128.c @@ -1405,14 +1405,14 @@ int gf_w128_composite_init(gf_t *gf) gf_internal_t *h = (gf_internal_t *) gf->scratch; if (h->region_type & GF_REGION_ALTMAP) { - gf->multiply_region.w128 = gf_w128_composite_multiply_region_alt; + SET_FUNCTION(gf,multiply_region,w128,gf_w128_composite_multiply_region_alt) } else { - gf->multiply_region.w128 = gf_w128_composite_multiply_region; + SET_FUNCTION(gf,multiply_region,w128,gf_w128_composite_multiply_region) } - gf->multiply.w128 = gf_w128_composite_multiply; - gf->divide.w128 = gf_w128_divide_from_inverse; - gf->inverse.w128 = gf_w128_composite_inverse; + SET_FUNCTION(gf,multiply,w128,gf_w128_composite_multiply) + SET_FUNCTION(gf,divide,w128,gf_w128_divide_from_inverse) + SET_FUNCTION(gf,inverse,w128,gf_w128_composite_inverse) return 1; } @@ -1421,9 +1421,9 @@ static int gf_w128_cfm_init(gf_t *gf) { #if defined(INTEL_SSE4_PCLMUL) - gf->inverse.w128 = gf_w128_euclid; - gf->multiply.w128 = gf_w128_clm_multiply; - gf->multiply_region.w128 = gf_w128_clm_multiply_region_from_single; + SET_FUNCTION(gf,inverse,w128,gf_w128_euclid) + SET_FUNCTION(gf,multiply,w128,gf_w128_clm_multiply) + SET_FUNCTION(gf,multiply_region,w128,gf_w128_clm_multiply_region_from_single) return 1; #endif @@ -1433,9 +1433,9 @@ int gf_w128_cfm_init(gf_t *gf) static int gf_w128_shift_init(gf_t *gf) { - gf->multiply.w128 = gf_w128_shift_multiply; - gf->inverse.w128 = gf_w128_euclid; - gf->multiply_region.w128 = gf_w128_multiply_region_from_single; + SET_FUNCTION(gf,multiply,w128,gf_w128_shift_multiply) + SET_FUNCTION(gf,inverse,w128,gf_w128_euclid) + SET_FUNCTION(gf,multiply_region,w128,gf_w128_multiply_region_from_single) return 1; } @@ -1446,16 +1446,16 @@ int gf_w128_bytwo_init(gf_t *gf) h = (gf_internal_t *) gf->scratch; if (h->mult_type == GF_MULT_BYTWO_p) { - gf->multiply.w128 = gf_w128_bytwo_p_multiply; - /*gf->multiply.w128 = gf_w128_sse_bytwo_p_multiply;*/ + SET_FUNCTION(gf,multiply,w128,gf_w128_bytwo_p_multiply) + /*SET_FUNCTION(gf,multiply,w128,gf_w128_sse_bytwo_p_multiply)*/ /* John: the sse function is slower.*/ } else { - gf->multiply.w128 = gf_w128_bytwo_b_multiply; - /*gf->multiply.w128 = gf_w128_sse_bytwo_b_multiply; + SET_FUNCTION(gf,multiply,w128,gf_w128_bytwo_b_multiply) + /*SET_FUNCTION(gf,multiply,w128,gf_w128_sse_bytwo_b_multiply) Ben: This sse function is also slower. */ } - gf->inverse.w128 = gf_w128_euclid; - gf->multiply_region.w128 = gf_w128_bytwo_b_multiply_region; + SET_FUNCTION(gf,inverse,w128,gf_w128_euclid) + SET_FUNCTION(gf,multiply_region,w128,gf_w128_bytwo_b_multiply_region) return 1; } @@ -1525,20 +1525,20 @@ int gf_w128_split_init(gf_t *gf) h = (gf_internal_t *) gf->scratch; - gf->multiply.w128 = gf_w128_bytwo_p_multiply; + SET_FUNCTION(gf,multiply,w128,gf_w128_bytwo_p_multiply) #if defined(INTEL_SSE4_PCLMUL) if (!(h->region_type & GF_REGION_NOSIMD)){ - gf->multiply.w128 = gf_w128_clm_multiply; + SET_FUNCTION(gf,multiply,w128,gf_w128_clm_multiply) } #endif - gf->inverse.w128 = gf_w128_euclid; + SET_FUNCTION(gf,inverse,w128,gf_w128_euclid) if ((h->arg1 != 4 && h->arg2 != 4) || h->mult_type == GF_MULT_DEFAULT) { sd8 = (struct gf_w128_split_8_128_data *) h->private; sd8->last_value[0] = 0; sd8->last_value[1] = 0; - gf->multiply_region.w128 = gf_w128_split_8_128_multiply_region; + SET_FUNCTION(gf,multiply_region,w128,gf_w128_split_8_128_multiply_region) } else { sd4 = (struct gf_w128_split_4_128_data *) h->private; sd4->last_value[0] = 0; @@ -1547,7 +1547,7 @@ int gf_w128_split_init(gf_t *gf) { #ifdef INTEL_SSE4 if(!(h->region_type & GF_REGION_NOSIMD)) - gf->multiply_region.w128 = gf_w128_split_4_128_sse_altmap_multiply_region; + SET_FUNCTION(gf,multiply_region,w128,gf_w128_split_4_128_sse_altmap_multiply_region) else return 0; #else @@ -1557,11 +1557,11 @@ int gf_w128_split_init(gf_t *gf) else { #ifdef INTEL_SSE4 if(!(h->region_type & GF_REGION_NOSIMD)) - gf->multiply_region.w128 = gf_w128_split_4_128_sse_multiply_region; + SET_FUNCTION(gf,multiply_region,w128,gf_w128_split_4_128_sse_multiply_region) else - gf->multiply_region.w128 = gf_w128_split_4_128_multiply_region; + SET_FUNCTION(gf,multiply_region,w128,gf_w128_split_4_128_multiply_region) #else - gf->multiply_region.w128 = gf_w128_split_4_128_multiply_region; + SET_FUNCTION(gf,multiply_region,w128,gf_w128_split_4_128_multiply_region) #endif } } @@ -1586,9 +1586,9 @@ int gf_w128_group_init(gf_t *gf) gt->m_table[2] = 0; gt->m_table[3] = 0; - gf->multiply.w128 = gf_w128_group_multiply; - gf->inverse.w128 = gf_w128_euclid; - gf->multiply_region.w128 = gf_w128_group_multiply_region; + SET_FUNCTION(gf,multiply,w128,gf_w128_group_multiply) + SET_FUNCTION(gf,inverse,w128,gf_w128_euclid) + SET_FUNCTION(gf,multiply_region,w128,gf_w128_group_multiply_region) gf_w128_group_r_init(gf); @@ -1738,10 +1738,10 @@ int gf_w128_init(gf_t *gf) } } - gf->multiply.w128 = NULL; - gf->divide.w128 = NULL; - gf->inverse.w128 = NULL; - gf->multiply_region.w128 = NULL; + SET_FUNCTION(gf,multiply,w128,NULL) + SET_FUNCTION(gf,divide,w128,NULL) + SET_FUNCTION(gf,inverse,w128,NULL) + SET_FUNCTION(gf,multiply_region,w128,NULL) switch(h->mult_type) { case GF_MULT_BYTWO_p: case GF_MULT_BYTWO_b: if (gf_w128_bytwo_init(gf) == 0) return 0; break; @@ -1757,22 +1757,22 @@ int gf_w128_init(gf_t *gf) /* Ben: Used to be h->region_type == GF_REGION_ALTMAP, but failed since there are multiple flags in h->region_type */ if (h->mult_type == GF_MULT_SPLIT_TABLE && (h->region_type & GF_REGION_ALTMAP)) { - gf->extract_word.w128 = gf_w128_split_extract_word; + SET_FUNCTION(gf,extract_word,w128,gf_w128_split_extract_word) } else if (h->mult_type == GF_MULT_COMPOSITE && h->region_type == GF_REGION_ALTMAP) { - gf->extract_word.w128 = gf_w128_composite_extract_word; + SET_FUNCTION(gf,extract_word,w128,gf_w128_composite_extract_word) } else { - gf->extract_word.w128 = gf_w128_extract_word; + SET_FUNCTION(gf,extract_word,w128,gf_w128_extract_word) } if (h->divide_type == GF_DIVIDE_EUCLID) { - gf->divide.w128 = gf_w128_divide_from_inverse; + SET_FUNCTION(gf,divide,w128,gf_w128_divide_from_inverse) } if (gf->inverse.w128 != NULL && gf->divide.w128 == NULL) { - gf->divide.w128 = gf_w128_divide_from_inverse; + SET_FUNCTION(gf,divide,w128,gf_w128_divide_from_inverse) } if (gf->inverse.w128 == NULL && gf->divide.w128 != NULL) { - gf->inverse.w128 = gf_w128_inverse_from_divide; + SET_FUNCTION(gf,inverse,w128,gf_w128_inverse_from_divide) } return 1; } diff --git a/src/gf_w16.c b/src/gf_w16.c index 4e026b2..a62ea51 100644 --- a/src/gf_w16.c +++ b/src/gf_w16.c @@ -548,7 +548,7 @@ gf_w16_shift_multiply (gf_t *gf, gf_val_32_t a16, gf_val_32_t b16) static int gf_w16_shift_init(gf_t *gf) { - gf->multiply.w32 = gf_w16_shift_multiply; + SET_FUNCTION(gf,multiply,w32,gf_w16_shift_multiply) return 1; } @@ -563,14 +563,14 @@ int gf_w16_cfm_init(gf_t *gf) /*Ben: Determining how many reductions to do */ if ((0xfe00 & h->prim_poly) == 0) { - gf->multiply.w32 = gf_w16_clm_multiply_2; - gf->multiply_region.w32 = gf_w16_clm_multiply_region_from_single_2; + SET_FUNCTION(gf,multiply,w32,gf_w16_clm_multiply_2) + SET_FUNCTION(gf,multiply_region,w32,gf_w16_clm_multiply_region_from_single_2) } else if((0xf000 & h->prim_poly) == 0) { - gf->multiply.w32 = gf_w16_clm_multiply_3; - gf->multiply_region.w32 = gf_w16_clm_multiply_region_from_single_3; + SET_FUNCTION(gf,multiply,w32,gf_w16_clm_multiply_3) + SET_FUNCTION(gf,multiply_region,w32,gf_w16_clm_multiply_region_from_single_3) } else if ((0xe000 & h->prim_poly) == 0) { - gf->multiply.w32 = gf_w16_clm_multiply_4; - gf->multiply_region.w32 = gf_w16_clm_multiply_region_from_single_4; + SET_FUNCTION(gf,multiply,w32,gf_w16_clm_multiply_4) + SET_FUNCTION(gf,multiply_region,w32,gf_w16_clm_multiply_region_from_single_4) } else { return 0; } @@ -705,10 +705,10 @@ int gf_w16_log_init(gf_t *gf) ltd->inv_tbl[i] = ltd->antilog_tbl[GF_MULT_GROUP_SIZE-ltd->log_tbl[i]]; } - gf->inverse.w32 = gf_w16_log_inverse; - gf->divide.w32 = gf_w16_log_divide; - gf->multiply.w32 = gf_w16_log_multiply; - gf->multiply_region.w32 = gf_w16_log_multiply_region; + SET_FUNCTION(gf,inverse,w32,gf_w16_log_inverse) + SET_FUNCTION(gf,divide,w32,gf_w16_log_divide) + SET_FUNCTION(gf,multiply,w32,gf_w16_log_multiply) + SET_FUNCTION(gf,multiply_region,w32,gf_w16_log_multiply_region) return 1; } @@ -1260,8 +1260,8 @@ int gf_w16_split_init(gf_t *gf) } for (i = 0; i < 8; i++) basep = GF_MULTBY_TWO(basep); } - gf->multiply.w32 = gf_w16_split_8_8_multiply; - gf->multiply_region.w32 = gf_w16_split_8_16_lazy_multiply_region; + SET_FUNCTION(gf,multiply,w32,gf_w16_split_8_8_multiply) + SET_FUNCTION(gf,multiply_region,w32,gf_w16_split_8_16_lazy_multiply_region) return 1; } @@ -1274,34 +1274,34 @@ int gf_w16_split_init(gf_t *gf) /* Defaults */ if (issse3) { - gf->multiply_region.w32 = gf_w16_split_4_16_lazy_sse_multiply_region; + SET_FUNCTION(gf,multiply_region,w32,gf_w16_split_4_16_lazy_sse_multiply_region) } else if (isneon) { #ifdef ARM_NEON gf_w16_neon_split_init(gf); #endif } else { - gf->multiply_region.w32 = gf_w16_split_8_16_lazy_multiply_region; + SET_FUNCTION(gf,multiply_region,w32,gf_w16_split_8_16_lazy_multiply_region) } if ((h->arg1 == 8 && h->arg2 == 16) || (h->arg2 == 8 && h->arg1 == 16)) { - gf->multiply_region.w32 = gf_w16_split_8_16_lazy_multiply_region; + SET_FUNCTION(gf,multiply_region,w32,gf_w16_split_8_16_lazy_multiply_region) } else if ((h->arg1 == 4 && h->arg2 == 16) || (h->arg2 == 4 && h->arg1 == 16)) { if (issse3 || isneon) { if(h->region_type & GF_REGION_ALTMAP && h->region_type & GF_REGION_NOSIMD) - gf->multiply_region.w32 = gf_w16_split_4_16_lazy_nosse_altmap_multiply_region; + SET_FUNCTION(gf,multiply_region,w32,gf_w16_split_4_16_lazy_nosse_altmap_multiply_region) else if(h->region_type & GF_REGION_NOSIMD) - gf->multiply_region.w32 = gf_w16_split_4_16_lazy_multiply_region; + SET_FUNCTION(gf,multiply_region,w32,gf_w16_split_4_16_lazy_multiply_region) else if(h->region_type & GF_REGION_ALTMAP && issse3) - gf->multiply_region.w32 = gf_w16_split_4_16_lazy_sse_altmap_multiply_region; + SET_FUNCTION(gf,multiply_region,w32,gf_w16_split_4_16_lazy_sse_altmap_multiply_region) } else { if(h->region_type & GF_REGION_SIMD) return 0; else if(h->region_type & GF_REGION_ALTMAP) - gf->multiply_region.w32 = gf_w16_split_4_16_lazy_nosse_altmap_multiply_region; + SET_FUNCTION(gf,multiply_region,w32,gf_w16_split_4_16_lazy_nosse_altmap_multiply_region) else - gf->multiply_region.w32 = gf_w16_split_4_16_lazy_multiply_region; + SET_FUNCTION(gf,multiply_region,w32,gf_w16_split_4_16_lazy_multiply_region) } } @@ -1313,7 +1313,7 @@ int gf_w16_table_init(gf_t *gf) { gf_w16_log_init(gf); - gf->multiply_region.w32 = gf_w16_table_lazy_multiply_region; + SET_FUNCTION(gf,multiply_region,w32,gf_w16_table_lazy_multiply_region) return 1; } @@ -1844,26 +1844,26 @@ int gf_w16_bytwo_init(gf_t *gf) } if (h->mult_type == GF_MULT_BYTWO_p) { - gf->multiply.w32 = gf_w16_bytwo_p_multiply; + SET_FUNCTION(gf,multiply,w32,gf_w16_bytwo_p_multiply) #ifdef INTEL_SSE2 if (h->region_type & GF_REGION_NOSIMD) - gf->multiply_region.w32 = gf_w16_bytwo_p_nosse_multiply_region; + SET_FUNCTION(gf,multiply_region,w32,gf_w16_bytwo_p_nosse_multiply_region) else - gf->multiply_region.w32 = gf_w16_bytwo_p_sse_multiply_region; + SET_FUNCTION(gf,multiply_region,w32,gf_w16_bytwo_p_sse_multiply_region) #else - gf->multiply_region.w32 = gf_w16_bytwo_p_nosse_multiply_region; + SET_FUNCTION(gf,multiply_region,w32,gf_w16_bytwo_p_nosse_multiply_region) if(h->region_type & GF_REGION_SIMD) return 0; #endif } else { - gf->multiply.w32 = gf_w16_bytwo_b_multiply; + SET_FUNCTION(gf,multiply,w32,gf_w16_bytwo_b_multiply) #ifdef INTEL_SSE2 if (h->region_type & GF_REGION_NOSIMD) - gf->multiply_region.w32 = gf_w16_bytwo_b_nosse_multiply_region; + SET_FUNCTION(gf,multiply_region,w32,gf_w16_bytwo_b_nosse_multiply_region) else - gf->multiply_region.w32 = gf_w16_bytwo_b_sse_multiply_region; + SET_FUNCTION(gf,multiply_region,w32,gf_w16_bytwo_b_sse_multiply_region) #else - gf->multiply_region.w32 = gf_w16_bytwo_b_nosse_multiply_region; + SET_FUNCTION(gf,multiply_region,w32,gf_w16_bytwo_b_nosse_multiply_region) if(h->region_type & GF_REGION_SIMD) return 0; #endif @@ -1904,10 +1904,10 @@ int gf_w16_log_zero_init(gf_t *gf) ltd->inv_tbl[i] = ltd->antilog_tbl[GF_MULT_GROUP_SIZE-ltd->log_tbl[i]]; } - gf->inverse.w32 = gf_w16_log_zero_inverse; - gf->divide.w32 = gf_w16_log_zero_divide; - gf->multiply.w32 = gf_w16_log_zero_multiply; - gf->multiply_region.w32 = gf_w16_log_zero_multiply_region; + SET_FUNCTION(gf,inverse,w32,gf_w16_log_zero_inverse) + SET_FUNCTION(gf,divide,w32,gf_w16_log_zero_divide) + SET_FUNCTION(gf,multiply,w32,gf_w16_log_zero_multiply) + SET_FUNCTION(gf,multiply_region,w32,gf_w16_log_zero_multiply_region) return 1; } @@ -2145,18 +2145,18 @@ int gf_w16_composite_init(gf_t *gf) cd->mult_table = gf_w8_get_mult_table(h->base_gf); if (h->region_type & GF_REGION_ALTMAP) { - gf->multiply_region.w32 = gf_w16_composite_multiply_region_alt; + SET_FUNCTION(gf,multiply_region,w32,gf_w16_composite_multiply_region_alt) } else { - gf->multiply_region.w32 = gf_w16_composite_multiply_region; + SET_FUNCTION(gf,multiply_region,w32,gf_w16_composite_multiply_region) } if (cd->mult_table == NULL) { - gf->multiply.w32 = gf_w16_composite_multiply_recursive; + SET_FUNCTION(gf,multiply,w32,gf_w16_composite_multiply_recursive) } else { - gf->multiply.w32 = gf_w16_composite_multiply_inline; + SET_FUNCTION(gf,multiply,w32,gf_w16_composite_multiply_inline) } - gf->divide.w32 = NULL; - gf->inverse.w32 = gf_w16_composite_inverse; + SET_FUNCTION(gf,divide,w32,NULL) + SET_FUNCTION(gf,inverse,w32,gf_w16_composite_inverse) return 1; } @@ -2277,10 +2277,10 @@ int gf_w16_group_init(gf_t *gf) d44->reduce[p>>16] = (p&0xffff); } - gf->multiply.w32 = gf_w16_group_4_4_multiply; - gf->divide.w32 = NULL; - gf->inverse.w32 = NULL; - gf->multiply_region.w32 = gf_w16_group_4_4_region_multiply; + SET_FUNCTION(gf,multiply,w32,gf_w16_group_4_4_multiply) + SET_FUNCTION(gf,divide,w32,NULL) + SET_FUNCTION(gf,inverse,w32,NULL) + SET_FUNCTION(gf,multiply_region,w32,gf_w16_group_4_4_region_multiply) return 1; } @@ -2360,10 +2360,10 @@ int gf_w16_init(gf_t *gf) if (h->mult_type != GF_MULT_COMPOSITE) h->prim_poly |= (1 << 16); - gf->multiply.w32 = NULL; - gf->divide.w32 = NULL; - gf->inverse.w32 = NULL; - gf->multiply_region.w32 = NULL; + SET_FUNCTION(gf,multiply,w32,NULL) + SET_FUNCTION(gf,divide,w32,NULL) + SET_FUNCTION(gf,inverse,w32,NULL) + SET_FUNCTION(gf,multiply_region,w32,NULL) switch(h->mult_type) { case GF_MULT_LOG_ZERO: if (gf_w16_log_zero_init(gf) == 0) return 0; break; @@ -2380,34 +2380,34 @@ int gf_w16_init(gf_t *gf) default: return 0; } if (h->divide_type == GF_DIVIDE_EUCLID) { - gf->divide.w32 = gf_w16_divide_from_inverse; - gf->inverse.w32 = gf_w16_euclid; + SET_FUNCTION(gf,divide,w32,gf_w16_divide_from_inverse) + SET_FUNCTION(gf,inverse,w32,gf_w16_euclid) } else if (h->divide_type == GF_DIVIDE_MATRIX) { - gf->divide.w32 = gf_w16_divide_from_inverse; - gf->inverse.w32 = gf_w16_matrix; + SET_FUNCTION(gf,divide,w32,gf_w16_divide_from_inverse) + SET_FUNCTION(gf,inverse,w32,gf_w16_matrix) } if (gf->divide.w32 == NULL) { - gf->divide.w32 = gf_w16_divide_from_inverse; - if (gf->inverse.w32 == NULL) gf->inverse.w32 = gf_w16_euclid; + SET_FUNCTION(gf,divide,w32,gf_w16_divide_from_inverse) + if (gf->inverse.w32 == NULL) SET_FUNCTION(gf,inverse,w32,gf_w16_euclid) } - if (gf->inverse.w32 == NULL) gf->inverse.w32 = gf_w16_inverse_from_divide; + if (gf->inverse.w32 == NULL) SET_FUNCTION(gf,inverse,w32,gf_w16_inverse_from_divide) if (h->region_type & GF_REGION_ALTMAP) { if (h->mult_type == GF_MULT_COMPOSITE) { - gf->extract_word.w32 = gf_w16_composite_extract_word; + SET_FUNCTION(gf,extract_word,w32,gf_w16_composite_extract_word) } else { - gf->extract_word.w32 = gf_w16_split_extract_word; + SET_FUNCTION(gf,extract_word,w32,gf_w16_split_extract_word) } } else if (h->region_type == GF_REGION_CAUCHY) { - gf->multiply_region.w32 = gf_wgen_cauchy_region; - gf->extract_word.w32 = gf_wgen_extract_word; + SET_FUNCTION(gf,multiply_region,w32,gf_wgen_cauchy_region) + SET_FUNCTION(gf,extract_word,w32,gf_wgen_extract_word) } else { - gf->extract_word.w32 = gf_w16_extract_word; + SET_FUNCTION(gf,extract_word,w32,gf_w16_extract_word) } if (gf->multiply_region.w32 == NULL) { - gf->multiply_region.w32 = gf_w16_multiply_region_from_single; + SET_FUNCTION(gf,multiply_region,w32,gf_w16_multiply_region_from_single) } return 1; } diff --git a/src/gf_w32.c b/src/gf_w32.c index 854a6e4..d496c3a 100644 --- a/src/gf_w32.c +++ b/src/gf_w32.c @@ -589,15 +589,15 @@ gf_w32_shift_multiply (gf_t *gf, uint32_t a32, uint32_t b32) static int gf_w32_cfmgk_init(gf_t *gf) { - gf->inverse.w32 = gf_w32_euclid; - gf->multiply_region.w32 = gf_w32_multiply_region_from_single; + SET_FUNCTION(gf,inverse,w32,gf_w32_euclid) + SET_FUNCTION(gf,multiply_region,w32,gf_w32_multiply_region_from_single) #if defined(INTEL_SSE4_PCLMUL) gf_internal_t *h; h = (gf_internal_t *) gf->scratch; - gf->multiply.w32 = gf_w32_cfmgk_multiply; - gf->multiply_region.w32 = gf_w32_cfmgk_multiply_region_from_single; + SET_FUNCTION(gf,multiply,w32,gf_w32_cfmgk_multiply) + SET_FUNCTION(gf,multiply_region,w32,gf_w32_cfmgk_multiply_region_from_single) uint64_t *q_plus = (uint64_t *) h->private; uint64_t *g_star = (uint64_t *) h->private + 1; @@ -624,8 +624,8 @@ int gf_w32_cfmgk_init(gf_t *gf) static int gf_w32_cfm_init(gf_t *gf) { - gf->inverse.w32 = gf_w32_euclid; - gf->multiply_region.w32 = gf_w32_multiply_region_from_single; + SET_FUNCTION(gf,inverse,w32,gf_w32_euclid) + SET_FUNCTION(gf,multiply_region,w32,gf_w32_multiply_region_from_single) /*Ben: We also check to see if the prim poly will work for pclmul */ /*Ben: Check to see how many reduction steps it will take*/ @@ -636,14 +636,14 @@ int gf_w32_cfm_init(gf_t *gf) h = (gf_internal_t *) gf->scratch; if ((0xfffe0000 & h->prim_poly) == 0){ - gf->multiply.w32 = gf_w32_clm_multiply_2; - gf->multiply_region.w32 = gf_w32_clm_multiply_region_from_single_2; + SET_FUNCTION(gf,multiply,w32,gf_w32_clm_multiply_2) + SET_FUNCTION(gf,multiply_region,w32,gf_w32_clm_multiply_region_from_single_2) }else if ((0xffc00000 & h->prim_poly) == 0){ - gf->multiply.w32 = gf_w32_clm_multiply_3; - gf->multiply_region.w32 = gf_w32_clm_multiply_region_from_single_3; + SET_FUNCTION(gf,multiply,w32,gf_w32_clm_multiply_3) + SET_FUNCTION(gf,multiply_region,w32,gf_w32_clm_multiply_region_from_single_3) }else if ((0xfe000000 & h->prim_poly) == 0){ - gf->multiply.w32 = gf_w32_clm_multiply_4; - gf->multiply_region.w32 = gf_w32_clm_multiply_region_from_single_4; + SET_FUNCTION(gf,multiply,w32,gf_w32_clm_multiply_4) + SET_FUNCTION(gf,multiply_region,w32,gf_w32_clm_multiply_region_from_single_4) } else { return 0; } @@ -656,9 +656,9 @@ int gf_w32_cfm_init(gf_t *gf) static int gf_w32_shift_init(gf_t *gf) { - gf->inverse.w32 = gf_w32_euclid; - gf->multiply_region.w32 = gf_w32_multiply_region_from_single; - gf->multiply.w32 = gf_w32_shift_multiply; + SET_FUNCTION(gf,inverse,w32,gf_w32_euclid) + SET_FUNCTION(gf,multiply_region,w32,gf_w32_multiply_region_from_single) + SET_FUNCTION(gf,multiply,w32,gf_w32_shift_multiply) return 1; } @@ -1380,32 +1380,32 @@ int gf_w32_bytwo_init(gf_t *gf) } if (h->mult_type == GF_MULT_BYTWO_p) { - gf->multiply.w32 = gf_w32_bytwo_p_multiply; + SET_FUNCTION(gf,multiply,w32,gf_w32_bytwo_p_multiply) #ifdef INTEL_SSE2 if (h->region_type & GF_REGION_NOSIMD) - gf->multiply_region.w32 = gf_w32_bytwo_p_nosse_multiply_region; + SET_FUNCTION(gf,multiply_region,w32,gf_w32_bytwo_p_nosse_multiply_region) else - gf->multiply_region.w32 = gf_w32_bytwo_p_sse_multiply_region; + SET_FUNCTION(gf,multiply_region,w32,gf_w32_bytwo_p_sse_multiply_region) #else - gf->multiply_region.w32 = gf_w32_bytwo_p_nosse_multiply_region; + SET_FUNCTION(gf,multiply_region,w32,gf_w32_bytwo_p_nosse_multiply_region) if(h->region_type & GF_REGION_SIMD) return 0; #endif } else { - gf->multiply.w32 = gf_w32_bytwo_b_multiply; + SET_FUNCTION(gf,multiply,w32,gf_w32_bytwo_b_multiply) #ifdef INTEL_SSE2 if (h->region_type & GF_REGION_NOSIMD) - gf->multiply_region.w32 = gf_w32_bytwo_b_nosse_multiply_region; + SET_FUNCTION(gf,multiply_region,w32,gf_w32_bytwo_b_nosse_multiply_region) else - gf->multiply_region.w32 = gf_w32_bytwo_b_sse_multiply_region; + SET_FUNCTION(gf,multiply_region,w32,gf_w32_bytwo_b_sse_multiply_region) #else - gf->multiply_region.w32 = gf_w32_bytwo_b_nosse_multiply_region; + SET_FUNCTION(gf,multiply_region,w32,gf_w32_bytwo_b_nosse_multiply_region) if(h->region_type & GF_REGION_SIMD) return 0; #endif } - gf->inverse.w32 = gf_w32_euclid; + SET_FUNCTION(gf,inverse,w32,gf_w32_euclid) return 1; } @@ -2252,7 +2252,7 @@ int gf_w32_split_init(gf_t *gf) /* Defaults */ - gf->inverse.w32 = gf_w32_euclid; + SET_FUNCTION(gf,inverse,w32,gf_w32_euclid) /* JSP: First handle single multiplication: If args == 8, then we're doing split 8 8. @@ -2261,17 +2261,17 @@ int gf_w32_split_init(gf_t *gf) */ if (h->arg1 == 8 && h->arg2 == 8) { - gf->multiply.w32 = gf_w32_split_8_8_multiply; + SET_FUNCTION(gf,multiply,w32,gf_w32_split_8_8_multiply) } else if (ispclmul) { if ((0xfffe0000 & h->prim_poly) == 0){ - gf->multiply.w32 = gf_w32_clm_multiply_2; + SET_FUNCTION(gf,multiply,w32,gf_w32_clm_multiply_2) } else if ((0xffc00000 & h->prim_poly) == 0){ - gf->multiply.w32 = gf_w32_clm_multiply_3; + SET_FUNCTION(gf,multiply,w32,gf_w32_clm_multiply_3) } else if ((0xfe000000 & h->prim_poly) == 0){ - gf->multiply.w32 = gf_w32_clm_multiply_4; + SET_FUNCTION(gf,multiply,w32,gf_w32_clm_multiply_4) } } else { - gf->multiply.w32 = gf_w32_bytwo_p_multiply; + SET_FUNCTION(gf,multiply,w32,gf_w32_bytwo_p_multiply) } /* Easy cases: 16/32 and 2/32 */ @@ -2279,7 +2279,7 @@ int gf_w32_split_init(gf_t *gf) if ((h->arg1 == 16 && h->arg2 == 32) || (h->arg1 == 32 && h->arg2 == 16)) { d16 = (struct gf_split_16_32_lazy_data *) h->private; d16->last_value = 0; - gf->multiply_region.w32 = gf_w32_split_16_32_lazy_multiply_region; + SET_FUNCTION(gf,multiply_region,w32,gf_w32_split_16_32_lazy_multiply_region) return 1; } @@ -2288,11 +2288,11 @@ int gf_w32_split_init(gf_t *gf) ld2->last_value = 0; #ifdef INTEL_SSSE3 if (!(h->region_type & GF_REGION_NOSIMD)) - gf->multiply_region.w32 = gf_w32_split_2_32_lazy_sse_multiply_region; + SET_FUNCTION(gf,multiply_region,w32,gf_w32_split_2_32_lazy_sse_multiply_region) else - gf->multiply_region.w32 = gf_w32_split_2_32_lazy_multiply_region; + SET_FUNCTION(gf,multiply_region,w32,gf_w32_split_2_32_lazy_multiply_region) #else - gf->multiply_region.w32 = gf_w32_split_2_32_lazy_multiply_region; + SET_FUNCTION(gf,multiply_region,w32,gf_w32_split_2_32_lazy_multiply_region) if(h->region_type & GF_REGION_SIMD) return 0; #endif return 1; @@ -2305,15 +2305,15 @@ int gf_w32_split_init(gf_t *gf) ld4 = (struct gf_split_4_32_lazy_data *) h->private; ld4->last_value = 0; if ((h->region_type & GF_REGION_NOSIMD) || !(issse3 || isneon)) { - gf->multiply_region.w32 = gf_w32_split_4_32_lazy_multiply_region; + SET_FUNCTION(gf,multiply_region,w32,gf_w32_split_4_32_lazy_multiply_region) } else if (isneon) { #ifdef ARM_NEON gf_w32_neon_split_init(gf); #endif } else if (h->region_type & GF_REGION_ALTMAP) { - gf->multiply_region.w32 = gf_w32_split_4_32_lazy_sse_altmap_multiply_region; + SET_FUNCTION(gf,multiply_region,w32,gf_w32_split_4_32_lazy_sse_altmap_multiply_region) } else { - gf->multiply_region.w32 = gf_w32_split_4_32_lazy_sse_multiply_region; + SET_FUNCTION(gf,multiply_region,w32,gf_w32_split_4_32_lazy_sse_multiply_region) } return 1; } @@ -2324,7 +2324,7 @@ int gf_w32_split_init(gf_t *gf) h->mult_type == GF_MULT_DEFAULT) { d32 = (struct gf_split_8_32_lazy_data *) h->private; d32->last_value = 0; - gf->multiply_region.w32 = gf_w32_split_8_32_lazy_multiply_region; + SET_FUNCTION(gf,multiply_region,w32,gf_w32_split_8_32_lazy_multiply_region) return 1; } @@ -2333,8 +2333,8 @@ int gf_w32_split_init(gf_t *gf) if (h->arg1 == 8 && h->arg2 == 8) { d8 = (struct gf_w32_split_8_8_data *) h->private; d8->last_value = 0; - gf->multiply.w32 = gf_w32_split_8_8_multiply; - gf->multiply_region.w32 = gf_w32_split_8_32_lazy_multiply_region; + SET_FUNCTION(gf,multiply,w32,gf_w32_split_8_8_multiply) + SET_FUNCTION(gf,multiply_region,w32,gf_w32_split_8_32_lazy_multiply_region) basep = 1; for (exp = 0; exp < 7; exp++) { for (j = 0; j < 256; j++) d8->tables[exp][0][j] = 0; @@ -2407,14 +2407,14 @@ int gf_w32_group_init(gf_t *gf) } if (g_s == g_r) { - gf->multiply.w32 = gf_w32_group_s_equals_r_multiply; - gf->multiply_region.w32 = gf_w32_group_s_equals_r_multiply_region; + SET_FUNCTION(gf,multiply,w32,gf_w32_group_s_equals_r_multiply) + SET_FUNCTION(gf,multiply_region,w32,gf_w32_group_s_equals_r_multiply_region) } else { - gf->multiply.w32 = gf_w32_group_multiply; - gf->multiply_region.w32 = gf_w32_group_multiply_region; + SET_FUNCTION(gf,multiply,w32,gf_w32_group_multiply) + SET_FUNCTION(gf,multiply_region,w32,gf_w32_group_multiply_region) } - gf->divide.w32 = NULL; - gf->inverse.w32 = gf_w32_euclid; + SET_FUNCTION(gf,divide,w32,NULL) + SET_FUNCTION(gf,inverse,w32,gf_w32_euclid) return 1; } @@ -2666,18 +2666,18 @@ int gf_w32_composite_init(gf_t *gf) cd->alog = gf_w16_get_mult_alog_table(h->base_gf); if (h->region_type & GF_REGION_ALTMAP) { - gf->multiply_region.w32 = gf_w32_composite_multiply_region_alt; + SET_FUNCTION(gf,multiply_region,w32,gf_w32_composite_multiply_region_alt) } else { - gf->multiply_region.w32 = gf_w32_composite_multiply_region; + SET_FUNCTION(gf,multiply_region,w32,gf_w32_composite_multiply_region) } if (cd->log == NULL) { - gf->multiply.w32 = gf_w32_composite_multiply_recursive; + SET_FUNCTION(gf,multiply,w32,gf_w32_composite_multiply_recursive) } else { - gf->multiply.w32 = gf_w32_composite_multiply_inline; + SET_FUNCTION(gf,multiply,w32,gf_w32_composite_multiply_inline) } - gf->divide.w32 = NULL; - gf->inverse.w32 = gf_w32_composite_inverse; + SET_FUNCTION(gf,divide,w32,NULL) + SET_FUNCTION(gf,inverse,w32,gf_w32_composite_inverse) return 1; } @@ -2776,10 +2776,10 @@ int gf_w32_init(gf_t *gf) if(h->mult_type != GF_MULT_COMPOSITE) h->prim_poly &= 0xffffffff; - gf->multiply.w32 = NULL; - gf->divide.w32 = NULL; - gf->inverse.w32 = NULL; - gf->multiply_region.w32 = NULL; + SET_FUNCTION(gf,multiply,w32,NULL) + SET_FUNCTION(gf,divide,w32,NULL) + SET_FUNCTION(gf,inverse,w32,NULL) + SET_FUNCTION(gf,multiply_region,w32,NULL) switch(h->mult_type) { case GF_MULT_CARRY_FREE: if (gf_w32_cfm_init(gf) == 0) return 0; break; @@ -2794,30 +2794,30 @@ int gf_w32_init(gf_t *gf) default: return 0; } if (h->divide_type == GF_DIVIDE_EUCLID) { - gf->divide.w32 = gf_w32_divide_from_inverse; - gf->inverse.w32 = gf_w32_euclid; + SET_FUNCTION(gf,divide,w32,gf_w32_divide_from_inverse) + SET_FUNCTION(gf,inverse,w32,gf_w32_euclid) } else if (h->divide_type == GF_DIVIDE_MATRIX) { - gf->divide.w32 = gf_w32_divide_from_inverse; - gf->inverse.w32 = gf_w32_matrix; + SET_FUNCTION(gf,divide,w32,gf_w32_divide_from_inverse) + SET_FUNCTION(gf,inverse,w32,gf_w32_matrix) } if (gf->inverse.w32 != NULL && gf->divide.w32 == NULL) { - gf->divide.w32 = gf_w32_divide_from_inverse; + SET_FUNCTION(gf,divide,w32,gf_w32_divide_from_inverse) } if (gf->inverse.w32 == NULL && gf->divide.w32 != NULL) { - gf->inverse.w32 = gf_w32_inverse_from_divide; + SET_FUNCTION(gf,inverse,w32,gf_w32_inverse_from_divide) } if (h->region_type == GF_REGION_CAUCHY) { - gf->extract_word.w32 = gf_wgen_extract_word; - gf->multiply_region.w32 = gf_wgen_cauchy_region; + SET_FUNCTION(gf,extract_word,w32,gf_wgen_extract_word) + SET_FUNCTION(gf,multiply_region,w32,gf_wgen_cauchy_region) } else if (h->region_type & GF_REGION_ALTMAP) { if (h->mult_type == GF_MULT_COMPOSITE) { - gf->extract_word.w32 = gf_w32_composite_extract_word; + SET_FUNCTION(gf,extract_word,w32,gf_w32_composite_extract_word) } else { - gf->extract_word.w32 = gf_w32_split_extract_word; + SET_FUNCTION(gf,extract_word,w32,gf_w32_split_extract_word) } } else { - gf->extract_word.w32 = gf_w32_extract_word; + SET_FUNCTION(gf,extract_word,w32,gf_w32_extract_word) } return 1; } diff --git a/src/gf_w4.c b/src/gf_w4.c index 0e86aa8..814b0f5 100644 --- a/src/gf_w4.c +++ b/src/gf_w4.c @@ -311,10 +311,10 @@ int gf_w4_log_init(gf_t *gf) return 0; } - gf->inverse.w32 = gf_w4_inverse_from_divide; - gf->divide.w32 = gf_w4_log_divide; - gf->multiply.w32 = gf_w4_log_multiply; - gf->multiply_region.w32 = gf_w4_log_multiply_region; + SET_FUNCTION(gf,inverse,w32,gf_w4_inverse_from_divide) + SET_FUNCTION(gf,divide,w32,gf_w4_log_divide) + SET_FUNCTION(gf,multiply,w32,gf_w4_log_multiply) + SET_FUNCTION(gf,multiply_region,w32,gf_w4_log_multiply_region) return 1; } @@ -444,20 +444,20 @@ int gf_w4_single_table_init(gf_t *gf) } } - gf->inverse.w32 = NULL; - gf->divide.w32 = gf_w4_single_table_divide; - gf->multiply.w32 = gf_w4_single_table_multiply; + SET_FUNCTION(gf,inverse,w32,NULL) + SET_FUNCTION(gf,divide,w32,gf_w4_single_table_divide) + SET_FUNCTION(gf,multiply,w32,gf_w4_single_table_multiply) #if defined(INTEL_SSSE3) || defined(ARM_NEON) if(h->region_type & (GF_REGION_NOSIMD | GF_REGION_CAUCHY)) - gf->multiply_region.w32 = gf_w4_single_table_multiply_region; + SET_FUNCTION(gf,multiply_region,w32,gf_w4_single_table_multiply_region) else #if defined(INTEL_SSSE3) - gf->multiply_region.w32 = gf_w4_single_table_sse_multiply_region; + SET_FUNCTION(gf,multiply_region,w32,gf_w4_single_table_sse_multiply_region) #elif defined(ARM_NEON) gf_w4_neon_single_table_init(gf); #endif #else - gf->multiply_region.w32 = gf_w4_single_table_multiply_region; + SET_FUNCTION(gf,multiply_region,w32,gf_w4_single_table_multiply_region) if (h->region_type & GF_REGION_SIMD) return 0; #endif @@ -548,10 +548,10 @@ int gf_w4_double_table_init(gf_t *gf) } } - gf->inverse.w32 = NULL; - gf->divide.w32 = gf_w4_double_table_divide; - gf->multiply.w32 = gf_w4_double_table_multiply; - gf->multiply_region.w32 = gf_w4_double_table_multiply_region; + SET_FUNCTION(gf,inverse,w32,NULL) + SET_FUNCTION(gf,divide,w32,gf_w4_double_table_divide) + SET_FUNCTION(gf,multiply,w32,gf_w4_double_table_multiply) + SET_FUNCTION(gf,multiply_region,w32,gf_w4_double_table_multiply_region) return 1; } @@ -682,10 +682,10 @@ int gf_w4_quad_table_init(gf_t *gf) } } - gf->inverse.w32 = NULL; - gf->divide.w32 = gf_w4_quad_table_divide; - gf->multiply.w32 = gf_w4_quad_table_multiply; - gf->multiply_region.w32 = gf_w4_quad_table_multiply_region; + SET_FUNCTION(gf,inverse,w32,NULL) + SET_FUNCTION(gf,divide,w32,gf_w4_quad_table_divide) + SET_FUNCTION(gf,multiply,w32,gf_w4_quad_table_multiply) + SET_FUNCTION(gf,multiply_region,w32,gf_w4_quad_table_multiply_region) return 1; } static @@ -724,10 +724,10 @@ int gf_w4_quad_table_lazy_init(gf_t *gf) } } - gf->inverse.w32 = NULL; - gf->divide.w32 = gf_w4_quad_table_lazy_divide; - gf->multiply.w32 = gf_w4_quad_table_lazy_multiply; - gf->multiply_region.w32 = gf_w4_quad_table_multiply_region; + SET_FUNCTION(gf,inverse,w32,NULL) + SET_FUNCTION(gf,divide,w32,gf_w4_quad_table_lazy_divide) + SET_FUNCTION(gf,multiply,w32,gf_w4_quad_table_lazy_multiply) + SET_FUNCTION(gf,multiply_region,w32,gf_w4_quad_table_multiply_region) return 1; } @@ -1865,26 +1865,26 @@ int gf_w4_bytwo_init(gf_t *gf) } if (h->mult_type == GF_MULT_BYTWO_p) { - gf->multiply.w32 = gf_w4_bytwo_p_multiply; + SET_FUNCTION(gf,multiply,w32,gf_w4_bytwo_p_multiply) #ifdef INTEL_SSE2 if (h->region_type & GF_REGION_NOSIMD) - gf->multiply_region.w32 = gf_w4_bytwo_p_nosse_multiply_region; + SET_FUNCTION(gf,multiply_region,w32,gf_w4_bytwo_p_nosse_multiply_region) else - gf->multiply_region.w32 = gf_w4_bytwo_p_sse_multiply_region; + SET_FUNCTION(gf,multiply_region,w32,gf_w4_bytwo_p_sse_multiply_region) #else - gf->multiply_region.w32 = gf_w4_bytwo_p_nosse_multiply_region; + SET_FUNCTION(gf,multiply_region,w32,gf_w4_bytwo_p_nosse_multiply_region) if (h->region_type & GF_REGION_SIMD) return 0; #endif } else { - gf->multiply.w32 = gf_w4_bytwo_b_multiply; + SET_FUNCTION(gf,multiply,w32,gf_w4_bytwo_b_multiply) #ifdef INTEL_SSE2 if (h->region_type & GF_REGION_NOSIMD) - gf->multiply_region.w32 = gf_w4_bytwo_b_nosse_multiply_region; + SET_FUNCTION(gf,multiply_region,w32,gf_w4_bytwo_b_nosse_multiply_region) else - gf->multiply_region.w32 = gf_w4_bytwo_b_sse_multiply_region; + SET_FUNCTION(gf,multiply_region,w32,gf_w4_bytwo_b_sse_multiply_region) #else - gf->multiply_region.w32 = gf_w4_bytwo_b_nosse_multiply_region; + SET_FUNCTION(gf,multiply_region,w32,gf_w4_bytwo_b_nosse_multiply_region) if (h->region_type & GF_REGION_SIMD) return 0; #endif @@ -1897,7 +1897,7 @@ static int gf_w4_cfm_init(gf_t *gf) { #if defined(INTEL_SSE4_PCLMUL) - gf->multiply.w32 = gf_w4_clm_multiply; + SET_FUNCTION(gf,multiply,w32,gf_w4_clm_multiply) return 1; #elif defined(ARM_NEON) return gf_w4_neon_cfm_init(gf); @@ -1908,7 +1908,7 @@ int gf_w4_cfm_init(gf_t *gf) static int gf_w4_shift_init(gf_t *gf) { - gf->multiply.w32 = gf_w4_shift_multiply; + SET_FUNCTION(gf,multiply,w32,gf_w4_shift_multiply) return 1; } @@ -1977,11 +1977,11 @@ gf_w4_init (gf_t *gf) h = (gf_internal_t *) gf->scratch; if (h->prim_poly == 0) h->prim_poly = 0x13; h->prim_poly |= 0x10; - gf->multiply.w32 = NULL; - gf->divide.w32 = NULL; - gf->inverse.w32 = NULL; - gf->multiply_region.w32 = NULL; - gf->extract_word.w32 = gf_w4_extract_word; + SET_FUNCTION(gf,multiply,w32,NULL) + SET_FUNCTION(gf,divide,w32,NULL) + SET_FUNCTION(gf,inverse,w32,NULL) + SET_FUNCTION(gf,multiply_region,w32,NULL) + SET_FUNCTION(gf,extract_word,w32,gf_w4_extract_word) switch(h->mult_type) { case GF_MULT_CARRY_FREE: if (gf_w4_cfm_init(gf) == 0) return 0; break; @@ -1995,27 +1995,27 @@ gf_w4_init (gf_t *gf) } if (h->divide_type == GF_DIVIDE_EUCLID) { - gf->divide.w32 = gf_w4_divide_from_inverse; - gf->inverse.w32 = gf_w4_euclid; + SET_FUNCTION(gf,divide,w32,gf_w4_divide_from_inverse) + SET_FUNCTION(gf,inverse,w32,gf_w4_euclid) } else if (h->divide_type == GF_DIVIDE_MATRIX) { - gf->divide.w32 = gf_w4_divide_from_inverse; - gf->inverse.w32 = gf_w4_matrix; + SET_FUNCTION(gf,divide,w32,gf_w4_divide_from_inverse) + SET_FUNCTION(gf,inverse,w32,gf_w4_matrix) } if (gf->divide.w32 == NULL) { - gf->divide.w32 = gf_w4_divide_from_inverse; - if (gf->inverse.w32 == NULL) gf->inverse.w32 = gf_w4_euclid; + SET_FUNCTION(gf,divide,w32,gf_w4_divide_from_inverse) + if (gf->inverse.w32 == NULL) SET_FUNCTION(gf,inverse,w32,gf_w4_euclid) } - if (gf->inverse.w32 == NULL) gf->inverse.w32 = gf_w4_inverse_from_divide; + if (gf->inverse.w32 == NULL) SET_FUNCTION(gf,inverse,w32,gf_w4_inverse_from_divide) if (h->region_type == GF_REGION_CAUCHY) { - gf->multiply_region.w32 = gf_wgen_cauchy_region; - gf->extract_word.w32 = gf_wgen_extract_word; + SET_FUNCTION(gf,multiply_region,w32,gf_wgen_cauchy_region) + SET_FUNCTION(gf,extract_word,w32,gf_wgen_extract_word) } if (gf->multiply_region.w32 == NULL) { - gf->multiply_region.w32 = gf_w4_multiply_region_from_single; + SET_FUNCTION(gf,multiply_region,w32,gf_w4_multiply_region_from_single) } return 1; diff --git a/src/gf_w64.c b/src/gf_w64.c index eae31e6..a096161 100644 --- a/src/gf_w64.c +++ b/src/gf_w64.c @@ -697,17 +697,17 @@ gf_w64_split_16_64_lazy_multiply_region(gf_t *gf, void *src, void *dest, uint64_ static int gf_w64_shift_init(gf_t *gf) { - gf->multiply.w64 = gf_w64_shift_multiply; - gf->inverse.w64 = gf_w64_euclid; - gf->multiply_region.w64 = gf_w64_multiply_region_from_single; + SET_FUNCTION(gf,multiply,w64,gf_w64_shift_multiply) + SET_FUNCTION(gf,inverse,w64,gf_w64_euclid) + SET_FUNCTION(gf,multiply_region,w64,gf_w64_multiply_region_from_single) return 1; } static int gf_w64_cfm_init(gf_t *gf) { - gf->inverse.w64 = gf_w64_euclid; - gf->multiply_region.w64 = gf_w64_multiply_region_from_single; + SET_FUNCTION(gf,inverse,w64,gf_w64_euclid) + SET_FUNCTION(gf,multiply_region,w64,gf_w64_multiply_region_from_single) #if defined(INTEL_SSE4_PCLMUL) gf_internal_t *h; @@ -715,11 +715,11 @@ int gf_w64_cfm_init(gf_t *gf) h = (gf_internal_t *) gf->scratch; if ((0xfffffffe00000000ULL & h->prim_poly) == 0){ - gf->multiply.w64 = gf_w64_clm_multiply_2; - gf->multiply_region.w64 = gf_w64_clm_multiply_region_from_single_2; + SET_FUNCTION(gf,multiply,w64,gf_w64_clm_multiply_2) + SET_FUNCTION(gf,multiply_region,w64,gf_w64_clm_multiply_region_from_single_2) }else if((0xfffe000000000000ULL & h->prim_poly) == 0){ - gf->multiply.w64 = gf_w64_clm_multiply_4; - gf->multiply_region.w64 = gf_w64_clm_multiply_region_from_single_4; + SET_FUNCTION(gf,multiply,w64,gf_w64_clm_multiply_4) + SET_FUNCTION(gf,multiply_region,w64,gf_w64_clm_multiply_region_from_single_4) } else { return 0; } @@ -1008,14 +1008,14 @@ int gf_w64_group_init(gf_t *gf) } if (g_s == g_r) { - gf->multiply.w64 = gf_w64_group_s_equals_r_multiply; - gf->multiply_region.w64 = gf_w64_group_s_equals_r_multiply_region; + SET_FUNCTION(gf,multiply,w64,gf_w64_group_s_equals_r_multiply) + SET_FUNCTION(gf,multiply_region,w64,gf_w64_group_s_equals_r_multiply_region) } else { - gf->multiply.w64 = gf_w64_group_multiply; - gf->multiply_region.w64 = gf_w64_group_multiply_region; + SET_FUNCTION(gf,multiply,w64,gf_w64_group_multiply) + SET_FUNCTION(gf,multiply_region,w64,gf_w64_group_multiply_region) } - gf->divide.w64 = NULL; - gf->inverse.w64 = gf_w64_euclid; + SET_FUNCTION(gf,divide,w64,NULL) + SET_FUNCTION(gf,inverse,w64,gf_w64_euclid) return 1; } @@ -1455,31 +1455,31 @@ int gf_w64_bytwo_init(gf_t *gf) h = (gf_internal_t *) gf->scratch; if (h->mult_type == GF_MULT_BYTWO_p) { - gf->multiply.w64 = gf_w64_bytwo_p_multiply; + SET_FUNCTION(gf,multiply,w64,gf_w64_bytwo_p_multiply) #ifdef INTEL_SSE2 if (h->region_type & GF_REGION_NOSIMD) - gf->multiply_region.w64 = gf_w64_bytwo_p_nosse_multiply_region; + SET_FUNCTION(gf,multiply_region,w64,gf_w64_bytwo_p_nosse_multiply_region) else - gf->multiply_region.w64 = gf_w64_bytwo_p_sse_multiply_region; + SET_FUNCTION(gf,multiply_region,w64,gf_w64_bytwo_p_sse_multiply_region) #else - gf->multiply_region.w64 = gf_w64_bytwo_p_nosse_multiply_region; + SET_FUNCTION(gf,multiply_region,w64,gf_w64_bytwo_p_nosse_multiply_region) if(h->region_type & GF_REGION_SIMD) return 0; #endif } else { - gf->multiply.w64 = gf_w64_bytwo_b_multiply; + SET_FUNCTION(gf,multiply,w64,gf_w64_bytwo_b_multiply) #ifdef INTEL_SSE2 if (h->region_type & GF_REGION_NOSIMD) - gf->multiply_region.w64 = gf_w64_bytwo_b_nosse_multiply_region; + SET_FUNCTION(gf,multiply_region,w64,gf_w64_bytwo_b_nosse_multiply_region) else - gf->multiply_region.w64 = gf_w64_bytwo_b_sse_multiply_region; + SET_FUNCTION(gf,multiply_region,w64,gf_w64_bytwo_b_sse_multiply_region) #else - gf->multiply_region.w64 = gf_w64_bytwo_b_nosse_multiply_region; + SET_FUNCTION(gf,multiply_region,w64,gf_w64_bytwo_b_nosse_multiply_region) if(h->region_type & GF_REGION_SIMD) return 0; #endif } - gf->inverse.w64 = gf_w64_euclid; + SET_FUNCTION(gf,inverse,w64,gf_w64_euclid) return 1; } @@ -1653,14 +1653,14 @@ int gf_w64_composite_init(gf_t *gf) gf_internal_t *h = (gf_internal_t *) gf->scratch; if (h->region_type & GF_REGION_ALTMAP) { - gf->multiply_region.w64 = gf_w64_composite_multiply_region_alt; + SET_FUNCTION(gf,multiply_region,w64,gf_w64_composite_multiply_region_alt) } else { - gf->multiply_region.w64 = gf_w64_composite_multiply_region; + SET_FUNCTION(gf,multiply_region,w64,gf_w64_composite_multiply_region) } - gf->multiply.w64 = gf_w64_composite_multiply; - gf->divide.w64 = NULL; - gf->inverse.w64 = gf_w64_composite_inverse; + SET_FUNCTION(gf,multiply,w64,gf_w64_composite_multiply) + SET_FUNCTION(gf,divide,w64,NULL) + SET_FUNCTION(gf,inverse,w64,gf_w64_composite_inverse) return 1; } @@ -1970,9 +1970,9 @@ int gf_w64_split_init(gf_t *gf) /* Defaults */ - gf->multiply_region.w64 = gf_w64_multiply_region_from_single; + SET_FUNCTION(gf,multiply_region,w64,gf_w64_multiply_region_from_single) - gf->multiply.w64 = gf_w64_bytwo_p_multiply; + SET_FUNCTION(gf,multiply,w64,gf_w64_bytwo_p_multiply) #if defined(INTEL_SSE4_PCLMUL) if ((!(h->region_type & GF_REGION_NOSIMD) && @@ -1980,18 +1980,18 @@ int gf_w64_split_init(gf_t *gf) h->mult_type == GF_MULT_DEFAULT){ if ((0xfffffffe00000000ULL & h->prim_poly) == 0){ - gf->multiply.w64 = gf_w64_clm_multiply_2; - gf->multiply_region.w64 = gf_w64_clm_multiply_region_from_single_2; + SET_FUNCTION(gf,multiply,w64,gf_w64_clm_multiply_2) + SET_FUNCTION(gf,multiply_region,w64,gf_w64_clm_multiply_region_from_single_2) }else if((0xfffe000000000000ULL & h->prim_poly) == 0){ - gf->multiply.w64 = gf_w64_clm_multiply_4; - gf->multiply_region.w64 = gf_w64_clm_multiply_region_from_single_4; + SET_FUNCTION(gf,multiply,w64,gf_w64_clm_multiply_4) + SET_FUNCTION(gf,multiply_region,w64,gf_w64_clm_multiply_region_from_single_4) }else{ return 0; } } #endif - gf->inverse.w64 = gf_w64_euclid; + SET_FUNCTION(gf,inverse,w64,gf_w64_euclid) /* Allen: set region pointers for default mult type. Single pointers are * taken care of above (explicitly for sse, implicitly for no sse). */ @@ -2001,7 +2001,7 @@ int gf_w64_split_init(gf_t *gf) d4 = (struct gf_split_4_64_lazy_data *) h->private; d4->last_value = 0; #if defined(INTEL_SSE4) - gf->multiply_region.w64 = gf_w64_split_4_64_lazy_sse_multiply_region; + SET_FUNCTION(gf,multiply_region,w64,gf_w64_split_4_64_lazy_sse_multiply_region) #elif defined(ARCH_AARCH64) gf_w64_neon_split_init(gf); #endif @@ -2010,7 +2010,7 @@ int gf_w64_split_init(gf_t *gf) if (h->mult_type == GF_MULT_DEFAULT) { d8 = (struct gf_split_8_64_lazy_data *) h->private; d8->last_value = 0; - gf->multiply_region.w64 = gf_w64_split_8_64_lazy_multiply_region; + SET_FUNCTION(gf,multiply_region,w64,gf_w64_split_8_64_lazy_multiply_region) } #endif @@ -2022,7 +2022,7 @@ int gf_w64_split_init(gf_t *gf) if(h->region_type & GF_REGION_ALTMAP) { #ifdef INTEL_SSSE3 - gf->multiply_region.w64 = gf_w64_split_4_64_lazy_sse_altmap_multiply_region; + SET_FUNCTION(gf,multiply_region,w64,gf_w64_split_4_64_lazy_sse_altmap_multiply_region) #elif defined(ARCH_AARCH64) gf_w64_neon_split_init(gf); #else @@ -2033,15 +2033,15 @@ int gf_w64_split_init(gf_t *gf) { #if defined(INTEL_SSE4) || defined(ARCH_AARCH64) if(h->region_type & GF_REGION_NOSIMD) - gf->multiply_region.w64 = gf_w64_split_4_64_lazy_multiply_region; + SET_FUNCTION(gf,multiply_region,w64,gf_w64_split_4_64_lazy_multiply_region) else #if defined(INTEL_SSE4) - gf->multiply_region.w64 = gf_w64_split_4_64_lazy_sse_multiply_region; + SET_FUNCTION(gf,multiply_region,w64,gf_w64_split_4_64_lazy_sse_multiply_region) #elif defined(ARCH_AARCH64) gf_w64_neon_split_init(gf); #endif #else - gf->multiply_region.w64 = gf_w64_split_4_64_lazy_multiply_region; + SET_FUNCTION(gf,multiply_region,w64,gf_w64_split_4_64_lazy_multiply_region) if(h->region_type & GF_REGION_SIMD) return 0; #endif @@ -2050,16 +2050,16 @@ int gf_w64_split_init(gf_t *gf) if ((h->arg1 == 8 && h->arg2 == 64) || (h->arg1 == 64 && h->arg2 == 8)) { d8 = (struct gf_split_8_64_lazy_data *) h->private; d8->last_value = 0; - gf->multiply_region.w64 = gf_w64_split_8_64_lazy_multiply_region; + SET_FUNCTION(gf,multiply_region,w64,gf_w64_split_8_64_lazy_multiply_region) } if ((h->arg1 == 16 && h->arg2 == 64) || (h->arg1 == 64 && h->arg2 == 16)) { d16 = (struct gf_split_16_64_lazy_data *) h->private; d16->last_value = 0; - gf->multiply_region.w64 = gf_w64_split_16_64_lazy_multiply_region; + SET_FUNCTION(gf,multiply_region,w64,gf_w64_split_16_64_lazy_multiply_region) } if ((h->arg1 == 8 && h->arg2 == 8)) { d88 = (struct gf_split_8_8_data *) h->private; - gf->multiply.w64 = gf_w64_split_8_8_multiply; + SET_FUNCTION(gf,multiply,w64,gf_w64_split_8_8_multiply) /* The performance of this guy sucks, so don't bother with a region op */ @@ -2169,10 +2169,10 @@ int gf_w64_init(gf_t *gf) } } - gf->multiply.w64 = NULL; - gf->divide.w64 = NULL; - gf->inverse.w64 = NULL; - gf->multiply_region.w64 = NULL; + SET_FUNCTION(gf,multiply,w64,NULL) + SET_FUNCTION(gf,divide,w64,NULL) + SET_FUNCTION(gf,inverse,w64,NULL) + SET_FUNCTION(gf,multiply_region,w64,NULL) switch(h->mult_type) { case GF_MULT_CARRY_FREE: if (gf_w64_cfm_init(gf) == 0) return 0; break; @@ -2186,27 +2186,27 @@ int gf_w64_init(gf_t *gf) default: return 0; } if (h->divide_type == GF_DIVIDE_EUCLID) { - gf->divide.w64 = gf_w64_divide_from_inverse; - gf->inverse.w64 = gf_w64_euclid; + SET_FUNCTION(gf,divide,w64,gf_w64_divide_from_inverse) + SET_FUNCTION(gf,inverse,w64,gf_w64_euclid) } if (gf->inverse.w64 != NULL && gf->divide.w64 == NULL) { - gf->divide.w64 = gf_w64_divide_from_inverse; + SET_FUNCTION(gf,divide,w64,gf_w64_divide_from_inverse) } if (gf->inverse.w64 == NULL && gf->divide.w64 != NULL) { - gf->inverse.w64 = gf_w64_inverse_from_divide; + SET_FUNCTION(gf,inverse,w64,gf_w64_inverse_from_divide) } if (h->region_type == GF_REGION_CAUCHY) return 0; if (h->region_type & GF_REGION_ALTMAP) { if (h->mult_type == GF_MULT_COMPOSITE) { - gf->extract_word.w64 = gf_w64_composite_extract_word; + SET_FUNCTION(gf,extract_word,w64,gf_w64_composite_extract_word) } else if (h->mult_type == GF_MULT_SPLIT_TABLE) { - gf->extract_word.w64 = gf_w64_split_extract_word; + SET_FUNCTION(gf,extract_word,w64,gf_w64_split_extract_word) } } else { - gf->extract_word.w64 = gf_w64_extract_word; + SET_FUNCTION(gf,extract_word,w64,gf_w64_extract_word) } return 1; diff --git a/src/gf_w8.c b/src/gf_w8.c index 276799f..81a0eba 100644 --- a/src/gf_w8.c +++ b/src/gf_w8.c @@ -514,14 +514,14 @@ int gf_w8_cfm_init(gf_t *gf) h = (gf_internal_t *) gf->scratch; if ((0xe0 & h->prim_poly) == 0){ - gf->multiply.w32 = gf_w8_clm_multiply_2; - gf->multiply_region.w32 = gf_w8_clm_multiply_region_from_single_2; + SET_FUNCTION(gf,multiply,w32,gf_w8_clm_multiply_2) + SET_FUNCTION(gf,multiply_region,w32,gf_w8_clm_multiply_region_from_single_2) }else if ((0xc0 & h->prim_poly) == 0){ - gf->multiply.w32 = gf_w8_clm_multiply_3; - gf->multiply_region.w32 = gf_w8_clm_multiply_region_from_single_3; + SET_FUNCTION(gf,multiply,w32,gf_w8_clm_multiply_3) + SET_FUNCTION(gf,multiply_region,w32,gf_w8_clm_multiply_region_from_single_3) }else if ((0x80 & h->prim_poly) == 0){ - gf->multiply.w32 = gf_w8_clm_multiply_4; - gf->multiply_region.w32 = gf_w8_clm_multiply_region_from_single_4; + SET_FUNCTION(gf,multiply,w32,gf_w8_clm_multiply_4) + SET_FUNCTION(gf,multiply_region,w32,gf_w8_clm_multiply_region_from_single_4) }else{ return 0; } @@ -537,7 +537,7 @@ int gf_w8_cfm_init(gf_t *gf) static int gf_w8_shift_init(gf_t *gf) { - gf->multiply.w32 = gf_w8_shift_multiply; /* The others will be set automatically */ + SET_FUNCTION(gf,multiply,w32,gf_w8_shift_multiply) /* The others will be set automatically */ return 1; } @@ -809,20 +809,20 @@ int gf_w8_log_init(gf_t *gf) } while (i != 1); if (h->mult_type == GF_MULT_LOG_TABLE) { - gf->inverse.w32 = gf_w8_log_inverse; - gf->divide.w32 = gf_w8_log_divide; - gf->multiply.w32 = gf_w8_log_multiply; - gf->multiply_region.w32 = gf_w8_log_multiply_region; + SET_FUNCTION(gf,inverse,w32,gf_w8_log_inverse) + SET_FUNCTION(gf,divide,w32,gf_w8_log_divide) + SET_FUNCTION(gf,multiply,w32,gf_w8_log_multiply) + SET_FUNCTION(gf,multiply_region,w32,gf_w8_log_multiply_region) } else if (h->mult_type == GF_MULT_LOG_ZERO) { - gf->inverse.w32 = gf_w8_logzero_small_inverse; - gf->divide.w32 = gf_w8_logzero_small_divide; - gf->multiply.w32 = gf_w8_logzero_small_multiply; - gf->multiply_region.w32 = gf_w8_logzero_multiply_region; + SET_FUNCTION(gf,inverse,w32,gf_w8_logzero_small_inverse) + SET_FUNCTION(gf,divide,w32,gf_w8_logzero_small_divide) + SET_FUNCTION(gf,multiply,w32,gf_w8_logzero_small_multiply) + SET_FUNCTION(gf,multiply_region,w32,gf_w8_logzero_multiply_region) } else { - gf->inverse.w32 = gf_w8_logzero_inverse; - gf->divide.w32 = gf_w8_logzero_divide; - gf->multiply.w32 = gf_w8_logzero_multiply; - gf->multiply_region.w32 = gf_w8_logzero_multiply_region; + SET_FUNCTION(gf,inverse,w32,gf_w8_logzero_inverse) + SET_FUNCTION(gf,divide,w32,gf_w8_logzero_divide) + SET_FUNCTION(gf,multiply,w32,gf_w8_logzero_multiply) + SET_FUNCTION(gf,multiply_region,w32,gf_w8_logzero_multiply_region) } return 1; } @@ -1102,19 +1102,19 @@ int gf_w8_split_init(gf_t *gf) } } - gf->multiply.w32 = gf_w8_split_multiply; + SET_FUNCTION(gf,multiply,w32,gf_w8_split_multiply) #if defined(INTEL_SSSE3) || defined(ARM_NEON) if (h->region_type & GF_REGION_NOSIMD) - gf->multiply_region.w32 = gf_w8_split_multiply_region; + SET_FUNCTION(gf,multiply_region,w32,gf_w8_split_multiply_region) else #if defined(INTEL_SSSE3) - gf->multiply_region.w32 = gf_w8_split_multiply_region_sse; + SET_FUNCTION(gf,multiply_region,w32,gf_w8_split_multiply_region_sse) #elif defined(ARM_NEON) gf_w8_neon_split_init(gf); #endif #else - gf->multiply_region.w32 = gf_w8_split_multiply_region; + SET_FUNCTION(gf,multiply_region,w32,gf_w8_split_multiply_region) if(h->region_type & GF_REGION_SIMD) return 0; #endif @@ -1201,29 +1201,29 @@ int gf_w8_table_init(gf_t *gf) } } - gf->inverse.w32 = NULL; /* Will set from divide */ + SET_FUNCTION(gf,inverse,w32,NULL) /* Will set from divide */ switch (scase) { case 0: - gf->divide.w32 = gf_w8_table_divide; - gf->multiply.w32 = gf_w8_table_multiply; - gf->multiply_region.w32 = gf_w8_table_multiply_region; + SET_FUNCTION(gf,divide,w32,gf_w8_table_divide) + SET_FUNCTION(gf,multiply,w32,gf_w8_table_multiply) + SET_FUNCTION(gf,multiply_region,w32,gf_w8_table_multiply_region) break; case 1: - gf->divide.w32 = gf_w8_double_table_divide; - gf->multiply.w32 = gf_w8_double_table_multiply; - gf->multiply_region.w32 = gf_w8_double_table_multiply_region; + SET_FUNCTION(gf,divide,w32,gf_w8_double_table_divide) + SET_FUNCTION(gf,multiply,w32,gf_w8_double_table_multiply) + SET_FUNCTION(gf,multiply_region,w32,gf_w8_double_table_multiply_region) break; case 2: - gf->divide.w32 = gf_w8_double_table_lazy_divide; - gf->multiply.w32 = gf_w8_double_table_lazy_multiply; - gf->multiply_region.w32 = gf_w8_double_table_multiply_region; + SET_FUNCTION(gf,divide,w32,gf_w8_double_table_lazy_divide) + SET_FUNCTION(gf,multiply,w32,gf_w8_double_table_lazy_multiply) + SET_FUNCTION(gf,multiply_region,w32,gf_w8_double_table_multiply_region) break; case 3: #if defined(INTEL_SSSE3) || defined(ARM_NEON) - gf->divide.w32 = gf_w8_default_divide; - gf->multiply.w32 = gf_w8_default_multiply; + SET_FUNCTION(gf,divide,w32,gf_w8_default_divide) + SET_FUNCTION(gf,multiply,w32,gf_w8_default_multiply) #if defined(INTEL_SSSE3) - gf->multiply_region.w32 = gf_w8_split_multiply_region_sse; + SET_FUNCTION(gf,multiply_region,w32,gf_w8_split_multiply_region_sse) #elif defined(ARM_NEON) gf_w8_neon_split_init(gf); #endif @@ -1472,18 +1472,18 @@ int gf_w8_composite_init(gf_t *gf) cd->mult_table = gf_w4_get_mult_table(h->base_gf); if (h->region_type & GF_REGION_ALTMAP) { - gf->multiply_region.w32 = gf_w8_composite_multiply_region_alt; + SET_FUNCTION(gf,multiply_region,w32,gf_w8_composite_multiply_region_alt) } else { - gf->multiply_region.w32 = gf_w8_composite_multiply_region; + SET_FUNCTION(gf,multiply_region,w32,gf_w8_composite_multiply_region) } if (cd->mult_table == NULL) { - gf->multiply.w32 = gf_w8_composite_multiply_recursive; + SET_FUNCTION(gf,multiply,w32,gf_w8_composite_multiply_recursive) } else { - gf->multiply.w32 = gf_w8_composite_multiply_inline; + SET_FUNCTION(gf,multiply,w32,gf_w8_composite_multiply_inline) } - gf->divide.w32 = NULL; - gf->inverse.w32 = gf_w8_composite_inverse; + SET_FUNCTION(gf,divide,w32,NULL) + SET_FUNCTION(gf,inverse,w32,gf_w8_composite_inverse) return 1; } @@ -2190,26 +2190,26 @@ int gf_w8_bytwo_init(gf_t *gf) } if (h->mult_type == GF_MULT_BYTWO_p) { - gf->multiply.w32 = gf_w8_bytwo_p_multiply; + SET_FUNCTION(gf,multiply,w32,gf_w8_bytwo_p_multiply) #ifdef INTEL_SSE2 if (h->region_type & GF_REGION_NOSIMD) - gf->multiply_region.w32 = gf_w8_bytwo_p_nosse_multiply_region; + SET_FUNCTION(gf,multiply_region,w32,gf_w8_bytwo_p_nosse_multiply_region) else - gf->multiply_region.w32 = gf_w8_bytwo_p_sse_multiply_region; + SET_FUNCTION(gf,multiply_region,w32,gf_w8_bytwo_p_sse_multiply_region) #else - gf->multiply_region.w32 = gf_w8_bytwo_p_nosse_multiply_region; + SET_FUNCTION(gf,multiply_region,w32,gf_w8_bytwo_p_nosse_multiply_region) if(h->region_type & GF_REGION_SIMD) return 0; #endif } else { - gf->multiply.w32 = gf_w8_bytwo_b_multiply; + SET_FUNCTION(gf,multiply,w32,gf_w8_bytwo_b_multiply) #ifdef INTEL_SSE2 if (h->region_type & GF_REGION_NOSIMD) - gf->multiply_region.w32 = gf_w8_bytwo_b_nosse_multiply_region; + SET_FUNCTION(gf,multiply_region,w32,gf_w8_bytwo_b_nosse_multiply_region) else - gf->multiply_region.w32 = gf_w8_bytwo_b_sse_multiply_region; + SET_FUNCTION(gf,multiply_region,w32,gf_w8_bytwo_b_sse_multiply_region) #else - gf->multiply_region.w32 = gf_w8_bytwo_b_nosse_multiply_region; + SET_FUNCTION(gf,multiply_region,w32,gf_w8_bytwo_b_nosse_multiply_region) if(h->region_type & GF_REGION_SIMD) return 0; #endif @@ -2304,11 +2304,11 @@ int gf_w8_init(gf_t *gf) h->prim_poly |= 0x100; } - gf->multiply.w32 = NULL; - gf->divide.w32 = NULL; - gf->inverse.w32 = NULL; - gf->multiply_region.w32 = NULL; - gf->extract_word.w32 = gf_w8_extract_word; + SET_FUNCTION(gf,multiply,w32,NULL) + SET_FUNCTION(gf,divide,w32,NULL) + SET_FUNCTION(gf,inverse,w32,NULL) + SET_FUNCTION(gf,multiply_region,w32,NULL) + SET_FUNCTION(gf,extract_word,w32,gf_w8_extract_word) switch(h->mult_type) { case GF_MULT_DEFAULT: @@ -2326,31 +2326,31 @@ int gf_w8_init(gf_t *gf) } if (h->divide_type == GF_DIVIDE_EUCLID) { - gf->divide.w32 = gf_w8_divide_from_inverse; - gf->inverse.w32 = gf_w8_euclid; + SET_FUNCTION(gf,divide,w32,gf_w8_divide_from_inverse) + SET_FUNCTION(gf,inverse,w32,gf_w8_euclid) } else if (h->divide_type == GF_DIVIDE_MATRIX) { - gf->divide.w32 = gf_w8_divide_from_inverse; - gf->inverse.w32 = gf_w8_matrix; + SET_FUNCTION(gf,divide,w32,gf_w8_divide_from_inverse) + SET_FUNCTION(gf,inverse,w32,gf_w8_matrix) } if (gf->divide.w32 == NULL) { - gf->divide.w32 = gf_w8_divide_from_inverse; - if (gf->inverse.w32 == NULL) gf->inverse.w32 = gf_w8_euclid; + SET_FUNCTION(gf,divide,w32,gf_w8_divide_from_inverse) + if (gf->inverse.w32 == NULL) SET_FUNCTION(gf,inverse,w32,gf_w8_euclid) } - if (gf->inverse.w32 == NULL) gf->inverse.w32 = gf_w8_inverse_from_divide; + if (gf->inverse.w32 == NULL) SET_FUNCTION(gf,inverse,w32,gf_w8_inverse_from_divide) if (h->mult_type == GF_MULT_COMPOSITE && (h->region_type & GF_REGION_ALTMAP)) { - gf->extract_word.w32 = gf_w8_composite_extract_word; + SET_FUNCTION(gf,extract_word,w32,gf_w8_composite_extract_word) } if (h->region_type == GF_REGION_CAUCHY) { - gf->multiply_region.w32 = gf_wgen_cauchy_region; - gf->extract_word.w32 = gf_wgen_extract_word; + SET_FUNCTION(gf,multiply_region,w32,gf_wgen_cauchy_region) + SET_FUNCTION(gf,extract_word,w32,gf_wgen_extract_word) } if (gf->multiply_region.w32 == NULL) { - gf->multiply_region.w32 = gf_w8_multiply_region_from_single; + SET_FUNCTION(gf,multiply_region,w32,gf_w8_multiply_region_from_single) } return 1; diff --git a/src/gf_wgen.c b/src/gf_wgen.c index ebc50a5..1e3d2e0 100644 --- a/src/gf_wgen.c +++ b/src/gf_wgen.c @@ -178,8 +178,8 @@ gf_wgen_shift_multiply (gf_t *gf, uint32_t a32, uint32_t b32) static int gf_wgen_shift_init(gf_t *gf) { - gf->multiply.w32 = gf_wgen_shift_multiply; - gf->inverse.w32 = gf_wgen_euclid; + SET_FUNCTION(gf,multiply,w32,gf_wgen_shift_multiply) + SET_FUNCTION(gf,inverse,w32,gf_wgen_euclid) return 1; } @@ -211,8 +211,8 @@ gf_wgen_bytwo_b_multiply (gf_t *gf, gf_val_32_t a, gf_val_32_t b) static int gf_wgen_bytwo_b_init(gf_t *gf) { - gf->multiply.w32 = gf_wgen_bytwo_b_multiply; - gf->inverse.w32 = gf_wgen_euclid; + SET_FUNCTION(gf,multiply,w32,gf_wgen_bytwo_b_multiply) + SET_FUNCTION(gf,inverse,w32,gf_wgen_euclid) return 1; } @@ -247,8 +247,8 @@ gf_wgen_bytwo_p_multiply (gf_t *gf, gf_val_32_t a, gf_val_32_t b) static int gf_wgen_bytwo_p_init(gf_t *gf) { - gf->multiply.w32 = gf_wgen_bytwo_p_multiply; - gf->inverse.w32 = gf_wgen_euclid; + SET_FUNCTION(gf,multiply,w32,gf_wgen_bytwo_p_multiply) + SET_FUNCTION(gf,inverse,w32,gf_wgen_euclid) return 1; } @@ -453,12 +453,12 @@ int gf_wgen_group_init(gf_t *gf) } if (g_s == g_r) { - gf->multiply.w32 = gf_wgen_group_s_equals_r_multiply; + SET_FUNCTION(gf,multiply,w32,gf_wgen_group_s_equals_r_multiply) } else { - gf->multiply.w32 = gf_wgen_group_multiply; + SET_FUNCTION(gf,multiply,w32,gf_wgen_group_multiply) } - gf->divide.w32 = NULL; - gf->divide.w32 = NULL; + SET_FUNCTION(gf,divide,w32,NULL) + SET_FUNCTION(gf,divide,w32,NULL) return 1; } @@ -519,8 +519,8 @@ int gf_wgen_table_8_init(gf_t *gf) } } - gf->multiply.w32 = gf_wgen_table_8_multiply; - gf->divide.w32 = gf_wgen_table_8_divide; + SET_FUNCTION(gf,multiply,w32,gf_wgen_table_8_multiply) + SET_FUNCTION(gf,divide,w32,gf_wgen_table_8_divide) return 1; } @@ -580,8 +580,8 @@ int gf_wgen_table_16_init(gf_t *gf) } } - gf->multiply.w32 = gf_wgen_table_16_multiply; - gf->divide.w32 = gf_wgen_table_16_divide; + SET_FUNCTION(gf,multiply,w32,gf_wgen_table_16_multiply) + SET_FUNCTION(gf,divide,w32,gf_wgen_table_16_divide) return 1; } @@ -670,8 +670,8 @@ int gf_wgen_log_8_init(gf_t *gf) return 0; } - gf->multiply.w32 = gf_wgen_log_8_multiply; - gf->divide.w32 = gf_wgen_log_8_divide; + SET_FUNCTION(gf,multiply,w32,gf_wgen_log_8_multiply) + SET_FUNCTION(gf,divide,w32,gf_wgen_log_8_divide) return 1; } @@ -746,8 +746,8 @@ int gf_wgen_log_16_init(gf_t *gf) return 0; } - gf->multiply.w32 = gf_wgen_log_16_multiply; - gf->divide.w32 = gf_wgen_log_16_divide; + SET_FUNCTION(gf,multiply,w32,gf_wgen_log_16_multiply) + SET_FUNCTION(gf,divide,w32,gf_wgen_log_16_divide) return 1; } @@ -821,8 +821,8 @@ int gf_wgen_log_32_init(gf_t *gf) return 0; } - gf->multiply.w32 = gf_wgen_log_32_multiply; - gf->divide.w32 = gf_wgen_log_32_divide; + SET_FUNCTION(gf,multiply,w32,gf_wgen_log_32_multiply) + SET_FUNCTION(gf,divide,w32,gf_wgen_log_32_divide) return 1; } @@ -975,11 +975,11 @@ int gf_wgen_init(gf_t *gf) } } - gf->multiply.w32 = NULL; - gf->divide.w32 = NULL; - gf->inverse.w32 = NULL; - gf->multiply_region.w32 = gf_wgen_cauchy_region; - gf->extract_word.w32 = gf_wgen_extract_word; + SET_FUNCTION(gf,multiply,w32,NULL) + SET_FUNCTION(gf,divide,w32,NULL) + SET_FUNCTION(gf,inverse,w32,NULL) + SET_FUNCTION(gf,multiply_region,w32,gf_wgen_cauchy_region) + SET_FUNCTION(gf,extract_word,w32,gf_wgen_extract_word) switch(h->mult_type) { case GF_MULT_DEFAULT: @@ -1000,20 +1000,20 @@ int gf_wgen_init(gf_t *gf) default: return 0; } if (h->divide_type == GF_DIVIDE_EUCLID) { - gf->divide.w32 = gf_wgen_divide_from_inverse; - gf->inverse.w32 = gf_wgen_euclid; + SET_FUNCTION(gf,divide,w32,gf_wgen_divide_from_inverse) + SET_FUNCTION(gf,inverse,w32,gf_wgen_euclid) } else if (h->divide_type == GF_DIVIDE_MATRIX) { - gf->divide.w32 = gf_wgen_divide_from_inverse; - gf->inverse.w32 = gf_wgen_matrix; + SET_FUNCTION(gf,divide,w32,gf_wgen_divide_from_inverse) + SET_FUNCTION(gf,inverse,w32,gf_wgen_matrix) } - if (gf->inverse.w32== NULL && gf->divide.w32 == NULL) gf->inverse.w32 = gf_wgen_euclid; + if (gf->inverse.w32== NULL && gf->divide.w32 == NULL) SET_FUNCTION(gf,inverse,w32,gf_wgen_euclid) if (gf->inverse.w32 != NULL && gf->divide.w32 == NULL) { - gf->divide.w32 = gf_wgen_divide_from_inverse; + SET_FUNCTION(gf,divide,w32,gf_wgen_divide_from_inverse) } if (gf->inverse.w32 == NULL && gf->divide.w32 != NULL) { - gf->inverse.w32 = gf_wgen_inverse_from_divide; + SET_FUNCTION(gf,inverse,w32,gf_wgen_inverse_from_divide) } return 1; } diff --git a/src/neon/gf_w16_neon.c b/src/neon/gf_w16_neon.c index 2bd3f30..477ee63 100644 --- a/src/neon/gf_w16_neon.c +++ b/src/neon/gf_w16_neon.c @@ -270,7 +270,7 @@ void gf_w16_neon_split_init(gf_t *gf) gf_internal_t *h = (gf_internal_t *) gf->scratch; if (h->region_type & GF_REGION_ALTMAP) - gf->multiply_region.w32 = gf_w16_split_4_16_lazy_altmap_multiply_region_neon; + SET_FUNCTION(gf,multiply_region,w32,gf_w16_split_4_16_lazy_altmap_multiply_region_neon) else - gf->multiply_region.w32 = gf_w16_split_4_16_lazy_multiply_region_neon; + SET_FUNCTION(gf,multiply_region,w32,gf_w16_split_4_16_lazy_multiply_region_neon) } diff --git a/src/neon/gf_w32_neon.c b/src/neon/gf_w32_neon.c index 8231eb3..7fd1329 100644 --- a/src/neon/gf_w32_neon.c +++ b/src/neon/gf_w32_neon.c @@ -262,8 +262,8 @@ void gf_w32_neon_split_init(gf_t *gf) gf_internal_t *h = (gf_internal_t *) gf->scratch; if (h->region_type & GF_REGION_ALTMAP) - gf->multiply_region.w32 = gf_w32_split_4_32_lazy_altmap_multiply_region_neon; + SET_FUNCTION(gf,multiply_region,w32,gf_w32_split_4_32_lazy_altmap_multiply_region_neon) else - gf->multiply_region.w32 = gf_w32_split_4_32_lazy_multiply_region_neon; + SET_FUNCTION(gf,multiply_region,w32,gf_w32_split_4_32_lazy_multiply_region_neon) } diff --git a/src/neon/gf_w4_neon.c b/src/neon/gf_w4_neon.c index 3a21432..5f35c86 100644 --- a/src/neon/gf_w4_neon.c +++ b/src/neon/gf_w4_neon.c @@ -235,13 +235,13 @@ gf_w4_single_table_multiply_region_neon(gf_t *gf, void *src, void *dest, int gf_w4_neon_cfm_init(gf_t *gf) { // single clm multiplication probably pointless - gf->multiply.w32 = gf_w4_neon_clm_multiply; - gf->multiply_region.w32 = gf_w4_neon_clm_multiply_region_from_single; + SET_FUNCTION(gf,multiply,w32,gf_w4_neon_clm_multiply) + SET_FUNCTION(gf,multiply_region,w32,gf_w4_neon_clm_multiply_region_from_single) return 1; } void gf_w4_neon_single_table_init(gf_t *gf) { - gf->multiply_region.w32 = gf_w4_single_table_multiply_region_neon; + SET_FUNCTION(gf,multiply_region,w32,gf_w4_single_table_multiply_region_neon) } diff --git a/src/neon/gf_w64_neon.c b/src/neon/gf_w64_neon.c index 0eca9c7..2409823 100644 --- a/src/neon/gf_w64_neon.c +++ b/src/neon/gf_w64_neon.c @@ -326,8 +326,8 @@ void gf_w64_neon_split_init(gf_t *gf) gf_internal_t *h = (gf_internal_t *) gf->scratch; if (h->region_type & GF_REGION_ALTMAP) - gf->multiply_region.w64 = gf_w64_split_4_64_lazy_altmap_multiply_region_neon; + SET_FUNCTION(gf,multiply_region,w64,gf_w64_split_4_64_lazy_altmap_multiply_region_neon) else - gf->multiply_region.w64 = gf_w64_split_4_64_lazy_multiply_region_neon; + SET_FUNCTION(gf,multiply_region,w64,gf_w64_split_4_64_lazy_multiply_region_neon) } diff --git a/src/neon/gf_w8_neon.c b/src/neon/gf_w8_neon.c index 930a916..0cce5ba 100644 --- a/src/neon/gf_w8_neon.c +++ b/src/neon/gf_w8_neon.c @@ -188,14 +188,14 @@ int gf_w8_neon_cfm_init(gf_t *gf) h = (gf_internal_t *) gf->scratch; if ((0xe0 & h->prim_poly) == 0){ - gf->multiply.w32 = gf_w8_neon_clm_multiply_2; - gf->multiply_region.w32 = gf_w8_neon_clm_multiply_region_from_single_2; + SET_FUNCTION(gf,multiply,w32,gf_w8_neon_clm_multiply_2) + SET_FUNCTION(gf,multiply_region,w32,gf_w8_neon_clm_multiply_region_from_single_2) }else if ((0xc0 & h->prim_poly) == 0){ - gf->multiply.w32 = gf_w8_neon_clm_multiply_3; - gf->multiply_region.w32 = gf_w8_neon_clm_multiply_region_from_single_3; + SET_FUNCTION(gf,multiply,w32,gf_w8_neon_clm_multiply_3) + SET_FUNCTION(gf,multiply_region,w32,gf_w8_neon_clm_multiply_region_from_single_3) }else if ((0x80 & h->prim_poly) == 0){ - gf->multiply.w32 = gf_w8_neon_clm_multiply_4; - gf->multiply_region.w32 = gf_w8_neon_clm_multiply_region_from_single_4; + SET_FUNCTION(gf,multiply,w32,gf_w8_neon_clm_multiply_4) + SET_FUNCTION(gf,multiply_region,w32,gf_w8_neon_clm_multiply_region_from_single_4) }else{ return 0; } @@ -298,5 +298,5 @@ gf_w8_split_multiply_region_neon(gf_t *gf, void *src, void *dest, gf_val_32_t va void gf_w8_neon_split_init(gf_t *gf) { - gf->multiply_region.w32 = gf_w8_split_multiply_region_neon; + SET_FUNCTION(gf,multiply_region,w32,gf_w8_split_multiply_region_neon) } diff --git a/tools/gf_methods.c b/tools/gf_methods.c index c7d3d58..b016c33 100644 --- a/tools/gf_methods.c +++ b/tools/gf_methods.c @@ -39,7 +39,7 @@ static char *divides[NDIVS] = { "MATRIX", "EUCLID" }; void usage(char *s) { - fprintf(stderr, "usage: gf_methods w -BADC -LUMDRB\n"); + fprintf(stderr, "usage: gf_methods w -BADC -LXUMDRB\n"); fprintf(stderr, "\n"); fprintf(stderr, " w can be 1-32, 64, 128\n"); fprintf(stderr, "\n"); @@ -50,6 +50,7 @@ void usage(char *s) fprintf(stderr, " Combinations are fine.\n"); fprintf(stderr, "\n"); fprintf(stderr, " -L Simply lists methods\n"); + fprintf(stderr, " -X List methods and functions selected (compile with DEBUG_FUNCTIONS)\n"); fprintf(stderr, " -U Produces calls to gf_unit\n"); fprintf(stderr, " -M Produces calls to time_tool.sh for single multiplications\n"); fprintf(stderr, " -D Produces calls to time_tool.sh for single divisions\n"); @@ -63,6 +64,19 @@ void usage(char *s) exit(1); } +void print_methods(gf_t *gf) +{ +#ifdef DEBUG_FUNCTIONS + gf_internal_t *h = (gf_internal_t*) gf->scratch; + + printf("multiply = %s\n", h->multiply); + printf("divide = %s\n", h->divide); + printf("inverse = %s\n", h->inverse); + printf("multiply_region = %s\n", h->multiply_region); + printf("extract_word = %s\n", h->extract_word); +#endif +} + int main(int argc, char *argv[]) { int m, r, d, w, i, sa, j, k, reset, ok; @@ -99,12 +113,12 @@ int main(int argc, char *argv[]) } } - if (strchr("LUMDRB", argv[3][1]) == NULL) { usage("Bad -LUMDRB"); } + if (strchr("LXUMDRB", argv[3][1]) == NULL) { usage("Bad -LXUMDRB"); } listing = argv[3][1]; if (listing == 'U') { w_str = "../test/gf_unit %d A -1"; - } else if (listing == 'L') { + } else if (listing == 'L' || listing == 'X') { w_str = "w=%d:"; } else { w_str = strdup("sh time_tool.sh X %d"); @@ -192,6 +206,8 @@ int main(int argc, char *argv[]) printf(w_str, w); for (j = 0; j < sa; j++) printf(" %s", gf_argv[j]); printf("\n"); + if (listing == 'X') + print_methods(&gf); gf_free(&gf, 1); } else if (_gf_errno == GF_E_DEFAULT) { fprintf(stderr, "Unlabeled failed method: w=%d:", w); @@ -212,6 +228,8 @@ int main(int argc, char *argv[]) printf(w_str, w); for (j = 0; j < sa; j++) printf(" %s", gf_argv[j]); printf("\n"); + if (listing == 'X') + print_methods(&gf); gf_free(&gf, 1); } else if (_gf_errno == GF_E_DEFAULT) { fprintf(stderr, "Unlabeled failed method: w=%d:", w); -- cgit v1.2.1 From 7761438c63e18f380979a3bf5647574243708abd Mon Sep 17 00:00:00 2001 From: Bassam Tabbara Date: Fri, 2 Sep 2016 17:23:36 -0700 Subject: Add SIMD test helpers This commit adds a couple of scripts that help test SIMD functionality on different machines through QEMU. tools/test_simd_qemu.sh will automatically start qemu, run tests and stop it. it uses the Ubuntu cloud images which are built for x86_64, arm and arm64. tools/test_simd.sh run a number of tests including compiling with different flags, unit tests, and gathering the functions selected in gf_init (and when compiling with DEBUG_FUNCTIONS) --- .gitignore | 3 +- tools/test_simd.sh | 125 ++++++++++++++++++++++++ tools/test_simd_qemu.sh | 254 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 381 insertions(+), 1 deletion(-) create mode 100755 tools/test_simd.sh create mode 100755 tools/test_simd_qemu.sh diff --git a/.gitignore b/.gitignore index f6f097d..22e6fbe 100644 --- a/.gitignore +++ b/.gitignore @@ -74,4 +74,5 @@ tools/gf_poly tools/gf_time tools/gf_unit_w* tools/test-suite.log - +tools/.qemu/ +tools/test_simd*.results diff --git a/tools/test_simd.sh b/tools/test_simd.sh new file mode 100755 index 0000000..1268f87 --- /dev/null +++ b/tools/test_simd.sh @@ -0,0 +1,125 @@ +#!/bin/bash -e + +# this scripts has a number of tests for SIMD. It can be invoked +# on the host or on a QEMU machine. + +script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +host_cpu=`uname -p` +results=${script_dir}/test_simd.results + +# runs unit tests and save the results +test_unit(){ + { ./configure && make clean && make; } || { echo "Compile FAILED" >> ${results}; return 1; } + make check || { echo "gf_methods $i FAILED" >> ${results}; ((++failed)); } + cat tools/test-suite.log >> ${results} || true +} + +# build with DEBUG_FUNCTIONS and save all methods selected +# to a results file +test_functions() { + failed=0 + + { ./configure && make clean && make CFLAGS="-DDEBUG_FUNCTIONS"; } || { echo "Compile FAILED" >> ${results}; return 1; } + for i in 128 64 32 16 8 4; do + { ${script_dir}/gf_methods $i -ACD -X >> ${results}; } || { echo "gf_methods $i FAILED" >> ${results}; ((++failed)); } + done + + return ${failed} +} + +compile_arm() { + failed=0 + + echo -n "Compiling with NO SIMD support..." >> ${results} + { ./configure --disable-neon && make clean && make && echo "SUCCESS" >> ${results}; } || { echo "FAIL" >> ${results}; ((++failed)); } + + echo -n "Compiling with FULL SIMD support..." >> ${results} + { ./configure && make clean && make && echo "SUCCESS" >> ${results}; } || { echo "FAIL" >> ${results}; ((++failed)); } + + return ${failed} +} + +compile_intel() { + failed=0 + + echo -n "Compiling with NO SIMD support..." >> ${results} + { ./configure && make clean && make && echo "SUCCESS" >> ${results}; } || { echo "FAIL" >> ${results}; ((++failed)); } + + echo -n "Compiling with SSE2 only..." >> ${results} + export ax_cv_have_sse_ext=no + export ax_cv_have_sse2_ext=yes + export ax_cv_have_sse3_ext=no + export ax_cv_have_ssse3_ext=no + export ax_cv_have_sse41_ext=no + export ax_cv_have_sse42_ext=no + export ax_cv_have_pclmuldq_ext=no + { ./configure && make clean && make && echo "SUCCESS" >> ${results}; } || { echo "FAIL" >> ${results}; ((++failed)); } + + echo -n "Compiling with SSE2,SSE3 only..." >> ${results} + export ax_cv_have_sse_ext=no + export ax_cv_have_sse2_ext=yes + export ax_cv_have_sse3_ext=yes + export ax_cv_have_ssse3_ext=no + export ax_cv_have_sse41_ext=no + export ax_cv_have_sse42_ext=no + export ax_cv_have_pclmuldq_ext=no + { ./configure && make clean && make && echo "SUCCESS" >> ${results}; } || { echo "FAIL" >> ${results}; ((++failed)); } + + echo -n "Compiling with SSE2,SSE3,SSSE3 only..." >> ${results} + export ax_cv_have_sse_ext=no + export ax_cv_have_sse2_ext=yes + export ax_cv_have_sse3_ext=yes + export ax_cv_have_ssse3_ext=yes + export ax_cv_have_sse41_ext=no + export ax_cv_have_sse42_ext=no + export ax_cv_have_pclmuldq_ext=no + { ./configure && make clean && make && echo "SUCCESS" >> ${results}; } || { echo "FAIL" >> ${results}; ((++failed)); } + + echo -n "Compiling with SSE2,SSE3,SSSE3,SSE4_1 only..." >> ${results} + export ax_cv_have_sse_ext=no + export ax_cv_have_sse2_ext=yes + export ax_cv_have_sse3_ext=yes + export ax_cv_have_ssse3_ext=yes + export ax_cv_have_sse41_ext=yes + export ax_cv_have_sse42_ext=no + export ax_cv_have_pclmuldq_ext=no + { ./configure && make clean && make && echo "SUCCESS" >> ${results}; } || { echo "FAIL" >> ${results}; ((++failed)); } + + echo -n "Compiling with SSE2,SSE3,SSSE3,SSE4_2 only..." >> ${results} + export ax_cv_have_sse_ext=no + export ax_cv_have_sse2_ext=yes + export ax_cv_have_sse3_ext=yes + export ax_cv_have_ssse3_ext=yes + export ax_cv_have_sse41_ext=no + export ax_cv_have_sse42_ext=yes + export ax_cv_have_pclmuldq_ext=no + { ./configure && make clean && make && echo "SUCCESS" >> ${results}; } || { echo "FAIL" >> ${results}; ((++failed)); } + + echo -n "Compiling with FULL SIMD support..." >> ${results} + export ax_cv_have_sse_ext=no + export ax_cv_have_sse2_ext=yes + export ax_cv_have_sse3_ext=yes + export ax_cv_have_ssse3_ext=yes + export ax_cv_have_sse41_ext=yes + export ax_cv_have_sse42_ext=yes + export ax_cv_have_pclmuldq_ext=yes + { ./configure && make clean && make && echo "SUCCESS" >> ${results}; } || { echo "FAIL" >> ${results}; ((++failed)); } + + return ${failed} +} + +# test that we can compile the source code with different +# SIMD options. We assume that we are running on processor +# full SIMD support +test_compile() { + case $host_cpu in + aarch64*|arm*) compile_arm ;; + i[[3456]]86*|x86_64*|amd64*) compile_intel ;; + esac +} + +cd ${script_dir}/.. +rm -f ${results} + +test_$1 +exit $? diff --git a/tools/test_simd_qemu.sh b/tools/test_simd_qemu.sh new file mode 100755 index 0000000..a270e20 --- /dev/null +++ b/tools/test_simd_qemu.sh @@ -0,0 +1,254 @@ +#!/bin/bash -e + +# This script will use QEMU to test gf-complete especially SIMD support +# on different architectures and cpus. It will boot a qemu machine +# and run an Ubuntu cloud image. All testing will happen inside the +# QEMU machine. + +# The following packages are required: +# qemu-system-aarch64 +# qemu-system-arm +# qemu-system-x86_64 +# genisoimage + + +script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +qemu_dir="${script_dir}/.qemu" +ssh_port=2222 +ssh_pubkey_file="${qemu_dir}/qemu.pub" +ssh_key_file="${qemu_dir}/qemu" + +mkdir -p "${qemu_dir}" + +cleanup() { + if [[ -n "$(jobs -p)" ]]; then + echo killing qemu processes "$(jobs -p)" + kill $(jobs -p) + fi +} + +trap cleanup EXIT + +start_qemu() { + arch=$1 + cpu=$2 + + image_version="xenial" + image_url_base="http://cloud-images.ubuntu.com/${image_version}/current" + + case $arch in + i[[3456]]86*|x86_64*|amd64*) + image_kernel="${image_version}-server-cloudimg-amd64-vmlinuz-generic" + image_initrd="${image_version}-server-cloudimg-amd64-initrd-generic" + image_disk="${image_version}-server-cloudimg-amd64-disk1.img" + ;; + aarch64*) + image_kernel="${image_version}-server-cloudimg-arm64-vmlinuz-generic" + image_initrd="${image_version}-server-cloudimg-arm64-initrd-generic" + image_disk="${image_version}-server-cloudimg-arm64-disk1.img" + ;; + arm*) + image_kernel="${image_version}-server-cloudimg-armhf-vmlinuz-lpae" + image_initrd="${image_version}-server-cloudimg-armhf-initrd-generic-lpae" + image_disk="${image_version}-server-cloudimg-armhf-disk1.img" + ;; + *) die "Unsupported arch" ;; + esac + + [[ -f ${qemu_dir}/${image_kernel} ]] || wget -O ${qemu_dir}/${image_kernel} ${image_url_base}/unpacked/${image_kernel} + [[ -f ${qemu_dir}/${image_initrd} ]] || wget -O ${qemu_dir}/${image_initrd} ${image_url_base}/unpacked/${image_initrd} + [[ -f ${qemu_dir}/${image_disk} ]] || wget -O ${qemu_dir}/${image_disk} ${image_url_base}/${image_disk} + + #create a delta disk to keep the original image clean + delta_disk="${qemu_dir}/disk.img" + rm -f ${delta_disk} + qemu-img create -q -f qcow2 -b "${qemu_dir}/${image_disk}" ${delta_disk} + + # generate an ssh keys + [[ -f ${ssh_pubkey_file} ]] || ssh-keygen -q -N "" -f ${ssh_key_file} + + # create a config disk to set the SSH keys + cat > "${qemu_dir}/meta-data" < "${qemu_dir}/user-data" < Date: Sat, 3 Sep 2016 08:43:13 -0700 Subject: Support for runtime SIMD detection This commits adds support for runtime detection of SIMD instructions. The idea is that you would build once with all supported SIMD functions and the same binaries could run on different machines with varying support for SIMD. At runtime gf-complete will select the right functions based on the processor. gf_cpu.c has the logic to detect SIMD instructions. On Intel processors this is done through cpuid. For ARM on linux we use getauxv. The logic in gf_w*.c has been changed to check for runtime SIMD support and fallback to generic code. Also a new test has been added. It compares the functions selected by gf_init when we enable/disable SIMD support through build flags, with runtime enabling/disabling. The test checks if the results are identical. --- .gitignore | 2 +- include/gf_cpu.h | 20 +++++ src/Makefile.am | 16 +++- src/gf.c | 21 +++-- src/gf_cpu.c | 153 ++++++++++++++++++++++++++++++++ src/gf_w128.c | 48 +++++----- src/gf_w16.c | 127 +++++++++++++------------- src/gf_w32.c | 181 ++++++++++++++++++------------------- src/gf_w4.c | 92 +++++++++---------- src/gf_w64.c | 164 +++++++++++++++++++--------------- src/gf_w8.c | 130 ++++++++++++++------------- tools/test_simd.sh | 231 ++++++++++++++++++++++++++++++++++++++++++++++++ tools/test_simd_qemu.sh | 2 + 13 files changed, 810 insertions(+), 377 deletions(-) create mode 100644 include/gf_cpu.h create mode 100644 src/gf_cpu.c diff --git a/.gitignore b/.gitignore index 22e6fbe..bfc1dfc 100644 --- a/.gitignore +++ b/.gitignore @@ -75,4 +75,4 @@ tools/gf_time tools/gf_unit_w* tools/test-suite.log tools/.qemu/ -tools/test_simd*.results +tools/test_simd*.results* diff --git a/include/gf_cpu.h b/include/gf_cpu.h new file mode 100644 index 0000000..71c7227 --- /dev/null +++ b/include/gf_cpu.h @@ -0,0 +1,20 @@ +/* + * GF-Complete: A Comprehensive Open Source Library for Galois Field Arithmetic + * James S. Plank, Ethan L. Miller, Kevin M. Greenan, + * Benjamin A. Arnold, John A. Burnum, Adam W. Disney, Allen C. McBride. + * + * gf_cpu.h + * + * Identifies whether the CPU supports SIMD instructions at runtime. + */ + +#pragma once + +extern int gf_cpu_supports_intel_pclmul; +extern int gf_cpu_supports_intel_sse4; +extern int gf_cpu_supports_intel_ssse3; +extern int gf_cpu_supports_intel_sse3; +extern int gf_cpu_supports_intel_sse2; +extern int gf_cpu_supports_arm_neon; + +void gf_cpu_identify(void); diff --git a/src/Makefile.am b/src/Makefile.am index a3bd37a..cfc2a50 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -4,11 +4,21 @@ AUTOMAKE_OPTIONS = subdir-objects AM_CPPFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include -AM_CFLAGS = -O3 $(SIMD_FLAGS) -fPIC -Wsign-compare +# avoid using SIMD_FLAGS for code that calls strcmp as new gcc +# versions will use SIMD for the strcmp implementation. Instead +# we create a static library just for gf_method that is not compiled +# with SIMD_FLAGS, this static library will get linked into gf_complete.so +noinst_LTLIBRARIES = libgf_util.la +libgf_util_la_SOURCES = gf_method.c +libgf_util_la_CFLAGS = -O3 -fPIC -Wsign-compare + +# we narrowly use SIMD_FLAGS for code that needs it lib_LTLIBRARIES = libgf_complete.la -libgf_complete_la_SOURCES = gf.c gf_method.c gf_wgen.c gf_w4.c gf_w8.c gf_w16.c gf_w32.c \ - gf_w64.c gf_w128.c gf_rand.c gf_general.c +libgf_complete_la_SOURCES = gf.c gf_wgen.c gf_w4.c gf_w8.c gf_w16.c gf_w32.c \ + gf_w64.c gf_w128.c gf_rand.c gf_general.c gf_cpu.c +libgf_complete_la_CFLAGS = -O3 $(SIMD_FLAGS) -fPIC -Wsign-compare +libgf_complete_la_LIBADD = libgf_util.la if HAVE_NEON libgf_complete_la_SOURCES += neon/gf_w4_neon.c \ diff --git a/src/gf.c b/src/gf.c index b7a5c01..feeafdc 100644 --- a/src/gf.c +++ b/src/gf.c @@ -12,6 +12,7 @@ #include #include #include +#include "gf_cpu.h" int _gf_errno = GF_E_DEFAULT; @@ -207,20 +208,28 @@ int gf_error_check(int w, int mult_type, int region_type, int divide_type, if (region_type & (~tmp)) { _gf_errno = GF_E_UNK_REG; return 0; } #ifdef INTEL_SSE2 - sse2 = 1; + if (gf_cpu_supports_intel_sse2) { + sse2 = 1; + } #endif #ifdef INTEL_SSSE3 - sse3 = 1; + if (gf_cpu_supports_intel_ssse3) { + sse3 = 1; + } #endif #ifdef INTEL_SSE4_PCLMUL - pclmul = 1; + if (gf_cpu_supports_intel_pclmul) { + pclmul = 1; + } #endif #ifdef ARM_NEON - pclmul = (w == 4 || w == 8); - sse3 = 1; + if (gf_cpu_supports_arm_neon) { + pclmul = (w == 4 || w == 8); + sse3 = 1; + } #endif @@ -473,6 +482,8 @@ int gf_init_hard(gf_t *gf, int w, int mult_type, int sz; gf_internal_t *h; + gf_cpu_identify(); + if (gf_error_check(w, mult_type, region_type, divide_type, arg1, arg2, prim_poly, base_gf) == 0) return 0; diff --git a/src/gf_cpu.c b/src/gf_cpu.c new file mode 100644 index 0000000..ee2f847 --- /dev/null +++ b/src/gf_cpu.c @@ -0,0 +1,153 @@ +/* + * GF-Complete: A Comprehensive Open Source Library for Galois Field Arithmetic + * James S. Plank, Ethan L. Miller, Kevin M. Greenan, + * Benjamin A. Arnold, John A. Burnum, Adam W. Disney, Allen C. McBride. + * + * gf_cpu.h + * + * Identifies whether the CPU supports SIMD instructions at runtime. + */ + +#include +#include + +int gf_cpu_identified = 0; + +int gf_cpu_supports_intel_pclmul = 0; +int gf_cpu_supports_intel_sse4 = 0; +int gf_cpu_supports_intel_ssse3 = 0; +int gf_cpu_supports_intel_sse3 = 0; +int gf_cpu_supports_intel_sse2 = 0; +int gf_cpu_supports_arm_neon = 0; + +#if defined(__x86_64__) + +void gf_cpu_identify(void) +{ + if (gf_cpu_identified) { + return; + } + + int op = 1, eax, ebx, ecx, edx; + + __asm__("cpuid" + : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) + : "a" (op)); + +#if defined(INTEL_SSE4_PCLMUL) + if ((ecx & 1) != 0 && !getenv("GF_COMPLETE_DISABLE_SSE4_PCLMUL")) { + gf_cpu_supports_intel_pclmul = 1; +#ifdef DEBUG_CPU_DETECTION + printf("#gf_cpu_supports_intel_pclmul\n"); +#endif + } +#endif + +#if defined(INTEL_SSE4) + if (((ecx & (1<<20)) != 0 || (ecx & (1<<19)) != 0) && !getenv("GF_COMPLETE_DISABLE_SSE4")) { + gf_cpu_supports_intel_sse4 = 1; +#ifdef DEBUG_CPU_DETECTION + printf("#gf_cpu_supports_intel_sse4\n"); +#endif + } +#endif + +#if defined(INTEL_SSSE3) + if ((ecx & (1<<9)) != 0 && !getenv("GF_COMPLETE_DISABLE_SSSE3")) { + gf_cpu_supports_intel_ssse3 = 1; +#ifdef DEBUG_CPU_DETECTION + printf("#gf_cpu_supports_intel_ssse3\n"); +#endif + } +#endif + +#if defined(INTEL_SSE3) + if ((ecx & 1) != 0 && !getenv("GF_COMPLETE_DISABLE_SSE3")) { + gf_cpu_supports_intel_sse3 = 1; +#ifdef DEBUG_CPU_DETECTION + printf("#gf_cpu_supports_intel_sse3\n"); +#endif + } +#endif + +#if defined(INTEL_SSE2) + if ((edx & (1<<26)) != 0 && !getenv("GF_COMPLETE_DISABLE_SSE2")) { + gf_cpu_supports_intel_sse2 = 1; +#ifdef DEBUG_CPU_DETECTION + printf("#gf_cpu_supports_intel_sse2\n"); +#endif + } +#endif + + gf_cpu_identified = 1; +} + +#elif defined(__arm__) || defined(__aarch64__) + +#ifdef __linux__ + +#include +#include +#include +#include +#include +#include + +unsigned long get_hwcap(unsigned long type) { + unsigned long hwcap = 0; + int fd = open("/proc/self/auxv", O_RDONLY); + if (fd > 0) { + Elf32_auxv_t auxv; + while (read(fd, &auxv, sizeof(Elf32_auxv_t))) { + if (auxv.a_type == type) { + hwcap = auxv.a_un.a_val; + break; + } + } + close(fd); + } + + return hwcap; +} + +#endif // linux + +void gf_cpu_identify(void) +{ + if (gf_cpu_identified) { + return; + } + +#if defined(ARM_NEON) + if (!getenv("GF_COMPLETE_DISABLE_NEON")) { +#if __linux__ && __arm__ + gf_cpu_supports_arm_neon = (get_hwcap(AT_HWCAP) & HWCAP_NEON) > 0; +#elif __aarch64__ + // ASIMD is supported on all aarch64 architectures + gf_cpu_supports_arm_neon = 1; +#else + // we assume that NEON is supported if the compiler supports + // NEON and we dont have a reliable way to detect runtime support. + gf_cpu_supports_arm_neon = 1; +#endif + +#ifdef DEBUG_CPU_DETECTION + if (gf_cpu_supports_arm_neon) { + printf("#gf_cpu_supports_arm_neon\n"); + } +#endif + } +#endif // defined(ARM_NEON) + + gf_cpu_identified = 1; +} + +#else // defined(__arm__) || defined(__aarch64__) + +int gf_cpu_identify(void) +{ + gf_cpu_identified = 1; + return 0; +} + +#endif diff --git a/src/gf_w128.c b/src/gf_w128.c index 5f650b3..74f72e8 100644 --- a/src/gf_w128.c +++ b/src/gf_w128.c @@ -11,6 +11,7 @@ #include "gf_int.h" #include #include +#include "gf_cpu.h" #define GF_FIELD_WIDTH (128) @@ -290,11 +291,11 @@ gf_w128_shift_multiply(gf_t *gf, gf_val_128_t a128, gf_val_128_t b128, gf_val_12 return; } +#if defined(INTEL_SSE4_PCLMUL) + void gf_w128_clm_multiply(gf_t *gf, gf_val_128_t a128, gf_val_128_t b128, gf_val_128_t c128) { -#if defined(INTEL_SSE4_PCLMUL) - __m128i a,b; __m128i result0,result1; __m128i prim_poly; @@ -338,9 +339,8 @@ gf_w128_clm_multiply(gf_t *gf, gf_val_128_t a128, gf_val_128_t b128, gf_val_128_ c128[0] = (uint64_t)_mm_extract_epi64(result1,1); c128[1] = (uint64_t)_mm_extract_epi64(result1,0); -#endif -return; } +#endif void gf_w128_bytwo_p_multiply(gf_t *gf, gf_val_128_t a128, gf_val_128_t b128, gf_val_128_t c128) @@ -376,10 +376,10 @@ gf_w128_bytwo_p_multiply(gf_t *gf, gf_val_128_t a128, gf_val_128_t b128, gf_val_ return; } +#if defined(INTEL_SSE4) void gf_w128_sse_bytwo_p_multiply(gf_t *gf, gf_val_128_t a128, gf_val_128_t b128, gf_val_128_t c128) { -#if defined(INTEL_SSE4) int i; __m128i a, b, pp, prod, amask, u_middle_one; /*John: pmask is always the highest bit set, and the rest zeros. amask changes, it's a countdown.*/ @@ -427,16 +427,16 @@ gf_w128_sse_bytwo_p_multiply(gf_t *gf, gf_val_128_t a128, gf_val_128_t b128, gf_ } c128[0] = (uint64_t)_mm_extract_epi64(prod, 1); c128[1] = (uint64_t)_mm_extract_epi64(prod, 0); -#endif return; } +#endif /* Ben: This slow function implements sse instrutions for bytwo_b because why not */ +#if defined(INTEL_SSE4) void gf_w128_sse_bytwo_b_multiply(gf_t *gf, gf_val_128_t a128, gf_val_128_t b128, gf_val_128_t c128) { -#if defined(INTEL_SSE4) __m128i a, b, lmask, hmask, pp, c, middle_one; gf_internal_t *h; uint64_t topbit, middlebit; @@ -471,8 +471,8 @@ gf_w128_sse_bytwo_b_multiply(gf_t *gf, gf_val_128_t a128, gf_val_128_t b128, gf_ if (middlebit) b = _mm_xor_si128(b, middle_one); if (topbit) b = _mm_xor_si128(b, pp); } -#endif } +#endif void gf_w128_bytwo_b_multiply(gf_t *gf, gf_val_128_t a128, gf_val_128_t b128, gf_val_128_t c128) @@ -1146,7 +1146,7 @@ gf_w128_group_multiply_region(gf_t *gf, void *src, void *dest, gf_val_128_t val, } /* a^-1 -> b */ - void +void gf_w128_euclid(GFP gf, gf_val_128_t a128, gf_val_128_t b128) { uint64_t e_i[2], e_im1[2], e_ip1[2]; @@ -1239,7 +1239,7 @@ gf_w128_euclid(GFP gf, gf_val_128_t a128, gf_val_128_t b128) return; } - void +void gf_w128_divide_from_inverse(GFP gf, gf_val_128_t a128, gf_val_128_t b128, gf_val_128_t c128) { uint64_t d[2]; @@ -1248,7 +1248,7 @@ gf_w128_divide_from_inverse(GFP gf, gf_val_128_t a128, gf_val_128_t b128, gf_val return; } - void +void gf_w128_inverse_from_divide(GFP gf, gf_val_128_t a128, gf_val_128_t b128) { uint64_t one128[2]; @@ -1260,7 +1260,7 @@ gf_w128_inverse_from_divide(GFP gf, gf_val_128_t a128, gf_val_128_t b128) static - void +void gf_w128_composite_inverse(gf_t *gf, gf_val_128_t a, gf_val_128_t inv) { gf_internal_t *h = (gf_internal_t *) gf->scratch; @@ -1421,10 +1421,12 @@ static int gf_w128_cfm_init(gf_t *gf) { #if defined(INTEL_SSE4_PCLMUL) - SET_FUNCTION(gf,inverse,w128,gf_w128_euclid) - SET_FUNCTION(gf,multiply,w128,gf_w128_clm_multiply) - SET_FUNCTION(gf,multiply_region,w128,gf_w128_clm_multiply_region_from_single) - return 1; + if (gf_cpu_supports_intel_pclmul) { + SET_FUNCTION(gf,inverse,w128,gf_w128_euclid) + SET_FUNCTION(gf,multiply,w128,gf_w128_clm_multiply) + SET_FUNCTION(gf,multiply_region,w128,gf_w128_clm_multiply_region_from_single) + return 1; + } #endif return 0; @@ -1527,7 +1529,7 @@ int gf_w128_split_init(gf_t *gf) SET_FUNCTION(gf,multiply,w128,gf_w128_bytwo_p_multiply) #if defined(INTEL_SSE4_PCLMUL) - if (!(h->region_type & GF_REGION_NOSIMD)){ + if (gf_cpu_supports_intel_pclmul && !(h->region_type & GF_REGION_NOSIMD)){ SET_FUNCTION(gf,multiply,w128,gf_w128_clm_multiply) } #endif @@ -1546,23 +1548,19 @@ int gf_w128_split_init(gf_t *gf) if((h->region_type & GF_REGION_ALTMAP)) { #ifdef INTEL_SSE4 - if(!(h->region_type & GF_REGION_NOSIMD)) + if(gf_cpu_supports_intel_sse4 && !(h->region_type & GF_REGION_NOSIMD)) SET_FUNCTION(gf,multiply_region,w128,gf_w128_split_4_128_sse_altmap_multiply_region) else - return 0; - #else - return 0; #endif + return 0; } else { #ifdef INTEL_SSE4 - if(!(h->region_type & GF_REGION_NOSIMD)) + if(gf_cpu_supports_intel_sse4 && !(h->region_type & GF_REGION_NOSIMD)) SET_FUNCTION(gf,multiply_region,w128,gf_w128_split_4_128_sse_multiply_region) else - SET_FUNCTION(gf,multiply_region,w128,gf_w128_split_4_128_multiply_region) - #else - SET_FUNCTION(gf,multiply_region,w128,gf_w128_split_4_128_multiply_region) #endif + SET_FUNCTION(gf,multiply_region,w128,gf_w128_split_4_128_multiply_region) } } return 1; diff --git a/src/gf_w16.c b/src/gf_w16.c index a62ea51..8316892 100644 --- a/src/gf_w16.c +++ b/src/gf_w16.c @@ -12,6 +12,7 @@ #include #include #include "gf_w16.h" +#include "gf_cpu.h" #define AB2(ip, am1 ,am2, b, t1, t2) {\ t1 = (b << 1) & am1;\ @@ -391,6 +392,7 @@ gf_val_32_t gf_w16_matrix (gf_t *gf, gf_val_32_t b) extra memory. */ +#if defined(INTEL_SSE4_PCLMUL) static inline gf_val_32_t @@ -398,8 +400,6 @@ gf_w16_clm_multiply_2 (gf_t *gf, gf_val_32_t a16, gf_val_32_t b16) { gf_val_32_t rv = 0; -#if defined(INTEL_SSE4_PCLMUL) - __m128i a, b; __m128i result; __m128i prim_poly; @@ -433,11 +433,11 @@ gf_w16_clm_multiply_2 (gf_t *gf, gf_val_32_t a16, gf_val_32_t b16) rv = ((gf_val_32_t)_mm_extract_epi32(result, 0)); - -#endif return rv; } +#endif +#if defined(INTEL_SSE4_PCLMUL) static inline gf_val_32_t @@ -445,8 +445,6 @@ gf_w16_clm_multiply_3 (gf_t *gf, gf_val_32_t a16, gf_val_32_t b16) { gf_val_32_t rv = 0; -#if defined(INTEL_SSE4_PCLMUL) - __m128i a, b; __m128i result; __m128i prim_poly; @@ -473,11 +471,11 @@ gf_w16_clm_multiply_3 (gf_t *gf, gf_val_32_t a16, gf_val_32_t b16) rv = ((gf_val_32_t)_mm_extract_epi32(result, 0)); - -#endif return rv; } +#endif +#if defined(INTEL_SSE4_PCLMUL) static inline gf_val_32_t @@ -485,8 +483,6 @@ gf_w16_clm_multiply_4 (gf_t *gf, gf_val_32_t a16, gf_val_32_t b16) { gf_val_32_t rv = 0; -#if defined(INTEL_SSE4_PCLMUL) - __m128i a, b; __m128i result; __m128i prim_poly; @@ -515,10 +511,9 @@ gf_w16_clm_multiply_4 (gf_t *gf, gf_val_32_t a16, gf_val_32_t b16) rv = ((gf_val_32_t)_mm_extract_epi32(result, 0)); - -#endif return rv; } +#endif static @@ -556,25 +551,27 @@ static int gf_w16_cfm_init(gf_t *gf) { #if defined(INTEL_SSE4_PCLMUL) - gf_internal_t *h; + if (gf_cpu_supports_intel_pclmul) { + gf_internal_t *h; - h = (gf_internal_t *) gf->scratch; - - /*Ben: Determining how many reductions to do */ - - if ((0xfe00 & h->prim_poly) == 0) { - SET_FUNCTION(gf,multiply,w32,gf_w16_clm_multiply_2) - SET_FUNCTION(gf,multiply_region,w32,gf_w16_clm_multiply_region_from_single_2) - } else if((0xf000 & h->prim_poly) == 0) { - SET_FUNCTION(gf,multiply,w32,gf_w16_clm_multiply_3) - SET_FUNCTION(gf,multiply_region,w32,gf_w16_clm_multiply_region_from_single_3) - } else if ((0xe000 & h->prim_poly) == 0) { - SET_FUNCTION(gf,multiply,w32,gf_w16_clm_multiply_4) - SET_FUNCTION(gf,multiply_region,w32,gf_w16_clm_multiply_region_from_single_4) - } else { - return 0; - } - return 1; + h = (gf_internal_t *) gf->scratch; + + /*Ben: Determining how many reductions to do */ + + if ((0xfe00 & h->prim_poly) == 0) { + SET_FUNCTION(gf,multiply,w32,gf_w16_clm_multiply_2) + SET_FUNCTION(gf,multiply_region,w32,gf_w16_clm_multiply_region_from_single_2) + } else if((0xf000 & h->prim_poly) == 0) { + SET_FUNCTION(gf,multiply,w32,gf_w16_clm_multiply_3) + SET_FUNCTION(gf,multiply_region,w32,gf_w16_clm_multiply_region_from_single_3) + } else if ((0xe000 & h->prim_poly) == 0) { + SET_FUNCTION(gf,multiply,w32,gf_w16_clm_multiply_4) + SET_FUNCTION(gf,multiply_region,w32,gf_w16_clm_multiply_region_from_single_4) + } else { + return 0; + } + return 1; + } #endif return 0; @@ -688,10 +685,9 @@ int gf_w16_log_init(gf_t *gf) if (check) { if (h->mult_type != GF_MULT_LOG_TABLE) { - -#if defined(INTEL_SSE4_PCLMUL) - return gf_w16_cfm_init(gf); -#endif + if (gf_cpu_supports_intel_pclmul) { + return gf_w16_cfm_init(gf); + } return gf_w16_shift_init(gf); } else { _gf_errno = GF_E_LOGPOLY; @@ -948,11 +944,11 @@ gf_w16_table_lazy_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t v gf_do_final_region_alignment(&rd); } +#ifdef INTEL_SSSE3 static void gf_w16_split_4_16_lazy_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor) { -#ifdef INTEL_SSSE3 uint64_t i, j, *s64, *d64, *top64;; uint64_t c, prod; uint8_t low[4][16]; @@ -1078,14 +1074,14 @@ gf_w16_split_4_16_lazy_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_v } gf_do_final_region_alignment(&rd); -#endif } +#endif +#ifdef INTEL_SSSE3 static void gf_w16_split_4_16_lazy_sse_altmap_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor) { -#ifdef INTEL_SSSE3 uint64_t i, j, *s64, *d64, *top64;; uint64_t c, prod; uint8_t low[4][16]; @@ -1187,8 +1183,8 @@ gf_w16_split_4_16_lazy_sse_altmap_multiply_region(gf_t *gf, void *src, void *des } gf_do_final_region_alignment(&rd); -#endif } +#endif uint32_t gf_w16_split_8_8_multiply(gf_t *gf, gf_val_32_t a, gf_val_32_t b) @@ -1216,21 +1212,11 @@ int gf_w16_split_init(gf_t *gf) { gf_internal_t *h; struct gf_w16_split_8_8_data *d8; - int i, j, exp, issse3; - int isneon = 0; + int i, j, exp; uint32_t p, basep, tmp; h = (gf_internal_t *) gf->scratch; -#ifdef INTEL_SSSE3 - issse3 = 1; -#else - issse3 = 0; -#endif -#ifdef ARM_NEON - isneon = 1; -#endif - if (h->arg1 == 8 && h->arg2 == 8) { d8 = (struct gf_w16_split_8_8_data *) h->private; basep = 1; @@ -1273,36 +1259,45 @@ int gf_w16_split_init(gf_t *gf) /* Defaults */ - if (issse3) { +#ifdef INTEL_SSSE3 + if (gf_cpu_supports_intel_ssse3) { SET_FUNCTION(gf,multiply_region,w32,gf_w16_split_4_16_lazy_sse_multiply_region) - } else if (isneon) { -#ifdef ARM_NEON + } else { +#elif ARM_NEON + if (gf_cpu_supports_arm_neon) { gf_w16_neon_split_init(gf); -#endif } else { +#endif SET_FUNCTION(gf,multiply_region,w32,gf_w16_split_8_16_lazy_multiply_region) +#if defined(INTEL_SSSE3) || defined(ARM_NEON) } - +#endif if ((h->arg1 == 8 && h->arg2 == 16) || (h->arg2 == 8 && h->arg1 == 16)) { SET_FUNCTION(gf,multiply_region,w32,gf_w16_split_8_16_lazy_multiply_region) } else if ((h->arg1 == 4 && h->arg2 == 16) || (h->arg2 == 4 && h->arg1 == 16)) { - if (issse3 || isneon) { +#if defined(INTEL_SSSE3) || defined(ARM_NEON) + if (gf_cpu_supports_intel_ssse3 || gf_cpu_supports_arm_neon) { if(h->region_type & GF_REGION_ALTMAP && h->region_type & GF_REGION_NOSIMD) SET_FUNCTION(gf,multiply_region,w32,gf_w16_split_4_16_lazy_nosse_altmap_multiply_region) else if(h->region_type & GF_REGION_NOSIMD) SET_FUNCTION(gf,multiply_region,w32,gf_w16_split_4_16_lazy_multiply_region) - else if(h->region_type & GF_REGION_ALTMAP && issse3) +#if defined(INTEL_SSSE3) + else if(h->region_type & GF_REGION_ALTMAP && gf_cpu_supports_intel_ssse3) SET_FUNCTION(gf,multiply_region,w32,gf_w16_split_4_16_lazy_sse_altmap_multiply_region) +#endif } else { +#endif if(h->region_type & GF_REGION_SIMD) return 0; else if(h->region_type & GF_REGION_ALTMAP) SET_FUNCTION(gf,multiply_region,w32,gf_w16_split_4_16_lazy_nosse_altmap_multiply_region) else SET_FUNCTION(gf,multiply_region,w32,gf_w16_split_4_16_lazy_multiply_region) +#if defined(INTEL_SSSE3) || defined(ARM_NEON) } +#endif } return 1; @@ -1846,26 +1841,28 @@ int gf_w16_bytwo_init(gf_t *gf) if (h->mult_type == GF_MULT_BYTWO_p) { SET_FUNCTION(gf,multiply,w32,gf_w16_bytwo_p_multiply) #ifdef INTEL_SSE2 - if (h->region_type & GF_REGION_NOSIMD) - SET_FUNCTION(gf,multiply_region,w32,gf_w16_bytwo_p_nosse_multiply_region) - else - SET_FUNCTION(gf,multiply_region,w32,gf_w16_bytwo_p_sse_multiply_region) - #else + if (gf_cpu_supports_intel_sse2 && !(h->region_type & GF_REGION_NOSIMD)) { + SET_FUNCTION(gf,multiply_region,w32,gf_w16_bytwo_p_sse_multiply_region) + } else { + #endif SET_FUNCTION(gf,multiply_region,w32,gf_w16_bytwo_p_nosse_multiply_region) if(h->region_type & GF_REGION_SIMD) return 0; + #ifdef INTEL_SSE2 + } #endif } else { SET_FUNCTION(gf,multiply,w32,gf_w16_bytwo_b_multiply) #ifdef INTEL_SSE2 - if (h->region_type & GF_REGION_NOSIMD) - SET_FUNCTION(gf,multiply_region,w32,gf_w16_bytwo_b_nosse_multiply_region) - else + if (gf_cpu_supports_intel_sse2 && !(h->region_type & GF_REGION_NOSIMD)) { SET_FUNCTION(gf,multiply_region,w32,gf_w16_bytwo_b_sse_multiply_region) - #else + } else { + #endif SET_FUNCTION(gf,multiply_region,w32,gf_w16_bytwo_b_nosse_multiply_region) if(h->region_type & GF_REGION_SIMD) return 0; + #ifdef INTEL_SSE2 + } #endif } diff --git a/src/gf_w32.c b/src/gf_w32.c index d496c3a..bb22894 100644 --- a/src/gf_w32.c +++ b/src/gf_w32.c @@ -13,6 +13,7 @@ #include #include #include "gf_w32.h" +#include "gf_cpu.h" #define MM_PRINT32(s, r) { uint8_t blah[16], ii; printf("%-12s", s); _mm_storeu_si128((__m128i *)blah, r); for (ii = 0; ii < 16; ii += 4) printf(" %02x%02x%02x%02x", blah[15-ii], blah[14-ii], blah[13-ii], blah[12-ii]); printf("\n"); } @@ -347,6 +348,8 @@ uint32_t gf_w32_matrix (gf_t *gf, uint32_t b) extra memory. */ +#if defined(INTEL_SSE4_PCLMUL) + static inline gf_val_32_t @@ -354,8 +357,6 @@ gf_w32_cfmgk_multiply (gf_t *gf, gf_val_32_t a32, gf_val_32_t b32) { gf_val_32_t rv = 0; -#if defined(INTEL_SSE4_PCLMUL) - __m128i a, b; __m128i result; __m128i w; @@ -378,9 +379,9 @@ gf_w32_cfmgk_multiply (gf_t *gf, gf_val_32_t a32, gf_val_32_t b32) /* Extracts 32 bit value from result. */ rv = ((gf_val_32_t)_mm_extract_epi32(result, 0)); -#endif return rv; } +#endif #if defined(INTEL_SSE4_PCLMUL) @@ -435,6 +436,8 @@ gf_w32_cfmgk_multiply_region_from_single(gf_t *gf, void *src, void *dest, uint32 #endif +#if defined(INTEL_SSE4_PCLMUL) + static inline gf_val_32_t @@ -442,8 +445,6 @@ gf_w32_clm_multiply_2 (gf_t *gf, gf_val_32_t a32, gf_val_32_t b32) { gf_val_32_t rv = 0; -#if defined(INTEL_SSE4_PCLMUL) - __m128i a, b; __m128i result; __m128i prim_poly; @@ -476,9 +477,11 @@ gf_w32_clm_multiply_2 (gf_t *gf, gf_val_32_t a32, gf_val_32_t b32) /* Extracts 32 bit value from result. */ rv = ((gf_val_32_t)_mm_extract_epi32(result, 0)); -#endif return rv; } +#endif + +#if defined(INTEL_SSE4_PCLMUL) static inline @@ -487,8 +490,6 @@ gf_w32_clm_multiply_3 (gf_t *gf, gf_val_32_t a32, gf_val_32_t b32) { gf_val_32_t rv = 0; -#if defined(INTEL_SSE4_PCLMUL) - __m128i a, b; __m128i result; __m128i prim_poly; @@ -515,9 +516,11 @@ gf_w32_clm_multiply_3 (gf_t *gf, gf_val_32_t a32, gf_val_32_t b32) /* Extracts 32 bit value from result. */ rv = ((gf_val_32_t)_mm_extract_epi32(result, 0)); -#endif return rv; } +#endif + +#if defined(INTEL_SSE4_PCLMUL) static inline @@ -526,8 +529,6 @@ gf_w32_clm_multiply_4 (gf_t *gf, gf_val_32_t a32, gf_val_32_t b32) { gf_val_32_t rv = 0; -#if defined(INTEL_SSE4_PCLMUL) - __m128i a, b; __m128i result; __m128i prim_poly; @@ -556,9 +557,9 @@ gf_w32_clm_multiply_4 (gf_t *gf, gf_val_32_t a32, gf_val_32_t b32) /* Extracts 32 bit value from result. */ rv = ((gf_val_32_t)_mm_extract_epi32(result, 0)); -#endif return rv; } +#endif static @@ -593,29 +594,31 @@ int gf_w32_cfmgk_init(gf_t *gf) SET_FUNCTION(gf,multiply_region,w32,gf_w32_multiply_region_from_single) #if defined(INTEL_SSE4_PCLMUL) - gf_internal_t *h; + if (gf_cpu_supports_intel_pclmul) { + gf_internal_t *h; - h = (gf_internal_t *) gf->scratch; - SET_FUNCTION(gf,multiply,w32,gf_w32_cfmgk_multiply) - SET_FUNCTION(gf,multiply_region,w32,gf_w32_cfmgk_multiply_region_from_single) + h = (gf_internal_t *) gf->scratch; + SET_FUNCTION(gf,multiply,w32,gf_w32_cfmgk_multiply) + SET_FUNCTION(gf,multiply_region,w32,gf_w32_cfmgk_multiply_region_from_single) - uint64_t *q_plus = (uint64_t *) h->private; - uint64_t *g_star = (uint64_t *) h->private + 1; + uint64_t *q_plus = (uint64_t *) h->private; + uint64_t *g_star = (uint64_t *) h->private + 1; - uint64_t tmp = h->prim_poly << 32; - *q_plus = 1ULL << 32; + uint64_t tmp = h->prim_poly << 32; + *q_plus = 1ULL << 32; - int i; - for(i = 63; i >= 32; i--) - if((1ULL << i) & tmp) - { - *q_plus |= 1ULL << (i-32); - tmp ^= h->prim_poly << (i-32); - } + int i; + for(i = 63; i >= 32; i--) + if((1ULL << i) & tmp) + { + *q_plus |= 1ULL << (i-32); + tmp ^= h->prim_poly << (i-32); + } - *g_star = h->prim_poly & ((1ULL << 32) - 1); + *g_star = h->prim_poly & ((1ULL << 32) - 1); - return 1; + return 1; + } #endif return 0; @@ -631,23 +634,25 @@ int gf_w32_cfm_init(gf_t *gf) /*Ben: Check to see how many reduction steps it will take*/ #if defined(INTEL_SSE4_PCLMUL) - gf_internal_t *h; + if (gf_cpu_supports_intel_pclmul) { + gf_internal_t *h; - h = (gf_internal_t *) gf->scratch; + h = (gf_internal_t *) gf->scratch; - if ((0xfffe0000 & h->prim_poly) == 0){ - SET_FUNCTION(gf,multiply,w32,gf_w32_clm_multiply_2) - SET_FUNCTION(gf,multiply_region,w32,gf_w32_clm_multiply_region_from_single_2) - }else if ((0xffc00000 & h->prim_poly) == 0){ - SET_FUNCTION(gf,multiply,w32,gf_w32_clm_multiply_3) - SET_FUNCTION(gf,multiply_region,w32,gf_w32_clm_multiply_region_from_single_3) - }else if ((0xfe000000 & h->prim_poly) == 0){ - SET_FUNCTION(gf,multiply,w32,gf_w32_clm_multiply_4) - SET_FUNCTION(gf,multiply_region,w32,gf_w32_clm_multiply_region_from_single_4) - } else { - return 0; + if ((0xfffe0000 & h->prim_poly) == 0){ + SET_FUNCTION(gf,multiply,w32,gf_w32_clm_multiply_2) + SET_FUNCTION(gf,multiply_region,w32,gf_w32_clm_multiply_region_from_single_2) + }else if ((0xffc00000 & h->prim_poly) == 0){ + SET_FUNCTION(gf,multiply,w32,gf_w32_clm_multiply_3) + SET_FUNCTION(gf,multiply_region,w32,gf_w32_clm_multiply_region_from_single_3) + }else if ((0xfe000000 & h->prim_poly) == 0){ + SET_FUNCTION(gf,multiply,w32,gf_w32_clm_multiply_4) + SET_FUNCTION(gf,multiply_region,w32,gf_w32_clm_multiply_region_from_single_4) + } else { + return 0; + } + return 1; } - return 1; #endif return 0; @@ -1382,26 +1387,28 @@ int gf_w32_bytwo_init(gf_t *gf) if (h->mult_type == GF_MULT_BYTWO_p) { SET_FUNCTION(gf,multiply,w32,gf_w32_bytwo_p_multiply) #ifdef INTEL_SSE2 - if (h->region_type & GF_REGION_NOSIMD) - SET_FUNCTION(gf,multiply_region,w32,gf_w32_bytwo_p_nosse_multiply_region) - else + if (gf_cpu_supports_intel_sse2 && !(h->region_type & GF_REGION_NOSIMD)) { SET_FUNCTION(gf,multiply_region,w32,gf_w32_bytwo_p_sse_multiply_region) - #else - SET_FUNCTION(gf,multiply_region,w32,gf_w32_bytwo_p_nosse_multiply_region) - if(h->region_type & GF_REGION_SIMD) - return 0; + } else { + #endif + SET_FUNCTION(gf,multiply_region,w32,gf_w32_bytwo_p_nosse_multiply_region) + if(h->region_type & GF_REGION_SIMD) + return 0; + #ifdef INTEL_SSE2 + } #endif } else { SET_FUNCTION(gf,multiply,w32,gf_w32_bytwo_b_multiply) #ifdef INTEL_SSE2 - if (h->region_type & GF_REGION_NOSIMD) - SET_FUNCTION(gf,multiply_region,w32,gf_w32_bytwo_b_nosse_multiply_region) - else + if (gf_cpu_supports_intel_sse2 && !(h->region_type & GF_REGION_NOSIMD)) { SET_FUNCTION(gf,multiply_region,w32,gf_w32_bytwo_b_sse_multiply_region) - #else + } else { + #endif SET_FUNCTION(gf,multiply_region,w32,gf_w32_bytwo_b_nosse_multiply_region) if(h->region_type & GF_REGION_SIMD) return 0; + #ifdef INTEL_SSE2 + } #endif } @@ -1755,11 +1762,11 @@ gf_w32_split_4_32_lazy_multiply_region(gf_t *gf, void *src, void *dest, uint32_t gf_do_final_region_alignment(&rd); } +#ifdef INTEL_SSSE3 static void gf_w32_split_4_32_lazy_sse_altmap_multiply_region(gf_t *gf, void *src, void *dest, uint32_t val, int bytes, int xor) { -#ifdef INTEL_SSSE3 gf_internal_t *h; int i, j, k; uint32_t pp, v, *s32, *d32, *top; @@ -1942,16 +1949,15 @@ gf_w32_split_4_32_lazy_sse_altmap_multiply_region(gf_t *gf, void *src, void *des } gf_do_final_region_alignment(&rd); - -#endif } +#endif +#ifdef INTEL_SSSE3 static void gf_w32_split_4_32_lazy_sse_multiply_region(gf_t *gf, void *src, void *dest, uint32_t val, int bytes, int xor) { -#ifdef INTEL_SSSE3 gf_internal_t *h; int i, j, k; uint32_t pp, v, *s32, *d32, *top, tmp_table[16]; @@ -2216,9 +2222,8 @@ gf_w32_split_4_32_lazy_sse_multiply_region(gf_t *gf, void *src, void *dest, uint } } gf_do_final_region_alignment(&rd); - -#endif } +#endif static int gf_w32_split_init(gf_t *gf) @@ -2230,23 +2235,7 @@ int gf_w32_split_init(gf_t *gf) struct gf_split_8_32_lazy_data *d32; struct gf_split_16_32_lazy_data *d16; uint32_t p, basep; - int i, j, exp, ispclmul, issse3; - int isneon = 0; - -#if defined(INTEL_SSE4_PCLMUL) - ispclmul = 1; -#else - ispclmul = 0; -#endif - -#ifdef INTEL_SSSE3 - issse3 = 1; -#else - issse3 = 0; -#endif -#ifdef ARM_NEON - isneon = 1; -#endif + int i, j, exp; h = (gf_internal_t *) gf->scratch; @@ -2262,7 +2251,8 @@ int gf_w32_split_init(gf_t *gf) if (h->arg1 == 8 && h->arg2 == 8) { SET_FUNCTION(gf,multiply,w32,gf_w32_split_8_8_multiply) - } else if (ispclmul) { +#if defined(INTEL_SSE4_PCLMUL) + } else if (gf_cpu_supports_intel_pclmul) { if ((0xfffe0000 & h->prim_poly) == 0){ SET_FUNCTION(gf,multiply,w32,gf_w32_clm_multiply_2) } else if ((0xffc00000 & h->prim_poly) == 0){ @@ -2270,6 +2260,7 @@ int gf_w32_split_init(gf_t *gf) } else if ((0xfe000000 & h->prim_poly) == 0){ SET_FUNCTION(gf,multiply,w32,gf_w32_clm_multiply_4) } +#endif } else { SET_FUNCTION(gf,multiply,w32,gf_w32_bytwo_p_multiply) } @@ -2287,33 +2278,39 @@ int gf_w32_split_init(gf_t *gf) ld2 = (struct gf_split_2_32_lazy_data *) h->private; ld2->last_value = 0; #ifdef INTEL_SSSE3 - if (!(h->region_type & GF_REGION_NOSIMD)) + if (gf_cpu_supports_intel_ssse3 && !(h->region_type & GF_REGION_NOSIMD)) { SET_FUNCTION(gf,multiply_region,w32,gf_w32_split_2_32_lazy_sse_multiply_region) - else + } else { + #endif SET_FUNCTION(gf,multiply_region,w32,gf_w32_split_2_32_lazy_multiply_region) - #else - SET_FUNCTION(gf,multiply_region,w32,gf_w32_split_2_32_lazy_multiply_region) - if(h->region_type & GF_REGION_SIMD) return 0; + if(h->region_type & GF_REGION_SIMD) return 0; + #ifdef INTEL_SSSE3 + } #endif return 1; } /* 4/32 or Default + SSE - There is no ALTMAP/NOSSE. */ + if ((h->arg1 == 4 && h->arg2 == 32) || (h->arg1 == 32 && h->arg2 == 4) || - ((issse3 || isneon) && h->mult_type == GF_REGION_DEFAULT)) { + ((gf_cpu_supports_intel_ssse3 || gf_cpu_supports_arm_neon) && h->mult_type == GF_REGION_DEFAULT)) { ld4 = (struct gf_split_4_32_lazy_data *) h->private; ld4->last_value = 0; - if ((h->region_type & GF_REGION_NOSIMD) || !(issse3 || isneon)) { + if ((h->region_type & GF_REGION_NOSIMD) || !(gf_cpu_supports_intel_ssse3 || gf_cpu_supports_arm_neon)) { SET_FUNCTION(gf,multiply_region,w32,gf_w32_split_4_32_lazy_multiply_region) - } else if (isneon) { + } else if (gf_cpu_supports_arm_neon) { #ifdef ARM_NEON gf_w32_neon_split_init(gf); #endif } else if (h->region_type & GF_REGION_ALTMAP) { +#ifdef INTEL_SSSE3 SET_FUNCTION(gf,multiply_region,w32,gf_w32_split_4_32_lazy_sse_altmap_multiply_region) +#endif } else { +#ifdef INTEL_SSSE3 SET_FUNCTION(gf,multiply_region,w32,gf_w32_split_4_32_lazy_sse_multiply_region) +#endif } return 1; } @@ -2686,16 +2683,6 @@ int gf_w32_composite_init(gf_t *gf) int gf_w32_scratch_size(int mult_type, int region_type, int divide_type, int arg1, int arg2) { - int issse3 = 0; - int isneon = 0; - -#ifdef INTEL_SSSE3 - issse3 = 1; -#endif -#ifdef ARM_NEON - isneon = 1; -#endif - switch(mult_type) { case GF_MULT_BYTWO_p: @@ -2720,7 +2707,7 @@ int gf_w32_scratch_size(int mult_type, int region_type, int divide_type, int arg return sizeof(gf_internal_t) + sizeof(struct gf_split_2_32_lazy_data) + 64; } if ((arg1 == 8 && arg2 == 32) || (arg2 == 8 && arg1 == 32) || - (mult_type == GF_MULT_DEFAULT && !(issse3 || isneon))) { + (mult_type == GF_MULT_DEFAULT && !(gf_cpu_supports_intel_ssse3 || gf_cpu_supports_arm_neon))) { return sizeof(gf_internal_t) + sizeof(struct gf_split_8_32_lazy_data) + 64; } if ((arg1 == 4 && arg2 == 32) || diff --git a/src/gf_w4.c b/src/gf_w4.c index 814b0f5..3a7b953 100644 --- a/src/gf_w4.c +++ b/src/gf_w4.c @@ -12,6 +12,7 @@ #include #include #include "gf_w4.h" +#include "gf_cpu.h" #define AB2(ip, am1 ,am2, b, t1, t2) {\ t1 = (b << 1) & am1;\ @@ -134,6 +135,7 @@ gf_w4_shift_multiply (gf_t *gf, gf_val_32_t a, gf_val_32_t b) /* Ben: This function works, but it is 33% slower than the normal shift mult */ +#if defined(INTEL_SSE4_PCLMUL) static inline gf_val_32_t @@ -141,8 +143,6 @@ gf_w4_clm_multiply (gf_t *gf, gf_val_32_t a4, gf_val_32_t b4) { gf_val_32_t rv = 0; -#if defined(INTEL_SSE4_PCLMUL) - __m128i a, b; __m128i result; __m128i prim_poly; @@ -173,9 +173,9 @@ gf_w4_clm_multiply (gf_t *gf, gf_val_32_t a4, gf_val_32_t b4) /* Extracts 32 bit value from result. */ rv = ((gf_val_32_t)_mm_extract_epi32(result, 0)); -#endif return rv; } +#endif static void @@ -447,18 +447,19 @@ int gf_w4_single_table_init(gf_t *gf) SET_FUNCTION(gf,inverse,w32,NULL) SET_FUNCTION(gf,divide,w32,gf_w4_single_table_divide) SET_FUNCTION(gf,multiply,w32,gf_w4_single_table_multiply) - #if defined(INTEL_SSSE3) || defined(ARM_NEON) - if(h->region_type & (GF_REGION_NOSIMD | GF_REGION_CAUCHY)) - SET_FUNCTION(gf,multiply_region,w32,gf_w4_single_table_multiply_region) - else - #if defined(INTEL_SSSE3) + #if defined(INTEL_SSSE3) + if (gf_cpu_supports_intel_ssse3 && !(h->region_type & (GF_REGION_NOSIMD | GF_REGION_CAUCHY))) { SET_FUNCTION(gf,multiply_region,w32,gf_w4_single_table_sse_multiply_region) - #elif defined(ARM_NEON) + } else { + #elif defined(ARM_NEON) + if (gf_cpu_supports_arm_neon && !(h->region_type & (GF_REGION_NOSIMD | GF_REGION_CAUCHY))) { gf_w4_neon_single_table_init(gf); - #endif - #else - SET_FUNCTION(gf,multiply_region,w32,gf_w4_single_table_multiply_region) - if (h->region_type & GF_REGION_SIMD) return 0; + } else { + #endif + SET_FUNCTION(gf,multiply_region,w32,gf_w4_single_table_multiply_region) + if (h->region_type & GF_REGION_SIMD) return 0; + #if defined(INTEL_SSSE3) || defined(ARM_NEON) + } #endif return 1; @@ -736,16 +737,13 @@ int gf_w4_table_init(gf_t *gf) { int rt; gf_internal_t *h; - int simd = 0; - -#if defined(INTEL_SSSE3) || defined(ARM_NEON) - simd = 1; -#endif h = (gf_internal_t *) gf->scratch; rt = (h->region_type); - if (h->mult_type == GF_MULT_DEFAULT && !simd) rt |= GF_REGION_DOUBLE_TABLE; + if (h->mult_type == GF_MULT_DEFAULT && + !(gf_cpu_supports_intel_ssse3 || gf_cpu_supports_arm_neon)) + rt |= GF_REGION_DOUBLE_TABLE; if (rt & GF_REGION_DOUBLE_TABLE) { return gf_w4_double_table_init(gf); @@ -929,11 +927,11 @@ gf_w4_bytwo_p_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t v #endif /* +#ifdef INTEL_SSE2 static void gf_w4_bytwo_b_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor) { -#ifdef INTEL_SSE2 uint8_t *d8, *s8, tb; __m128i pp, m1, m2, t1, t2, va, vb; struct gf_bytwo_data *btd; @@ -990,8 +988,8 @@ gf_w4_bytwo_b_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t v } } gf_do_final_region_alignment(&rd); -#endif } +#endif */ #ifdef INTEL_SSE2 @@ -1867,26 +1865,28 @@ int gf_w4_bytwo_init(gf_t *gf) if (h->mult_type == GF_MULT_BYTWO_p) { SET_FUNCTION(gf,multiply,w32,gf_w4_bytwo_p_multiply) #ifdef INTEL_SSE2 - if (h->region_type & GF_REGION_NOSIMD) - SET_FUNCTION(gf,multiply_region,w32,gf_w4_bytwo_p_nosse_multiply_region) - else + if (gf_cpu_supports_intel_sse2 && !(h->region_type & GF_REGION_NOSIMD)) { SET_FUNCTION(gf,multiply_region,w32,gf_w4_bytwo_p_sse_multiply_region) - #else - SET_FUNCTION(gf,multiply_region,w32,gf_w4_bytwo_p_nosse_multiply_region) - if (h->region_type & GF_REGION_SIMD) - return 0; + } else { + #endif + SET_FUNCTION(gf,multiply_region,w32,gf_w4_bytwo_p_nosse_multiply_region) + if (h->region_type & GF_REGION_SIMD) + return 0; + #ifdef INTEL_SSE2 + } #endif } else { SET_FUNCTION(gf,multiply,w32,gf_w4_bytwo_b_multiply) #ifdef INTEL_SSE2 - if (h->region_type & GF_REGION_NOSIMD) - SET_FUNCTION(gf,multiply_region,w32,gf_w4_bytwo_b_nosse_multiply_region) - else + if (gf_cpu_supports_intel_sse2 && !(h->region_type & GF_REGION_NOSIMD)) { SET_FUNCTION(gf,multiply_region,w32,gf_w4_bytwo_b_sse_multiply_region) - #else - SET_FUNCTION(gf,multiply_region,w32,gf_w4_bytwo_b_nosse_multiply_region) - if (h->region_type & GF_REGION_SIMD) - return 0; + } else { + #endif + SET_FUNCTION(gf,multiply_region,w32,gf_w4_bytwo_b_nosse_multiply_region) + if (h->region_type & GF_REGION_SIMD) + return 0; + #ifdef INTEL_SSE2 + } #endif } return 1; @@ -1897,10 +1897,14 @@ static int gf_w4_cfm_init(gf_t *gf) { #if defined(INTEL_SSE4_PCLMUL) - SET_FUNCTION(gf,multiply,w32,gf_w4_clm_multiply) - return 1; + if (gf_cpu_supports_intel_pclmul) { + SET_FUNCTION(gf,multiply,w32,gf_w4_clm_multiply) + return 1; + } #elif defined(ARM_NEON) - return gf_w4_neon_cfm_init(gf); + if (gf_cpu_supports_arm_neon) { + return gf_w4_neon_cfm_init(gf); + } #endif return 0; } @@ -1917,15 +1921,6 @@ int gf_w4_shift_init(gf_t *gf) int gf_w4_scratch_size(int mult_type, int region_type, int divide_type, int arg1, int arg2) { - int issse3 = 0, isneon = 0; - -#ifdef INTEL_SSSE3 - issse3 = 1; -#endif -#ifdef ARM_NEON - isneon = 1; -#endif - switch(mult_type) { case GF_MULT_BYTWO_p: @@ -1938,7 +1933,8 @@ int gf_w4_scratch_size(int mult_type, int region_type, int divide_type, int arg1 return sizeof(gf_internal_t) + sizeof(struct gf_single_table_data) + 64; } - if (mult_type == GF_MULT_DEFAULT && !(issse3 || isneon)) + if (mult_type == GF_MULT_DEFAULT && + !(gf_cpu_supports_arm_neon || gf_cpu_supports_intel_ssse3)) region_type = GF_REGION_DOUBLE_TABLE; if (region_type & GF_REGION_DOUBLE_TABLE) { diff --git a/src/gf_w64.c b/src/gf_w64.c index a096161..69e55db 100644 --- a/src/gf_w64.c +++ b/src/gf_w64.c @@ -12,6 +12,7 @@ #include #include #include "gf_w64.h" +#include "gf_cpu.h" static inline @@ -338,6 +339,8 @@ gf_w64_shift_multiply (gf_t *gf, gf_val_64_t a64, gf_val_64_t b64) * ELM: Use the Intel carryless multiply instruction to do very fast 64x64 multiply. */ +#if defined(INTEL_SSE4_PCLMUL) + static inline gf_val_64_t @@ -345,8 +348,6 @@ gf_w64_clm_multiply_2 (gf_t *gf, gf_val_64_t a64, gf_val_64_t b64) { gf_val_64_t rv = 0; -#if defined(INTEL_SSE4_PCLMUL) - __m128i a, b; __m128i result; __m128i prim_poly; @@ -376,10 +377,12 @@ gf_w64_clm_multiply_2 (gf_t *gf, gf_val_64_t a64, gf_val_64_t b64) result = _mm_xor_si128 (result, w); rv = ((gf_val_64_t)_mm_extract_epi64(result, 0)); -#endif return rv; } +#endif +#if defined(INTEL_SSE4_PCLMUL) + static inline gf_val_64_t @@ -387,8 +390,6 @@ gf_w64_clm_multiply_4 (gf_t *gf, gf_val_64_t a64, gf_val_64_t b64) { gf_val_64_t rv = 0; -#if defined(INTEL_SSE4_PCLMUL) - __m128i a, b; __m128i result; __m128i prim_poly; @@ -418,15 +419,15 @@ gf_w64_clm_multiply_4 (gf_t *gf, gf_val_64_t a64, gf_val_64_t b64) result = _mm_xor_si128 (result, w); rv = ((gf_val_64_t)_mm_extract_epi64(result, 0)); -#endif return rv; } +#endif +#if defined(INTEL_SSE4_PCLMUL) void gf_w64_clm_multiply_region(gf_t *gf, void *src, void *dest, uint64_t val, int bytes, int xor) { -#if defined(INTEL_SSE4_PCLMUL) gf_internal_t *h; uint8_t *s8, *d8, *dtop; gf_region_data rd; @@ -504,8 +505,8 @@ gf_w64_clm_multiply_region(gf_t *gf, void *src, void *dest, uint64_t val, int by } } gf_do_final_region_alignment(&rd); -#endif } +#endif void gf_w64_split_4_64_lazy_multiply_region(gf_t *gf, void *src, void *dest, uint64_t val, int bytes, int xor) @@ -709,21 +710,23 @@ int gf_w64_cfm_init(gf_t *gf) SET_FUNCTION(gf,inverse,w64,gf_w64_euclid) SET_FUNCTION(gf,multiply_region,w64,gf_w64_multiply_region_from_single) -#if defined(INTEL_SSE4_PCLMUL) - gf_internal_t *h; +#if defined(INTEL_SSE4_PCLMUL) + if (gf_cpu_supports_intel_pclmul) { + gf_internal_t *h; - h = (gf_internal_t *) gf->scratch; + h = (gf_internal_t *) gf->scratch; - if ((0xfffffffe00000000ULL & h->prim_poly) == 0){ - SET_FUNCTION(gf,multiply,w64,gf_w64_clm_multiply_2) - SET_FUNCTION(gf,multiply_region,w64,gf_w64_clm_multiply_region_from_single_2) - }else if((0xfffe000000000000ULL & h->prim_poly) == 0){ - SET_FUNCTION(gf,multiply,w64,gf_w64_clm_multiply_4) - SET_FUNCTION(gf,multiply_region,w64,gf_w64_clm_multiply_region_from_single_4) - } else { - return 0; + if ((0xfffffffe00000000ULL & h->prim_poly) == 0){ + SET_FUNCTION(gf,multiply,w64,gf_w64_clm_multiply_2) + SET_FUNCTION(gf,multiply_region,w64,gf_w64_clm_multiply_region_from_single_2) + }else if((0xfffe000000000000ULL & h->prim_poly) == 0){ + SET_FUNCTION(gf,multiply,w64,gf_w64_clm_multiply_4) + SET_FUNCTION(gf,multiply_region,w64,gf_w64_clm_multiply_region_from_single_4) + } else { + return 0; + } + return 1; } - return 1; #endif return 0; @@ -1261,9 +1264,9 @@ gf_w64_bytwo_b_nosse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_64_ v = _mm_srli_epi64(v, 1); } +#ifdef INTEL_SSE2 void gf_w64_bytwo_p_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_64_t val, int bytes, int xor) { -#ifdef INTEL_SSE2 int i; uint8_t *s8, *d8; uint64_t vrev, one64; @@ -1322,8 +1325,8 @@ void gf_w64_bytwo_p_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_ s8 += 16; } gf_do_final_region_alignment(&rd); -#endif } +#endif #ifdef INTEL_SSE2 static @@ -1457,26 +1460,28 @@ int gf_w64_bytwo_init(gf_t *gf) if (h->mult_type == GF_MULT_BYTWO_p) { SET_FUNCTION(gf,multiply,w64,gf_w64_bytwo_p_multiply) #ifdef INTEL_SSE2 - if (h->region_type & GF_REGION_NOSIMD) - SET_FUNCTION(gf,multiply_region,w64,gf_w64_bytwo_p_nosse_multiply_region) - else + if (gf_cpu_supports_intel_sse2 && !(h->region_type & GF_REGION_NOSIMD)) { SET_FUNCTION(gf,multiply_region,w64,gf_w64_bytwo_p_sse_multiply_region) - #else - SET_FUNCTION(gf,multiply_region,w64,gf_w64_bytwo_p_nosse_multiply_region) - if(h->region_type & GF_REGION_SIMD) - return 0; + } else { + #endif + SET_FUNCTION(gf,multiply_region,w64,gf_w64_bytwo_p_nosse_multiply_region) + if(h->region_type & GF_REGION_SIMD) + return 0; + #ifdef INTEL_SSE2 + } #endif } else { SET_FUNCTION(gf,multiply,w64,gf_w64_bytwo_b_multiply) #ifdef INTEL_SSE2 - if (h->region_type & GF_REGION_NOSIMD) - SET_FUNCTION(gf,multiply_region,w64,gf_w64_bytwo_b_nosse_multiply_region) - else + if (gf_cpu_supports_intel_sse2 && !(h->region_type & GF_REGION_NOSIMD)) { SET_FUNCTION(gf,multiply_region,w64,gf_w64_bytwo_b_sse_multiply_region) - #else + } else { + #endif SET_FUNCTION(gf,multiply_region,w64,gf_w64_bytwo_b_nosse_multiply_region) if(h->region_type & GF_REGION_SIMD) return 0; + #ifdef INTEL_SSE2 + } #endif } SET_FUNCTION(gf,inverse,w64,gf_w64_euclid) @@ -1975,18 +1980,20 @@ int gf_w64_split_init(gf_t *gf) SET_FUNCTION(gf,multiply,w64,gf_w64_bytwo_p_multiply) #if defined(INTEL_SSE4_PCLMUL) - if ((!(h->region_type & GF_REGION_NOSIMD) && - (h->arg1 == 64 || h->arg2 == 64)) || - h->mult_type == GF_MULT_DEFAULT){ - - if ((0xfffffffe00000000ULL & h->prim_poly) == 0){ - SET_FUNCTION(gf,multiply,w64,gf_w64_clm_multiply_2) - SET_FUNCTION(gf,multiply_region,w64,gf_w64_clm_multiply_region_from_single_2) - }else if((0xfffe000000000000ULL & h->prim_poly) == 0){ - SET_FUNCTION(gf,multiply,w64,gf_w64_clm_multiply_4) - SET_FUNCTION(gf,multiply_region,w64,gf_w64_clm_multiply_region_from_single_4) - }else{ - return 0; + if (gf_cpu_supports_intel_pclmul) { + if ((!(h->region_type & GF_REGION_NOSIMD) && + (h->arg1 == 64 || h->arg2 == 64)) || + h->mult_type == GF_MULT_DEFAULT){ + + if ((0xfffffffe00000000ULL & h->prim_poly) == 0){ + SET_FUNCTION(gf,multiply,w64,gf_w64_clm_multiply_2) + SET_FUNCTION(gf,multiply_region,w64,gf_w64_clm_multiply_region_from_single_2) + }else if((0xfffe000000000000ULL & h->prim_poly) == 0){ + SET_FUNCTION(gf,multiply,w64,gf_w64_clm_multiply_4) + SET_FUNCTION(gf,multiply_region,w64,gf_w64_clm_multiply_region_from_single_4) + }else{ + return 0; + } } } #endif @@ -1996,23 +2003,27 @@ int gf_w64_split_init(gf_t *gf) /* Allen: set region pointers for default mult type. Single pointers are * taken care of above (explicitly for sse, implicitly for no sse). */ -#if defined(INTEL_SSE4) || defined(ARCH_AARCH64) if (h->mult_type == GF_MULT_DEFAULT) { - d4 = (struct gf_split_4_64_lazy_data *) h->private; - d4->last_value = 0; +#if defined(INTEL_SSE4) || defined(ARCH_AARCH64) + if (gf_cpu_supports_intel_sse4 || gf_cpu_supports_arm_neon) { + d4 = (struct gf_split_4_64_lazy_data *) h->private; + d4->last_value = 0; #if defined(INTEL_SSE4) - SET_FUNCTION(gf,multiply_region,w64,gf_w64_split_4_64_lazy_sse_multiply_region) + if (gf_cpu_supports_intel_sse4) + SET_FUNCTION(gf,multiply_region,w64,gf_w64_split_4_64_lazy_sse_multiply_region) #elif defined(ARCH_AARCH64) - gf_w64_neon_split_init(gf); + if (gf_cpu_supports_arm_neon) + gf_w64_neon_split_init(gf); #endif - } -#else - if (h->mult_type == GF_MULT_DEFAULT) { - d8 = (struct gf_split_8_64_lazy_data *) h->private; - d8->last_value = 0; - SET_FUNCTION(gf,multiply_region,w64,gf_w64_split_8_64_lazy_multiply_region) - } + } else { +#endif + d8 = (struct gf_split_8_64_lazy_data *) h->private; + d8->last_value = 0; + SET_FUNCTION(gf,multiply_region,w64,gf_w64_split_8_64_lazy_multiply_region) +#if defined(INTEL_SSE4) || defined(ARCH_AARCH64) + } #endif + } if ((h->arg1 == 4 && h->arg2 == 64) || (h->arg1 == 64 && h->arg2 == 4)) { d4 = (struct gf_split_4_64_lazy_data *) h->private; @@ -2022,28 +2033,35 @@ int gf_w64_split_init(gf_t *gf) if(h->region_type & GF_REGION_ALTMAP) { #ifdef INTEL_SSSE3 - SET_FUNCTION(gf,multiply_region,w64,gf_w64_split_4_64_lazy_sse_altmap_multiply_region) + if (gf_cpu_supports_intel_ssse3) { + SET_FUNCTION(gf,multiply_region,w64,gf_w64_split_4_64_lazy_sse_altmap_multiply_region) + } else #elif defined(ARCH_AARCH64) - gf_w64_neon_split_init(gf); - #else - return 0; + if (gf_cpu_supports_arm_neon) { + gf_w64_neon_split_init(gf); + } else #endif + return 0; } else //no altmap { #if defined(INTEL_SSE4) || defined(ARCH_AARCH64) - if(h->region_type & GF_REGION_NOSIMD) - SET_FUNCTION(gf,multiply_region,w64,gf_w64_split_4_64_lazy_multiply_region) - else - #if defined(INTEL_SSE4) - SET_FUNCTION(gf,multiply_region,w64,gf_w64_split_4_64_lazy_sse_multiply_region) - #elif defined(ARCH_AARCH64) - gf_w64_neon_split_init(gf); - #endif - #else + if(gf_cpu_supports_intel_sse4 || gf_cpu_supports_arm_neon) { + if (h->region_type & GF_REGION_NOSIMD) { + SET_FUNCTION(gf,multiply_region,w64,gf_w64_split_4_64_lazy_multiply_region) + } else + #if defined(INTEL_SSE4) + SET_FUNCTION(gf,multiply_region,w64,gf_w64_split_4_64_lazy_sse_multiply_region) + #elif defined(ARCH_AARCH64) + gf_w64_neon_split_init(gf); + #endif + } else { + #endif SET_FUNCTION(gf,multiply_region,w64,gf_w64_split_4_64_lazy_multiply_region) if(h->region_type & GF_REGION_SIMD) return 0; + #if defined(INTEL_SSE4) || defined(ARCH_AARCH64) + } #endif } } @@ -2114,11 +2132,15 @@ int gf_w64_scratch_size(int mult_type, int region_type, int divide_type, int arg * then fall through to split table scratch size code. */ #if defined(INTEL_SSE4) || defined(ARCH_AARCH64) + if (gf_cpu_supports_intel_sse4 || gf_cpu_supports_arm_neon) { arg1 = 64; arg2 = 4; -#else + } else { +#endif arg1 = 64; arg2 = 8; +#if defined(INTEL_SSE4) || defined(ARCH_AARCH64) + } #endif case GF_MULT_SPLIT_TABLE: diff --git a/src/gf_w8.c b/src/gf_w8.c index 81a0eba..f647a31 100644 --- a/src/gf_w8.c +++ b/src/gf_w8.c @@ -13,6 +13,7 @@ #include #include #include +#include "gf_cpu.h" #define AB2(ip, am1 ,am2, b, t1, t2) {\ t1 = (b << 1) & am1;\ @@ -127,6 +128,7 @@ uint32_t gf_w8_matrix (gf_t *gf, uint32_t b) } +#if defined(INTEL_SSE4_PCLMUL) static inline gf_val_32_t @@ -134,8 +136,6 @@ gf_w8_clm_multiply_2 (gf_t *gf, gf_val_32_t a8, gf_val_32_t b8) { gf_val_32_t rv = 0; -#if defined(INTEL_SSE4_PCLMUL) - __m128i a, b; __m128i result; __m128i prim_poly; @@ -169,10 +169,11 @@ gf_w8_clm_multiply_2 (gf_t *gf, gf_val_32_t a8, gf_val_32_t b8) rv = ((gf_val_32_t)_mm_extract_epi32(result, 0)); -#endif return rv; } +#endif +#if defined(INTEL_SSE4_PCLMUL) static inline gf_val_32_t @@ -180,8 +181,6 @@ gf_w8_clm_multiply_3 (gf_t *gf, gf_val_32_t a8, gf_val_32_t b8) { gf_val_32_t rv = 0; -#if defined(INTEL_SSE4_PCLMUL) - __m128i a, b; __m128i result; __m128i prim_poly; @@ -208,10 +207,11 @@ gf_w8_clm_multiply_3 (gf_t *gf, gf_val_32_t a8, gf_val_32_t b8) rv = ((gf_val_32_t)_mm_extract_epi32(result, 0)); -#endif return rv; } +#endif +#if defined(INTEL_SSE4_PCLMUL) static inline gf_val_32_t @@ -219,8 +219,6 @@ gf_w8_clm_multiply_4 (gf_t *gf, gf_val_32_t a8, gf_val_32_t b8) { gf_val_32_t rv = 0; -#if defined(INTEL_SSE4_PCLMUL) - __m128i a, b; __m128i result; __m128i prim_poly; @@ -248,9 +246,9 @@ gf_w8_clm_multiply_4 (gf_t *gf, gf_val_32_t a8, gf_val_32_t b8) /* Extracts 32 bit value from result. */ rv = ((gf_val_32_t)_mm_extract_epi32(result, 0)); -#endif return rv; } +#endif static @@ -509,25 +507,29 @@ static int gf_w8_cfm_init(gf_t *gf) { #if defined(INTEL_SSE4_PCLMUL) - gf_internal_t *h; - - h = (gf_internal_t *) gf->scratch; - - if ((0xe0 & h->prim_poly) == 0){ - SET_FUNCTION(gf,multiply,w32,gf_w8_clm_multiply_2) - SET_FUNCTION(gf,multiply_region,w32,gf_w8_clm_multiply_region_from_single_2) - }else if ((0xc0 & h->prim_poly) == 0){ - SET_FUNCTION(gf,multiply,w32,gf_w8_clm_multiply_3) - SET_FUNCTION(gf,multiply_region,w32,gf_w8_clm_multiply_region_from_single_3) - }else if ((0x80 & h->prim_poly) == 0){ - SET_FUNCTION(gf,multiply,w32,gf_w8_clm_multiply_4) - SET_FUNCTION(gf,multiply_region,w32,gf_w8_clm_multiply_region_from_single_4) - }else{ - return 0; - } - return 1; + if (gf_cpu_supports_intel_pclmul) { + gf_internal_t *h; + + h = (gf_internal_t *) gf->scratch; + + if ((0xe0 & h->prim_poly) == 0){ + SET_FUNCTION(gf,multiply,w32,gf_w8_clm_multiply_2) + SET_FUNCTION(gf,multiply_region,w32,gf_w8_clm_multiply_region_from_single_2) + }else if ((0xc0 & h->prim_poly) == 0){ + SET_FUNCTION(gf,multiply,w32,gf_w8_clm_multiply_3) + SET_FUNCTION(gf,multiply_region,w32,gf_w8_clm_multiply_region_from_single_3) + }else if ((0x80 & h->prim_poly) == 0){ + SET_FUNCTION(gf,multiply,w32,gf_w8_clm_multiply_4) + SET_FUNCTION(gf,multiply_region,w32,gf_w8_clm_multiply_region_from_single_4) + }else{ + return 0; + } + return 1; + } #elif defined(ARM_NEON) - return gf_w8_neon_cfm_init(gf); + if (gf_cpu_supports_arm_neon) { + return gf_w8_neon_cfm_init(gf); + } #endif return 0; @@ -1103,20 +1105,21 @@ int gf_w8_split_init(gf_t *gf) } SET_FUNCTION(gf,multiply,w32,gf_w8_split_multiply) - - #if defined(INTEL_SSSE3) || defined(ARM_NEON) - if (h->region_type & GF_REGION_NOSIMD) - SET_FUNCTION(gf,multiply_region,w32,gf_w8_split_multiply_region) - else - #if defined(INTEL_SSSE3) + + #if defined(INTEL_SSSE3) + if (gf_cpu_supports_intel_ssse3 && !(h->region_type & GF_REGION_NOSIMD)) { SET_FUNCTION(gf,multiply_region,w32,gf_w8_split_multiply_region_sse) - #elif defined(ARM_NEON) + } else { + #elif defined(ARM_NEON) + if (gf_cpu_supports_arm_neon && !(h->region_type & GF_REGION_NOSIMD)) { gf_w8_neon_split_init(gf); - #endif - #else + } else { + #endif SET_FUNCTION(gf,multiply_region,w32,gf_w8_split_multiply_region) if(h->region_type & GF_REGION_SIMD) return 0; + #if defined(INTEL_SSSE3) || defined(ARM_NEON) + } #endif return 1; @@ -1134,17 +1137,12 @@ int gf_w8_table_init(gf_t *gf) struct gf_w8_double_table_data *dtd = NULL; struct gf_w8_double_table_lazy_data *ltd = NULL; struct gf_w8_default_data *dd = NULL; - int a, b, c, prod, scase, use_simd; + int a, b, c, prod, scase; h = (gf_internal_t *) gf->scratch; -#if defined(INTEL_SSSE3) || defined(ARM_NEON) - use_simd = 1; -#else - use_simd = 0; -#endif - - if (h->mult_type == GF_MULT_DEFAULT && use_simd) { + if (h->mult_type == GF_MULT_DEFAULT && + (gf_cpu_supports_intel_ssse3 || gf_cpu_supports_arm_neon)) { dd = (struct gf_w8_default_data *)h->private; scase = 3; bzero(dd->high, sizeof(uint8_t) * GF_FIELD_SIZE * GF_HALF_SIZE); @@ -1220,13 +1218,19 @@ int gf_w8_table_init(gf_t *gf) break; case 3: #if defined(INTEL_SSSE3) || defined(ARM_NEON) - SET_FUNCTION(gf,divide,w32,gf_w8_default_divide) - SET_FUNCTION(gf,multiply,w32,gf_w8_default_multiply) + if (gf_cpu_supports_intel_ssse3 || gf_cpu_supports_arm_neon) { + SET_FUNCTION(gf,divide,w32,gf_w8_default_divide) + SET_FUNCTION(gf,multiply,w32,gf_w8_default_multiply) #if defined(INTEL_SSSE3) - SET_FUNCTION(gf,multiply_region,w32,gf_w8_split_multiply_region_sse) + if (gf_cpu_supports_intel_ssse3) { + SET_FUNCTION(gf,multiply_region,w32,gf_w8_split_multiply_region_sse) + } #elif defined(ARM_NEON) - gf_w8_neon_split_init(gf); + if (gf_cpu_supports_arm_neon) { + gf_w8_neon_split_init(gf); + } #endif + } #endif break; } @@ -2192,26 +2196,28 @@ int gf_w8_bytwo_init(gf_t *gf) if (h->mult_type == GF_MULT_BYTWO_p) { SET_FUNCTION(gf,multiply,w32,gf_w8_bytwo_p_multiply) #ifdef INTEL_SSE2 - if (h->region_type & GF_REGION_NOSIMD) - SET_FUNCTION(gf,multiply_region,w32,gf_w8_bytwo_p_nosse_multiply_region) - else + if (gf_cpu_supports_intel_sse2 && !(h->region_type & GF_REGION_NOSIMD)) { SET_FUNCTION(gf,multiply_region,w32,gf_w8_bytwo_p_sse_multiply_region) -#else - SET_FUNCTION(gf,multiply_region,w32,gf_w8_bytwo_p_nosse_multiply_region) - if(h->region_type & GF_REGION_SIMD) - return 0; + } else { +#endif + SET_FUNCTION(gf,multiply_region,w32,gf_w8_bytwo_p_nosse_multiply_region) + if(h->region_type & GF_REGION_SIMD) + return 0; +#ifdef INTEL_SSE2 + } #endif } else { SET_FUNCTION(gf,multiply,w32,gf_w8_bytwo_b_multiply) #ifdef INTEL_SSE2 - if (h->region_type & GF_REGION_NOSIMD) - SET_FUNCTION(gf,multiply_region,w32,gf_w8_bytwo_b_nosse_multiply_region) - else + if (gf_cpu_supports_intel_sse2 && !(h->region_type & GF_REGION_NOSIMD)) { SET_FUNCTION(gf,multiply_region,w32,gf_w8_bytwo_b_sse_multiply_region) -#else + } else { +#endif SET_FUNCTION(gf,multiply_region,w32,gf_w8_bytwo_b_nosse_multiply_region) if(h->region_type & GF_REGION_SIMD) return 0; +#ifdef INTEL_SSE2 + } #endif } return 1; @@ -2229,9 +2235,9 @@ int gf_w8_scratch_size(int mult_type, int region_type, int divide_type, int arg1 switch(mult_type) { case GF_MULT_DEFAULT: -#if defined(INTEL_SSSE3) || defined(ARM_NEON) - return sizeof(gf_internal_t) + sizeof(struct gf_w8_default_data) + 64; -#endif + if (gf_cpu_supports_intel_ssse3 || gf_cpu_supports_arm_neon) { + return sizeof(gf_internal_t) + sizeof(struct gf_w8_default_data) + 64; + } return sizeof(gf_internal_t) + sizeof(struct gf_w8_single_table_data) + 64; case GF_MULT_TABLE: if (region_type == GF_REGION_CAUCHY) { diff --git a/tools/test_simd.sh b/tools/test_simd.sh index 1268f87..1b0e319 100755 --- a/tools/test_simd.sh +++ b/tools/test_simd.sh @@ -118,6 +118,237 @@ test_compile() { esac } +# disable through build flags +runtime_arm_flags() { + failed=0 + + echo "====NO SIMD support..." >> ${1} + { ./configure --disable-neon && make clean && make CFLAGS="-DDEBUG_FUNCTIONS"; } || { echo "Compile FAILED" >> ${1}; return 1; } + for i in 128 64 32 16 8 4; do + { ${script_dir}/gf_methods $i -ACD -X >> ${1}; } || { echo "gf_methods $i FAILED" >> ${1}; ((++failed)); } + done + + echo "====FULL SIMD support..." >> ${1} + { ./configure && make clean && make CFLAGS="-DDEBUG_FUNCTIONS"; } || { echo "Compile FAILED" >> ${1}; return 1; } + for i in 128 64 32 16 8 4; do + { ${script_dir}/gf_methods $i -ACD -X >> ${1}; } || { echo "gf_methods $i FAILED" >> ${1}; ((++failed)); } + done + + return ${failed} +} + +# build once with FULL SIMD and disable at runtime through environment +runtime_arm_env() { + failed=0 + + { ./configure && make clean && make CFLAGS="-DDEBUG_FUNCTIONS"; } || { echo "Compile FAILED" >> ${1}; return 1; } + + echo "====NO SIMD support..." >> ${1} + export GF_COMPLETE_DISABLE_NEON=1 + for i in 128 64 32 16 8 4; do + { ${script_dir}/gf_methods $i -ACD -X >> ${1}; } || { echo "gf_methods $i FAILED" >> ${1}; ((++failed)); } + done + + echo "====FULL SIMD support..." >> ${1} + unset GF_COMPLETE_DISABLE_NEON + for i in 128 64 32 16 8 4; do + { ${script_dir}/gf_methods $i -ACD -X >> ${1}; } || { echo "gf_methods $i FAILED" >> ${1}; ((++failed)); } + done + + return ${failed} +} + +runtime_intel_flags() { + failed=0 + + echo "====NO SIMD support..." >> ${1} + { ./configure --disable-sse && make clean && make CFLAGS="-DDEBUG_FUNCTIONS"; } || { echo "FAIL" >> ${1}; ((++failed)); } + for i in 128 64 32 16 8 4; do + { ${script_dir}/gf_methods $i -ACD -X >> ${1}; } || { echo "gf_methods $i FAILED" >> ${1}; ((++failed)); } + done + + echo "====SSE2 support..." >> ${1} + export ax_cv_have_sse_ext=no + export ax_cv_have_sse2_ext=yes + export ax_cv_have_sse3_ext=no + export ax_cv_have_ssse3_ext=no + export ax_cv_have_sse41_ext=no + export ax_cv_have_sse42_ext=no + export ax_cv_have_pclmuldq_ext=no + { ./configure && make clean && make CFLAGS="-DDEBUG_FUNCTIONS"; } || { echo "FAIL" >> ${1}; ((++failed)); } + for i in 128 64 32 16 8 4; do + { ${script_dir}/gf_methods $i -ACD -X >> ${1}; } || { echo "gf_methods $i FAILED" >> ${1}; ((++failed)); } + done + + echo "====SSE2,SSE3 support..." >> ${1} + export ax_cv_have_sse_ext=no + export ax_cv_have_sse2_ext=yes + export ax_cv_have_sse3_ext=yes + export ax_cv_have_ssse3_ext=no + export ax_cv_have_sse41_ext=no + export ax_cv_have_sse42_ext=no + export ax_cv_have_pclmuldq_ext=no + { ./configure && make clean && make CFLAGS="-DDEBUG_FUNCTIONS"; } || { echo "FAIL" >> ${1}; ((++failed)); } + for i in 128 64 32 16 8 4; do + { ${script_dir}/gf_methods $i -ACD -X >> ${1}; } || { echo "gf_methods $i FAILED" >> ${1}; ((++failed)); } + done + + echo "====SSE2,SSE3,SSSE3 support..." >> ${1} + export ax_cv_have_sse_ext=no + export ax_cv_have_sse2_ext=yes + export ax_cv_have_sse3_ext=yes + export ax_cv_have_ssse3_ext=yes + export ax_cv_have_sse41_ext=no + export ax_cv_have_sse42_ext=no + export ax_cv_have_pclmuldq_ext=no + { ./configure && make clean && make CFLAGS="-DDEBUG_FUNCTIONS"; } || { echo "FAIL" >> ${1}; ((++failed)); } + for i in 128 64 32 16 8 4; do + { ${script_dir}/gf_methods $i -ACD -X >> ${1}; } || { echo "gf_methods $i FAILED" >> ${1}; ((++failed)); } + done + + echo "====SSE2,SSE3,SSSE3,SSE4_1 support..." >> ${1} + export ax_cv_have_sse_ext=no + export ax_cv_have_sse2_ext=yes + export ax_cv_have_sse3_ext=yes + export ax_cv_have_ssse3_ext=yes + export ax_cv_have_sse41_ext=yes + export ax_cv_have_sse42_ext=no + export ax_cv_have_pclmuldq_ext=no + { ./configure && make clean && make CFLAGS="-DDEBUG_FUNCTIONS"; } || { echo "FAIL" >> ${1}; ((++failed)); } + for i in 128 64 32 16 8 4; do + { ${script_dir}/gf_methods $i -ACD -X >> ${1}; } || { echo "gf_methods $i FAILED" >> ${1}; ((++failed)); } + done + + echo "====SSE2,SSE3,SSSE3,SSE4_2 support..." >> ${1} + export ax_cv_have_sse_ext=no + export ax_cv_have_sse2_ext=yes + export ax_cv_have_sse3_ext=yes + export ax_cv_have_ssse3_ext=yes + export ax_cv_have_sse41_ext=no + export ax_cv_have_sse42_ext=yes + export ax_cv_have_pclmuldq_ext=no + { ./configure && make clean && make CFLAGS="-DDEBUG_FUNCTIONS"; } || { echo "FAIL" >> ${1}; ((++failed)); } + for i in 128 64 32 16 8 4; do + { ${script_dir}/gf_methods $i -ACD -X >> ${1}; } || { echo "gf_methods $i FAILED" >> ${1}; ((++failed)); } + done + + echo "====FULL SIMD support..." >> ${1} + { ./configure && make clean && make CFLAGS="-DDEBUG_FUNCTIONS"; } || { echo "FAIL" >> ${1}; ((++failed)); } + for i in 128 64 32 16 8 4; do + { ${script_dir}/gf_methods $i -ACD -X >> ${1}; } || { echo "gf_methods $i FAILED" >> ${1}; ((++failed)); } + done + + return ${failed} +} + +runtime_intel_env() { + failed=0 + + # compile a build with full SIMD support + { ./configure && make clean && make CFLAGS="-DDEBUG_FUNCTIONS"; } || { echo "Compile FAILED" >> ${1}; return 1; } + + echo "====NO SIMD support..." >> ${1} + export GF_COMPLETE_DISABLE_SSE2=1 + export GF_COMPLETE_DISABLE_SSE3=1 + export GF_COMPLETE_DISABLE_SSSE3=1 + export GF_COMPLETE_DISABLE_SSE4=1 + export GF_COMPLETE_DISABLE_SSE4_PCLMUL=1 + for i in 128 64 32 16 8 4; do + { ${script_dir}/gf_methods $i -ACD -X >> ${1}; } || { echo "gf_methods $i FAILED" >> ${1}; ((++failed)); } + done + + echo "====SSE2 support..." >> ${1} + unset GF_COMPLETE_DISABLE_SSE2 + export GF_COMPLETE_DISABLE_SSE3=1 + export GF_COMPLETE_DISABLE_SSSE3=1 + export GF_COMPLETE_DISABLE_SSE4=1 + export GF_COMPLETE_DISABLE_SSE4_PCLMUL=1 + for i in 128 64 32 16 8 4; do + { ${script_dir}/gf_methods $i -ACD -X >> ${1}; } || { echo "gf_methods $i FAILED" >> ${1}; ((++failed)); } + done + + echo "====SSE2,SSE3 support..." >> ${1} + unset GF_COMPLETE_DISABLE_SSE2 + unset GF_COMPLETE_DISABLE_SSE3 + export GF_COMPLETE_DISABLE_SSSE3=1 + export GF_COMPLETE_DISABLE_SSE4=1 + export GF_COMPLETE_DISABLE_SSE4_PCLMUL=1 + for i in 128 64 32 16 8 4; do + { ${script_dir}/gf_methods $i -ACD -X >> ${1}; } || { echo "gf_methods $i FAILED" >> ${1}; ((++failed)); } + done + + echo "====SSE2,SSE3,SSSE3 support..." >> ${1} + unset GF_COMPLETE_DISABLE_SSE2 + unset GF_COMPLETE_DISABLE_SSE3 + unset GF_COMPLETE_DISABLE_SSSE3 + export GF_COMPLETE_DISABLE_SSE4=1 + export GF_COMPLETE_DISABLE_SSE4_PCLMUL=1 + for i in 128 64 32 16 8 4; do + { ${script_dir}/gf_methods $i -ACD -X >> ${1}; } || { echo "gf_methods $i FAILED" >> ${1}; ((++failed)); } + done + + echo "====SSE2,SSE3,SSSE3,SSE4_1 support..." >> ${1} + unset GF_COMPLETE_DISABLE_SSE2 + unset GF_COMPLETE_DISABLE_SSE3 + unset GF_COMPLETE_DISABLE_SSSE3 + unset GF_COMPLETE_DISABLE_SSE4 + export GF_COMPLETE_DISABLE_SSE4_PCLMUL=1 + for i in 128 64 32 16 8 4; do + { ${script_dir}/gf_methods $i -ACD -X >> ${1}; } || { echo "gf_methods $i FAILED" >> ${1}; ((++failed)); } + done + + echo "====SSE2,SSE3,SSSE3,SSE4_2 support..." >> ${1} + unset GF_COMPLETE_DISABLE_SSE2 + unset GF_COMPLETE_DISABLE_SSE3 + unset GF_COMPLETE_DISABLE_SSSE3 + unset GF_COMPLETE_DISABLE_SSE4 + export GF_COMPLETE_DISABLE_SSE4_PCLMUL=1 + for i in 128 64 32 16 8 4; do + { ${script_dir}/gf_methods $i -ACD -X >> ${1}; } || { echo "gf_methods $i FAILED" >> ${1}; ((++failed)); } + done + + echo "====FULL SIMD support..." >> ${1} + unset GF_COMPLETE_DISABLE_SSE2 + unset GF_COMPLETE_DISABLE_SSE3 + unset GF_COMPLETE_DISABLE_SSSE3 + unset GF_COMPLETE_DISABLE_SSE4 + unset GF_COMPLETE_DISABLE_SSE4_PCLMUL + for i in 128 64 32 16 8 4; do + { ${script_dir}/gf_methods $i -ACD -X >> ${1}; } || { echo "gf_methods $i FAILED" >> ${1}; ((++failed)); } + done + + return ${failed} +} + +test_runtime() { + rm -f ${results}.left + rm -f ${results}.right + + case $host_cpu in + aarch64*|arm*) + runtime_arm_flags ${results}.left + runtime_arm_env ${results}.right + ;; + i[[3456]]86*|x86_64*|amd64*) + runtime_intel_flags ${results}.left + runtime_intel_env ${results}.right + ;; + esac + + echo "======LEFT======" > ${results} + cat ${results}.left >> ${results} + echo "======RIGHT======" >> ${results} + cat ${results}.right >> ${results} + echo "======RESULT======" >> ${results} + if diff "${results}.left" "${results}.right"; then + echo SUCCESS >> ${results} + return 0 + else + echo SUCCESS >> ${results} + return 1 + fi +} + cd ${script_dir}/.. rm -f ${results} diff --git a/tools/test_simd_qemu.sh b/tools/test_simd_qemu.sh index a270e20..7b2cb1c 100755 --- a/tools/test_simd_qemu.sh +++ b/tools/test_simd_qemu.sh @@ -224,6 +224,8 @@ run_test_simd_basic() { { run_test $arch $cpu "unit" && echo "SUCCESS"; } || { echo "FAILED"; ((++failed)); } echo "=====running functions test" { run_test $arch $cpu "functions" && echo "SUCCESS"; } || { echo "FAILED"; ((++failed)); } + echo "=====running runtime test" + { run_test $arch $cpu "runtime" && echo "SUCCESS"; } || { echo "FAILED"; ((++failed)); } stop_qemu return ${failed} -- cgit v1.2.1 From ad11042132c7db78e8ae57a364c37df74572e8b6 Mon Sep 17 00:00:00 2001 From: Bassam Tabbara Date: Tue, 6 Sep 2016 23:48:39 -0700 Subject: Simplify SIMD make scripts ax_ext.m4 no longer performs any CPU checks. Instead it just checks if the the compile supports SIMD flags. Runtime detection will choose the right methods base on CPU instructions available. Intel AVX support is still done through the build since it would require a major refactoring of the code base to support it at runtime. For now I added a configuration flag --enable-avx that can be used to compile with AVX support. Also use cpu intrinsics instead of __asm__ --- configure.ac | 9 ++ m4/ax_ext.m4 | 295 +++++--------------------------------------- m4/ax_gcc_x86_avx_xgetbv.m4 | 79 ------------ m4/ax_gcc_x86_cpuid.m4 | 79 ------------ src/gf_cpu.c | 33 +++-- test/Makefile.am | 2 +- tools/Makefile.am | 2 +- tools/test_simd.sh | 12 +- tools/test_simd_qemu.sh | 2 + 9 files changed, 79 insertions(+), 434 deletions(-) delete mode 100644 m4/ax_gcc_x86_avx_xgetbv.m4 delete mode 100644 m4/ax_gcc_x86_cpuid.m4 diff --git a/configure.ac b/configure.ac index 3e8cf18..3beea03 100644 --- a/configure.ac +++ b/configure.ac @@ -66,5 +66,14 @@ AC_ARG_ENABLE([valgrind], [enable_valgrind=no]) AM_CONDITIONAL(ENABLE_VALGRIND, test "x$enable_valgrind" != xno) +AC_ARG_ENABLE([avx], AS_HELP_STRING([--enable-avx], [Build with AVX optimizations])) +AX_CHECK_COMPILE_FLAG(-mavx, [ax_cv_support_avx=yes], []) + +AS_IF([test "x$enable_avx" = "xyes"], + [AS_IF([test "x$ax_cv_support_avx" = "xno"], + [AC_MSG_ERROR([AVX requested but compiler does not support -mavx])], + [SIMD_FLAGS="$SIMD_FLAGS -mavx"]) + ]) + AC_CONFIG_FILES([Makefile src/Makefile tools/Makefile test/Makefile examples/Makefile]) AC_OUTPUT diff --git a/m4/ax_ext.m4 b/m4/ax_ext.m4 index c03ccef..95c4dbe 100644 --- a/m4/ax_ext.m4 +++ b/m4/ax_ext.m4 @@ -1,40 +1,7 @@ # -# Updated by KMG to support -DINTEL_SSE for GF-Complete +# This macro is based on http://www.gnu.org/software/autoconf-archive/ax_ext.html +# but simplified to do compile time SIMD checks only # -# =========================================================================== -# http://www.gnu.org/software/autoconf-archive/ax_ext.html -# =========================================================================== -# -# SYNOPSIS -# -# AX_EXT -# -# DESCRIPTION -# -# Find supported SIMD extensions by requesting cpuid. When an SIMD -# extension is found, the -m"simdextensionname" is added to SIMD_FLAGS if -# compiler supports it. For example, if "sse2" is available, then "-msse2" -# is added to SIMD_FLAGS. -# -# This macro calls: -# -# AC_SUBST(SIMD_FLAGS) -# -# And defines: -# -# HAVE_MMX / HAVE_SSE / HAVE_SSE2 / HAVE_SSE3 / HAVE_SSSE3 / HAVE_SSE4.1 / HAVE_SSE4.2 / HAVE_AVX -# -# LICENSE -# -# Copyright (c) 2007 Christophe Tournayre -# Copyright (c) 2013 Michael Petch -# -# Copying and distribution of this file, with or without modification, are -# permitted in any medium without royalty provided the copyright notice -# and this notice are preserved. This file is offered as-is, without any -# warranty. - -#serial 12 AC_DEFUN([AX_EXT], [ @@ -45,263 +12,63 @@ AC_DEFUN([AX_EXT], AC_DEFINE(HAVE_ARCH_AARCH64,,[targeting AArch64]) SIMD_FLAGS="$SIMD_FLAGS -DARCH_AARCH64" - AC_CACHE_CHECK([whether NEON is supported], [ax_cv_have_neon_ext], - [ - # TODO: detect / cross-compile - ax_cv_have_neon_ext=yes - ]) - AC_CACHE_CHECK([whether cryptographic extension is supported], [ax_cv_have_arm_crypt_ext], - [ - # TODO: detect / cross-compile - ax_cv_have_arm_crypt_ext=yes - ]) - - if test "$ax_cv_have_arm_crypt_ext" = yes; then - AC_DEFINE(HAVE_ARM_CRYPT_EXT,,[Support ARM cryptographic extension]) - fi - + AC_CACHE_CHECK([whether NEON is enabled], [ax_cv_have_neon_ext], [ax_cv_have_neon_ext=yes]) if test "$ax_cv_have_neon_ext" = yes; then - AC_DEFINE(HAVE_NEON,,[Support NEON instructions]) + AX_CHECK_COMPILE_FLAG(-march=armv8-a+simd, [SIMD_FLAGS="$SIMD_FLAGS -march=armv8-a+simd -DARM_NEON"], [ax_cv_have_neon_ext=no]) fi - - if test "$ax_cv_have_arm_crypt_ext" = yes && test "$ax_cv_have_neon_ext" = yes; then - AX_CHECK_COMPILE_FLAG(-march=armv8-a+simd+crypto, - SIMD_FLAGS="$SIMD_FLAGS -march=armv8-a+simd+crypto -DARM_CRYPT -DARM_NEON", []) - elif test "$ax_cv_have_arm_crypt_ext" = yes; then - AX_CHECK_COMPILE_FLAG(-march=armv8-a+crypto, - SIMD_FLAGS="$SIMD_FLAGS -march=armv8-a+crypto -DARM_CRYPT", []) - elif test "$ax_cv_have_neon_ext" = yes; then - AX_CHECK_COMPILE_FLAG(-march=armv8-a+simd, - SIMD_FLAGS="$SIMD_FLAGS -march=armv8-a+simd -DARM_NEON", []) - fi - ;; + ;; arm*) - AC_CACHE_CHECK([whether NEON is supported], [ax_cv_have_neon_ext], - [ - # TODO: detect / cross-compile - ax_cv_have_neon_ext=yes - ]) - + AC_CACHE_CHECK([whether NEON is enabled], [ax_cv_have_neon_ext], [ax_cv_have_neon_ext=yes]) if test "$ax_cv_have_neon_ext" = yes; then - AC_DEFINE(HAVE_NEON,,[Support NEON instructions]) - AX_CHECK_COMPILE_FLAG(-mfpu=neon, - SIMD_FLAGS="$SIMD_FLAGS -mfpu=neon -DARM_NEON", []) + AX_CHECK_COMPILE_FLAG(-mfpu=neon, [SIMD_FLAGS="$SIMD_FLAGS -mfpu=neon -DARM_NEON"], [ax_cv_have_neon_ext=no]) fi - ;; + ;; powerpc*) - AC_CACHE_CHECK([whether altivec is supported], [ax_cv_have_altivec_ext], - [ - if test `/usr/sbin/sysctl -a 2>/dev/null| grep -c hw.optional.altivec` != 0; then - if test `/usr/sbin/sysctl -n hw.optional.altivec` = 1; then - ax_cv_have_altivec_ext=yes - fi - fi - ]) - - if test "$ax_cv_have_altivec_ext" = yes; then - AC_DEFINE(HAVE_ALTIVEC,,[Support Altivec instructions]) - AX_CHECK_COMPILE_FLAG(-faltivec, SIMD_FLAGS="$SIMD_FLAGS -faltivec", []) - fi - ;; - - - i[[3456]]86*|x86_64*|amd64*) - - AC_REQUIRE([AX_GCC_X86_CPUID]) - AC_REQUIRE([AX_GCC_X86_AVX_XGETBV]) - - AX_GCC_X86_CPUID(0x00000001) - ecx=`echo $ax_cv_gcc_x86_cpuid_0x00000001 | cut -d ":" -f 3` - edx=`echo $ax_cv_gcc_x86_cpuid_0x00000001 | cut -d ":" -f 4` - - AC_CACHE_CHECK([whether mmx is supported], [ax_cv_have_mmx_ext], - [ - ax_cv_have_mmx_ext=no - if test "$((0x$edx>>23&0x01))" = 1; then - ax_cv_have_mmx_ext=yes - fi - ]) - - AC_CACHE_CHECK([whether sse is supported], [ax_cv_have_sse_ext], - [ - ax_cv_have_sse_ext=no - if test "$((0x$edx>>25&0x01))" = 1; then - ax_cv_have_sse_ext=yes - fi - ]) - - AC_CACHE_CHECK([whether sse2 is supported], [ax_cv_have_sse2_ext], - [ - ax_cv_have_sse2_ext=no - if test "$((0x$edx>>26&0x01))" = 1; then - ax_cv_have_sse2_ext=yes - fi - ]) - - AC_CACHE_CHECK([whether sse3 is supported], [ax_cv_have_sse3_ext], - [ - ax_cv_have_sse3_ext=no - if test "$((0x$ecx&0x01))" = 1; then - ax_cv_have_sse3_ext=yes - fi - ]) - - AC_CACHE_CHECK([whether pclmuldq is supported], [ax_cv_have_pclmuldq_ext], - [ - ax_cv_have_pclmuldq_ext=no - if test "$((0x$ecx>>1&0x01))" = 1; then - ax_cv_have_pclmuldq_ext=yes - fi - ]) - - AC_CACHE_CHECK([whether ssse3 is supported], [ax_cv_have_ssse3_ext], - [ - ax_cv_have_ssse3_ext=no - if test "$((0x$ecx>>9&0x01))" = 1; then - ax_cv_have_ssse3_ext=yes - fi - ]) - - AC_CACHE_CHECK([whether sse4.1 is supported], [ax_cv_have_sse41_ext], - [ - ax_cv_have_sse41_ext=no - if test "$((0x$ecx>>19&0x01))" = 1; then - ax_cv_have_sse41_ext=yes - fi - ]) - - AC_CACHE_CHECK([whether sse4.2 is supported], [ax_cv_have_sse42_ext], - [ - ax_cv_have_sse42_ext=no - if test "$((0x$ecx>>20&0x01))" = 1; then - ax_cv_have_sse42_ext=yes - fi - ]) - - AC_CACHE_CHECK([whether avx is supported by processor], [ax_cv_have_avx_cpu_ext], - [ - ax_cv_have_avx_cpu_ext=no - if test "$((0x$ecx>>28&0x01))" = 1; then - ax_cv_have_avx_cpu_ext=yes - fi - ]) - - if test x"$ax_cv_have_avx_cpu_ext" = x"yes"; then - AX_GCC_X86_AVX_XGETBV(0x00000000) - - xgetbv_eax="0" - if test x"$ax_cv_gcc_x86_avx_xgetbv_0x00000000" != x"unknown"; then - xgetbv_eax=`echo $ax_cv_gcc_x86_avx_xgetbv_0x00000000 | cut -d ":" -f 1` - fi - - AC_CACHE_CHECK([whether avx is supported by operating system], [ax_cv_have_avx_ext], - [ - ax_cv_have_avx_ext=no - - if test "$((0x$ecx>>27&0x01))" = 1; then - if test "$((0x$xgetbv_eax&0x6))" = 6; then - ax_cv_have_avx_ext=yes - fi - fi - ]) - if test x"$ax_cv_have_avx_ext" = x"no"; then - AC_MSG_WARN([Your processor supports AVX, but your operating system doesn't]) - fi + AC_CACHE_CHECK([whether altivec is enabled], [ax_cv_have_altivec_ext], [ax_cv_have_altivec_ext=yes]) + if test "$ax_cv_have_altivec_ext" = yes; then + AX_CHECK_COMPILE_FLAG(-faltivec, [SIMD_FLAGS="$SIMD_FLAGS -faltivec"], [ax_cv_have_altivec_ext=no]) fi + ;; - if test "$ax_cv_have_mmx_ext" = yes; then - AX_CHECK_COMPILE_FLAG(-mmmx, ax_cv_support_mmx_ext=yes, []) - if test x"$ax_cv_support_mmx_ext" = x"yes"; then - SIMD_FLAGS="$SIMD_FLAGS -mmmx" - AC_DEFINE(HAVE_MMX,,[Support mmx instructions]) - else - AC_MSG_WARN([Your processor supports mmx instructions but not your compiler, can you try another compiler?]) - fi - fi + i[[3456]]86*|x86_64*|amd64*) + AC_CACHE_CHECK([whether sse is enabled], [ax_cv_have_sse_ext], [ax_cv_have_sse_ext=yes]) if test "$ax_cv_have_sse_ext" = yes; then - AX_CHECK_COMPILE_FLAG(-msse, ax_cv_support_sse_ext=yes, []) - if test x"$ax_cv_support_sse_ext" = x"yes"; then - SIMD_FLAGS="$SIMD_FLAGS -msse -DINTEL_SSE" - AC_DEFINE(HAVE_SSE,,[Support SSE (Streaming SIMD Extensions) instructions]) - else - AC_MSG_WARN([Your processor supports sse instructions but not your compiler, can you try another compiler?]) - fi + AX_CHECK_COMPILE_FLAG(-msse, [SIMD_FLAGS="$SIMD_FLAGS -msse -DINTEL_SSE"], [ax_cv_have_sse_ext=no]) fi + AC_CACHE_CHECK([whether sse2 is enabled], [ax_cv_have_sse2_ext], [ax_cv_have_sse2_ext=yes]) if test "$ax_cv_have_sse2_ext" = yes; then - AX_CHECK_COMPILE_FLAG(-msse2, ax_cv_support_sse2_ext=yes, []) - if test x"$ax_cv_support_sse2_ext" = x"yes"; then - SIMD_FLAGS="$SIMD_FLAGS -msse2 -DINTEL_SSE2" - AC_DEFINE(HAVE_SSE2,,[Support SSE2 (Streaming SIMD Extensions 2) instructions]) - else - AC_MSG_WARN([Your processor supports sse2 instructions but not your compiler, can you try another compiler?]) - fi + AX_CHECK_COMPILE_FLAG(-msse2, [SIMD_FLAGS="$SIMD_FLAGS -msse2 -DINTEL_SSE2"], [ax_cv_have_sse2_ext=no]) fi + AC_CACHE_CHECK([whether sse3 is enabled], [ax_cv_have_sse3_ext], [ax_cv_have_sse3_ext=yes]) if test "$ax_cv_have_sse3_ext" = yes; then - AX_CHECK_COMPILE_FLAG(-msse3, ax_cv_support_sse3_ext=yes, []) - if test x"$ax_cv_support_sse3_ext" = x"yes"; then - SIMD_FLAGS="$SIMD_FLAGS -msse3 -DINTEL_SSE3" - AC_DEFINE(HAVE_SSE3,,[Support SSE3 (Streaming SIMD Extensions 3) instructions]) - else - AC_MSG_WARN([Your processor supports sse3 instructions but not your compiler, can you try another compiler?]) - fi - fi - - if test "$ax_cv_have_pclmuldq_ext" = yes; then - AX_CHECK_COMPILE_FLAG(-mpclmul, ax_cv_support_pclmuldq_ext=yes, []) - if test x"$ax_cv_support_pclmuldq_ext" = x"yes"; then - SIMD_FLAGS="$SIMD_FLAGS -mpclmul -DINTEL_SSE4_PCLMUL" - AC_DEFINE(HAVE_PCLMULDQ,,[Support (PCLMULDQ) Carry-Free Muliplication]) - else - AC_MSG_WARN([Your processor supports pclmuldq instructions but not your compiler, can you try another compiler?]) - fi + AX_CHECK_COMPILE_FLAG(-msse3, [SIMD_FLAGS="$SIMD_FLAGS -msse3 -DINTEL_SSE3"], [ax_cv_have_sse3_ext=no]) fi + AC_CACHE_CHECK([whether ssse3 is enabled], [ax_cv_have_ssse3_ext], [ax_cv_have_ssse3_ext=yes]) if test "$ax_cv_have_ssse3_ext" = yes; then - AX_CHECK_COMPILE_FLAG(-mssse3, ax_cv_support_ssse3_ext=yes, []) - if test x"$ax_cv_support_ssse3_ext" = x"yes"; then - SIMD_FLAGS="$SIMD_FLAGS -mssse3 -DINTEL_SSSE3" - AC_DEFINE(HAVE_SSSE3,,[Support SSSE3 (Supplemental Streaming SIMD Extensions 3) instructions]) - else - AC_MSG_WARN([Your processor supports ssse3 instructions but not your compiler, can you try another compiler?]) - fi + AX_CHECK_COMPILE_FLAG(-mssse3, [SIMD_FLAGS="$SIMD_FLAGS -mssse3 -DINTEL_SSSE3"], [ax_cv_have_ssse3_ext=no]) fi - if test "$ax_cv_have_sse41_ext" = yes; then - AX_CHECK_COMPILE_FLAG(-msse4.1, ax_cv_support_sse41_ext=yes, []) - if test x"$ax_cv_support_sse41_ext" = x"yes"; then - SIMD_FLAGS="$SIMD_FLAGS -msse4.1 -DINTEL_SSE4" - AC_DEFINE(HAVE_SSE4_1,,[Support SSSE4.1 (Streaming SIMD Extensions 4.1) instructions]) - else - AC_MSG_WARN([Your processor supports sse4.1 instructions but not your compiler, can you try another compiler?]) - fi + AC_CACHE_CHECK([whether pclmuldq is enabled], [ax_cv_have_pclmuldq_ext], [ax_cv_have_pclmuldq_ext=yes]) + if test "$ax_cv_have_pclmuldq_ext" = yes; then + AX_CHECK_COMPILE_FLAG(-mpclmul, [SIMD_FLAGS="$SIMD_FLAGS -mpclmul -DINTEL_SSE4_PCLMUL"], [ax_cv_have_pclmuldq_ext=no]) fi - if test "$ax_cv_have_sse42_ext" = yes; then - AX_CHECK_COMPILE_FLAG(-msse4.2, ax_cv_support_sse42_ext=yes, []) - if test x"$ax_cv_support_sse42_ext" = x"yes"; then - SIMD_FLAGS="$SIMD_FLAGS -msse4.2 -DINTEL_SSE4" - AC_DEFINE(HAVE_SSE4_2,,[Support SSSE4.2 (Streaming SIMD Extensions 4.2) instructions]) - else - AC_MSG_WARN([Your processor supports sse4.2 instructions but not your compiler, can you try another compiler?]) - fi + AC_CACHE_CHECK([whether sse4.1 is enabled], [ax_cv_have_sse41_ext], [ax_cv_have_sse41_ext=yes]) + if test "$ax_cv_have_sse41_ext" = yes; then + AX_CHECK_COMPILE_FLAG(-msse4.1, [SIMD_FLAGS="$SIMD_FLAGS -msse4.1 -DINTEL_SSE4"], [ax_cv_have_sse41_ext=no]) fi - if test "$ax_cv_have_avx_ext" = yes; then - AX_CHECK_COMPILE_FLAG(-mavx, ax_cv_support_avx_ext=yes, []) - if test x"$ax_cv_support_avx_ext" = x"yes"; then - SIMD_FLAGS="$SIMD_FLAGS -mavx" - AC_DEFINE(HAVE_AVX,,[Support AVX (Advanced Vector Extensions) instructions]) - else - AC_MSG_WARN([Your processor supports avx instructions but not your compiler, can you try another compiler?]) - fi + AC_CACHE_CHECK([whether sse4.2 is enabled], [ax_cv_have_sse42_ext], [ax_cv_have_sse42_ext=yes]) + if test "$ax_cv_have_sse42_ext" = yes; then + AX_CHECK_COMPILE_FLAG(-msse4.2, [SIMD_FLAGS="$SIMD_FLAGS -msse4.2 -DINTEL_SSE4"], [ax_cv_have_sse42_ext=no]) fi - - ;; + ;; esac AC_SUBST(SIMD_FLAGS) diff --git a/m4/ax_gcc_x86_avx_xgetbv.m4 b/m4/ax_gcc_x86_avx_xgetbv.m4 deleted file mode 100644 index 0624eeb..0000000 --- a/m4/ax_gcc_x86_avx_xgetbv.m4 +++ /dev/null @@ -1,79 +0,0 @@ -# =========================================================================== -# http://www.gnu.org/software/autoconf-archive/ax_gcc_x86_avx_xgetbv.html -# =========================================================================== -# -# SYNOPSIS -# -# AX_GCC_X86_AVX_XGETBV -# -# DESCRIPTION -# -# On later x86 processors with AVX SIMD support, with gcc or a compiler -# that has a compatible syntax for inline assembly instructions, run a -# small program that executes the xgetbv instruction with input OP. This -# can be used to detect if the OS supports AVX instruction usage. -# -# On output, the values of the eax and edx registers are stored as -# hexadecimal strings as "eax:edx" in the cache variable -# ax_cv_gcc_x86_avx_xgetbv. -# -# If the xgetbv instruction fails (because you are running a -# cross-compiler, or because you are not using gcc, or because you are on -# a processor that doesn't have this instruction), -# ax_cv_gcc_x86_avx_xgetbv_OP is set to the string "unknown". -# -# This macro mainly exists to be used in AX_EXT. -# -# LICENSE -# -# Copyright (c) 2013 Michael Petch -# -# This program is free software: you can redistribute it and/or modify it -# under the terms of the GNU General Public License as published by the -# Free Software Foundation, either version 3 of the License, or (at your -# option) any later version. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General -# Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program. If not, see . -# -# As a special exception, the respective Autoconf Macro's copyright owner -# gives unlimited permission to copy, distribute and modify the configure -# scripts that are the output of Autoconf when processing the Macro. You -# need not follow the terms of the GNU General Public License when using -# or distributing such scripts, even though portions of the text of the -# Macro appear in them. The GNU General Public License (GPL) does govern -# all other use of the material that constitutes the Autoconf Macro. -# -# This special exception to the GPL applies to versions of the Autoconf -# Macro released by the Autoconf Archive. When you make and distribute a -# modified version of the Autoconf Macro, you may extend this special -# exception to the GPL to apply to your modified version as well. - -#serial 1 - -AC_DEFUN([AX_GCC_X86_AVX_XGETBV], -[AC_REQUIRE([AC_PROG_CC]) -AC_LANG_PUSH([C]) -AC_CACHE_CHECK(for x86-AVX xgetbv $1 output, ax_cv_gcc_x86_avx_xgetbv_$1, - [AC_RUN_IFELSE([AC_LANG_PROGRAM([#include ], [ - int op = $1, eax, edx; - FILE *f; - /* Opcodes for xgetbv */ - __asm__(".byte 0x0f, 0x01, 0xd0" - : "=a" (eax), "=d" (edx) - : "c" (op)); - f = fopen("conftest_xgetbv", "w"); if (!f) return 1; - fprintf(f, "%x:%x\n", eax, edx); - fclose(f); - return 0; -])], - [ax_cv_gcc_x86_avx_xgetbv_$1=`cat conftest_xgetbv`; rm -f conftest_xgetbv], - [ax_cv_gcc_x86_avx_xgetbv_$1=unknown; rm -f conftest_xgetbv], - [ax_cv_gcc_x86_avx_xgetbv_$1=unknown])]) -AC_LANG_POP([C]) -]) diff --git a/m4/ax_gcc_x86_cpuid.m4 b/m4/ax_gcc_x86_cpuid.m4 deleted file mode 100644 index 7d46fee..0000000 --- a/m4/ax_gcc_x86_cpuid.m4 +++ /dev/null @@ -1,79 +0,0 @@ -# =========================================================================== -# http://www.gnu.org/software/autoconf-archive/ax_gcc_x86_cpuid.html -# =========================================================================== -# -# SYNOPSIS -# -# AX_GCC_X86_CPUID(OP) -# -# DESCRIPTION -# -# On Pentium and later x86 processors, with gcc or a compiler that has a -# compatible syntax for inline assembly instructions, run a small program -# that executes the cpuid instruction with input OP. This can be used to -# detect the CPU type. -# -# On output, the values of the eax, ebx, ecx, and edx registers are stored -# as hexadecimal strings as "eax:ebx:ecx:edx" in the cache variable -# ax_cv_gcc_x86_cpuid_OP. -# -# If the cpuid instruction fails (because you are running a -# cross-compiler, or because you are not using gcc, or because you are on -# a processor that doesn't have this instruction), ax_cv_gcc_x86_cpuid_OP -# is set to the string "unknown". -# -# This macro mainly exists to be used in AX_GCC_ARCHFLAG. -# -# LICENSE -# -# Copyright (c) 2008 Steven G. Johnson -# Copyright (c) 2008 Matteo Frigo -# -# This program is free software: you can redistribute it and/or modify it -# under the terms of the GNU General Public License as published by the -# Free Software Foundation, either version 3 of the License, or (at your -# option) any later version. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General -# Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program. If not, see . -# -# As a special exception, the respective Autoconf Macro's copyright owner -# gives unlimited permission to copy, distribute and modify the configure -# scripts that are the output of Autoconf when processing the Macro. You -# need not follow the terms of the GNU General Public License when using -# or distributing such scripts, even though portions of the text of the -# Macro appear in them. The GNU General Public License (GPL) does govern -# all other use of the material that constitutes the Autoconf Macro. -# -# This special exception to the GPL applies to versions of the Autoconf -# Macro released by the Autoconf Archive. When you make and distribute a -# modified version of the Autoconf Macro, you may extend this special -# exception to the GPL to apply to your modified version as well. - -#serial 7 - -AC_DEFUN([AX_GCC_X86_CPUID], -[AC_REQUIRE([AC_PROG_CC]) -AC_LANG_PUSH([C]) -AC_CACHE_CHECK(for x86 cpuid $1 output, ax_cv_gcc_x86_cpuid_$1, - [AC_RUN_IFELSE([AC_LANG_PROGRAM([#include ], [ - int op = $1, eax, ebx, ecx, edx; - FILE *f; - __asm__("cpuid" - : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) - : "a" (op)); - f = fopen("conftest_cpuid", "w"); if (!f) return 1; - fprintf(f, "%x:%x:%x:%x\n", eax, ebx, ecx, edx); - fclose(f); - return 0; -])], - [ax_cv_gcc_x86_cpuid_$1=`cat conftest_cpuid`; rm -f conftest_cpuid], - [ax_cv_gcc_x86_cpuid_$1=unknown; rm -f conftest_cpuid], - [ax_cv_gcc_x86_cpuid_$1=unknown])]) -AC_LANG_POP([C]) -]) diff --git a/src/gf_cpu.c b/src/gf_cpu.c index ee2f847..fae2cd5 100644 --- a/src/gf_cpu.c +++ b/src/gf_cpu.c @@ -22,20 +22,35 @@ int gf_cpu_supports_arm_neon = 0; #if defined(__x86_64__) +#if defined(_MSC_VER) + +#define cpuid(info, x) __cpuidex(info, x, 0) + +#elif defined(__GNUC__) + +#include +void cpuid(int info[4], int InfoType){ + __cpuid_count(InfoType, 0, info[0], info[1], info[2], info[3]); +} + +#else + +#error please add a way to detect CPU SIMD support at runtime + +#endif + void gf_cpu_identify(void) { if (gf_cpu_identified) { return; } - int op = 1, eax, ebx, ecx, edx; + int reg[4]; - __asm__("cpuid" - : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) - : "a" (op)); + cpuid(reg, 1); #if defined(INTEL_SSE4_PCLMUL) - if ((ecx & 1) != 0 && !getenv("GF_COMPLETE_DISABLE_SSE4_PCLMUL")) { + if ((reg[2] & 1) != 0 && !getenv("GF_COMPLETE_DISABLE_SSE4_PCLMUL")) { gf_cpu_supports_intel_pclmul = 1; #ifdef DEBUG_CPU_DETECTION printf("#gf_cpu_supports_intel_pclmul\n"); @@ -44,7 +59,7 @@ void gf_cpu_identify(void) #endif #if defined(INTEL_SSE4) - if (((ecx & (1<<20)) != 0 || (ecx & (1<<19)) != 0) && !getenv("GF_COMPLETE_DISABLE_SSE4")) { + if (((reg[2] & (1<<20)) != 0 || (reg[2] & (1<<19)) != 0) && !getenv("GF_COMPLETE_DISABLE_SSE4")) { gf_cpu_supports_intel_sse4 = 1; #ifdef DEBUG_CPU_DETECTION printf("#gf_cpu_supports_intel_sse4\n"); @@ -53,7 +68,7 @@ void gf_cpu_identify(void) #endif #if defined(INTEL_SSSE3) - if ((ecx & (1<<9)) != 0 && !getenv("GF_COMPLETE_DISABLE_SSSE3")) { + if ((reg[2] & (1<<9)) != 0 && !getenv("GF_COMPLETE_DISABLE_SSSE3")) { gf_cpu_supports_intel_ssse3 = 1; #ifdef DEBUG_CPU_DETECTION printf("#gf_cpu_supports_intel_ssse3\n"); @@ -62,7 +77,7 @@ void gf_cpu_identify(void) #endif #if defined(INTEL_SSE3) - if ((ecx & 1) != 0 && !getenv("GF_COMPLETE_DISABLE_SSE3")) { + if ((reg[2] & 1) != 0 && !getenv("GF_COMPLETE_DISABLE_SSE3")) { gf_cpu_supports_intel_sse3 = 1; #ifdef DEBUG_CPU_DETECTION printf("#gf_cpu_supports_intel_sse3\n"); @@ -71,7 +86,7 @@ void gf_cpu_identify(void) #endif #if defined(INTEL_SSE2) - if ((edx & (1<<26)) != 0 && !getenv("GF_COMPLETE_DISABLE_SSE2")) { + if ((reg[3] & (1<<26)) != 0 && !getenv("GF_COMPLETE_DISABLE_SSE2")) { gf_cpu_supports_intel_sse2 = 1; #ifdef DEBUG_CPU_DETECTION printf("#gf_cpu_supports_intel_sse2\n"); diff --git a/test/Makefile.am b/test/Makefile.am index 2791528..f590ecc 100644 --- a/test/Makefile.am +++ b/test/Makefile.am @@ -1,7 +1,7 @@ # GF-Complete 'test' AM file AM_CPPFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include -AM_CFLAGS = -O3 $(SIMD_FLAGS) -fPIC +AM_CFLAGS = -O3 -fPIC bin_PROGRAMS = gf_unit diff --git a/tools/Makefile.am b/tools/Makefile.am index a9dd8b9..4ca9131 100644 --- a/tools/Makefile.am +++ b/tools/Makefile.am @@ -1,7 +1,7 @@ # GF-Complete 'tools' AM file AM_CPPFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include -AM_CFLAGS = -O3 $(SIMD_FLAGS) -fPIC +AM_CFLAGS = -O3 -fPIC bin_PROGRAMS = gf_mult gf_div gf_add gf_time gf_methods gf_poly gf_inline_time diff --git a/tools/test_simd.sh b/tools/test_simd.sh index 1b0e319..6401590 100755 --- a/tools/test_simd.sh +++ b/tools/test_simd.sh @@ -27,6 +27,16 @@ test_functions() { return ${failed} } +# build with DEBUG_CPU_FUNCTIONS and print out CPU detection +test_detection() { + failed=0 + + { ./configure && make clean && make CFLAGS="-DDEBUG_CPU_DETECTION"; } || { echo "Compile FAILED" >> ${results}; return 1; } + { ${script_dir}/gf_methods 32 -ACD -L | grep '#' >> ${results}; } || { echo "gf_methods $i FAILED" >> ${results}; ((++failed)); } + + return ${failed} +} + compile_arm() { failed=0 @@ -167,7 +177,7 @@ runtime_intel_flags() { { ${script_dir}/gf_methods $i -ACD -X >> ${1}; } || { echo "gf_methods $i FAILED" >> ${1}; ((++failed)); } done - echo "====SSE2 support..." >> ${1} + echo "====SSE2 support..." >> ${1} export ax_cv_have_sse_ext=no export ax_cv_have_sse2_ext=yes export ax_cv_have_sse3_ext=no diff --git a/tools/test_simd_qemu.sh b/tools/test_simd_qemu.sh index 7b2cb1c..5771874 100755 --- a/tools/test_simd_qemu.sh +++ b/tools/test_simd_qemu.sh @@ -224,6 +224,8 @@ run_test_simd_basic() { { run_test $arch $cpu "unit" && echo "SUCCESS"; } || { echo "FAILED"; ((++failed)); } echo "=====running functions test" { run_test $arch $cpu "functions" && echo "SUCCESS"; } || { echo "FAILED"; ((++failed)); } + echo "=====running detection test" + { run_test $arch $cpu "detection" && echo "SUCCESS"; } || { echo "FAILED"; ((++failed)); } echo "=====running runtime test" { run_test $arch $cpu "runtime" && echo "SUCCESS"; } || { echo "FAILED"; ((++failed)); } stop_qemu -- cgit v1.2.1 From 0e5c920fb69f2d962db1df045d1b71b9b012b902 Mon Sep 17 00:00:00 2001 From: Bassam Tabbara Date: Tue, 13 Sep 2016 10:19:24 -0700 Subject: gf_multby_one now checks runtime SIMD support --- src/gf.c | 128 ++++++++++++++++++++++++++++++++------------------------------- 1 file changed, 65 insertions(+), 63 deletions(-) diff --git a/src/gf.c b/src/gf.c index feeafdc..84d6996 100644 --- a/src/gf.c +++ b/src/gf.c @@ -912,9 +912,6 @@ static void gf_unaligned_xor(void *src, void *dest, int bytes); void gf_multby_one(void *src, void *dest, int bytes, int xor) { -#ifdef INTEL_SSE2 - __m128i ms, md; -#endif unsigned long uls, uld; uint8_t *s8, *d8; uint64_t *s64, *d64, *dtop64; @@ -929,84 +926,89 @@ void gf_multby_one(void *src, void *dest, int bytes, int xor) uld = (unsigned long) dest; #ifdef INTEL_SSE2 - int abytes; - s8 = (uint8_t *) src; - d8 = (uint8_t *) dest; - if (uls % 16 == uld % 16) { - gf_set_region_data(&rd, NULL, src, dest, bytes, 1, xor, 16); - while (s8 != rd.s_start) { - *d8 ^= *s8; - d8++; - s8++; + if (gf_cpu_supports_intel_sse2) { + __m128i ms, md; + int abytes; + s8 = (uint8_t *) src; + d8 = (uint8_t *) dest; + if (uls % 16 == uld % 16) { + gf_set_region_data(&rd, NULL, src, dest, bytes, 1, xor, 16); + while (s8 != rd.s_start) { + *d8 ^= *s8; + d8++; + s8++; + } + while (s8 < (uint8_t *) rd.s_top) { + ms = _mm_load_si128 ((__m128i *)(s8)); + md = _mm_load_si128 ((__m128i *)(d8)); + md = _mm_xor_si128(md, ms); + _mm_store_si128((__m128i *)(d8), md); + s8 += 16; + d8 += 16; + } + while (s8 != (uint8_t *) src + bytes) { + *d8 ^= *s8; + d8++; + s8++; + } + return; } - while (s8 < (uint8_t *) rd.s_top) { - ms = _mm_load_si128 ((__m128i *)(s8)); - md = _mm_load_si128 ((__m128i *)(d8)); + + abytes = (bytes & 0xfffffff0); + + while (d8 < (uint8_t *) dest + abytes) { + ms = _mm_loadu_si128 ((__m128i *)(s8)); + md = _mm_loadu_si128 ((__m128i *)(d8)); md = _mm_xor_si128(md, ms); - _mm_store_si128((__m128i *)(d8), md); + _mm_storeu_si128((__m128i *)(d8), md); s8 += 16; d8 += 16; } - while (s8 != (uint8_t *) src + bytes) { + while (d8 != (uint8_t *) dest+bytes) { *d8 ^= *s8; d8++; s8++; } return; } - - abytes = (bytes & 0xfffffff0); - - while (d8 < (uint8_t *) dest + abytes) { - ms = _mm_loadu_si128 ((__m128i *)(s8)); - md = _mm_loadu_si128 ((__m128i *)(d8)); - md = _mm_xor_si128(md, ms); - _mm_storeu_si128((__m128i *)(d8), md); - s8 += 16; - d8 += 16; - } - while (d8 != (uint8_t *) dest+bytes) { - *d8 ^= *s8; - d8++; - s8++; - } - return; #endif #if defined(ARM_NEON) - s8 = (uint8_t *) src; - d8 = (uint8_t *) dest; - - if (uls % 16 == uld % 16) { - gf_set_region_data(&rd, NULL, src, dest, bytes, 1, xor, 16); - while (s8 != rd.s_start) { + if (gf_cpu_supports_arm_neon) { + s8 = (uint8_t *) src; + d8 = (uint8_t *) dest; + + if (uls % 16 == uld % 16) { + gf_set_region_data(&rd, NULL, src, dest, bytes, 1, xor, 16); + while (s8 != rd.s_start) { + *d8 ^= *s8; + s8++; + d8++; + } + while (s8 < (uint8_t *) rd.s_top) { + uint8x16_t vs = vld1q_u8 (s8); + uint8x16_t vd = vld1q_u8 (d8); + uint8x16_t vr = veorq_u8 (vs, vd); + vst1q_u8 (d8, vr); + s8 += 16; + d8 += 16; + } + } else { + while (s8 + 15 < (uint8_t *) src + bytes) { + uint8x16_t vs = vld1q_u8 (s8); + uint8x16_t vd = vld1q_u8 (d8); + uint8x16_t vr = veorq_u8 (vs, vd); + vst1q_u8 (d8, vr); + s8 += 16; + d8 += 16; + } + } + while (s8 < (uint8_t *) src + bytes) { *d8 ^= *s8; s8++; d8++; } - while (s8 < (uint8_t *) rd.s_top) { - uint8x16_t vs = vld1q_u8 (s8); - uint8x16_t vd = vld1q_u8 (d8); - uint8x16_t vr = veorq_u8 (vs, vd); - vst1q_u8 (d8, vr); - s8 += 16; - d8 += 16; - } - } else { - while (s8 + 15 < (uint8_t *) src + bytes) { - uint8x16_t vs = vld1q_u8 (s8); - uint8x16_t vd = vld1q_u8 (d8); - uint8x16_t vr = veorq_u8 (vs, vd); - vst1q_u8 (d8, vr); - s8 += 16; - d8 += 16; - } - } - while (s8 < (uint8_t *) src + bytes) { - *d8 ^= *s8; - s8++; - d8++; + return; } - return; #endif if (uls % 8 != uld % 8) { gf_unaligned_xor(src, dest, bytes); -- cgit v1.2.1 From 0690ba86a81faff99a3383b5907ddc02a317eea0 Mon Sep 17 00:00:00 2001 From: Bassam Tabbara Date: Tue, 13 Sep 2016 11:11:40 -0700 Subject: Added --enable flags for debugging runtime SIMD --- configure.ac | 8 ++++++++ tools/test_simd.sh | 29 +++++++++++++++-------------- 2 files changed, 23 insertions(+), 14 deletions(-) diff --git a/configure.ac b/configure.ac index 3beea03..d696f6e 100644 --- a/configure.ac +++ b/configure.ac @@ -29,6 +29,14 @@ AC_CHECK_FUNCS([posix_memalign], AS_IF([test "x$found_memalign" != "xyes"], [AC_MSG_WARN([No function for aligned memory allocation found])]) +AC_ARG_ENABLE([debug-functions], + AS_HELP_STRING([--enable-debug-func], [Enable debugging of functions selected])) +AS_IF([test "x$enable_debug_func" = "xyes"], [CPPFLAGS="$CPPFLAGS -DDEBUG_FUNCTIONS"]) + +AC_ARG_ENABLE([debug-cpu], + AS_HELP_STRING([--enable-debug-cpu], [Enable debugging of SIMD detection])) +AS_IF([test "x$enable_debug_cpu" = "xyes"], [CPPFLAGS="$CPPFLAGS -DDEBUG_CPU_DETECTION"]) + AX_EXT() AC_ARG_ENABLE([neon], diff --git a/tools/test_simd.sh b/tools/test_simd.sh index 6401590..e514e4f 100755 --- a/tools/test_simd.sh +++ b/tools/test_simd.sh @@ -6,11 +6,12 @@ script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" host_cpu=`uname -p` results=${script_dir}/test_simd.results +nprocs=$(grep -c ^processor /proc/cpuinfo) # runs unit tests and save the results test_unit(){ { ./configure && make clean && make; } || { echo "Compile FAILED" >> ${results}; return 1; } - make check || { echo "gf_methods $i FAILED" >> ${results}; ((++failed)); } + make -j$nprocs check || { echo "gf_methods $i FAILED" >> ${results}; ((++failed)); } cat tools/test-suite.log >> ${results} || true } @@ -19,7 +20,7 @@ test_unit(){ test_functions() { failed=0 - { ./configure && make clean && make CFLAGS="-DDEBUG_FUNCTIONS"; } || { echo "Compile FAILED" >> ${results}; return 1; } + { ./configure --enable-debug-func && make clean && make; } || { echo "Compile FAILED" >> ${results}; return 1; } for i in 128 64 32 16 8 4; do { ${script_dir}/gf_methods $i -ACD -X >> ${results}; } || { echo "gf_methods $i FAILED" >> ${results}; ((++failed)); } done @@ -31,7 +32,7 @@ test_functions() { test_detection() { failed=0 - { ./configure && make clean && make CFLAGS="-DDEBUG_CPU_DETECTION"; } || { echo "Compile FAILED" >> ${results}; return 1; } + { ./configure --enable-debug-cpu && make clean && make; } || { echo "Compile FAILED" >> ${results}; return 1; } { ${script_dir}/gf_methods 32 -ACD -L | grep '#' >> ${results}; } || { echo "gf_methods $i FAILED" >> ${results}; ((++failed)); } return ${failed} @@ -133,13 +134,13 @@ runtime_arm_flags() { failed=0 echo "====NO SIMD support..." >> ${1} - { ./configure --disable-neon && make clean && make CFLAGS="-DDEBUG_FUNCTIONS"; } || { echo "Compile FAILED" >> ${1}; return 1; } + { ./configure --disable-neon --enable-debug-func && make clean && make; } || { echo "Compile FAILED" >> ${1}; return 1; } for i in 128 64 32 16 8 4; do { ${script_dir}/gf_methods $i -ACD -X >> ${1}; } || { echo "gf_methods $i FAILED" >> ${1}; ((++failed)); } done echo "====FULL SIMD support..." >> ${1} - { ./configure && make clean && make CFLAGS="-DDEBUG_FUNCTIONS"; } || { echo "Compile FAILED" >> ${1}; return 1; } + { ./configure --enable-debug-func && make clean && make; } || { echo "Compile FAILED" >> ${1}; return 1; } for i in 128 64 32 16 8 4; do { ${script_dir}/gf_methods $i -ACD -X >> ${1}; } || { echo "gf_methods $i FAILED" >> ${1}; ((++failed)); } done @@ -151,7 +152,7 @@ runtime_arm_flags() { runtime_arm_env() { failed=0 - { ./configure && make clean && make CFLAGS="-DDEBUG_FUNCTIONS"; } || { echo "Compile FAILED" >> ${1}; return 1; } + { ./configure --enable-debug-func && make clean && make; } || { echo "Compile FAILED" >> ${1}; return 1; } echo "====NO SIMD support..." >> ${1} export GF_COMPLETE_DISABLE_NEON=1 @@ -172,7 +173,7 @@ runtime_intel_flags() { failed=0 echo "====NO SIMD support..." >> ${1} - { ./configure --disable-sse && make clean && make CFLAGS="-DDEBUG_FUNCTIONS"; } || { echo "FAIL" >> ${1}; ((++failed)); } + { ./configure --disable-sse --enable-debug-func && make clean && make; } || { echo "FAIL" >> ${1}; ((++failed)); } for i in 128 64 32 16 8 4; do { ${script_dir}/gf_methods $i -ACD -X >> ${1}; } || { echo "gf_methods $i FAILED" >> ${1}; ((++failed)); } done @@ -185,7 +186,7 @@ runtime_intel_flags() { export ax_cv_have_sse41_ext=no export ax_cv_have_sse42_ext=no export ax_cv_have_pclmuldq_ext=no - { ./configure && make clean && make CFLAGS="-DDEBUG_FUNCTIONS"; } || { echo "FAIL" >> ${1}; ((++failed)); } + { ./configure --enable-debug-func && make clean && make; } || { echo "FAIL" >> ${1}; ((++failed)); } for i in 128 64 32 16 8 4; do { ${script_dir}/gf_methods $i -ACD -X >> ${1}; } || { echo "gf_methods $i FAILED" >> ${1}; ((++failed)); } done @@ -198,7 +199,7 @@ runtime_intel_flags() { export ax_cv_have_sse41_ext=no export ax_cv_have_sse42_ext=no export ax_cv_have_pclmuldq_ext=no - { ./configure && make clean && make CFLAGS="-DDEBUG_FUNCTIONS"; } || { echo "FAIL" >> ${1}; ((++failed)); } + { ./configure --enable-debug-func && make clean && make; } || { echo "FAIL" >> ${1}; ((++failed)); } for i in 128 64 32 16 8 4; do { ${script_dir}/gf_methods $i -ACD -X >> ${1}; } || { echo "gf_methods $i FAILED" >> ${1}; ((++failed)); } done @@ -211,7 +212,7 @@ runtime_intel_flags() { export ax_cv_have_sse41_ext=no export ax_cv_have_sse42_ext=no export ax_cv_have_pclmuldq_ext=no - { ./configure && make clean && make CFLAGS="-DDEBUG_FUNCTIONS"; } || { echo "FAIL" >> ${1}; ((++failed)); } + { ./configure --enable-debug-func && make clean && make; } || { echo "FAIL" >> ${1}; ((++failed)); } for i in 128 64 32 16 8 4; do { ${script_dir}/gf_methods $i -ACD -X >> ${1}; } || { echo "gf_methods $i FAILED" >> ${1}; ((++failed)); } done @@ -224,7 +225,7 @@ runtime_intel_flags() { export ax_cv_have_sse41_ext=yes export ax_cv_have_sse42_ext=no export ax_cv_have_pclmuldq_ext=no - { ./configure && make clean && make CFLAGS="-DDEBUG_FUNCTIONS"; } || { echo "FAIL" >> ${1}; ((++failed)); } + { ./configure --enable-debug-func && make clean && make; } || { echo "FAIL" >> ${1}; ((++failed)); } for i in 128 64 32 16 8 4; do { ${script_dir}/gf_methods $i -ACD -X >> ${1}; } || { echo "gf_methods $i FAILED" >> ${1}; ((++failed)); } done @@ -237,13 +238,13 @@ runtime_intel_flags() { export ax_cv_have_sse41_ext=no export ax_cv_have_sse42_ext=yes export ax_cv_have_pclmuldq_ext=no - { ./configure && make clean && make CFLAGS="-DDEBUG_FUNCTIONS"; } || { echo "FAIL" >> ${1}; ((++failed)); } + { ./configure --enable-debug-func && make clean && make; } || { echo "FAIL" >> ${1}; ((++failed)); } for i in 128 64 32 16 8 4; do { ${script_dir}/gf_methods $i -ACD -X >> ${1}; } || { echo "gf_methods $i FAILED" >> ${1}; ((++failed)); } done echo "====FULL SIMD support..." >> ${1} - { ./configure && make clean && make CFLAGS="-DDEBUG_FUNCTIONS"; } || { echo "FAIL" >> ${1}; ((++failed)); } + { ./configure --enable-debug-func && make clean && make; } || { echo "FAIL" >> ${1}; ((++failed)); } for i in 128 64 32 16 8 4; do { ${script_dir}/gf_methods $i -ACD -X >> ${1}; } || { echo "gf_methods $i FAILED" >> ${1}; ((++failed)); } done @@ -255,7 +256,7 @@ runtime_intel_env() { failed=0 # compile a build with full SIMD support - { ./configure && make clean && make CFLAGS="-DDEBUG_FUNCTIONS"; } || { echo "Compile FAILED" >> ${1}; return 1; } + { ./configure --enable-debug-func && make clean && make; } || { echo "Compile FAILED" >> ${1}; return 1; } echo "====NO SIMD support..." >> ${1} export GF_COMPLETE_DISABLE_SSE2=1 -- cgit v1.2.1