diff options
author | Radu Velea <radu.velea@intel.com> | 2015-10-27 12:21:36 +0200 |
---|---|---|
committer | Timothy B. Terriberry <tterribe@xiph.org> | 2015-11-05 01:23:05 -0800 |
commit | 1632152b83b8ab4e28393bca94450796b71b0201 (patch) | |
tree | 9d70a379010a47f5c203bad841dca40257619e14 | |
parent | bb0e1e0d6f6b112160a10de84ba368689c41c1bb (diff) | |
download | opus-1632152b83b8ab4e28393bca94450796b71b0201.tar.gz |
Adding AVX config switches
Signed-off-by: Timothy B. Terriberry <tterribe@xiph.org>
-rw-r--r-- | celt/cpu_support.h | 5 | ||||
-rw-r--r-- | celt/x86/x86_celt_map.c | 7 | ||||
-rw-r--r-- | celt/x86/x86cpu.c | 10 | ||||
-rw-r--r-- | configure.ac | 38 | ||||
-rw-r--r-- | silk/x86/x86_silk_map.c | 7 |
5 files changed, 64 insertions, 3 deletions
diff --git a/celt/cpu_support.h b/celt/cpu_support.h index db1cb588..133abbfc 100644 --- a/celt/cpu_support.h +++ b/celt/cpu_support.h @@ -48,13 +48,14 @@ (defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)) #include "x86/x86cpu.h" -/* We currently support 4 x86 variants: +/* We currently support 5 x86 variants: * arch[0] -> non-sse * arch[1] -> sse * arch[2] -> sse2 * arch[3] -> sse4.1 + * arch[4] -> avx */ -#define OPUS_ARCHMASK 3 +#define OPUS_ARCHMASK 7 int opus_select_arch(void); #else diff --git a/celt/x86/x86_celt_map.c b/celt/x86/x86_celt_map.c index 1ed2acbc..8e5e4492 100644 --- a/celt/x86/x86_celt_map.c +++ b/celt/x86/x86_celt_map.c @@ -53,6 +53,7 @@ void (*const CELT_FIR_IMPL[OPUS_ARCHMASK + 1])( celt_fir_c, celt_fir_c, MAY_HAVE_SSE4_1(celt_fir), /* sse4.1 */ + MAY_HAVE_SSE4_1(celt_fir) /* avx */ }; void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])( @@ -65,6 +66,7 @@ void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])( xcorr_kernel_c, xcorr_kernel_c, MAY_HAVE_SSE4_1(xcorr_kernel), /* sse4.1 */ + MAY_HAVE_SSE4_1(xcorr_kernel) /* avx */ }; #endif @@ -81,6 +83,7 @@ opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])( celt_inner_prod_c, MAY_HAVE_SSE2(celt_inner_prod), MAY_HAVE_SSE4_1(celt_inner_prod), /* sse4.1 */ + MAY_HAVE_SSE4_1(celt_inner_prod) /* avx */ }; #endif @@ -99,6 +102,7 @@ void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])( MAY_HAVE_SSE(xcorr_kernel), MAY_HAVE_SSE(xcorr_kernel), MAY_HAVE_SSE(xcorr_kernel), + MAY_HAVE_SSE(xcorr_kernel) }; opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])( @@ -110,6 +114,7 @@ opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])( MAY_HAVE_SSE(celt_inner_prod), MAY_HAVE_SSE(celt_inner_prod), MAY_HAVE_SSE(celt_inner_prod), + MAY_HAVE_SSE(celt_inner_prod) }; void (*const DUAL_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])( @@ -124,6 +129,7 @@ void (*const DUAL_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])( MAY_HAVE_SSE(dual_inner_prod), MAY_HAVE_SSE(dual_inner_prod), MAY_HAVE_SSE(dual_inner_prod), + MAY_HAVE_SSE(dual_inner_prod) }; void (*const COMB_FILTER_CONST_IMPL[OPUS_ARCHMASK + 1])( @@ -139,6 +145,7 @@ void (*const COMB_FILTER_CONST_IMPL[OPUS_ARCHMASK + 1])( MAY_HAVE_SSE(comb_filter_const), MAY_HAVE_SSE(comb_filter_const), MAY_HAVE_SSE(comb_filter_const), + MAY_HAVE_SSE(comb_filter_const) }; diff --git a/celt/x86/x86cpu.c b/celt/x86/x86cpu.c index f850715e..1a73dd1f 100644 --- a/celt/x86/x86cpu.c +++ b/celt/x86/x86cpu.c @@ -91,6 +91,8 @@ typedef struct CPU_Feature{ int HW_SSE; int HW_SSE2; int HW_SSE41; + /* SIMD: 256-bit */ + int HW_AVX; } CPU_Feature; static void opus_cpu_feature_check(CPU_Feature *cpu_feature) @@ -106,11 +108,13 @@ static void opus_cpu_feature_check(CPU_Feature *cpu_feature) cpu_feature->HW_SSE = (info[3] & (1 << 25)) != 0; cpu_feature->HW_SSE2 = (info[3] & (1 << 26)) != 0; cpu_feature->HW_SSE41 = (info[2] & (1 << 19)) != 0; + cpu_feature->HW_AVX = (info[2] & (1 << 28)) != 0; } else { cpu_feature->HW_SSE = 0; cpu_feature->HW_SSE2 = 0; cpu_feature->HW_SSE41 = 0; + cpu_feature->HW_AVX = 0; } } @@ -140,6 +144,12 @@ int opus_select_arch(void) } arch++; + if (!cpu_feature.HW_AVX) + { + return arch; + } + arch++; + return arch; } diff --git a/configure.ac b/configure.ac index bb838c0b..74aa2f48 100644 --- a/configure.ac +++ b/configure.ac @@ -351,10 +351,12 @@ AM_CONDITIONAL([OPUS_ARM_EXTERNAL_ASM], AM_CONDITIONAL([HAVE_SSE], [false]) AM_CONDITIONAL([HAVE_SSE2], [false]) AM_CONDITIONAL([HAVE_SSE4_1], [false]) +AM_CONDITIONAL([HAVE_AVX], [false]) m4_define([DEFAULT_X86_SSE_CFLAGS], [-msse]) m4_define([DEFAULT_X86_SSE2_CFLAGS], [-msse2]) m4_define([DEFAULT_X86_SSE4_1_CFLAGS], [-msse4.1]) +m4_define([DEFAULT_X86_AVX_CFLAGS], [-mavx]) m4_define([DEFAULT_ARM_NEON_INTR_CFLAGS], [-mfpu=neon]) # With GCC on ARM32 softfp architectures (e.g. Android, or older Ubuntu) you need to specify # -mfloat-abi=softfp for -mfpu=neon to work. However, on ARM32 hardfp architectures (e.g. newer Ubuntu), @@ -371,11 +373,13 @@ AS_CASE([$host], AC_ARG_VAR([X86_SSE_CFLAGS], [C compiler flags to compile SSE intrinsics @<:@default=]DEFAULT_X86_SSE_CFLAGS[@:>@]) AC_ARG_VAR([X86_SSE2_CFLAGS], [C compiler flags to compile SSE2 intrinsics @<:@default=]DEFAULT_X86_SSE2_CFLAGS[@:>@]) AC_ARG_VAR([X86_SSE4_1_CFLAGS], [C compiler flags to compile SSE4.1 intrinsics @<:@default=]DEFAULT_X86_SSE4_1_CFLAGS[@:>@]) +AC_ARG_VAR([X86_AVX_CFLAGS], [C compiler flags to compile AVX intrinsics @<:@default=]DEFAULT_X86_AVX_CFLAGS[@:>@]) AC_ARG_VAR([ARM_NEON_INTR_CFLAGS], [C compiler flags to compile ARM NEON intrinsics @<:@default=]DEFAULT_ARM_NEON_INTR_CFLAGS / DEFAULT_ARM_NEON_SOFTFP_INTR_CFLAGS[@:>@]) AS_VAR_SET_IF([X86_SSE_CFLAGS], [], [AS_VAR_SET([X86_SSE_CFLAGS], "DEFAULT_X86_SSE_CFLAGS")]) AS_VAR_SET_IF([X86_SSE2_CFLAGS], [], [AS_VAR_SET([X86_SSE2_CFLAGS], "DEFAULT_X86_SSE2_CFLAGS")]) AS_VAR_SET_IF([X86_SSE4_1_CFLAGS], [], [AS_VAR_SET([X86_SSE4_1_CFLAGS], "DEFAULT_X86_SSE4_1_CFLAGS")]) +AS_VAR_SET_IF([X86_AVX_CFLAGS], [], [AS_VAR_SET([X86_AVX_CFLAGS], "DEFAULT_X86_AVX_CFLAGS")]) AS_VAR_SET_IF([ARM_NEON_INTR_CFLAGS], [], [AS_VAR_SET([ARM_NEON_INTR_CFLAGS], ["$RESOLVED_DEFAULT_ARM_NEON_INTR_CFLAGS"])]) AC_DEFUN([OPUS_PATH_NE10], @@ -566,7 +570,24 @@ AS_IF([test x"$enable_intrinsics" = x"yes"],[ AC_SUBST([OPUS_X86_SSE4_1_CFLAGS]) ] ) - + OPUS_CHECK_INTRINSICS( + [AVX], + [$X86_AVX_CFLAGS], + [OPUS_X86_MAY_HAVE_AVX], + [OPUS_X86_PRESUME_AVX], + [[#include <immintrin.h> + ]], + [[ + static __m256 mtest; + mtest = _mm256_setzero_ps(); + ]] + ) + AS_IF([test x"$OPUS_X86_MAY_HAVE_AVX" = x"1" && test x"$OPUS_X86_PRESUME_AVX" != x"1"], + [ + OPUS_X86_AVX_CFLAGS="$X86_AVX_CFLAGS" + AC_SUBST([OPUS_X86_AVX_CFLAGS]) + ] + ) AS_IF([test x"$rtcd_support" = x"no"], [rtcd_support=""]) AS_IF([test x"$OPUS_X86_MAY_HAVE_SSE" = x"1"], [ @@ -606,6 +627,19 @@ AS_IF([test x"$enable_intrinsics" = x"yes"],[ [ AC_MSG_WARN([Compiler does not support SSE4.1 intrinsics]) ]) + AS_IF([test x"$OPUS_X86_MAY_HAVE_AVX" = x"1"], + [ + AC_DEFINE([OPUS_X86_MAY_HAVE_AVX], 1, [Compiler supports X86 AVX Intrinsics]) + intrinsics_support="$intrinsics_support AVX" + + AS_IF([test x"$OPUS_X86_PRESUME_AVX" = x"1"], + [AC_DEFINE([OPUS_X86_PRESUME_AVX], 1, [Define if binary requires AVX intrinsics support])], + [rtcd_support="$rtcd_support AVX"]) + ], + [ + AC_MSG_WARN([Compiler does not support AVX intrinsics]) + ]) + AS_IF([test x"$intrinsics_support" = x""], [intrinsics_support=no], [intrinsics_support="x86$intrinsics_support"] @@ -672,6 +706,8 @@ AM_CONDITIONAL([HAVE_SSE2], [test x"$OPUS_X86_MAY_HAVE_SSE2" = x"1"]) AM_CONDITIONAL([HAVE_SSE4_1], [test x"$OPUS_X86_MAY_HAVE_SSE4_1" = x"1"]) +AM_CONDITIONAL([HAVE_AVX], + [test x"$OPUS_X86_MAY_HAVE_AVX" = x"1"]) AS_IF([test x"$enable_rtcd" = x"yes"],[ AS_IF([test x"$rtcd_support" != x"no"],[ diff --git a/silk/x86/x86_silk_map.c b/silk/x86/x86_silk_map.c index 6e79675a..818841f2 100644 --- a/silk/x86/x86_silk_map.c +++ b/silk/x86/x86_silk_map.c @@ -50,6 +50,7 @@ opus_int64 (*const SILK_INNER_PROD16_ALIGNED_64_IMPL[ OPUS_ARCHMASK + 1 ] )( silk_inner_prod16_aligned_64_c, silk_inner_prod16_aligned_64_c, MAY_HAVE_SSE4_1( silk_inner_prod16_aligned_64 ), /* sse4.1 */ + MAY_HAVE_SSE4_1( silk_inner_prod16_aligned_64 ) /* avx */ }; #endif @@ -62,6 +63,7 @@ opus_int (*const SILK_VAD_GETSA_Q8_IMPL[ OPUS_ARCHMASK + 1 ] )( silk_VAD_GetSA_Q8_c, silk_VAD_GetSA_Q8_c, MAY_HAVE_SSE4_1( silk_VAD_GetSA_Q8 ), /* sse4.1 */ + MAY_HAVE_SSE4_1( silk_VAD_GetSA_Q8 ) /* avx */ }; void (*const SILK_NSQ_IMPL[ OPUS_ARCHMASK + 1 ] )( @@ -85,6 +87,7 @@ void (*const SILK_NSQ_IMPL[ OPUS_ARCHMASK + 1 ] )( silk_NSQ_c, silk_NSQ_c, MAY_HAVE_SSE4_1( silk_NSQ ), /* sse4.1 */ + MAY_HAVE_SSE4_1( silk_NSQ ) /* avx */ }; void (*const SILK_VQ_WMAT_EC_IMPL[ OPUS_ARCHMASK + 1 ] )( @@ -104,6 +107,7 @@ void (*const SILK_VQ_WMAT_EC_IMPL[ OPUS_ARCHMASK + 1 ] )( silk_VQ_WMat_EC_c, silk_VQ_WMat_EC_c, MAY_HAVE_SSE4_1( silk_VQ_WMat_EC ), /* sse4.1 */ + MAY_HAVE_SSE4_1( silk_VQ_WMat_EC ) /* avx */ }; void (*const SILK_NSQ_DEL_DEC_IMPL[ OPUS_ARCHMASK + 1 ] )( @@ -127,6 +131,7 @@ void (*const SILK_NSQ_DEL_DEC_IMPL[ OPUS_ARCHMASK + 1 ] )( silk_NSQ_del_dec_c, silk_NSQ_del_dec_c, MAY_HAVE_SSE4_1( silk_NSQ_del_dec ), /* sse4.1 */ + MAY_HAVE_SSE4_1( silk_NSQ_del_dec ) /* avx */ }; #if defined(FIXED_POINT) @@ -144,6 +149,7 @@ void (*const SILK_WARPED_LPC_ANALYSIS_FILTER_FIX_IMPL[ OPUS_ARCHMASK + 1 ] )( silk_warped_LPC_analysis_filter_FIX_c, silk_warped_LPC_analysis_filter_FIX_c, MAY_HAVE_SSE4_1( silk_warped_LPC_analysis_filter_FIX ), /* sse4.1 */ + MAY_HAVE_SSE4_1( silk_warped_LPC_analysis_filter_FIX ) /* avx */ }; void (*const SILK_BURG_MODIFIED_IMPL[ OPUS_ARCHMASK + 1 ] )( @@ -161,6 +167,7 @@ void (*const SILK_BURG_MODIFIED_IMPL[ OPUS_ARCHMASK + 1 ] )( silk_burg_modified_c, silk_burg_modified_c, MAY_HAVE_SSE4_1( silk_burg_modified ), /* sse4.1 */ + MAY_HAVE_SSE4_1( silk_burg_modified ) /* avx */ }; #endif |