From cd9f1a87d6f200b3ccb4be7dc646b4832b321ba9 Mon Sep 17 00:00:00 2001 From: Raghuveer Devulapalli Date: Wed, 22 May 2019 16:27:47 -0700 Subject: BUG: AVX2 impl of sin/cos requires an FMA Without an FMA, the output of AVX2 and AVX512 version differ. This changes ensures the output across implementations remains exactly the same. --- numpy/core/setup_common.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'numpy/core/setup_common.py') diff --git a/numpy/core/setup_common.py b/numpy/core/setup_common.py index 6e3109ab5..a3f7acd6d 100644 --- a/numpy/core/setup_common.py +++ b/numpy/core/setup_common.py @@ -178,9 +178,10 @@ OPTIONAL_FUNCTION_ATTRIBUTES = [('__attribute__((optimize("unroll-loops")))', # gcc 4.8.4 support attributes but not with intrisics # tested via "#include<%s> int %s %s(void *){code; return 0;};" % (header, attribute, name, code) # function name will be converted to HAVE_ preprocessor macro -OPTIONAL_FUNCTION_ATTRIBUTES_WITH_INTRINSICS = [('__attribute__((target("avx2")))', +OPTIONAL_FUNCTION_ATTRIBUTES_WITH_INTRINSICS = [('__attribute__((target("avx2,fma")))', 'attribute_target_avx2_with_intrinsics', - '__m256 temp = _mm256_set1_ps(1.0)', + '__m256 temp = _mm256_set1_ps(1.0); temp = \ + _mm256_fmadd_ps(temp, temp, temp)', 'immintrin.h'), ('__attribute__((target("avx512f")))', 'attribute_target_avx512f_with_intrinsics', -- cgit v1.2.1