summaryrefslogtreecommitdiff
path: root/numpy/core/setup_common.py
diff options
context:
space:
mode:
authorRaghuveer Devulapalli <raghuveer.devulapalli@intel.com>2019-05-22 16:27:47 -0700
committerRaghuveer Devulapalli <raghuveer.devulapalli@intel.com>2019-08-03 10:50:04 -0700
commitcd9f1a87d6f200b3ccb4be7dc646b4832b321ba9 (patch)
tree12a94f1658b7a899f2c60dac62dad3ba38350a13 /numpy/core/setup_common.py
parent9cae09cfa46dcb8d4eed07f7df841a36da942b07 (diff)
downloadnumpy-cd9f1a87d6f200b3ccb4be7dc646b4832b321ba9.tar.gz
BUG: AVX2 impl of sin/cos requires an FMA
Without an FMA, the output of AVX2 and AVX512 version differ. This changes ensures the output across implementations remains exactly the same.
Diffstat (limited to 'numpy/core/setup_common.py')
-rw-r--r--numpy/core/setup_common.py5
1 files changed, 3 insertions, 2 deletions
diff --git a/numpy/core/setup_common.py b/numpy/core/setup_common.py
index 6e3109ab5..a3f7acd6d 100644
--- a/numpy/core/setup_common.py
+++ b/numpy/core/setup_common.py
@@ -178,9 +178,10 @@ OPTIONAL_FUNCTION_ATTRIBUTES = [('__attribute__((optimize("unroll-loops")))',
# gcc 4.8.4 support attributes but not with intrisics
# tested via "#include<%s> int %s %s(void *){code; return 0;};" % (header, attribute, name, code)
# function name will be converted to HAVE_<upper-case-name> preprocessor macro
-OPTIONAL_FUNCTION_ATTRIBUTES_WITH_INTRINSICS = [('__attribute__((target("avx2")))',
+OPTIONAL_FUNCTION_ATTRIBUTES_WITH_INTRINSICS = [('__attribute__((target("avx2,fma")))',
'attribute_target_avx2_with_intrinsics',
- '__m256 temp = _mm256_set1_ps(1.0)',
+ '__m256 temp = _mm256_set1_ps(1.0); temp = \
+ _mm256_fmadd_ps(temp, temp, temp)',
'immintrin.h'),
('__attribute__((target("avx512f")))',
'attribute_target_avx512f_with_intrinsics',