diff options
author | Raghuveer Devulapalli <raghuveer.devulapalli@intel.com> | 2019-05-22 16:27:47 -0700 |
---|---|---|
committer | Raghuveer Devulapalli <raghuveer.devulapalli@intel.com> | 2019-08-03 10:50:04 -0700 |
commit | cd9f1a87d6f200b3ccb4be7dc646b4832b321ba9 (patch) | |
tree | 12a94f1658b7a899f2c60dac62dad3ba38350a13 /numpy/core/setup_common.py | |
parent | 9cae09cfa46dcb8d4eed07f7df841a36da942b07 (diff) | |
download | numpy-cd9f1a87d6f200b3ccb4be7dc646b4832b321ba9.tar.gz |
BUG: AVX2 impl of sin/cos requires an FMA
Without an FMA, the output of AVX2 and AVX512 version differ. This
changes ensures the output across implementations remains exactly the
same.
Diffstat (limited to 'numpy/core/setup_common.py')
-rw-r--r-- | numpy/core/setup_common.py | 5 |
1 files changed, 3 insertions, 2 deletions
diff --git a/numpy/core/setup_common.py b/numpy/core/setup_common.py index 6e3109ab5..a3f7acd6d 100644 --- a/numpy/core/setup_common.py +++ b/numpy/core/setup_common.py @@ -178,9 +178,10 @@ OPTIONAL_FUNCTION_ATTRIBUTES = [('__attribute__((optimize("unroll-loops")))', # gcc 4.8.4 support attributes but not with intrisics # tested via "#include<%s> int %s %s(void *){code; return 0;};" % (header, attribute, name, code) # function name will be converted to HAVE_<upper-case-name> preprocessor macro -OPTIONAL_FUNCTION_ATTRIBUTES_WITH_INTRINSICS = [('__attribute__((target("avx2")))', +OPTIONAL_FUNCTION_ATTRIBUTES_WITH_INTRINSICS = [('__attribute__((target("avx2,fma")))', 'attribute_target_avx2_with_intrinsics', - '__m256 temp = _mm256_set1_ps(1.0)', + '__m256 temp = _mm256_set1_ps(1.0); temp = \ + _mm256_fmadd_ps(temp, temp, temp)', 'immintrin.h'), ('__attribute__((target("avx512f")))', 'attribute_target_avx512f_with_intrinsics', |