summaryrefslogtreecommitdiff
path: root/numpy/core/setup_common.py
diff options
context:
space:
mode:
authorDeveloper-Ecosystem-Engineering <65677710+Developer-Ecosystem-Engineering@users.noreply.github.com>2022-08-23 12:53:48 -0700
committerDeveloper-Ecosystem-Engineering <65677710+Developer-Ecosystem-Engineering@users.noreply.github.com>2022-08-23 12:53:48 -0700
commit28c8e9224f887a56936cf5f18423b426704334c2 (patch)
tree01eb937a32823bf2786182171a2aef2aeec2022e /numpy/core/setup_common.py
parentd8c09c50ef2e90f0db7395e70d2d8fa11921abc5 (diff)
downloadnumpy-28c8e9224f887a56936cf5f18423b426704334c2.tar.gz
ENH: Remove AVX related functions from non x86 based builds
Apple silicon builds of NumPy have extra functions in them for AVX2/AVX512. The changes here remove those implementations if we're not building for x86. Apple silicon: - original size: 3946035 bytes - new size: 3657731 bytes - savings: 288304 bytes (7.31%) Changes pass all tests on M1 native, M1 Rosetta, and iMacPro (AVX512). We've verified performance is the same before/after for Rosetta and iMacPro. We've also verified that binaries are exactly the same size and have the same number of symbols in them.
Diffstat (limited to 'numpy/core/setup_common.py')
-rw-r--r--numpy/core/setup_common.py20
1 files changed, 11 insertions, 9 deletions
diff --git a/numpy/core/setup_common.py b/numpy/core/setup_common.py
index c19a8479f..23eda03db 100644
--- a/numpy/core/setup_common.py
+++ b/numpy/core/setup_common.py
@@ -204,16 +204,18 @@ OPTIONAL_FUNCTION_ATTRIBUTES = [('__attribute__((optimize("unroll-loops")))',
'attribute_optimize_opt_2'),
('__attribute__((nonnull (1)))',
'attribute_nonnull'),
- ('__attribute__((target ("avx")))',
- 'attribute_target_avx'),
- ('__attribute__((target ("avx2")))',
- 'attribute_target_avx2'),
- ('__attribute__((target ("avx512f")))',
- 'attribute_target_avx512f'),
- ('__attribute__((target ("avx512f,avx512dq,avx512bw,avx512vl,avx512cd")))',
- 'attribute_target_avx512_skx'),
]
+OPTIONAL_FUNCTION_ATTRIBUTES_AVX = [('__attribute__((target ("avx")))',
+ 'attribute_target_avx'),
+ ('__attribute__((target ("avx2")))',
+ 'attribute_target_avx2'),
+ ('__attribute__((target ("avx512f")))',
+ 'attribute_target_avx512f'),
+ ('__attribute__((target ("avx512f,avx512dq,avx512bw,avx512vl,avx512cd")))',
+ 'attribute_target_avx512_skx'),
+ ]
+
# function attributes with intrinsics
# To ensure your compiler can compile avx intrinsics with just the attributes
# gcc 4.8.4 support attributes but not with intrisics
@@ -222,7 +224,7 @@ OPTIONAL_FUNCTION_ATTRIBUTES = [('__attribute__((optimize("unroll-loops")))',
# The _mm512_castps_si512 instruction is specific check for AVX-512F support
# in gcc-4.9 which is missing a subset of intrinsics. See
# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61878
-OPTIONAL_FUNCTION_ATTRIBUTES_WITH_INTRINSICS = [('__attribute__((target("avx2,fma")))',
+OPTIONAL_FUNCTION_ATTRIBUTES_WITH_INTRINSICS_AVX = [('__attribute__((target("avx2,fma")))',
'attribute_target_avx2_with_intrinsics',
'__m256 temp = _mm256_set1_ps(1.0); temp = \
_mm256_fmadd_ps(temp, temp, temp)',