diff options
author | Developer-Ecosystem-Engineering <65677710+Developer-Ecosystem-Engineering@users.noreply.github.com> | 2022-08-23 12:53:48 -0700 |
---|---|---|
committer | Developer-Ecosystem-Engineering <65677710+Developer-Ecosystem-Engineering@users.noreply.github.com> | 2022-08-23 12:53:48 -0700 |
commit | 28c8e9224f887a56936cf5f18423b426704334c2 (patch) | |
tree | 01eb937a32823bf2786182171a2aef2aeec2022e /numpy/core/setup_common.py | |
parent | d8c09c50ef2e90f0db7395e70d2d8fa11921abc5 (diff) | |
download | numpy-28c8e9224f887a56936cf5f18423b426704334c2.tar.gz |
ENH: Remove AVX related functions from non x86 based builds
Apple silicon builds of NumPy have extra functions in them for AVX2/AVX512. The changes here remove those implementations if we're not building for x86.
Apple silicon:
- original size: 3946035 bytes
- new size: 3657731 bytes
- savings: 288304 bytes (7.31%)
Changes pass all tests on M1 native, M1 Rosetta, and iMacPro (AVX512). We've verified performance is the same before/after for Rosetta and iMacPro. We've also verified that binaries are exactly the same size and have the same number of symbols in them.
Diffstat (limited to 'numpy/core/setup_common.py')
-rw-r--r-- | numpy/core/setup_common.py | 20 |
1 files changed, 11 insertions, 9 deletions
diff --git a/numpy/core/setup_common.py b/numpy/core/setup_common.py index c19a8479f..23eda03db 100644 --- a/numpy/core/setup_common.py +++ b/numpy/core/setup_common.py @@ -204,16 +204,18 @@ OPTIONAL_FUNCTION_ATTRIBUTES = [('__attribute__((optimize("unroll-loops")))', 'attribute_optimize_opt_2'), ('__attribute__((nonnull (1)))', 'attribute_nonnull'), - ('__attribute__((target ("avx")))', - 'attribute_target_avx'), - ('__attribute__((target ("avx2")))', - 'attribute_target_avx2'), - ('__attribute__((target ("avx512f")))', - 'attribute_target_avx512f'), - ('__attribute__((target ("avx512f,avx512dq,avx512bw,avx512vl,avx512cd")))', - 'attribute_target_avx512_skx'), ] +OPTIONAL_FUNCTION_ATTRIBUTES_AVX = [('__attribute__((target ("avx")))', + 'attribute_target_avx'), + ('__attribute__((target ("avx2")))', + 'attribute_target_avx2'), + ('__attribute__((target ("avx512f")))', + 'attribute_target_avx512f'), + ('__attribute__((target ("avx512f,avx512dq,avx512bw,avx512vl,avx512cd")))', + 'attribute_target_avx512_skx'), + ] + # function attributes with intrinsics # To ensure your compiler can compile avx intrinsics with just the attributes # gcc 4.8.4 support attributes but not with intrisics @@ -222,7 +224,7 @@ OPTIONAL_FUNCTION_ATTRIBUTES = [('__attribute__((optimize("unroll-loops")))', # The _mm512_castps_si512 instruction is specific check for AVX-512F support # in gcc-4.9 which is missing a subset of intrinsics. See # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61878 -OPTIONAL_FUNCTION_ATTRIBUTES_WITH_INTRINSICS = [('__attribute__((target("avx2,fma")))', +OPTIONAL_FUNCTION_ATTRIBUTES_WITH_INTRINSICS_AVX = [('__attribute__((target("avx2,fma")))', 'attribute_target_avx2_with_intrinsics', '__m256 temp = _mm256_set1_ps(1.0); temp = \ _mm256_fmadd_ps(temp, temp, temp)', |