From 28c8e9224f887a56936cf5f18423b426704334c2 Mon Sep 17 00:00:00 2001 From: Developer-Ecosystem-Engineering <65677710+Developer-Ecosystem-Engineering@users.noreply.github.com> Date: Tue, 23 Aug 2022 12:53:48 -0700 Subject: ENH: Remove AVX related functions from non x86 based builds Apple silicon builds of NumPy have extra functions in them for AVX2/AVX512. The changes here remove those implementations if we're not building for x86. Apple silicon: - original size: 3946035 bytes - new size: 3657731 bytes - savings: 288304 bytes (7.31%) Changes pass all tests on M1 native, M1 Rosetta, and iMacPro (AVX512). We've verified performance is the same before/after for Rosetta and iMacPro. We've also verified that binaries are exactly the same size and have the same number of symbols in them. --- numpy/core/setup_common.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'numpy/core/setup_common.py') diff --git a/numpy/core/setup_common.py b/numpy/core/setup_common.py index c19a8479f..23eda03db 100644 --- a/numpy/core/setup_common.py +++ b/numpy/core/setup_common.py @@ -204,16 +204,18 @@ OPTIONAL_FUNCTION_ATTRIBUTES = [('__attribute__((optimize("unroll-loops")))', 'attribute_optimize_opt_2'), ('__attribute__((nonnull (1)))', 'attribute_nonnull'), - ('__attribute__((target ("avx")))', - 'attribute_target_avx'), - ('__attribute__((target ("avx2")))', - 'attribute_target_avx2'), - ('__attribute__((target ("avx512f")))', - 'attribute_target_avx512f'), - ('__attribute__((target ("avx512f,avx512dq,avx512bw,avx512vl,avx512cd")))', - 'attribute_target_avx512_skx'), ] +OPTIONAL_FUNCTION_ATTRIBUTES_AVX = [('__attribute__((target ("avx")))', + 'attribute_target_avx'), + ('__attribute__((target ("avx2")))', + 'attribute_target_avx2'), + ('__attribute__((target ("avx512f")))', + 'attribute_target_avx512f'), + ('__attribute__((target ("avx512f,avx512dq,avx512bw,avx512vl,avx512cd")))', + 'attribute_target_avx512_skx'), + ] + # function attributes with intrinsics # To ensure your compiler can compile avx intrinsics with just the attributes # gcc 4.8.4 support attributes but not with intrisics @@ -222,7 +224,7 @@ OPTIONAL_FUNCTION_ATTRIBUTES = [('__attribute__((optimize("unroll-loops")))', # The _mm512_castps_si512 instruction is specific check for AVX-512F support # in gcc-4.9 which is missing a subset of intrinsics. See # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61878 -OPTIONAL_FUNCTION_ATTRIBUTES_WITH_INTRINSICS = [('__attribute__((target("avx2,fma")))', +OPTIONAL_FUNCTION_ATTRIBUTES_WITH_INTRINSICS_AVX = [('__attribute__((target("avx2,fma")))', 'attribute_target_avx2_with_intrinsics', '__m256 temp = _mm256_set1_ps(1.0); temp = \ _mm256_fmadd_ps(temp, temp, temp)', -- cgit v1.2.1