From 28c8e9224f887a56936cf5f18423b426704334c2 Mon Sep 17 00:00:00 2001 From: Developer-Ecosystem-Engineering <65677710+Developer-Ecosystem-Engineering@users.noreply.github.com> Date: Tue, 23 Aug 2022 12:53:48 -0700 Subject: ENH: Remove AVX related functions from non x86 based builds Apple silicon builds of NumPy have extra functions in them for AVX2/AVX512. The changes here remove those implementations if we're not building for x86. Apple silicon: - original size: 3946035 bytes - new size: 3657731 bytes - savings: 288304 bytes (7.31%) Changes pass all tests on M1 native, M1 Rosetta, and iMacPro (AVX512). We've verified performance is the same before/after for Rosetta and iMacPro. We've also verified that binaries are exactly the same size and have the same number of symbols in them. --- numpy/core/setup.py | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) (limited to 'numpy/core/setup.py') diff --git a/numpy/core/setup.py b/numpy/core/setup.py index 65aacfdad..483e62a34 100644 --- a/numpy/core/setup.py +++ b/numpy/core/setup.py @@ -232,10 +232,23 @@ def check_math_capabilities(config, ext, moredefs, mathlibs): ext.extra_compile_args.extend( ['-ffixed-xmm%s' % n for n in range(16, 32)]) - for dec, fn, code, header in OPTIONAL_FUNCTION_ATTRIBUTES_WITH_INTRINSICS: - if config.check_gcc_function_attribute_with_intrinsics(dec, fn, code, - header): - moredefs.append((fname2def(fn), 1)) + platform = sysconfig.get_platform() + if("x86_64" in platform): + for dec, fn in OPTIONAL_FUNCTION_ATTRIBUTES_AVX: + if config.check_gcc_function_attribute(dec, fn): + moredefs.append((fname2def(fn), 1)) + if fn == 'attribute_target_avx512f': + # GH-14787: Work around GCC<8.4 bug when compiling with AVX512 + # support on Windows-based platforms + if (sys.platform in ('win32', 'cygwin') and + config.check_compiler_gcc() and + not config.check_gcc_version_at_least(8, 4)): + ext.extra_compile_args.extend( + ['-ffixed-xmm%s' % n for n in range(16, 32)]) + for dec, fn, code, header in OPTIONAL_FUNCTION_ATTRIBUTES_WITH_INTRINSICS_AVX: + if config.check_gcc_function_attribute_with_intrinsics(dec, fn, code, + header): + moredefs.append((fname2def(fn), 1)) for fn in OPTIONAL_VARIABLE_ATTRIBUTES: if config.check_gcc_variable_attribute(fn): -- cgit v1.2.1