From 7ddb5daa866984caa78e3fa4b5cd4869f4ee94cf Mon Sep 17 00:00:00 2001 From: Sayed Adel Date: Tue, 7 Feb 2023 21:04:27 +0200 Subject: ENH, SIMD: removes #NPY_ENABLE_AVX512_QSORT and use #directives instead --- numpy/core/setup.py | 19 ++----------------- numpy/core/src/npysort/quicksort.cpp | 11 +++++++++++ numpy/core/src/npysort/simd_qsort.dispatch.cpp | 4 ++-- numpy/core/src/npysort/simd_qsort_16bit.dispatch.cpp | 4 ++-- 4 files changed, 17 insertions(+), 21 deletions(-) (limited to 'numpy/core') diff --git a/numpy/core/setup.py b/numpy/core/setup.py index cfae34e31..d6117f02d 100644 --- a/numpy/core/setup.py +++ b/numpy/core/setup.py @@ -68,14 +68,6 @@ class CallOnceOnly: out = copy.deepcopy(pickle.loads(self._check_complex)) return out -# Temporarily disable AVX512 sorting on WIN32 until we can figure -# out why it has test failures -def enable_avx512_qsort(): - enable = True - if "win32" in sysconfig.get_platform(): - enable = False - return enable - def can_link_svml(): """SVML library is supported only on x86_64 architecture and currently only on linux @@ -492,9 +484,6 @@ def configuration(parent_package='',top_path=None): if can_link_svml(): moredefs.append(('NPY_CAN_LINK_SVML', 1)) - if enable_avx512_qsort(): - moredefs.append(('NPY_ENABLE_AVX512_QSORT', 1)) - # Use bogus stride debug aid to flush out bugs where users use # strides of dimensions with length 1 to index a full contiguous # array. @@ -975,14 +964,10 @@ def configuration(parent_package='',top_path=None): # links to the arm64 npymath library, # see gh-22673 join('src', 'npymath', 'arm64_exports.c'), + join('src', 'npysort', 'simd_qsort.dispatch.cpp'), + join('src', 'npysort', 'simd_qsort_16bit.dispatch.cpp'), ] - if enable_avx512_qsort(): - multiarray_src += [ - join('src', 'npysort', 'simd_qsort.dispatch.cpp'), - join('src', 'npysort', 'simd_qsort_16bit.dispatch.cpp'), - ] - ####################################################################### # _multiarray_umath module - umath part # ####################################################################### diff --git a/numpy/core/src/npysort/quicksort.cpp b/numpy/core/src/npysort/quicksort.cpp index 0e65dc9bc..625fdebbb 100644 --- a/numpy/core/src/npysort/quicksort.cpp +++ b/numpy/core/src/npysort/quicksort.cpp @@ -69,6 +69,15 @@ #define SMALL_MERGESORT 20 #define SMALL_STRING 16 +// Temporarily disable AVX512 sorting on WIN32 until we can figure +// out why it has test failures +#ifdef _MSC_VER +template +inline bool quicksort_dispatch(T*, npy_intp) +{ + return false; +} +#else template inline bool quicksort_dispatch(T *start, npy_intp num) { @@ -92,6 +101,8 @@ inline bool quicksort_dispatch(T *start, npy_intp num) } return false; } +#endif // _MSC_VER + /* ***************************************************************************** ** NUMERIC SORTS ** diff --git a/numpy/core/src/npysort/simd_qsort.dispatch.cpp b/numpy/core/src/npysort/simd_qsort.dispatch.cpp index 36b5d799c..c2ac5a2ae 100644 --- a/numpy/core/src/npysort/simd_qsort.dispatch.cpp +++ b/numpy/core/src/npysort/simd_qsort.dispatch.cpp @@ -7,14 +7,14 @@ #include "simd_qsort.hpp" -#ifdef NPY_HAVE_AVX512_SKX +#if defined(NPY_HAVE_AVX512_SKX) && !defined(_MSC_VER) #include "avx512-32bit-qsort.hpp" #include "avx512-64bit-qsort.hpp" #endif namespace np { namespace qsort_simd { -#ifdef NPY_HAVE_AVX512_SKX +#if defined(NPY_HAVE_AVX512_SKX) && !defined(_MSC_VER) template<> void NPY_CPU_DISPATCH_CURFX(QSort)(int32_t *arr, intptr_t size) { avx512_qsort(arr, size); diff --git a/numpy/core/src/npysort/simd_qsort_16bit.dispatch.cpp b/numpy/core/src/npysort/simd_qsort_16bit.dispatch.cpp index a816b8781..673a2f81e 100644 --- a/numpy/core/src/npysort/simd_qsort_16bit.dispatch.cpp +++ b/numpy/core/src/npysort/simd_qsort_16bit.dispatch.cpp @@ -7,13 +7,13 @@ #include "simd_qsort.hpp" -#ifdef NPY_HAVE_AVX512_ICL +#if defined(NPY_HAVE_AVX512_ICL) && !defined(_MSC_VER) #include "avx512-16bit-qsort.hpp" #endif namespace np { namespace qsort_simd { -#ifdef NPY_HAVE_AVX512_ICL +#if defined(NPY_HAVE_AVX512_ICL) && !defined(_MSC_VER) template<> void NPY_CPU_DISPATCH_CURFX(QSort)(Half *arr, intptr_t size) { avx512_qsort_fp16(reinterpret_cast(arr), size); -- cgit v1.2.1