summaryrefslogtreecommitdiff
path: root/numpy/core/src/npysort/quicksort.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'numpy/core/src/npysort/quicksort.cpp')
-rw-r--r--numpy/core/src/npysort/quicksort.cpp127
1 files changed, 68 insertions, 59 deletions
diff --git a/numpy/core/src/npysort/quicksort.cpp b/numpy/core/src/npysort/quicksort.cpp
index 3e351dd84..7497ebaa3 100644
--- a/numpy/core/src/npysort/quicksort.cpp
+++ b/numpy/core/src/npysort/quicksort.cpp
@@ -54,15 +54,11 @@
#include "npysort_common.h"
#include "npysort_heapsort.h"
#include "numpy_tag.h"
+#include "simd_qsort.hpp"
-#include "x86-qsort.h"
#include <cstdlib>
#include <utility>
-#ifndef NPY_DISABLE_OPTIMIZATION
-#include "x86-qsort.dispatch.h"
-#endif
-
#define NOT_USED NPY_UNUSED(unused)
/*
* pushing largest partition has upper bound of log2(n) space
@@ -73,70 +69,50 @@
#define SMALL_MERGESORT 20
#define SMALL_STRING 16
+// Temporarily disable AVX512 sorting on WIN32 and CYGWIN until we can figure
+// out why it has test failures
+#if defined(_MSC_VER) || defined(__CYGWIN__)
+template<typename T>
+inline bool quicksort_dispatch(T*, npy_intp)
+{
+ return false;
+}
+#else
+template<typename T>
+inline bool quicksort_dispatch(T *start, npy_intp num)
+{
+ using TF = typename np::meta::FixedWidth<T>::Type;
+ void (*dispfunc)(TF*, intptr_t) = nullptr;
+ if (sizeof(T) == sizeof(uint16_t)) {
+ #ifndef NPY_DISABLE_OPTIMIZATION
+ #include "simd_qsort_16bit.dispatch.h"
+ #endif
+ NPY_CPU_DISPATCH_CALL_XB(dispfunc = np::qsort_simd::template QSort, <TF>);
+ }
+ else if (sizeof(T) == sizeof(uint32_t) || sizeof(T) == sizeof(uint64_t)) {
+ #ifndef NPY_DISABLE_OPTIMIZATION
+ #include "simd_qsort.dispatch.h"
+ #endif
+ NPY_CPU_DISPATCH_CALL_XB(dispfunc = np::qsort_simd::template QSort, <TF>);
+ }
+ if (dispfunc) {
+ (*dispfunc)(reinterpret_cast<TF*>(start), static_cast<intptr_t>(num));
+ return true;
+ }
+ return false;
+}
+#endif // _MSC_VER || CYGWIN
+
/*
*****************************************************************************
** NUMERIC SORTS **
*****************************************************************************
*/
-namespace {
-
-template <typename Tag>
-struct x86_dispatch {
- static bool quicksort(typename Tag::type *, npy_intp) { return false; }
-};
-
-template <>
-struct x86_dispatch<npy::int_tag> {
- static bool quicksort(npy_int *start, npy_intp num)
- {
- void (*dispfunc)(void *, npy_intp) = nullptr;
- NPY_CPU_DISPATCH_CALL_XB(dispfunc = x86_quicksort_int);
- if (dispfunc) {
- (*dispfunc)(start, num);
- return true;
- }
- return false;
- }
-};
-
-template <>
-struct x86_dispatch<npy::uint_tag> {
- static bool quicksort(npy_uint *start, npy_intp num)
- {
- void (*dispfunc)(void *, npy_intp) = nullptr;
- NPY_CPU_DISPATCH_CALL_XB(dispfunc = x86_quicksort_uint);
- if (dispfunc) {
- (*dispfunc)(start, num);
- return true;
- }
- return false;
- }
-};
-
-template <>
-struct x86_dispatch<npy::float_tag> {
- static bool quicksort(npy_float *start, npy_intp num)
- {
- void (*dispfunc)(void *, npy_intp) = nullptr;
- NPY_CPU_DISPATCH_CALL_XB(dispfunc = x86_quicksort_float);
- if (dispfunc) {
- (*dispfunc)(start, num);
- return true;
- }
- return false;
- }
-};
-
-} // namespace
-
template <typename Tag, typename type>
static int
quicksort_(type *start, npy_intp num)
{
- if (x86_dispatch<Tag>::quicksort(start, num))
- return 0;
-
type vp;
type *pl = start;
type *pr = pl + num - 1;
@@ -729,56 +705,89 @@ quicksort_ubyte(void *start, npy_intp n, void *NPY_UNUSED(varr))
NPY_NO_EXPORT int
quicksort_short(void *start, npy_intp n, void *NPY_UNUSED(varr))
{
+ if (quicksort_dispatch((npy_short *)start, n)) {
+ return 0;
+ }
return quicksort_<npy::short_tag>((npy_short *)start, n);
}
NPY_NO_EXPORT int
quicksort_ushort(void *start, npy_intp n, void *NPY_UNUSED(varr))
{
+ if (quicksort_dispatch((npy_ushort *)start, n)) {
+ return 0;
+ }
return quicksort_<npy::ushort_tag>((npy_ushort *)start, n);
}
NPY_NO_EXPORT int
quicksort_int(void *start, npy_intp n, void *NPY_UNUSED(varr))
{
+ if (quicksort_dispatch((npy_int *)start, n)) {
+ return 0;
+ }
return quicksort_<npy::int_tag>((npy_int *)start, n);
}
NPY_NO_EXPORT int
quicksort_uint(void *start, npy_intp n, void *NPY_UNUSED(varr))
{
+ if (quicksort_dispatch((npy_uint *)start, n)) {
+ return 0;
+ }
return quicksort_<npy::uint_tag>((npy_uint *)start, n);
}
NPY_NO_EXPORT int
quicksort_long(void *start, npy_intp n, void *NPY_UNUSED(varr))
{
+ if (quicksort_dispatch((npy_long *)start, n)) {
+ return 0;
+ }
return quicksort_<npy::long_tag>((npy_long *)start, n);
}
NPY_NO_EXPORT int
quicksort_ulong(void *start, npy_intp n, void *NPY_UNUSED(varr))
{
+ if (quicksort_dispatch((npy_ulong *)start, n)) {
+ return 0;
+ }
return quicksort_<npy::ulong_tag>((npy_ulong *)start, n);
}
NPY_NO_EXPORT int
quicksort_longlong(void *start, npy_intp n, void *NPY_UNUSED(varr))
{
+ if (quicksort_dispatch((npy_longlong *)start, n)) {
+ return 0;
+ }
return quicksort_<npy::longlong_tag>((npy_longlong *)start, n);
}
NPY_NO_EXPORT int
quicksort_ulonglong(void *start, npy_intp n, void *NPY_UNUSED(varr))
{
+ if (quicksort_dispatch((npy_ulonglong *)start, n)) {
+ return 0;
+ }
return quicksort_<npy::ulonglong_tag>((npy_ulonglong *)start, n);
}
NPY_NO_EXPORT int
quicksort_half(void *start, npy_intp n, void *NPY_UNUSED(varr))
{
+ if (quicksort_dispatch((np::Half *)start, n)) {
+ return 0;
+ }
return quicksort_<npy::half_tag>((npy_half *)start, n);
}
NPY_NO_EXPORT int
quicksort_float(void *start, npy_intp n, void *NPY_UNUSED(varr))
{
+ if (quicksort_dispatch((npy_float *)start, n)) {
+ return 0;
+ }
return quicksort_<npy::float_tag>((npy_float *)start, n);
}
NPY_NO_EXPORT int
quicksort_double(void *start, npy_intp n, void *NPY_UNUSED(varr))
{
+ if (quicksort_dispatch((npy_double *)start, n)) {
+ return 0;
+ }
return quicksort_<npy::double_tag>((npy_double *)start, n);
}
NPY_NO_EXPORT int