From 52b5935ea1ab9a5f1043e7a4af2ced8311affe01 Mon Sep 17 00:00:00 2001 From: KIU Shueng Chuan Date: Tue, 19 Oct 2021 14:24:51 +0800 Subject: BUG: fix win32 np.clip slowness The use of the macro _NPY_CLIP results in multiple re-evaluations of the input arguments. Thus for floating point types, the check of NaNs is performed multiple times. This manifests itself as a slowness on Win32 builds. See #18673. --- numpy/core/src/umath/clip.c.src | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/numpy/core/src/umath/clip.c.src b/numpy/core/src/umath/clip.c.src index bc966b7ac..48786d4a2 100644 --- a/numpy/core/src/umath/clip.c.src +++ b/numpy/core/src/umath/clip.c.src @@ -76,9 +76,6 @@ * npy_datetime, npy_timedelta# */ -#define _NPY_CLIP(x, min, max) \ - _NPY_@name@_MIN(_NPY_@name@_MAX((x), (min)), (max)) - NPY_NO_EXPORT void @name@_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)) { @@ -95,25 +92,33 @@ NPY_NO_EXPORT void /* contiguous, branch to let the compiler optimize */ if (is1 == sizeof(@type@) && os1 == sizeof(@type@)) { for(npy_intp i = 0; i < n; i++, ip1 += is1, op1 += os1) { - *(@type@ *)op1 = _NPY_CLIP(*(@type@ *)ip1, min_val, max_val); + @type@ t = *(@type@ *)ip1; + t = _NPY_@name@_MAX(t, min_val); + t = _NPY_@name@_MIN(t, max_val); + *(@type@ *)op1 = t; } } else { for(npy_intp i = 0; i < n; i++, ip1 += is1, op1 += os1) { - *(@type@ *)op1 = _NPY_CLIP(*(@type@ *)ip1, min_val, max_val); + @type@ t = *(@type@ *)ip1; + t = _NPY_@name@_MAX(t, min_val); + t = _NPY_@name@_MIN(t, max_val); + *(@type@ *)op1 = t; } } } else { TERNARY_LOOP { - *(@type@ *)op1 = _NPY_CLIP(*(@type@ *)ip1, *(@type@ *)ip2, *(@type@ *)ip3); + @type@ t = *(@type@ *)ip1; + t = _NPY_@name@_MAX(t, *(@type@ *)ip2); + t = _NPY_@name@_MIN(t, *(@type@ *)ip3); + *(@type@ *)op1 = t; } } npy_clear_floatstatus_barrier((char*)dimensions); } // clean up the macros we defined above -#undef _NPY_CLIP #undef _NPY_@name@_MAX #undef _NPY_@name@_MIN -- cgit v1.2.1