From 4682087b93294a90f0e540ab0d8fd7e6e5afdd09 Mon Sep 17 00:00:00 2001 From: Sayed Adel Date: Sat, 23 Oct 2021 07:56:29 +0200 Subject: BUG, SIMD: Fix unsigned 8-bit integer divison by a scalar Same as 64-bit divistion, except quad wide divison leads to inaccurate multiplier in certain cases when the log2 of the divisor overflow to zero. --- numpy/core/src/common/simd/intdiv.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/numpy/core/src/common/simd/intdiv.h b/numpy/core/src/common/simd/intdiv.h index c787c7a7d..a7a461721 100644 --- a/numpy/core/src/common/simd/intdiv.h +++ b/numpy/core/src/common/simd/intdiv.h @@ -201,7 +201,7 @@ NPY_FINLINE npyv_u8x3 npyv_divisor_u8(npy_uint8 d) default: l = npyv__bitscan_revnz_u32(d - 1) + 1; // ceil(log2(d)) l2 = (npy_uint8)(1 << l); // 2^l, overflow to 0 if l = 8 - m = ((l2 - d) << 8) / d + 1; // multiplier + m = ((npy_uint16)((l2 - d) << 8)) / d + 1; // multiplier sh1 = 1; sh2 = l - 1; // shift counts } npyv_u8x3 divisor; -- cgit v1.2.1