BUG, SIMD: Fix unsigned 8-bit integer divison by a scalarissue_20025

Same as 64-bit divistion, except quad wide divison leads to inaccurate multiplier in certain cases when the log2 of the divisor overflow to zero.
author: Sayed Adel <seiko@imavr.com> 2021-10-23 07:56:29 +0200
committer: Sayed Adel <seiko@imavr.com> 2021-10-23 08:13:42 +0200
commit: 4682087b93294a90f0e540ab0d8fd7e6e5afdd09 (patch)
tree: e05ffb1c626c742dc43d4fae334882de095e0c78
parent: 437a249250248b2cbf7acd8a2a0f836e3b98a547 (diff)
download: numpy-issue_20025.tar.gz
1 files changed, 1 insertions, 1 deletions
diff --git a/numpy/core/src/common/simd/intdiv.h b/numpy/core/src/common/simd/intdiv.h
index c787c7a7d..a7a461721 100644
--- a/numpy/core/src/common/simd/intdiv.h
+++ b/numpy/core/src/common/simd/intdiv.h
@@ -201,7 +201,7 @@ NPY_FINLINE npyv_u8x3 npyv_divisor_u8(npy_uint8 d)
     default:
         l   = npyv__bitscan_revnz_u32(d - 1) + 1;  // ceil(log2(d))
         l2  = (npy_uint8)(1 << l);                 // 2^l, overflow to 0 if l = 8
-        m   = ((l2 - d) << 8) / d + 1;             // multiplier
+        m   = ((npy_uint16)((l2 - d) << 8)) / d + 1; // multiplier
         sh1 = 1;  sh2 = l - 1;                     // shift counts
     }
     npyv_u8x3 divisor;
author	Sayed Adel <seiko@imavr.com>	2021-10-23 07:56:29 +0200
committer	Sayed Adel <seiko@imavr.com>	2021-10-23 08:13:42 +0200
commit	4682087b93294a90f0e540ab0d8fd7e6e5afdd09 (patch)
tree	e05ffb1c626c742dc43d4fae334882de095e0c78
parent	437a249250248b2cbf7acd8a2a0f836e3b98a547 (diff)
download	numpy-issue_20025.tar.gz