From 4682087b93294a90f0e540ab0d8fd7e6e5afdd09 Mon Sep 17 00:00:00 2001
From: Sayed Adel <seiko@imavr.com>
Date: Sat, 23 Oct 2021 07:56:29 +0200
Subject: BUG, SIMD: Fix unsigned 8-bit integer divison by a scalar

  Same as 64-bit divistion, except quad wide divison leads
  to inaccurate multiplier in certain cases when the log2 of
  the divisor overflow to zero.
---
 numpy/core/src/common/simd/intdiv.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/numpy/core/src/common/simd/intdiv.h b/numpy/core/src/common/simd/intdiv.h
index c787c7a7d..a7a461721 100644
--- a/numpy/core/src/common/simd/intdiv.h
+++ b/numpy/core/src/common/simd/intdiv.h
@@ -201,7 +201,7 @@ NPY_FINLINE npyv_u8x3 npyv_divisor_u8(npy_uint8 d)
     default:
         l   = npyv__bitscan_revnz_u32(d - 1) + 1;  // ceil(log2(d))
         l2  = (npy_uint8)(1 << l);                 // 2^l, overflow to 0 if l = 8
-        m   = ((l2 - d) << 8) / d + 1;             // multiplier
+        m   = ((npy_uint16)((l2 - d) << 8)) / d + 1; // multiplier
         sh1 = 1;  sh2 = l - 1;                     // shift counts
     }
     npyv_u8x3 divisor;
-- 
cgit v1.2.1