summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSayed Adel <seiko@imavr.com>2021-01-16 14:01:56 +0000
committerSayed Adel <seiko@imavr.com>2021-03-08 08:19:12 +0200
commit6c94b4c2c7d48acf08a0f4d2d5844f7d7f3669de (patch)
tree127f496944c2319401e84f084294af8531c008ef
parent2da9858aed5efca22d92682000f03e341dc46d64 (diff)
downloadnumpy-6c94b4c2c7d48acf08a0f4d2d5844f7d7f3669de.tar.gz
SIMD, TST: add test cases for NPYV fast integer division
-rw-r--r--numpy/core/src/_simd/_simd.dispatch.c.src19
-rw-r--r--numpy/core/tests/test_simd.py70
2 files changed, 86 insertions, 3 deletions
diff --git a/numpy/core/src/_simd/_simd.dispatch.c.src b/numpy/core/src/_simd/_simd.dispatch.c.src
index e5b58a8d2..4acd20a75 100644
--- a/numpy/core/src/_simd/_simd.dispatch.c.src
+++ b/numpy/core/src/_simd/_simd.dispatch.c.src
@@ -15,8 +15,8 @@
/**begin repeat
* #sfx = u8, s8, u16, s16, u32, s32, u64, s64, f32, f64#
* #bsfx = b8, b8, b16, b16, b32, b32, b64, b64, b32, b64#
- * #esfx = u16, s8, u32, s16, u32, s32, u64, s64, f32, f64#
- * #expand_sup =1, 0, 1, 0, 0, 0, 0, 0, 0, 0#
+ * #esfx = u16, s8, u32,s16, u32, s32, u64, s64, f32, f64#
+ * #expand_sup= 1, 0, 1, 0, 0, 0, 0, 0, 0, 0#
* #simd_sup = 1, 1, 1, 1, 1, 1, 1, 1, 1, NPY_SIMD_F64#
* #fp_only = 0, 0, 0, 0, 0, 0, 0, 0, 1, 1#
* #sat_sup = 1, 1, 1, 1, 0, 0, 0, 0, 0, 0#
@@ -27,6 +27,7 @@
* #sum_sup = 0, 0, 0, 0, 1, 0, 1, 0, 1, 1#
* #rev64_sup = 1, 1, 1, 1, 1, 1, 0, 0, 1, 0#
* #ncont_sup = 0, 0, 0, 0, 1, 1, 1, 1, 1, 1#
+ * #intdiv_sup= 1, 1, 1, 1, 1, 1, 1, 1, 0, 0#
* #shl_imm = 0, 0, 15, 15, 31, 31, 63, 63, 0, 0#
* #shr_imm = 0, 0, 16, 16, 32, 32, 64, 64, 0, 0#
*/
@@ -354,6 +355,11 @@ SIMD_IMPL_INTRIN_2(mul_@sfx@, v@sfx@, v@sfx@, v@sfx@)
SIMD_IMPL_INTRIN_2(div_@sfx@, v@sfx@, v@sfx@, v@sfx@)
#endif // div_sup
+#if @intdiv_sup@
+SIMD_IMPL_INTRIN_1(divisor_@sfx@, v@sfx@x3, @sfx@)
+SIMD_IMPL_INTRIN_2(divc_@sfx@, v@sfx@, v@sfx@, v@sfx@x3)
+#endif // intdiv_sup
+
#if @fused_sup@
/**begin repeat1
* #intrin = muladd, mulsub, nmuladd, nmulsub#
@@ -442,6 +448,7 @@ SIMD_IMPL_INTRIN_1(not_@bsfx@, v@bsfx@, v@bsfx@)
SIMD_IMPL_INTRIN_1(tobits_@bsfx@, u64, v@bsfx@)
/**end repeat**/
+
//#########################################################################
//## Attach module functions
//#########################################################################
@@ -449,7 +456,7 @@ static PyMethodDef simd__intrinsics_methods[] = {
/**begin repeat
* #sfx = u8, s8, u16, s16, u32, s32, u64, s64, f32, f64#
* #bsfx = b8, b8, b16, b16, b32, b32, b64, b64, b32, b64#
- * #esfx = u16, s8, u32, s16, u32, s32, u64, s64, f32, f64#
+ * #esfx = u16, s8, u32,s16, u32, s32, u64, s64, f32, f64#
* #expand_sup =1, 0, 1, 0, 0, 0, 0, 0, 0, 0#
* #simd_sup = 1, 1, 1, 1, 1, 1, 1, 1, 1, NPY_SIMD_F64#
* #fp_only = 0, 0, 0, 0, 0, 0, 0, 0, 1, 1#
@@ -461,6 +468,7 @@ static PyMethodDef simd__intrinsics_methods[] = {
* #sum_sup = 0, 0, 0, 0, 1, 0, 1, 0, 1, 1#
* #rev64_sup = 1, 1, 1, 1, 1, 1, 0, 0, 1, 0#
* #ncont_sup = 0, 0, 0, 0, 1, 1, 1, 1, 1, 1#
+ * #intdiv_sup= 1, 1, 1, 1, 1, 1, 1, 1, 0, 0#
* #shl_imm = 0, 0, 15, 15, 31, 31, 63, 63, 0, 0#
* #shr_imm = 0, 0, 16, 16, 32, 32, 64, 64, 0, 0#
*/
@@ -568,6 +576,11 @@ SIMD_INTRIN_DEF(mul_@sfx@)
SIMD_INTRIN_DEF(div_@sfx@)
#endif // div_sup
+#if @intdiv_sup@
+SIMD_INTRIN_DEF(divisor_@sfx@)
+SIMD_INTRIN_DEF(divc_@sfx@)
+#endif // intdiv_sup
+
#if @fused_sup@
/**begin repeat1
* #intrin = muladd, mulsub, nmuladd, nmulsub#
diff --git a/numpy/core/tests/test_simd.py b/numpy/core/tests/test_simd.py
index 1d1a111be..8306a86d3 100644
--- a/numpy/core/tests/test_simd.py
+++ b/numpy/core/tests/test_simd.py
@@ -733,6 +733,76 @@ class _SIMD_ALL(_Test_Utility):
div = self.div(vdata_a, vdata_b)
assert div == data_div
+ def test_arithmetic_intdiv(self):
+ """
+ Test integer division intrinics:
+ npyv_divisor_##sfx
+ npyv_divc_##sfx
+ """
+ if self._is_fp():
+ return
+
+ def trunc_div(a, d):
+ """
+ Divide towards zero works with large integers > 2^53,
+ equivalent to int(a/d)
+ """
+ sign_a, sign_d = a < 0, d < 0
+ if a == 0 or sign_a == sign_d:
+ return a // d
+ return (a + sign_d - sign_a) // d + 1
+
+ int_min = self._int_min() if self._is_signed() else 1
+ int_max = self._int_max()
+ rdata = (
+ 0, 1, self.nlanes, int_max-self.nlanes,
+ int_min, int_min//2 + 1
+ )
+ divisors = (1, 2, self.nlanes, int_min, int_max, int_max//2)
+
+ for x, d in zip(rdata, divisors):
+ data = self._data(x)
+ vdata = self.load(data)
+ data_divc = [trunc_div(a, d) for a in data]
+ divisor = self.divisor(d)
+ divc = self.divc(vdata, divisor)
+ assert divc == data_divc
+
+ if not self._is_signed():
+ return
+
+ safe_neg = lambda x: -x-1 if -x > int_max else -x
+ # test round divison for signed integers
+ for x, d in zip(rdata, divisors):
+ d_neg = safe_neg(d)
+ data = self._data(x)
+ data_neg = [safe_neg(a) for a in data]
+ vdata = self.load(data)
+ vdata_neg = self.load(data_neg)
+ divisor = self.divisor(d)
+ divisor_neg = self.divisor(d_neg)
+
+ # round towards zero
+ data_divc = [trunc_div(a, d_neg) for a in data]
+ divc = self.divc(vdata, divisor_neg)
+ assert divc == data_divc
+ data_divc = [trunc_div(a, d) for a in data_neg]
+ divc = self.divc(vdata_neg, divisor)
+ assert divc == data_divc
+
+ # test truncate sign if the dividend is zero
+ vzero = self.zero()
+ for d in (-1, -10, -100, int_min//2, int_min):
+ divisor = self.divisor(d)
+ divc = self.divc(vzero, divisor)
+ assert divc == vzero
+
+ # test overflow
+ vmin = self.setall(int_min)
+ divisor = self.divisor(-1)
+ divc = self.divc(vmin, divisor)
+ assert divc == vmin
+
def test_arithmetic_reduce_sum(self):
"""
Test reduce sum intrinics: