diff options
author | Sayed Adel <seiko@imavr.com> | 2021-01-16 14:01:56 +0000 |
---|---|---|
committer | Sayed Adel <seiko@imavr.com> | 2021-03-08 08:19:12 +0200 |
commit | 6c94b4c2c7d48acf08a0f4d2d5844f7d7f3669de (patch) | |
tree | 127f496944c2319401e84f084294af8531c008ef | |
parent | 2da9858aed5efca22d92682000f03e341dc46d64 (diff) | |
download | numpy-6c94b4c2c7d48acf08a0f4d2d5844f7d7f3669de.tar.gz |
SIMD, TST: add test cases for NPYV fast integer division
-rw-r--r-- | numpy/core/src/_simd/_simd.dispatch.c.src | 19 | ||||
-rw-r--r-- | numpy/core/tests/test_simd.py | 70 |
2 files changed, 86 insertions, 3 deletions
diff --git a/numpy/core/src/_simd/_simd.dispatch.c.src b/numpy/core/src/_simd/_simd.dispatch.c.src index e5b58a8d2..4acd20a75 100644 --- a/numpy/core/src/_simd/_simd.dispatch.c.src +++ b/numpy/core/src/_simd/_simd.dispatch.c.src @@ -15,8 +15,8 @@ /**begin repeat * #sfx = u8, s8, u16, s16, u32, s32, u64, s64, f32, f64# * #bsfx = b8, b8, b16, b16, b32, b32, b64, b64, b32, b64# - * #esfx = u16, s8, u32, s16, u32, s32, u64, s64, f32, f64# - * #expand_sup =1, 0, 1, 0, 0, 0, 0, 0, 0, 0# + * #esfx = u16, s8, u32,s16, u32, s32, u64, s64, f32, f64# + * #expand_sup= 1, 0, 1, 0, 0, 0, 0, 0, 0, 0# * #simd_sup = 1, 1, 1, 1, 1, 1, 1, 1, 1, NPY_SIMD_F64# * #fp_only = 0, 0, 0, 0, 0, 0, 0, 0, 1, 1# * #sat_sup = 1, 1, 1, 1, 0, 0, 0, 0, 0, 0# @@ -27,6 +27,7 @@ * #sum_sup = 0, 0, 0, 0, 1, 0, 1, 0, 1, 1# * #rev64_sup = 1, 1, 1, 1, 1, 1, 0, 0, 1, 0# * #ncont_sup = 0, 0, 0, 0, 1, 1, 1, 1, 1, 1# + * #intdiv_sup= 1, 1, 1, 1, 1, 1, 1, 1, 0, 0# * #shl_imm = 0, 0, 15, 15, 31, 31, 63, 63, 0, 0# * #shr_imm = 0, 0, 16, 16, 32, 32, 64, 64, 0, 0# */ @@ -354,6 +355,11 @@ SIMD_IMPL_INTRIN_2(mul_@sfx@, v@sfx@, v@sfx@, v@sfx@) SIMD_IMPL_INTRIN_2(div_@sfx@, v@sfx@, v@sfx@, v@sfx@) #endif // div_sup +#if @intdiv_sup@ +SIMD_IMPL_INTRIN_1(divisor_@sfx@, v@sfx@x3, @sfx@) +SIMD_IMPL_INTRIN_2(divc_@sfx@, v@sfx@, v@sfx@, v@sfx@x3) +#endif // intdiv_sup + #if @fused_sup@ /**begin repeat1 * #intrin = muladd, mulsub, nmuladd, nmulsub# @@ -442,6 +448,7 @@ SIMD_IMPL_INTRIN_1(not_@bsfx@, v@bsfx@, v@bsfx@) SIMD_IMPL_INTRIN_1(tobits_@bsfx@, u64, v@bsfx@) /**end repeat**/ + //######################################################################### //## Attach module functions //######################################################################### @@ -449,7 +456,7 @@ static PyMethodDef simd__intrinsics_methods[] = { /**begin repeat * #sfx = u8, s8, u16, s16, u32, s32, u64, s64, f32, f64# * #bsfx = b8, b8, b16, b16, b32, b32, b64, b64, b32, b64# - * #esfx = u16, s8, u32, s16, u32, s32, u64, s64, f32, f64# + * #esfx = u16, s8, u32,s16, u32, s32, u64, s64, f32, f64# * #expand_sup =1, 0, 1, 0, 0, 0, 0, 0, 0, 0# * #simd_sup = 1, 1, 1, 1, 1, 1, 1, 1, 1, NPY_SIMD_F64# * #fp_only = 0, 0, 0, 0, 0, 0, 0, 0, 1, 1# @@ -461,6 +468,7 @@ static PyMethodDef simd__intrinsics_methods[] = { * #sum_sup = 0, 0, 0, 0, 1, 0, 1, 0, 1, 1# * #rev64_sup = 1, 1, 1, 1, 1, 1, 0, 0, 1, 0# * #ncont_sup = 0, 0, 0, 0, 1, 1, 1, 1, 1, 1# + * #intdiv_sup= 1, 1, 1, 1, 1, 1, 1, 1, 0, 0# * #shl_imm = 0, 0, 15, 15, 31, 31, 63, 63, 0, 0# * #shr_imm = 0, 0, 16, 16, 32, 32, 64, 64, 0, 0# */ @@ -568,6 +576,11 @@ SIMD_INTRIN_DEF(mul_@sfx@) SIMD_INTRIN_DEF(div_@sfx@) #endif // div_sup +#if @intdiv_sup@ +SIMD_INTRIN_DEF(divisor_@sfx@) +SIMD_INTRIN_DEF(divc_@sfx@) +#endif // intdiv_sup + #if @fused_sup@ /**begin repeat1 * #intrin = muladd, mulsub, nmuladd, nmulsub# diff --git a/numpy/core/tests/test_simd.py b/numpy/core/tests/test_simd.py index 1d1a111be..8306a86d3 100644 --- a/numpy/core/tests/test_simd.py +++ b/numpy/core/tests/test_simd.py @@ -733,6 +733,76 @@ class _SIMD_ALL(_Test_Utility): div = self.div(vdata_a, vdata_b) assert div == data_div + def test_arithmetic_intdiv(self): + """ + Test integer division intrinics: + npyv_divisor_##sfx + npyv_divc_##sfx + """ + if self._is_fp(): + return + + def trunc_div(a, d): + """ + Divide towards zero works with large integers > 2^53, + equivalent to int(a/d) + """ + sign_a, sign_d = a < 0, d < 0 + if a == 0 or sign_a == sign_d: + return a // d + return (a + sign_d - sign_a) // d + 1 + + int_min = self._int_min() if self._is_signed() else 1 + int_max = self._int_max() + rdata = ( + 0, 1, self.nlanes, int_max-self.nlanes, + int_min, int_min//2 + 1 + ) + divisors = (1, 2, self.nlanes, int_min, int_max, int_max//2) + + for x, d in zip(rdata, divisors): + data = self._data(x) + vdata = self.load(data) + data_divc = [trunc_div(a, d) for a in data] + divisor = self.divisor(d) + divc = self.divc(vdata, divisor) + assert divc == data_divc + + if not self._is_signed(): + return + + safe_neg = lambda x: -x-1 if -x > int_max else -x + # test round divison for signed integers + for x, d in zip(rdata, divisors): + d_neg = safe_neg(d) + data = self._data(x) + data_neg = [safe_neg(a) for a in data] + vdata = self.load(data) + vdata_neg = self.load(data_neg) + divisor = self.divisor(d) + divisor_neg = self.divisor(d_neg) + + # round towards zero + data_divc = [trunc_div(a, d_neg) for a in data] + divc = self.divc(vdata, divisor_neg) + assert divc == data_divc + data_divc = [trunc_div(a, d) for a in data_neg] + divc = self.divc(vdata_neg, divisor) + assert divc == data_divc + + # test truncate sign if the dividend is zero + vzero = self.zero() + for d in (-1, -10, -100, int_min//2, int_min): + divisor = self.divisor(d) + divc = self.divc(vzero, divisor) + assert divc == vzero + + # test overflow + vmin = self.setall(int_min) + divisor = self.divisor(-1) + divc = self.divc(vmin, divisor) + assert divc == vmin + def test_arithmetic_reduce_sum(self): """ Test reduce sum intrinics: |