summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSayed Adel <seiko@imavr.com>2020-07-08 09:27:53 +0200
committerSayed Adel <seiko@imavr.com>2020-10-27 11:46:58 +0000
commitc65a559bcd459bacca3e8cf15e129cf2576540b5 (patch)
treeb7fbab96874321ebf7caf6c6fbdbca3a13015b48
parentcb3efe8e03b53dbab457a99be1a48384312abe16 (diff)
downloadnumpy-c65a559bcd459bacca3e8cf15e129cf2576540b5.tar.gz
ENH, TST: Add testing unit that covers the current implemented intrinsics.
-rw-r--r--numpy/core/tests/test_simd.py408
1 files changed, 408 insertions, 0 deletions
diff --git a/numpy/core/tests/test_simd.py b/numpy/core/tests/test_simd.py
new file mode 100644
index 000000000..26e78d402
--- /dev/null
+++ b/numpy/core/tests/test_simd.py
@@ -0,0 +1,408 @@
+# NOTE: Please avoid the use of numpy.testing since NPYV intrinsics
+# may be involved in their functionality.
+import pytest
+from numpy.core._simd import targets
+
+class _Test_Utility(object):
+ # submodule of the desired SIMD extention, e.g. targets["AVX512F"]
+ npyv = None
+ # the current data type suffix e.g. 's8'
+ sfx = None
+
+ def __getattr__(self, attr):
+ """
+ To call NPV intrinsics without the prefix 'npyv_' and
+ auto suffixing intrinsics according to class attribute 'sfx'
+ """
+ nattr = getattr(self.npyv, attr + "_" + self.sfx)
+ if callable(nattr):
+ return lambda *args: nattr(*args)
+ return nattr
+
+ def _data(self, n=None, reverse=False):
+ """
+ Create list of consecutive numbers according to number of vector's lanes.
+ """
+ if n is None:
+ n = 1
+ rng = range(n, n + self.nlanes)
+ if reverse:
+ rng = reversed(rng)
+ if self._is_fp():
+ return [x / 1.0 for x in rng]
+ return list(rng)
+
+ def _is_unsigned(self):
+ return self.sfx[0] == 'u'
+
+ def _is_signed(self):
+ return self.sfx[0] == 's'
+
+ def _is_fp(self):
+ return self.sfx[0] == 'f'
+
+ def _scalar_size(self):
+ return int(self.sfx[1:])
+
+ def _int_clip(self, seq):
+ if self._is_fp():
+ return seq
+ max_int = self._int_max()
+ min_int = self._int_min()
+ return [min(max(v, min_int), max_int) for v in seq]
+
+ def _int_max(self):
+ if self._is_fp():
+ return None
+ max_u = self._to_unsigned(self.setall(-1))[0]
+ if self._is_signed():
+ return max_u // 2
+ return max_u
+
+ def _int_min(self):
+ if self._is_fp():
+ return None
+ if self._is_unsigned():
+ return 0
+ return -(self._int_max() + 1)
+
+ def _true_mask(self):
+ max_unsig = getattr(self.npyv, "setall_u" + self.sfx[1:])(-1)
+ return max_unsig[0]
+
+ def _to_unsigned(self, vector):
+ if isinstance(vector, (list, tuple)):
+ return getattr(self.npyv, "load_u" + self.sfx[1:])(vector)
+ else:
+ sfx = vector.__name__.replace("npyv_", "")
+ if sfx[0] == "b":
+ cvt_intrin = "cvt_u{0}_b{0}"
+ else:
+ cvt_intrin = "reinterpret_u{0}_{1}"
+ return getattr(self.npyv, cvt_intrin.format(sfx[1:], sfx))(vector)
+
+class _SIMD_INT(_Test_Utility):
+ """
+ To test all integer vector types at once
+ """
+ def test_operators_shift(self):
+ if self.sfx in ("u8", "s8"):
+ pytest.skip("there are no shift intrinsics for npyv_" + self.sfx)
+
+ data_a = self._data(self._int_max() - self.nlanes)
+ data_b = self._data(self._int_min(), reverse=True)
+ vdata_a, vdata_b = self.load(data_a), self.load(data_b)
+
+ for count in range(self._scalar_size()):
+ # load to cast
+ data_shl_a = self.load([a << count for a in data_a])
+ # left shift
+ shl = self.shl(vdata_a, count)
+ assert shl == data_shl_a
+ # left shift by an immediate constant
+ shli = self.shli(vdata_a, count)
+ assert shli == data_shl_a
+ # load to cast
+ data_shr_a = self.load([a >> count for a in data_a])
+ # right shift
+ shr = self.shr(vdata_a, count)
+ assert shr == data_shr_a
+ # right shift by an immediate constant
+ shri = self.shri(vdata_a, count)
+ assert shri == data_shr_a
+
+ def test_arithmetic_subadd_saturated(self):
+ if self.sfx in ("u32", "s32", "u64", "s64"):
+ pytest.skip("there are no saturated add/sub intrinsics for npyv_" + self.sfx)
+
+ data_a = self._data(self._int_max() - self.nlanes)
+ data_b = self._data(self._int_min(), reverse=True)
+ vdata_a, vdata_b = self.load(data_a), self.load(data_b)
+
+ data_adds = self._int_clip([a + b for a, b in zip(data_a, data_b)])
+ adds = self.adds(vdata_a, vdata_b)
+ assert adds == data_adds
+
+ data_subs = self._int_clip([a - b for a, b in zip(data_a, data_b)])
+ subs = self.subs(vdata_a, vdata_b)
+ assert subs == data_subs
+
+class _SIMD_FP(_Test_Utility):
+ """
+ To test all float vector types at once
+ """
+ pass
+
+class _SIMD_ALL(_Test_Utility):
+ """
+ To test all vector types at once
+ """
+ def test_memory_load(self):
+ data = self._data()
+ # unaligned load
+ load_data = self.load(data)
+ assert load_data == data
+ # aligned load
+ loada_data = self.loada(data)
+ assert loada_data == data
+ # stream load
+ loads_data = self.loads(data)
+ assert loads_data == data
+ # load lower part
+ loadl = self.loadl(data)
+ loadl_half = list(loadl)[:self.nlanes//2]
+ data_half = data[:self.nlanes//2]
+ assert loadl_half == data_half
+ assert loadl != data # detect overflow
+
+ def test_memory_store(self):
+ data = self._data()
+ vdata = self.load(data)
+ # unaligned store
+ store = [0] * self.nlanes
+ self.store(store, vdata)
+ assert store == data
+ # aligned store
+ store_a = [0] * self.nlanes
+ self.storea(store_a, vdata)
+ assert store_a == data
+ # stream store
+ store_s = [0] * self.nlanes
+ self.stores(store_s, vdata)
+ assert store_s == data
+ # store lower part
+ store_l = [0] * self.nlanes
+ self.storel(store_l, vdata)
+ assert store_l[:self.nlanes//2] == data[:self.nlanes//2]
+ assert store_l != vdata # detect overflow
+ # store higher part
+ store_h = [0] * self.nlanes
+ self.storeh(store_h, vdata)
+ assert store_h[:self.nlanes//2] == data[self.nlanes//2:]
+ assert store_h != vdata # detect overflow
+
+ def test_misc(self):
+ broadcast_zero = self.zero()
+ assert broadcast_zero == [0] * self.nlanes
+ for i in range(1, 10):
+ broadcasti = self.setall(i)
+ assert broadcasti == [i] * self.nlanes
+
+ data_a, data_b = self._data(), self._data(reverse=True)
+ vdata_a, vdata_b = self.load(data_a), self.load(data_b)
+
+ # py level of npyv_set_* don't support ignoring the extra specified lanes or
+ # fill non-specified lanes with zero.
+ vset = self.set(*data_a)
+ assert vset == data_a
+ # py level of npyv_setf_* don't support ignoring the extra specified lanes or
+ # fill non-specified lanes with the specified scalar.
+ vsetf = self.setf(10, *data_a)
+ assert vsetf == data_a
+
+ # We're testing the sainty of _simd's type-vector,
+ # reinterpret* intrinsics itself are tested via compiler
+ # during the build of _simd module
+ sfxes = ["u8", "s8", "u16", "s16", "u32", "s32", "u64", "s64", "f32"]
+ if self.npyv.simd_f64:
+ sfxes.append("f64")
+ for sfx in sfxes:
+ vec_name = getattr(self, "reinterpret_" + sfx)(vdata_a).__name__
+ assert vec_name == "npyv_" + sfx
+
+ # select & mask operations
+ select_a = self.select(self.cmpeq(self.zero(), self.zero()), vdata_a, vdata_b)
+ assert select_a == data_a
+ select_b = self.select(self.cmpneq(self.zero(), self.zero()), vdata_a, vdata_b)
+ assert select_b == data_b
+
+ # cleanup intrinsic is only used with AVX for
+ # zeroing registers to avoid the AVX-SSE transition penalty,
+ # so nothing to test here
+ self.npyv.cleanup()
+
+ def test_reorder(self):
+ data_a, data_b = self._data(), self._data(reverse=True)
+ vdata_a, vdata_b = self.load(data_a), self.load(data_b)
+ # lower half part
+ data_a_lo = data_a[:self.nlanes//2]
+ data_b_lo = data_b[:self.nlanes//2]
+ # higher half part
+ data_a_hi = data_a[self.nlanes//2:]
+ data_b_hi = data_b[self.nlanes//2:]
+ # combine two lower parts
+ combinel = self.combinel(vdata_a, vdata_b)
+ assert combinel == data_a_lo + data_b_lo
+ # combine two higher parts
+ combineh = self.combineh(vdata_a, vdata_b)
+ assert combineh == data_a_hi + data_b_hi
+ # combine x2
+ combine = self.combine(vdata_a, vdata_b)
+ assert combine == (data_a_lo + data_b_lo, data_a_hi + data_b_hi)
+ # zip(interleave)
+ data_zipl = [v for p in zip(data_a_lo, data_b_lo) for v in p]
+ data_ziph = [v for p in zip(data_a_hi, data_b_hi) for v in p]
+ vzip = self.zip(vdata_a, vdata_b)
+ assert vzip == (data_zipl, data_ziph)
+
+ def test_operators_comparison(self):
+ if self._is_fp():
+ data_a = self._data()
+ else:
+ data_a = self._data(self._int_max() - self.nlanes)
+ data_b = self._data(self._int_min(), reverse=True)
+ vdata_a, vdata_b = self.load(data_a), self.load(data_b)
+
+ mask_true = self._true_mask()
+ def to_bool(vector):
+ return [lane == mask_true for lane in vector]
+ # equal
+ data_eq = [a == b for a, b in zip(data_a, data_b)]
+ cmpeq = to_bool(self.cmpeq(vdata_a, vdata_b))
+ assert cmpeq == data_eq
+ # not equal
+ data_neq = [a != b for a, b in zip(data_a, data_b)]
+ cmpneq = to_bool(self.cmpneq(vdata_a, vdata_b))
+ assert cmpneq == data_neq
+ # greater than
+ data_gt = [a > b for a, b in zip(data_a, data_b)]
+ cmpgt = to_bool(self.cmpgt(vdata_a, vdata_b))
+ assert cmpgt == data_gt
+ # greater than and equal
+ data_ge = [a >= b for a, b in zip(data_a, data_b)]
+ cmpge = to_bool(self.cmpge(vdata_a, vdata_b))
+ assert cmpge == data_ge
+ # less than
+ data_lt = [a < b for a, b in zip(data_a, data_b)]
+ cmplt = to_bool(self.cmplt(vdata_a, vdata_b))
+ assert cmplt == data_lt
+ # less than and equal
+ data_le = [a <= b for a, b in zip(data_a, data_b)]
+ cmple = to_bool(self.cmple(vdata_a, vdata_b))
+ assert cmple == data_le
+
+ def test_operators_logical(self):
+ if self._is_fp():
+ data_a = self._data()
+ else:
+ data_a = self._data(self._int_max() - self.nlanes)
+ data_b = self._data(self._int_min(), reverse=True)
+ vdata_a, vdata_b = self.load(data_a), self.load(data_b)
+
+ if self._is_fp():
+ data_cast_a = self._to_unsigned(vdata_a)
+ data_cast_b = self._to_unsigned(vdata_b)
+ cast, cast_data = self._to_unsigned, self._to_unsigned
+ else:
+ data_cast_a, data_cast_b = data_a, data_b
+ cast, cast_data = lambda a: a, self.load
+
+ data_xor = cast_data([a ^ b for a, b in zip(data_cast_a, data_cast_b)])
+ vxor = cast(self.xor(vdata_a, vdata_b))
+ assert vxor == data_xor
+
+ data_or = cast_data([a | b for a, b in zip(data_cast_a, data_cast_b)])
+ vor = cast(getattr(self, "or")(vdata_a, vdata_b))
+ assert vor == data_or
+
+ data_and = cast_data([a & b for a, b in zip(data_cast_a, data_cast_b)])
+ vand = cast(getattr(self, "and")(vdata_a, vdata_b))
+ assert vand == data_and
+
+ data_not = cast_data([~a for a in data_cast_a])
+ vnot = cast(getattr(self, "not")(vdata_a))
+ assert vnot == data_not
+
+ def test_conversion_boolean(self):
+ bsfx = "b" + self.sfx[1:]
+ to_boolean = getattr(self.npyv, "cvt_%s_%s" % (bsfx, self.sfx))
+ from_boolean = getattr(self.npyv, "cvt_%s_%s" % (self.sfx, bsfx))
+
+ false_vb = to_boolean(self.setall(0))
+ true_vb = self.cmpeq(self.setall(0), self.setall(0))
+ assert false_vb != true_vb
+
+ false_vsfx = from_boolean(false_vb)
+ true_vsfx = from_boolean(true_vb)
+ assert false_vsfx != true_vsfx
+
+ def test_arithmetic_subadd(self):
+ if self._is_fp():
+ data_a = self._data()
+ else:
+ data_a = self._data(self._int_max() - self.nlanes)
+ data_b = self._data(self._int_min(), reverse=True)
+ vdata_a, vdata_b = self.load(data_a), self.load(data_b)
+
+ # non-saturated
+ data_add = self.load([a + b for a, b in zip(data_a, data_b)]) # load to cast
+ add = self.add(vdata_a, vdata_b)
+ assert add == data_add
+ data_sub = self.load([a - b for a, b in zip(data_a, data_b)])
+ sub = self.sub(vdata_a, vdata_b)
+ assert sub == data_sub
+
+ def test_arithmetic_mul(self):
+ if self.sfx in ("u64", "s64"):
+ pytest.skip("there is no multiplication intrinsic for npyv_" + self.sfx)
+
+ if self._is_fp():
+ data_a = self._data()
+ else:
+ data_a = self._data(self._int_max() - self.nlanes)
+ data_b = self._data(self._int_min(), reverse=True)
+ vdata_a, vdata_b = self.load(data_a), self.load(data_b)
+
+ data_mul = self.load([a * b for a, b in zip(data_a, data_b)])
+ mul = self.mul(vdata_a, vdata_b)
+ assert mul == data_mul
+
+ def test_arithmetic_div(self):
+ if not self._is_fp():
+ pytest.skip("there is no division intrinsic for npyv_" + self.sfx)
+
+ data_a, data_b = self._data(), self._data(reverse=True)
+ vdata_a, vdata_b = self.load(data_a), self.load(data_b)
+
+ # load to truncate f64 to precision of f32
+ data_div = self.load([a / b for a, b in zip(data_a, data_b)])
+ div = self.div(vdata_a, vdata_b)
+ assert div == data_div
+
+int_sfx = ("u8", "s8", "u16", "s16", "u32", "s32", "u64", "s64")
+fp_sfx = ("f32", "f64")
+all_sfx = int_sfx + fp_sfx
+tests_registery = {
+ int_sfx : "_SIMD_INT",
+ fp_sfx : "_SIMD_FP",
+ all_sfx : "_SIMD_ALL"
+}
+for name, npyv in targets.items():
+ simd_width = npyv.simd if npyv else ''
+ pretty_name = name.split('__') # multi-target separator
+ if len(pretty_name) > 1:
+ # multi-target
+ pretty_name = f"({' '.join(pretty_name)})"
+ else:
+ pretty_name = pretty_name[0]
+
+ skip = ""
+ skip_sfx = dict()
+ if not npyv:
+ skip = f"target '{pretty_name}' isn't supported by current machine"
+ elif not npyv.simd:
+ skip = f"target '{pretty_name}' isn't supported by NPYV"
+ elif not npyv.simd_f64:
+ skip_sfx["f64"] = f"target '{pretty_name}' doesn't support double-precision"
+
+ for sfxes, class_name in tests_registery.items():
+ for sfx in sfxes:
+ skip_m = skip_sfx.get(sfx, skip)
+ if skip_m:
+ skip_m = '@pytest.mark.skip(reason="%s")' % skip_m
+ exec(
+ f"{skip_m}\n"
+ f"class Test{class_name}_{simd_width}_{name}_{sfx}({class_name}):\n"
+ f" npyv = targets['{name}']\n"
+ f" sfx = '{sfx}'\n"
+ )