ENH: Use threshold also inside SubArrayFormat.

Previously, for structured arrays that contain a lot of elements, those are always all shown, independent of the threshold print option. This PR ensures that threshold and edgeitems are respected. Note that multidimensional items are typeset without line breaks, to avoid breaking up the structure. Hence, this does not solve the issue that structured array elements can easily overfill a line on the console.
author: Marten van Kerkwijk <mhvk@astro.utoronto.ca> 2023-02-20 21:50:20 -0500
committer: Marten van Kerkwijk <mhvk@astro.utoronto.ca> 2023-04-07 20:12:31 -0400
commit: 59c73c666b3590895d58bcb91a3732820da3c7bb (patch)
tree: 66dbe4f1b5e426c4aea26e83b9a61d959597015b /numpy
parent: 954aa5856c9812b71162415b6573b397368a1da7 (diff)
download: numpy-59c73c666b3590895d58bcb91a3732820da3c7bb.tar.gz
2 files changed, 63 insertions, 10 deletions
diff --git a/numpy/core/arrayprint.py b/numpy/core/arrayprint.py
index dcfb6e6a8..62cd52707 100644
--- a/numpy/core/arrayprint.py
+++ b/numpy/core/arrayprint.py
@@ -1096,7 +1096,7 @@ def format_float_scientific(x, precision=None, unique=True, trim='k',
         identify the value may be printed and rounded unbiased.
 
         -- versionadded:: 1.21.0
-        
+
     Returns
     -------
     rep : string
@@ -1181,7 +1181,7 @@ def format_float_positional(x, precision=None, unique=True,
         Minimum number of digits to print. Only has an effect if `unique=True`
         in which case additional digits past those necessary to uniquely
         identify the value may be printed, rounding the last additional digit.
-        
+
         -- versionadded:: 1.21.0
 
     Returns
@@ -1339,13 +1339,29 @@ class TimedeltaFormat(_TimelikeFormat):
 
 
 class SubArrayFormat:
-    def __init__(self, format_function):
+    def __init__(self, format_function, **options):
         self.format_function = format_function
+        self.threshold = options['threshold']
+        self.edge_items = options['edgeitems']
+
+    def __call__(self, a):
+        self.summary_insert = "..." if a.size > self.threshold else ""
+        return self.format_array(a)
+
+    def format_array(self, a):
+        if np.ndim(a) == 0:
+            return self.format_function(a)
+
+        if self.summary_insert and a.shape[0] > 2*self.edge_items:
+            formatted = (
+                [self.format_array(a_) for a_ in a[:self.edge_items]]
+                + [self.summary_insert]
+                + [self.format_array(a_) for a_ in a[-self.edge_items:]]
+            )
+        else:
+            formatted = [self.format_array(a_) for a_ in a]
 
-    def __call__(self, arr):
-        if arr.ndim <= 1:
-            return "[" + ", ".join(self.format_function(a) for a in arr) + "]"
-        return "[" + ", ".join(self.__call__(a) for a in arr) + "]"
+        return "[" + ", ".join(formatted) + "]"
 
 
 class StructuredVoidFormat:
@@ -1369,7 +1385,7 @@ class StructuredVoidFormat:
         for field_name in data.dtype.names:
             format_function = _get_format_function(data[field_name], **options)
             if data.dtype[field_name].shape != ():
-                format_function = SubArrayFormat(format_function)
+                format_function = SubArrayFormat(format_function, **options)
             format_functions.append(format_function)
         return cls(format_functions)
 
@@ -1428,7 +1444,7 @@ def dtype_is_implied(dtype):
     # not just void types can be structured, and names are not part of the repr
     if dtype.names is not None:
         return False
-    
+
     # should care about endianness *unless size is 1* (e.g., int8, bool)
     if not dtype.isnative:
         return False
diff --git a/numpy/core/tests/test_arrayprint.py b/numpy/core/tests/test_arrayprint.py
index 372cb8d41..b92c8ae8c 100644
--- a/numpy/core/tests/test_arrayprint.py
+++ b/numpy/core/tests/test_arrayprint.py
@@ -353,6 +353,33 @@ class TestArray2String:
                 '       [ 501,  502,  503, ...,  999, 1000, 1001]])'
         assert_equal(repr(A), reprA)
 
+    def test_summarize_structure(self):
+        A = (np.arange(2002, dtype="<i8").reshape(2, 1001)
+             .view([('i', "<i8", (1001,))]))
+        strA = ("[[([   0,    1,    2, ...,  998,  999, 1000],)]\n"
+                " [([1001, 1002, 1003, ..., 1999, 2000, 2001],)]]")
+        assert_equal(str(A), strA)
+
+        reprA = ("array([[([   0,    1,    2, ...,  998,  999, 1000],)],\n"
+                 "       [([1001, 1002, 1003, ..., 1999, 2000, 2001],)]],\n"
+                 "      dtype=[('i', '<i8', (1001,))])")
+        assert_equal(repr(A), reprA)
+
+        B = np.ones(2002, dtype=">i8").view([('i', ">i8", (2, 1001))])
+        strB = "[([[1, 1, 1, ..., 1, 1, 1], [1, 1, 1, ..., 1, 1, 1]],)]"
+        assert_equal(str(B), strB)
+
+        reprB = (
+            "array([([[1, 1, 1, ..., 1, 1, 1], [1, 1, 1, ..., 1, 1, 1]],)],\n"
+            "      dtype=[('i', '>i8', (2, 1001))])"
+        )
+        assert_equal(repr(B), reprB)
+
+        C = (np.arange(22, dtype="<i8").reshape(2, 11)
+             .view([('i1', "<i8"), ('i10', "<i8", (10,))]))
+        strC = "[[( 0, [ 1, ..., 10])]\n [(11, [12, ..., 21])]]"
+        assert_equal(np.array2string(C, threshold=1, edgeitems=1), strC)
+
     def test_linewidth(self):
         a = np.full(6, 1)
 
@@ -817,7 +844,7 @@ class TestPrintOptions:
     )
     def test_dtype_endianness_repr(self, native):
         '''
-        there was an issue where 
+        there was an issue where
         repr(array([0], dtype='<u2')) and repr(array([0], dtype='>u2'))
         both returned the same thing:
         array([0], dtype=uint16)
@@ -963,6 +990,16 @@ class TestPrintOptions:
                     [[ 0.]]]])""")
         )
 
+    def test_edgeitems_structured(self):
+        np.set_printoptions(edgeitems=1, threshold=1)
+        A = np.arange(5*2*3, dtype="<i8").view([('i', "<i8", (5, 2, 3))])
+        reprA = (
+            "array([([[[ 0, ...,  2], [ 3, ...,  5]], ..., "
+            "[[24, ..., 26], [27, ..., 29]]],)],\n"
+            "      dtype=[('i', '<i8', (5, 2, 3))])"
+        )
+        assert_equal(repr(A), reprA)
+
     def test_bad_args(self):
         assert_raises(ValueError, np.set_printoptions, threshold=float('nan'))
         assert_raises(TypeError, np.set_printoptions, threshold='1')
author	Marten van Kerkwijk <mhvk@astro.utoronto.ca>	2023-02-20 21:50:20 -0500
committer	Marten van Kerkwijk <mhvk@astro.utoronto.ca>	2023-04-07 20:12:31 -0400
commit	59c73c666b3590895d58bcb91a3732820da3c7bb (patch)
tree	66dbe4f1b5e426c4aea26e83b9a61d959597015b /numpy
parent	954aa5856c9812b71162415b6573b397368a1da7 (diff)
download	numpy-59c73c666b3590895d58bcb91a3732820da3c7bb.tar.gz