ENH: Faster numpy.load (try/except _filter_header) (#22916)

This pull requests speeds up numpy.load. Since _filter_header is quite a bottleneck, we only run it if we must. Users will get a warning if they have a legacy Numpy file so that they can save it again for faster loading. Main discussion and benchmarks see #22898 Co-authored-by: Sebastian Berg <sebastian@sipsolutions.net>
author: Michael <michael.siebert2k@gmail.com> 2023-01-13 21:24:35 +0100
committer: GitHub <noreply@github.com> 2023-01-13 21:24:35 +0100
commit: 2303556949b96c4220ed86fa4554f6a87dec3842 (patch)
tree: d88bc32af66dbfe442074007e51f0d3ad9a719eb /numpy/lib/format.py
parent: 87ba79b05e5eab101b03e1cc76656287d5d0aed5 (diff)
download: numpy-2303556949b96c4220ed86fa4554f6a87dec3842.tar.gz
1 files changed, 18 insertions, 4 deletions
diff --git a/numpy/lib/format.py b/numpy/lib/format.py
index 54fd0b0bc..8f3fd694d 100644
--- a/numpy/lib/format.py
+++ b/numpy/lib/format.py
@@ -623,13 +623,27 @@ def _read_array_header(fp, version, max_header_size=_MAX_HEADER_SIZE):
     #   "descr" : dtype.descr
     # Versions (2, 0) and (1, 0) could have been created by a Python 2
     # implementation before header filtering was implemented.
-    if version <= (2, 0):
-        header = _filter_header(header)
+    #
+    # For performance reasons, we try without _filter_header first though
     try:
         d = safe_eval(header)
     except SyntaxError as e:
-        msg = "Cannot parse header: {!r}"
-        raise ValueError(msg.format(header)) from e
+        if version <= (2, 0):
+            header = _filter_header(header)
+            try:
+                d = safe_eval(header)
+            except SyntaxError as e2:
+                msg = "Cannot parse header: {!r}"
+                raise ValueError(msg.format(header)) from e2
+            else:
+                warnings.warn(
+                    "Reading `.npy` or `.npz` file required additional "
+                    "header parsing as it was created on Python 2. Save the "
+                    "file again to speed up loading and avoid this warning.",
+                    UserWarning, stacklevel=4)
+        else:
+            msg = "Cannot parse header: {!r}"
+            raise ValueError(msg.format(header)) from e
     if not isinstance(d, dict):
         msg = "Header is not a dictionary: {!r}"
         raise ValueError(msg.format(d))
author	Michael <michael.siebert2k@gmail.com>	2023-01-13 21:24:35 +0100
committer	GitHub <noreply@github.com>	2023-01-13 21:24:35 +0100
commit	2303556949b96c4220ed86fa4554f6a87dec3842 (patch)
tree	d88bc32af66dbfe442074007e51f0d3ad9a719eb /numpy/lib/format.py
parent	87ba79b05e5eab101b03e1cc76656287d5d0aed5 (diff)
download	numpy-2303556949b96c4220ed86fa4554f6a87dec3842.tar.gz