summaryrefslogtreecommitdiff
path: root/numpy/lib/format.py
diff options
context:
space:
mode:
authorMichael <michael.siebert2k@gmail.com>2023-01-13 21:24:35 +0100
committerGitHub <noreply@github.com>2023-01-13 21:24:35 +0100
commit2303556949b96c4220ed86fa4554f6a87dec3842 (patch)
treed88bc32af66dbfe442074007e51f0d3ad9a719eb /numpy/lib/format.py
parent87ba79b05e5eab101b03e1cc76656287d5d0aed5 (diff)
downloadnumpy-2303556949b96c4220ed86fa4554f6a87dec3842.tar.gz
ENH: Faster numpy.load (try/except _filter_header) (#22916)
This pull requests speeds up numpy.load. Since _filter_header is quite a bottleneck, we only run it if we must. Users will get a warning if they have a legacy Numpy file so that they can save it again for faster loading. Main discussion and benchmarks see #22898 Co-authored-by: Sebastian Berg <sebastian@sipsolutions.net>
Diffstat (limited to 'numpy/lib/format.py')
-rw-r--r--numpy/lib/format.py22
1 files changed, 18 insertions, 4 deletions
diff --git a/numpy/lib/format.py b/numpy/lib/format.py
index 54fd0b0bc..8f3fd694d 100644
--- a/numpy/lib/format.py
+++ b/numpy/lib/format.py
@@ -623,13 +623,27 @@ def _read_array_header(fp, version, max_header_size=_MAX_HEADER_SIZE):
# "descr" : dtype.descr
# Versions (2, 0) and (1, 0) could have been created by a Python 2
# implementation before header filtering was implemented.
- if version <= (2, 0):
- header = _filter_header(header)
+ #
+ # For performance reasons, we try without _filter_header first though
try:
d = safe_eval(header)
except SyntaxError as e:
- msg = "Cannot parse header: {!r}"
- raise ValueError(msg.format(header)) from e
+ if version <= (2, 0):
+ header = _filter_header(header)
+ try:
+ d = safe_eval(header)
+ except SyntaxError as e2:
+ msg = "Cannot parse header: {!r}"
+ raise ValueError(msg.format(header)) from e2
+ else:
+ warnings.warn(
+ "Reading `.npy` or `.npz` file required additional "
+ "header parsing as it was created on Python 2. Save the "
+ "file again to speed up loading and avoid this warning.",
+ UserWarning, stacklevel=4)
+ else:
+ msg = "Cannot parse header: {!r}"
+ raise ValueError(msg.format(header)) from e
if not isinstance(d, dict):
msg = "Header is not a dictionary: {!r}"
raise ValueError(msg.format(d))