diff options
author | Michael <michael.siebert2k@gmail.com> | 2023-01-13 21:24:35 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-01-13 21:24:35 +0100 |
commit | 2303556949b96c4220ed86fa4554f6a87dec3842 (patch) | |
tree | d88bc32af66dbfe442074007e51f0d3ad9a719eb /numpy/lib/format.py | |
parent | 87ba79b05e5eab101b03e1cc76656287d5d0aed5 (diff) | |
download | numpy-2303556949b96c4220ed86fa4554f6a87dec3842.tar.gz |
ENH: Faster numpy.load (try/except _filter_header) (#22916)
This pull requests speeds up numpy.load. Since _filter_header is quite a bottleneck, we only run it if we must. Users will get a warning if they have a legacy Numpy file so that they can save it again for faster loading.
Main discussion and benchmarks see #22898
Co-authored-by: Sebastian Berg <sebastian@sipsolutions.net>
Diffstat (limited to 'numpy/lib/format.py')
-rw-r--r-- | numpy/lib/format.py | 22 |
1 files changed, 18 insertions, 4 deletions
diff --git a/numpy/lib/format.py b/numpy/lib/format.py index 54fd0b0bc..8f3fd694d 100644 --- a/numpy/lib/format.py +++ b/numpy/lib/format.py @@ -623,13 +623,27 @@ def _read_array_header(fp, version, max_header_size=_MAX_HEADER_SIZE): # "descr" : dtype.descr # Versions (2, 0) and (1, 0) could have been created by a Python 2 # implementation before header filtering was implemented. - if version <= (2, 0): - header = _filter_header(header) + # + # For performance reasons, we try without _filter_header first though try: d = safe_eval(header) except SyntaxError as e: - msg = "Cannot parse header: {!r}" - raise ValueError(msg.format(header)) from e + if version <= (2, 0): + header = _filter_header(header) + try: + d = safe_eval(header) + except SyntaxError as e2: + msg = "Cannot parse header: {!r}" + raise ValueError(msg.format(header)) from e2 + else: + warnings.warn( + "Reading `.npy` or `.npz` file required additional " + "header parsing as it was created on Python 2. Save the " + "file again to speed up loading and avoid this warning.", + UserWarning, stacklevel=4) + else: + msg = "Cannot parse header: {!r}" + raise ValueError(msg.format(header)) from e if not isinstance(d, dict): msg = "Header is not a dictionary: {!r}" raise ValueError(msg.format(d)) |