From 81bc4565b50c6cebb21c95c685285e32e1fb9b65 Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Tue, 4 Oct 2022 13:36:08 +0200 Subject: MAINT: Ensure graceful handling of large header sizes This ensures graceful handling of large header files. Unfortunately, it may be a bit inconvenient for users, thus the new kwarg and the work-around of also accepting allow-pickle. See also the documation here: https://docs.python.org/3.10/library/ast.html#ast.literal_eval --- numpy/lib/format.py | 59 +++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 50 insertions(+), 9 deletions(-) (limited to 'numpy/lib/format.py') diff --git a/numpy/lib/format.py b/numpy/lib/format.py index 625768b62..19fec48ed 100644 --- a/numpy/lib/format.py +++ b/numpy/lib/format.py @@ -186,6 +186,10 @@ _header_size_info = { (3, 0): (' max_header_size: + raise ValueError( + f"Header info length ({len(header)}) is large and may not be safe " + "to load securely.\n" + "To allow loading, adjust `max_header_size` or fully trust " + "the `.npy` file using `allow_pickle=True`.\n" + "For safety against large resource use or crashes, sandboxing " + "may be necessary.") # The header is a pretty-printed string representation of a literal # Python dictionary with trailing newlines padded to a ARRAY_ALIGN byte @@ -694,7 +716,8 @@ def write_array(fp, array, version=None, allow_pickle=True, pickle_kwargs=None): fp.write(chunk.tobytes('C')) -def read_array(fp, allow_pickle=False, pickle_kwargs=None): +def read_array(fp, allow_pickle=False, pickle_kwargs=None, *, + max_header_size=_MAX_HEADER_SIZE): """ Read an array from an NPY file. @@ -713,6 +736,12 @@ def read_array(fp, allow_pickle=False, pickle_kwargs=None): Additional keyword arguments to pass to pickle.load. These are only useful when loading object arrays saved on Python 2 when using Python 3. + max_header_size : int, optional + Maximum allowed size of the header. Large headers may not be safe + to load securely and thus require explicitly passing a larger value. + See :py:meth:`ast.literal_eval()` for details. + This option is ignored when `allow_pickle` is passed. In that case + the file is by definition trusted and the limit is unnecessary. Returns ------- @@ -726,9 +755,15 @@ def read_array(fp, allow_pickle=False, pickle_kwargs=None): an object array. """ + if allow_pickle: + # Effectively ignore max_header_size, since `allow_pickle` indicates + # that the input is fully trusted. + max_header_size = 2**64 + version = read_magic(fp) _check_version(version) - shape, fortran_order, dtype = _read_array_header(fp, version) + shape, fortran_order, dtype = _read_array_header( + fp, version, max_header_size=max_header_size) if len(shape) == 0: count = 1 else: @@ -788,7 +823,8 @@ def read_array(fp, allow_pickle=False, pickle_kwargs=None): def open_memmap(filename, mode='r+', dtype=None, shape=None, - fortran_order=False, version=None): + fortran_order=False, version=None, *, + max_header_size=_MAX_HEADER_SIZE): """ Open a .npy file as a memory-mapped array. @@ -819,6 +855,10 @@ def open_memmap(filename, mode='r+', dtype=None, shape=None, If the mode is a "write" mode, then this is the version of the file format used to create the file. None means use the oldest supported version that is able to store the data. Default: None + max_header_size : int, optional + Maximum allowed size of the header. Large headers may not be safe + to load securely and thus require explicitly passing a larger value. + See :py:meth:`ast.literal_eval()` for details. Returns ------- @@ -866,7 +906,8 @@ def open_memmap(filename, mode='r+', dtype=None, shape=None, version = read_magic(fp) _check_version(version) - shape, fortran_order, dtype = _read_array_header(fp, version) + shape, fortran_order, dtype = _read_array_header( + fp, version, max_header_size=max_header_size) if dtype.hasobject: msg = "Array can't be memory-mapped: Python objects in dtype." raise ValueError(msg) -- cgit v1.2.1