diff options
author | Lars Gustäbel <lars@gustaebel.de> | 2016-04-19 08:43:17 +0200 |
---|---|---|
committer | Lars Gustäbel <lars@gustaebel.de> | 2016-04-19 08:43:17 +0200 |
commit | 0f450abec432763b92d6a9b1a778e8c0e5232338 (patch) | |
tree | 992da577543eff31b83c6558ea96bf634db05b40 /Lib/tarfile.py | |
parent | 472233ec835bfaaf1419c74956a0e64797a6a0c2 (diff) | |
download | cpython-git-0f450abec432763b92d6a9b1a778e8c0e5232338.tar.gz |
Issue #24838: tarfile's ustar and gnu formats now correctly calculate name and
link field limits for multibyte character encodings like utf-8.
Diffstat (limited to 'Lib/tarfile.py')
-rwxr-xr-x | Lib/tarfile.py | 29 |
1 files changed, 15 insertions, 14 deletions
diff --git a/Lib/tarfile.py b/Lib/tarfile.py index 523620e004..86e1cf9b89 100755 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -812,11 +812,11 @@ class TarInfo(object): """ info["magic"] = POSIX_MAGIC - if len(info["linkname"]) > LENGTH_LINK: + if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK: raise ValueError("linkname is too long") - if len(info["name"]) > LENGTH_NAME: - info["prefix"], info["name"] = self._posix_split_name(info["name"]) + if len(info["name"].encode(encoding, errors)) > LENGTH_NAME: + info["prefix"], info["name"] = self._posix_split_name(info["name"], encoding, errors) return self._create_header(info, USTAR_FORMAT, encoding, errors) @@ -826,10 +826,10 @@ class TarInfo(object): info["magic"] = GNU_MAGIC buf = b"" - if len(info["linkname"]) > LENGTH_LINK: + if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK: buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK, encoding, errors) - if len(info["name"]) > LENGTH_NAME: + if len(info["name"].encode(encoding, errors)) > LENGTH_NAME: buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME, encoding, errors) return buf + self._create_header(info, GNU_FORMAT, encoding, errors) @@ -889,19 +889,20 @@ class TarInfo(object): """ return cls._create_pax_generic_header(pax_headers, XGLTYPE, "utf-8") - def _posix_split_name(self, name): + def _posix_split_name(self, name, encoding, errors): """Split a name longer than 100 chars into a prefix and a name part. """ - prefix = name[:LENGTH_PREFIX + 1] - while prefix and prefix[-1] != "/": - prefix = prefix[:-1] - - name = name[len(prefix):] - prefix = prefix[:-1] - - if not prefix or len(name) > LENGTH_NAME: + components = name.split("/") + for i in range(1, len(components)): + prefix = "/".join(components[:i]) + name = "/".join(components[i:]) + if len(prefix.encode(encoding, errors)) <= LENGTH_PREFIX and \ + len(name.encode(encoding, errors)) <= LENGTH_NAME: + break + else: raise ValueError("name is too long") + return prefix, name @staticmethod |