From 0f450abec432763b92d6a9b1a778e8c0e5232338 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lars=20Gust=C3=A4bel?= Date: Tue, 19 Apr 2016 08:43:17 +0200 Subject: Issue #24838: tarfile's ustar and gnu formats now correctly calculate name and link field limits for multibyte character encodings like utf-8. --- Lib/tarfile.py | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) (limited to 'Lib/tarfile.py') diff --git a/Lib/tarfile.py b/Lib/tarfile.py index 523620e004..86e1cf9b89 100755 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -812,11 +812,11 @@ class TarInfo(object): """ info["magic"] = POSIX_MAGIC - if len(info["linkname"]) > LENGTH_LINK: + if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK: raise ValueError("linkname is too long") - if len(info["name"]) > LENGTH_NAME: - info["prefix"], info["name"] = self._posix_split_name(info["name"]) + if len(info["name"].encode(encoding, errors)) > LENGTH_NAME: + info["prefix"], info["name"] = self._posix_split_name(info["name"], encoding, errors) return self._create_header(info, USTAR_FORMAT, encoding, errors) @@ -826,10 +826,10 @@ class TarInfo(object): info["magic"] = GNU_MAGIC buf = b"" - if len(info["linkname"]) > LENGTH_LINK: + if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK: buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK, encoding, errors) - if len(info["name"]) > LENGTH_NAME: + if len(info["name"].encode(encoding, errors)) > LENGTH_NAME: buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME, encoding, errors) return buf + self._create_header(info, GNU_FORMAT, encoding, errors) @@ -889,19 +889,20 @@ class TarInfo(object): """ return cls._create_pax_generic_header(pax_headers, XGLTYPE, "utf-8") - def _posix_split_name(self, name): + def _posix_split_name(self, name, encoding, errors): """Split a name longer than 100 chars into a prefix and a name part. """ - prefix = name[:LENGTH_PREFIX + 1] - while prefix and prefix[-1] != "/": - prefix = prefix[:-1] - - name = name[len(prefix):] - prefix = prefix[:-1] - - if not prefix or len(name) > LENGTH_NAME: + components = name.split("/") + for i in range(1, len(components)): + prefix = "/".join(components[:i]) + name = "/".join(components[i:]) + if len(prefix.encode(encoding, errors)) <= LENGTH_PREFIX and \ + len(name.encode(encoding, errors)) <= LENGTH_NAME: + break + else: raise ValueError("name is too long") + return prefix, name @staticmethod -- cgit v1.2.1