summaryrefslogtreecommitdiff
path: root/Lib/tarfile.py
diff options
context:
space:
mode:
authorLars Gustäbel <lars@gustaebel.de>2016-04-19 08:43:17 +0200
committerLars Gustäbel <lars@gustaebel.de>2016-04-19 08:43:17 +0200
commit0f450abec432763b92d6a9b1a778e8c0e5232338 (patch)
tree992da577543eff31b83c6558ea96bf634db05b40 /Lib/tarfile.py
parent472233ec835bfaaf1419c74956a0e64797a6a0c2 (diff)
downloadcpython-git-0f450abec432763b92d6a9b1a778e8c0e5232338.tar.gz
Issue #24838: tarfile's ustar and gnu formats now correctly calculate name and
link field limits for multibyte character encodings like utf-8.
Diffstat (limited to 'Lib/tarfile.py')
-rwxr-xr-xLib/tarfile.py29
1 files changed, 15 insertions, 14 deletions
diff --git a/Lib/tarfile.py b/Lib/tarfile.py
index 523620e004..86e1cf9b89 100755
--- a/Lib/tarfile.py
+++ b/Lib/tarfile.py
@@ -812,11 +812,11 @@ class TarInfo(object):
"""
info["magic"] = POSIX_MAGIC
- if len(info["linkname"]) > LENGTH_LINK:
+ if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK:
raise ValueError("linkname is too long")
- if len(info["name"]) > LENGTH_NAME:
- info["prefix"], info["name"] = self._posix_split_name(info["name"])
+ if len(info["name"].encode(encoding, errors)) > LENGTH_NAME:
+ info["prefix"], info["name"] = self._posix_split_name(info["name"], encoding, errors)
return self._create_header(info, USTAR_FORMAT, encoding, errors)
@@ -826,10 +826,10 @@ class TarInfo(object):
info["magic"] = GNU_MAGIC
buf = b""
- if len(info["linkname"]) > LENGTH_LINK:
+ if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK:
buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK, encoding, errors)
- if len(info["name"]) > LENGTH_NAME:
+ if len(info["name"].encode(encoding, errors)) > LENGTH_NAME:
buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME, encoding, errors)
return buf + self._create_header(info, GNU_FORMAT, encoding, errors)
@@ -889,19 +889,20 @@ class TarInfo(object):
"""
return cls._create_pax_generic_header(pax_headers, XGLTYPE, "utf-8")
- def _posix_split_name(self, name):
+ def _posix_split_name(self, name, encoding, errors):
"""Split a name longer than 100 chars into a prefix
and a name part.
"""
- prefix = name[:LENGTH_PREFIX + 1]
- while prefix and prefix[-1] != "/":
- prefix = prefix[:-1]
-
- name = name[len(prefix):]
- prefix = prefix[:-1]
-
- if not prefix or len(name) > LENGTH_NAME:
+ components = name.split("/")
+ for i in range(1, len(components)):
+ prefix = "/".join(components[:i])
+ name = "/".join(components[i:])
+ if len(prefix.encode(encoding, errors)) <= LENGTH_PREFIX and \
+ len(name.encode(encoding, errors)) <= LENGTH_NAME:
+ break
+ else:
raise ValueError("name is too long")
+
return prefix, name
@staticmethod