diff options
author | Lars Gustäbel <lars@gustaebel.de> | 2011-10-14 12:53:10 +0200 |
---|---|---|
committer | Lars Gustäbel <lars@gustaebel.de> | 2011-10-14 12:53:10 +0200 |
commit | 01277d166a993742814c772d01987fbaafb528d4 (patch) | |
tree | 1313156ef053cbd757ce804db6387c05b925f234 | |
parent | 0e7e715a117ca5f29d53890550ff00447f0c2dc5 (diff) | |
parent | ac3d137a303d579d7b02af083fda90309ab9378a (diff) | |
download | cpython-git-01277d166a993742814c772d01987fbaafb528d4.tar.gz |
Merge with 3.2: Issue #13158: Fix decoding and encoding of base-256 number fields in tarfile.
The nti() function that converts a number field from a tar header to a number
failed to decode GNU tar specific base-256 fields. I also added support for
decoding and encoding negative base-256 number fields.
-rw-r--r-- | Lib/tarfile.py | 43 | ||||
-rw-r--r-- | Lib/test/test_tarfile.py | 24 | ||||
-rw-r--r-- | Misc/NEWS | 3 |
3 files changed, 49 insertions, 21 deletions
diff --git a/Lib/tarfile.py b/Lib/tarfile.py index 2560562319..39fe635049 100644 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -194,16 +194,18 @@ def nti(s): """ # There are two possible encodings for a number field, see # itn() below. - if s[0] != chr(0o200): + if s[0] in (0o200, 0o377): + n = 0 + for i in range(len(s) - 1): + n <<= 8 + n += s[i + 1] + if s[0] == 0o377: + n = -(256 ** (len(s) - 1) - n) + else: try: n = int(nts(s, "ascii", "strict") or "0", 8) except ValueError: raise InvalidHeaderError("invalid header") - else: - n = 0 - for i in range(len(s) - 1): - n <<= 8 - n += ord(s[i + 1]) return n def itn(n, digits=8, format=DEFAULT_FORMAT): @@ -212,25 +214,26 @@ def itn(n, digits=8, format=DEFAULT_FORMAT): # POSIX 1003.1-1988 requires numbers to be encoded as a string of # octal digits followed by a null-byte, this allows values up to # (8**(digits-1))-1. GNU tar allows storing numbers greater than - # that if necessary. A leading 0o200 byte indicates this particular - # encoding, the following digits-1 bytes are a big-endian - # representation. This allows values up to (256**(digits-1))-1. + # that if necessary. A leading 0o200 or 0o377 byte indicate this + # particular encoding, the following digits-1 bytes are a big-endian + # base-256 representation. This allows values up to (256**(digits-1))-1. + # A 0o200 byte indicates a positive number, a 0o377 byte a negative + # number. if 0 <= n < 8 ** (digits - 1): s = bytes("%0*o" % (digits - 1, n), "ascii") + NUL - else: - if format != GNU_FORMAT or n >= 256 ** (digits - 1): - raise ValueError("overflow in number field") - - if n < 0: - # XXX We mimic GNU tar's behaviour with negative numbers, - # this could raise OverflowError. - n = struct.unpack("L", struct.pack("l", n))[0] + elif format == GNU_FORMAT and -256 ** (digits - 1) <= n < 256 ** (digits - 1): + if n >= 0: + s = bytearray([0o200]) + else: + s = bytearray([0o377]) + n = 256 ** digits + n - s = bytearray() for i in range(digits - 1): - s.insert(0, n & 0o377) + s.insert(1, n & 0o377) n >>= 8 - s.insert(0, 0o200) + else: + raise ValueError("overflow in number field") + return s def calc_chksums(buf): diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index 034765c5e7..d346113582 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -1582,9 +1582,31 @@ class MiscTest(unittest.TestCase): self.assertEqual(tarfile.nts(b"foo\0\0\0\0\0", "ascii", "strict"), "foo") self.assertEqual(tarfile.nts(b"foo\0bar\0", "ascii", "strict"), "foo") - def test_number_fields(self): + def test_read_number_fields(self): + # Issue 13158: Test if GNU tar specific base-256 number fields + # are decoded correctly. + self.assertEqual(tarfile.nti(b"0000001\x00"), 1) + self.assertEqual(tarfile.nti(b"7777777\x00"), 0o7777777) + self.assertEqual(tarfile.nti(b"\x80\x00\x00\x00\x00\x20\x00\x00"), 0o10000000) + self.assertEqual(tarfile.nti(b"\x80\x00\x00\x00\xff\xff\xff\xff"), 0xffffffff) + self.assertEqual(tarfile.nti(b"\xff\xff\xff\xff\xff\xff\xff\xff"), -1) + self.assertEqual(tarfile.nti(b"\xff\xff\xff\xff\xff\xff\xff\x9c"), -100) + self.assertEqual(tarfile.nti(b"\xff\x00\x00\x00\x00\x00\x00\x00"), -0x100000000000000) + + def test_write_number_fields(self): self.assertEqual(tarfile.itn(1), b"0000001\x00") + self.assertEqual(tarfile.itn(0o7777777), b"7777777\x00") + self.assertEqual(tarfile.itn(0o10000000), b"\x80\x00\x00\x00\x00\x20\x00\x00") self.assertEqual(tarfile.itn(0xffffffff), b"\x80\x00\x00\x00\xff\xff\xff\xff") + self.assertEqual(tarfile.itn(-1), b"\xff\xff\xff\xff\xff\xff\xff\xff") + self.assertEqual(tarfile.itn(-100), b"\xff\xff\xff\xff\xff\xff\xff\x9c") + self.assertEqual(tarfile.itn(-0x100000000000000), b"\xff\x00\x00\x00\x00\x00\x00\x00") + + def test_number_field_limits(self): + self.assertRaises(ValueError, tarfile.itn, -1, 8, tarfile.USTAR_FORMAT) + self.assertRaises(ValueError, tarfile.itn, 0o10000000, 8, tarfile.USTAR_FORMAT) + self.assertRaises(ValueError, tarfile.itn, -0x10000000001, 6, tarfile.GNU_FORMAT) + self.assertRaises(ValueError, tarfile.itn, 0x10000000000, 6, tarfile.GNU_FORMAT) class ContextManagerTest(unittest.TestCase): @@ -305,6 +305,9 @@ Core and Builtins Library ------- +- Issue #13158: Fix decoding and encoding of GNU tar specific base-256 number + fields in tarfile. + - Issue #13025: mimetypes is now reading MIME types using the UTF-8 encoding, instead of the locale encoding. |