summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars Gustäbel <lars@gustaebel.de>2011-10-14 12:53:10 +0200
committerLars Gustäbel <lars@gustaebel.de>2011-10-14 12:53:10 +0200
commit01277d166a993742814c772d01987fbaafb528d4 (patch)
tree1313156ef053cbd757ce804db6387c05b925f234
parent0e7e715a117ca5f29d53890550ff00447f0c2dc5 (diff)
parentac3d137a303d579d7b02af083fda90309ab9378a (diff)
downloadcpython-git-01277d166a993742814c772d01987fbaafb528d4.tar.gz
Merge with 3.2: Issue #13158: Fix decoding and encoding of base-256 number fields in tarfile.
The nti() function that converts a number field from a tar header to a number failed to decode GNU tar specific base-256 fields. I also added support for decoding and encoding negative base-256 number fields.
-rw-r--r--Lib/tarfile.py43
-rw-r--r--Lib/test/test_tarfile.py24
-rw-r--r--Misc/NEWS3
3 files changed, 49 insertions, 21 deletions
diff --git a/Lib/tarfile.py b/Lib/tarfile.py
index 2560562319..39fe635049 100644
--- a/Lib/tarfile.py
+++ b/Lib/tarfile.py
@@ -194,16 +194,18 @@ def nti(s):
"""
# There are two possible encodings for a number field, see
# itn() below.
- if s[0] != chr(0o200):
+ if s[0] in (0o200, 0o377):
+ n = 0
+ for i in range(len(s) - 1):
+ n <<= 8
+ n += s[i + 1]
+ if s[0] == 0o377:
+ n = -(256 ** (len(s) - 1) - n)
+ else:
try:
n = int(nts(s, "ascii", "strict") or "0", 8)
except ValueError:
raise InvalidHeaderError("invalid header")
- else:
- n = 0
- for i in range(len(s) - 1):
- n <<= 8
- n += ord(s[i + 1])
return n
def itn(n, digits=8, format=DEFAULT_FORMAT):
@@ -212,25 +214,26 @@ def itn(n, digits=8, format=DEFAULT_FORMAT):
# POSIX 1003.1-1988 requires numbers to be encoded as a string of
# octal digits followed by a null-byte, this allows values up to
# (8**(digits-1))-1. GNU tar allows storing numbers greater than
- # that if necessary. A leading 0o200 byte indicates this particular
- # encoding, the following digits-1 bytes are a big-endian
- # representation. This allows values up to (256**(digits-1))-1.
+ # that if necessary. A leading 0o200 or 0o377 byte indicate this
+ # particular encoding, the following digits-1 bytes are a big-endian
+ # base-256 representation. This allows values up to (256**(digits-1))-1.
+ # A 0o200 byte indicates a positive number, a 0o377 byte a negative
+ # number.
if 0 <= n < 8 ** (digits - 1):
s = bytes("%0*o" % (digits - 1, n), "ascii") + NUL
- else:
- if format != GNU_FORMAT or n >= 256 ** (digits - 1):
- raise ValueError("overflow in number field")
-
- if n < 0:
- # XXX We mimic GNU tar's behaviour with negative numbers,
- # this could raise OverflowError.
- n = struct.unpack("L", struct.pack("l", n))[0]
+ elif format == GNU_FORMAT and -256 ** (digits - 1) <= n < 256 ** (digits - 1):
+ if n >= 0:
+ s = bytearray([0o200])
+ else:
+ s = bytearray([0o377])
+ n = 256 ** digits + n
- s = bytearray()
for i in range(digits - 1):
- s.insert(0, n & 0o377)
+ s.insert(1, n & 0o377)
n >>= 8
- s.insert(0, 0o200)
+ else:
+ raise ValueError("overflow in number field")
+
return s
def calc_chksums(buf):
diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py
index 034765c5e7..d346113582 100644
--- a/Lib/test/test_tarfile.py
+++ b/Lib/test/test_tarfile.py
@@ -1582,9 +1582,31 @@ class MiscTest(unittest.TestCase):
self.assertEqual(tarfile.nts(b"foo\0\0\0\0\0", "ascii", "strict"), "foo")
self.assertEqual(tarfile.nts(b"foo\0bar\0", "ascii", "strict"), "foo")
- def test_number_fields(self):
+ def test_read_number_fields(self):
+ # Issue 13158: Test if GNU tar specific base-256 number fields
+ # are decoded correctly.
+ self.assertEqual(tarfile.nti(b"0000001\x00"), 1)
+ self.assertEqual(tarfile.nti(b"7777777\x00"), 0o7777777)
+ self.assertEqual(tarfile.nti(b"\x80\x00\x00\x00\x00\x20\x00\x00"), 0o10000000)
+ self.assertEqual(tarfile.nti(b"\x80\x00\x00\x00\xff\xff\xff\xff"), 0xffffffff)
+ self.assertEqual(tarfile.nti(b"\xff\xff\xff\xff\xff\xff\xff\xff"), -1)
+ self.assertEqual(tarfile.nti(b"\xff\xff\xff\xff\xff\xff\xff\x9c"), -100)
+ self.assertEqual(tarfile.nti(b"\xff\x00\x00\x00\x00\x00\x00\x00"), -0x100000000000000)
+
+ def test_write_number_fields(self):
self.assertEqual(tarfile.itn(1), b"0000001\x00")
+ self.assertEqual(tarfile.itn(0o7777777), b"7777777\x00")
+ self.assertEqual(tarfile.itn(0o10000000), b"\x80\x00\x00\x00\x00\x20\x00\x00")
self.assertEqual(tarfile.itn(0xffffffff), b"\x80\x00\x00\x00\xff\xff\xff\xff")
+ self.assertEqual(tarfile.itn(-1), b"\xff\xff\xff\xff\xff\xff\xff\xff")
+ self.assertEqual(tarfile.itn(-100), b"\xff\xff\xff\xff\xff\xff\xff\x9c")
+ self.assertEqual(tarfile.itn(-0x100000000000000), b"\xff\x00\x00\x00\x00\x00\x00\x00")
+
+ def test_number_field_limits(self):
+ self.assertRaises(ValueError, tarfile.itn, -1, 8, tarfile.USTAR_FORMAT)
+ self.assertRaises(ValueError, tarfile.itn, 0o10000000, 8, tarfile.USTAR_FORMAT)
+ self.assertRaises(ValueError, tarfile.itn, -0x10000000001, 6, tarfile.GNU_FORMAT)
+ self.assertRaises(ValueError, tarfile.itn, 0x10000000000, 6, tarfile.GNU_FORMAT)
class ContextManagerTest(unittest.TestCase):
diff --git a/Misc/NEWS b/Misc/NEWS
index 65f862d33d..b35a6be1fb 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -305,6 +305,9 @@ Core and Builtins
Library
-------
+- Issue #13158: Fix decoding and encoding of GNU tar specific base-256 number
+ fields in tarfile.
+
- Issue #13025: mimetypes is now reading MIME types using the UTF-8 encoding,
instead of the locale encoding.