Issue #6123: Fix opening empty archives and files.

(Note that an empty archive is not the same as an empty file. An empty archive contains no members and is correctly terminated with an EOF block full of zeros. An empty file contains no data at all.) The problem was that although tarfile was able to create empty archives, it failed to open them raising a ReadError. On the other hand, tarfile opened empty files without error in most read modes and presented them as empty archives. (However, some modes still raised errors: "r|gz" raised ReadError, but "r:gz" worked, "r:bz2" even raised EOFError.) In order to get a more fine-grained control over the various internal error conditions I now split up the HeaderError exception into a number of meaningful sub-exceptions. This makes it easier in the TarFile.next() method to react to the different conditions in the correct way. The visible change in its behaviour now is that tarfile will open empty archives correctly and raise ReadError consistently for empty files.
author: Lars Gustäbel <lars@gustaebel.de> 2009-11-22 18:30:53 +0000
committer: Lars Gustäbel <lars@gustaebel.de> 2009-11-22 18:30:53 +0000
commit: dd866d57af2e1b82c155b4613c303099785ed62c (patch)
tree: 13c311d2e6a3f9988ef4dccd43ce6ca6164e2a18 /Lib/test/test_tarfile.py
parent: 814d02dcd1b30a98f9a34c710be35de6db82aa40 (diff)
download: cpython-git-dd866d57af2e1b82c155b4613c303099785ed62c.tar.gz
1 files changed, 80 insertions, 5 deletions
diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py
index 74a8dac713..20339dd0a7 100644
--- a/Lib/test/test_tarfile.py
+++ b/Lib/test/test_tarfile.py
@@ -136,7 +136,53 @@ class UstarReadTest(ReadTest):
         fobj.close()
 
 
-class MiscReadTest(ReadTest):
+class CommonReadTest(ReadTest):
+
+    def test_empty_tarfile(self):
+        # Test for issue6123: Allow opening empty archives.
+        # This test checks if tarfile.open() is able to open an empty tar
+        # archive successfully. Note that an empty tar archive is not the
+        # same as an empty file!
+        tarfile.open(tmpname, self.mode.replace("r", "w")).close()
+        try:
+            tar = tarfile.open(tmpname, self.mode)
+            tar.getnames()
+        except tarfile.ReadError:
+            self.fail("tarfile.open() failed on empty archive")
+        self.assertListEqual(tar.getmembers(), [])
+
+    def test_null_tarfile(self):
+        # Test for issue6123: Allow opening empty archives.
+        # This test guarantees that tarfile.open() does not treat an empty
+        # file as an empty tar archive.
+        open(tmpname, "wb").close()
+        self.assertRaises(tarfile.ReadError, tarfile.open, tmpname, self.mode)
+        self.assertRaises(tarfile.ReadError, tarfile.open, tmpname)
+
+    def test_ignore_zeros(self):
+        # Test TarFile's ignore_zeros option.
+        if self.mode.endswith(":gz"):
+            _open = gzip.GzipFile
+        elif self.mode.endswith(":bz2"):
+            _open = bz2.BZ2File
+        else:
+            _open = open
+
+        for char in ('\0', 'a'):
+            # Test if EOFHeaderError ('\0') and InvalidHeaderError ('a')
+            # are ignored correctly.
+            fobj = _open(tmpname, "wb")
+            fobj.write(char * 1024)
+            fobj.write(tarfile.TarInfo("foo").tobuf())
+            fobj.close()
+
+            tar = tarfile.open(tmpname, mode="r", ignore_zeros=True)
+            self.assertListEqual(tar.getnames(), ["foo"],
+                    "ignore_zeros=True should have skipped the %r-blocks" % char)
+            tar.close()
+
+
+class MiscReadTest(CommonReadTest):
 
     def test_no_name_argument(self):
         fobj = open(self.tarname, "rb")
@@ -265,7 +311,7 @@ class MiscReadTest(ReadTest):
         tar.close()
 
 
-class StreamReadTest(ReadTest):
+class StreamReadTest(CommonReadTest):
 
     mode="r|"
 
@@ -1111,12 +1157,12 @@ class AppendTest(unittest.TestCase):
         self._test()
 
     def test_empty(self):
-        open(self.tarname, "w").close()
+        tarfile.open(self.tarname, "w:").close()
         self._add_testfile()
         self._test()
 
     def test_empty_fileobj(self):
-        fobj = StringIO.StringIO()
+        fobj = StringIO.StringIO("\0" * 1024)
         self._add_testfile(fobj)
         fobj.seek(0)
         self._test(fileobj=fobj)
@@ -1146,6 +1192,29 @@ class AppendTest(unittest.TestCase):
         self._create_testtar("w:bz2")
         self.assertRaises(tarfile.ReadError, tarfile.open, tmpname, "a")
 
+    # Append mode is supposed to fail if the tarfile to append to
+    # does not end with a zero block.
+    def _test_error(self, data):
+        open(self.tarname, "wb").write(data)
+        self.assertRaises(tarfile.ReadError, self._add_testfile)
+
+    def test_null(self):
+        self._test_error("")
+
+    def test_incomplete(self):
+        self._test_error("\0" * 13)
+
+    def test_premature_eof(self):
+        data = tarfile.TarInfo("foo").tobuf()
+        self._test_error(data)
+
+    def test_trailing_garbage(self):
+        data = tarfile.TarInfo("foo").tobuf()
+        self._test_error(data + "\0" * 13)
+
+    def test_invalid(self):
+        self._test_error("a" * 512)
+
 
 class LimitsTest(unittest.TestCase):
 
@@ -1247,10 +1316,16 @@ class Bz2PartialReadTest(unittest.TestCase):
                     raise AssertionError("infinite loop detected in tarfile.open()")
                 self.hit_eof = self.pos == self.len
                 return StringIO.StringIO.read(self, n)
+            def seek(self, *args):
+                self.hit_eof = False
+                return StringIO.StringIO.seek(self, *args)
 
         data = bz2.compress(tarfile.TarInfo("foo").tobuf())
         for x in range(len(data) + 1):
-            tarfile.open(fileobj=MyStringIO(data[:x]), mode=mode)
+            try:
+                tarfile.open(fileobj=MyStringIO(data[:x]), mode=mode)
+            except tarfile.ReadError:
+                pass # we have no interest in ReadErrors
 
     def test_partial_input(self):
         self._test_partial_input("r")
author	Lars Gustäbel <lars@gustaebel.de>	2009-11-22 18:30:53 +0000
committer	Lars Gustäbel <lars@gustaebel.de>	2009-11-22 18:30:53 +0000
commit	dd866d57af2e1b82c155b4613c303099785ed62c (patch)
tree	13c311d2e6a3f9988ef4dccd43ce6ca6164e2a18 /Lib/test/test_tarfile.py
parent	814d02dcd1b30a98f9a34c710be35de6db82aa40 (diff)
download	cpython-git-dd866d57af2e1b82c155b4613c303099785ed62c.tar.gz