diff options
author | Serhiy Storchaka <storchaka@gmail.com> | 2013-01-22 17:07:49 +0200 |
---|---|---|
committer | Serhiy Storchaka <storchaka@gmail.com> | 2013-01-22 17:07:49 +0200 |
commit | 57f9b7a12420d461e8ea5cc1ba63f80de778c7d5 (patch) | |
tree | c68dc61b4ff59cb81b20427786c4785c36472613 /Lib/gzip.py | |
parent | a80f761a6d23f42abc529a338477741ba4973168 (diff) | |
parent | 7c3922f44c226eac29a497648bbc3cc8702905a8 (diff) | |
download | cpython-git-57f9b7a12420d461e8ea5cc1ba63f80de778c7d5.tar.gz |
Issue #1159051: GzipFile now raises EOFError when reading a corrupted file
with truncated header or footer.
Added tests for reading truncated gzip, bzip2, and lzma files.
Diffstat (limited to 'Lib/gzip.py')
-rw-r--r-- | Lib/gzip.py | 81 |
1 files changed, 37 insertions, 44 deletions
diff --git a/Lib/gzip.py b/Lib/gzip.py index 998a8e5d05..d7da02ca1b 100644 --- a/Lib/gzip.py +++ b/Lib/gzip.py @@ -65,9 +65,6 @@ def write32u(output, value): # or unsigned. output.write(struct.pack("<L", value)) -def read32(input): - return struct.unpack("<I", input.read(4))[0] - class _PaddedFile: """Minimal read-only file object that prepends a string to the contents of an actual file. Shouldn't be used outside of gzip.py, as it lacks @@ -281,27 +278,31 @@ class GzipFile(io.BufferedIOBase): self.crc = zlib.crc32(b"") & 0xffffffff self.size = 0 + def _read_exact(self, n): + data = self.fileobj.read(n) + while len(data) < n: + b = self.fileobj.read(n - len(data)) + if not b: + raise EOFError("Compressed file ended before the " + "end-of-stream marker was reached") + data += b + return data + def _read_gzip_header(self): magic = self.fileobj.read(2) if magic == b'': - raise EOFError("Reached EOF") + return False if magic != b'\037\213': raise IOError('Not a gzipped file') - method = ord( self.fileobj.read(1) ) + + method, flag, self.mtime = struct.unpack("<BBIxx", self._read_exact(8)) if method != 8: raise IOError('Unknown compression method') - flag = ord( self.fileobj.read(1) ) - self.mtime = read32(self.fileobj) - # extraflag = self.fileobj.read(1) - # os = self.fileobj.read(1) - self.fileobj.read(2) if flag & FEXTRA: # Read & discard the extra field, if present - xlen = ord(self.fileobj.read(1)) - xlen = xlen + 256*ord(self.fileobj.read(1)) - self.fileobj.read(xlen) + self._read_exact(struct.unpack("<H", self._read_exact(2))) if flag & FNAME: # Read and discard a null-terminated string containing the filename while True: @@ -315,12 +316,13 @@ class GzipFile(io.BufferedIOBase): if not s or s==b'\000': break if flag & FHCRC: - self.fileobj.read(2) # Read & discard the 16-bit header CRC + self._read_exact(2) # Read & discard the 16-bit header CRC unused = self.fileobj.unused() if unused: uncompress = self.decompress.decompress(unused) self._add_read_data(uncompress) + return True def write(self,data): self._check_closed() @@ -354,20 +356,16 @@ class GzipFile(io.BufferedIOBase): readsize = 1024 if size < 0: # get the whole thing - try: - while True: - self._read(readsize) - readsize = min(self.max_read_chunk, readsize * 2) - except EOFError: - size = self.extrasize + while self._read(readsize): + readsize = min(self.max_read_chunk, readsize * 2) + size = self.extrasize else: # just get some more of it - try: - while size > self.extrasize: - self._read(readsize) - readsize = min(self.max_read_chunk, readsize * 2) - except EOFError: - if size > self.extrasize: - size = self.extrasize + while size > self.extrasize: + if not self._read(readsize): + if size > self.extrasize: + size = self.extrasize + break + readsize = min(self.max_read_chunk, readsize * 2) offset = self.offset - self.extrastart chunk = self.extrabuf[offset: offset + size] @@ -385,12 +383,9 @@ class GzipFile(io.BufferedIOBase): if self.extrasize <= 0 and self.fileobj is None: return b'' - try: - # For certain input data, a single call to _read() may not return - # any data. In this case, retry until we get some data or reach EOF. - while self.extrasize <= 0: - self._read() - except EOFError: + # For certain input data, a single call to _read() may not return + # any data. In this case, retry until we get some data or reach EOF. + while self.extrasize <= 0 and self._read(): pass if size < 0 or size > self.extrasize: size = self.extrasize @@ -413,12 +408,9 @@ class GzipFile(io.BufferedIOBase): if self.extrasize == 0: if self.fileobj is None: return b'' - try: - # Ensure that we don't return b"" if we haven't reached EOF. - while self.extrasize == 0: - # 1024 is the same buffering heuristic used in read() - self._read(max(n, 1024)) - except EOFError: + # Ensure that we don't return b"" if we haven't reached EOF. + # 1024 is the same buffering heuristic used in read() + while self.extrasize == 0 and self._read(max(n, 1024)): pass offset = self.offset - self.extrastart remaining = self.extrasize @@ -431,13 +423,14 @@ class GzipFile(io.BufferedIOBase): def _read(self, size=1024): if self.fileobj is None: - raise EOFError("Reached EOF") + return False if self._new_member: # If the _new_member flag is set, we have to # jump to the next member, if there is one. self._init_read() - self._read_gzip_header() + if not self._read_gzip_header(): + return False self.decompress = zlib.decompressobj(-zlib.MAX_WBITS) self._new_member = False @@ -454,7 +447,7 @@ class GzipFile(io.BufferedIOBase): self.fileobj.prepend(self.decompress.unused_data, True) self._read_eof() self._add_read_data( uncompress ) - raise EOFError('Reached EOF') + return False uncompress = self.decompress.decompress(buf) self._add_read_data( uncompress ) @@ -470,6 +463,7 @@ class GzipFile(io.BufferedIOBase): # a new member on the next call self._read_eof() self._new_member = True + return True def _add_read_data(self, data): self.crc = zlib.crc32(data, self.crc) & 0xffffffff @@ -484,8 +478,7 @@ class GzipFile(io.BufferedIOBase): # We check the that the computed CRC and size of the # uncompressed data matches the stored values. Note that the size # stored is the true file size mod 2**32. - crc32 = read32(self.fileobj) - isize = read32(self.fileobj) # may exceed 2GB + crc32, isize = struct.unpack("<II", self._read_exact(8)) if crc32 != self.crc: raise IOError("CRC check failed %s != %s" % (hex(crc32), hex(self.crc))) |