diff options
author | Barry Warsaw <barry@python.org> | 2011-05-23 15:29:24 -0400 |
---|---|---|
committer | Barry Warsaw <barry@python.org> | 2011-05-23 15:29:24 -0400 |
commit | 544fb3c1aea093f3be6e0142679c0f01ab316758 (patch) | |
tree | ad1b5c56a1cd7de261e35819133502b4180278f0 /Lib/gzip.py | |
parent | 51228dcd1c23959cdd68712c8b2507bed4effccf (diff) | |
parent | b5e277ca522d94bdbf1426bfd8c43fee2f8153f3 (diff) | |
download | cpython-544fb3c1aea093f3be6e0142679c0f01ab316758.tar.gz |
Null merge from 2.6 branch.
Diffstat (limited to 'Lib/gzip.py')
-rw-r--r-- | Lib/gzip.py | 135 |
1 files changed, 80 insertions, 55 deletions
diff --git a/Lib/gzip.py b/Lib/gzip.py index fc21ad4475..2bcb4dbfb0 100644 --- a/Lib/gzip.py +++ b/Lib/gzip.py @@ -5,8 +5,9 @@ but random access is not allowed.""" # based on Andrew Kuchling's minigzip.py distributed with the zlib module -import struct, sys, time +import struct, sys, time, os import zlib +import io import __builtin__ __all__ = ["GzipFile","open"] @@ -32,7 +33,7 @@ def open(filename, mode="rb", compresslevel=9): """ return GzipFile(filename, mode, compresslevel) -class GzipFile: +class GzipFile(io.BufferedIOBase): """The GzipFile class simulates most of the methods of a file object with the exception of the readinto() and truncate() methods. @@ -42,7 +43,7 @@ class GzipFile: max_read_chunk = 10 * 1024 * 1024 # 10Mb def __init__(self, filename=None, mode=None, - compresslevel=9, fileobj=None): + compresslevel=9, fileobj=None, mtime=None): """Constructor for the GzipFile class. At least one of fileobj and filename must be given a @@ -69,6 +70,15 @@ class GzipFile: level of compression; 1 is fastest and produces the least compression, and 9 is slowest and produces the most compression. The default is 9. + The mtime argument is an optional numeric timestamp to be written + to the stream when compressing. All gzip compressed streams + are required to contain a timestamp. If omitted or None, the + current time is used. This module ignores the timestamp when + decompressing; however, some programs, such as gunzip, make use + of it. The format of the timestamp is the same as that of the + return value of time.time() and of the st_mtime member of the + object returned by os.stat(). + """ # guarantee the file is opened in binary mode on platforms @@ -88,8 +98,12 @@ class GzipFile: self.mode = READ # Set flag indicating start of a new member self._new_member = True + # Buffer data read from gzip file. extrastart is offset in + # stream where buffer starts. extrasize is number of + # bytes remaining in buffer from current stream position. self.extrabuf = "" self.extrasize = 0 + self.extrastart = 0 self.name = filename # Starts small, scales exponentially self.min_readsize = 100 @@ -107,6 +121,7 @@ class GzipFile: self.fileobj = fileobj self.offset = 0 + self.mtime = mtime if self.mode == WRITE: self._write_gzip_header() @@ -123,6 +138,13 @@ class GzipFile: s = repr(self.fileobj) return '<gzip ' + s[1:-1] + ' ' + hex(id(self)) + '>' + def _check_closed(self): + """Raises a ValueError if the underlying file object has been closed. + + """ + if self.closed: + raise ValueError('I/O operation on closed file.') + def _init_write(self, filename): self.name = filename self.crc = zlib.crc32("") & 0xffffffffL @@ -133,14 +155,17 @@ class GzipFile: def _write_gzip_header(self): self.fileobj.write('\037\213') # magic header self.fileobj.write('\010') # compression method - fname = self.name + fname = os.path.basename(self.name) if fname.endswith(".gz"): fname = fname[:-3] flags = 0 if fname: flags = FNAME self.fileobj.write(chr(flags)) - write32u(self.fileobj, long(time.time())) + mtime = self.mtime + if mtime is None: + mtime = time.time() + write32u(self.fileobj, long(mtime)) self.fileobj.write('\002') self.fileobj.write('\377') if fname: @@ -158,10 +183,10 @@ class GzipFile: if method != 8: raise IOError, 'Unknown compression method' flag = ord( self.fileobj.read(1) ) - # modtime = self.fileobj.read(4) + self.mtime = read32(self.fileobj) # extraflag = self.fileobj.read(1) # os = self.fileobj.read(1) - self.fileobj.read(6) + self.fileobj.read(2) if flag & FEXTRA: # Read & discard the extra field, if present @@ -183,21 +208,29 @@ class GzipFile: if flag & FHCRC: self.fileobj.read(2) # Read & discard the 16-bit header CRC - def write(self,data): + self._check_closed() if self.mode != WRITE: import errno raise IOError(errno.EBADF, "write() on read-only GzipFile object") if self.fileobj is None: raise ValueError, "write() on closed GzipFile object" + + # Convert data type if called by io.BufferedWriter. + if isinstance(data, memoryview): + data = data.tobytes() + if len(data) > 0: self.size = self.size + len(data) self.crc = zlib.crc32(data, self.crc) & 0xffffffffL self.fileobj.write( self.compress.compress(data) ) self.offset += len(data) + return len(data) + def read(self, size=-1): + self._check_closed() if self.mode != READ: import errno raise IOError(errno.EBADF, "read() on write-only GzipFile object") @@ -222,15 +255,14 @@ class GzipFile: if size > self.extrasize: size = self.extrasize - chunk = self.extrabuf[:size] - self.extrabuf = self.extrabuf[size:] + offset = self.offset - self.extrastart + chunk = self.extrabuf[offset: offset + size] self.extrasize = self.extrasize - size self.offset += size return chunk def _unread(self, buf): - self.extrabuf = buf + self.extrabuf self.extrasize = len(buf) + self.extrasize self.offset -= len(buf) @@ -286,8 +318,10 @@ class GzipFile: def _add_read_data(self, data): self.crc = zlib.crc32(data, self.crc) & 0xffffffffL - self.extrabuf = self.extrabuf + data + offset = self.offset - self.extrastart + self.extrabuf = self.extrabuf[offset:] + data self.extrasize = self.extrasize + len(data) + self.extrastart = self.offset self.size = self.size + len(data) def _read_eof(self): @@ -305,6 +339,19 @@ class GzipFile: elif isize != (self.size & 0xffffffffL): raise IOError, "Incorrect length of data produced" + # Gzip files can be padded with zeroes and still have archives. + # Consume all zero bytes and set the file position to the first + # non-zero byte. See http://www.gzip.org/#faq8 + c = "\x00" + while c == "\x00": + c = self.fileobj.read(1) + if c: + self.fileobj.seek(-1, 1) + + @property + def closed(self): + return self.fileobj is None + def close(self): if self.fileobj is None: return @@ -320,16 +367,8 @@ class GzipFile: self.myfileobj.close() self.myfileobj = None - def __del__(self): - try: - if (self.myfileobj is None and - self.fileobj is None): - return - except AttributeError: - return - self.close() - def flush(self,zlib_mode=zlib.Z_SYNC_FLUSH): + self._check_closed() if self.mode == WRITE: # Ensure the compressor's buffer is flushed self.fileobj.write(self.compress.flush(zlib_mode)) @@ -343,12 +382,6 @@ class GzipFile: """ return self.fileobj.fileno() - def isatty(self): - return False - - def tell(self): - return self.offset - def rewind(self): '''Return the uncompressed stream file position indicator to the beginning of the file''' @@ -358,8 +391,18 @@ class GzipFile: self._new_member = True self.extrabuf = "" self.extrasize = 0 + self.extrastart = 0 self.offset = 0 + def readable(self): + return self.mode == READ + + def writable(self): + return self.mode == WRITE + + def seekable(self): + return True + def seek(self, offset, whence=0): if whence: if whence == 1: @@ -382,8 +425,18 @@ class GzipFile: self.read(1024) self.read(count % 1024) + return self.offset + def readline(self, size=-1): if size < 0: + # Shortcut common case - newline found in buffer. + offset = self.offset - self.extrastart + i = self.extrabuf.find('\n', offset) + 1 + if i > 0: + self.extrasize -= i - offset + self.offset += i - offset + return self.extrabuf[offset: i] + size = sys.maxint readsize = self.min_readsize else: @@ -413,34 +466,6 @@ class GzipFile: self.min_readsize = min(readsize, self.min_readsize * 2, 512) return ''.join(bufs) # Return resulting line - def readlines(self, sizehint=0): - # Negative numbers result in reading all the lines - if sizehint <= 0: - sizehint = sys.maxint - L = [] - while sizehint > 0: - line = self.readline() - if line == "": - break - L.append(line) - sizehint = sizehint - len(line) - - return L - - def writelines(self, L): - for line in L: - self.write(line) - - def __iter__(self): - return self - - def next(self): - line = self.readline() - if line: - return line - else: - raise StopIteration - def _test(): # Act like gzip; with -d, act like gunzip. |