diff options
| -rw-r--r-- | Doc/library/gzip.rst | 11 | ||||
| -rw-r--r-- | Lib/gzip.py | 21 | ||||
| -rw-r--r-- | Lib/test/test_gzip.py | 63 | ||||
| -rw-r--r-- | Misc/NEWS | 5 | 
4 files changed, 95 insertions, 5 deletions
| diff --git a/Doc/library/gzip.rst b/Doc/library/gzip.rst index c6f9ef82bd..fa73bba458 100644 --- a/Doc/library/gzip.rst +++ b/Doc/library/gzip.rst @@ -24,7 +24,7 @@ For other archive formats, see the :mod:`bz2`, :mod:`zipfile`, and  The module defines the following items: -.. class:: GzipFile([filename[, mode[, compresslevel[, fileobj]]]]) +.. class:: GzipFile([filename[, mode[, compresslevel[, fileobj[, mtime]]]]])     Constructor for the :class:`GzipFile` class, which simulates most of the methods     of a file object, with the exception of the :meth:`readinto` and @@ -52,6 +52,15 @@ The module defines the following items:     level of compression; ``1`` is fastest and produces the least compression, and     ``9`` is slowest and produces the most compression.  The default is ``9``. +   The *mtime* argument is an optional numeric timestamp to be written to +   the stream when compressing.  All :program:`gzip`compressed streams are +   required to contain a timestamp.  If omitted or ``None``, the current +   time is used.  This module ignores the timestamp when decompressing; +   however, some programs, such as :program:`gunzip`\ , make use of it. +   The format of the timestamp is the same as that of the return value of +   ``time.time()`` and of the ``st_mtime`` member of the object returned +   by ``os.stat()``. +     Calling a :class:`GzipFile` object's :meth:`close` method does not close     *fileobj*, since you might wish to append more material after the compressed     data.  This also allows you to pass a :class:`StringIO` object opened for diff --git a/Lib/gzip.py b/Lib/gzip.py index 11d557172f..560a722bae 100644 --- a/Lib/gzip.py +++ b/Lib/gzip.py @@ -54,7 +54,7 @@ class GzipFile:      max_read_chunk = 10 * 1024 * 1024   # 10Mb      def __init__(self, filename=None, mode=None, -                 compresslevel=9, fileobj=None): +                 compresslevel=9, fileobj=None, mtime=None):          """Constructor for the GzipFile class.          At least one of fileobj and filename must be given a @@ -81,6 +81,15 @@ class GzipFile:          level of compression; 1 is fastest and produces the least compression,          and 9 is slowest and produces the most compression.  The default is 9. +        The mtime argument is an optional numeric timestamp to be written +        to the stream when compressing.  All gzip compressed streams +        are required to contain a timestamp.  If omitted or None, the +        current time is used.  This module ignores the timestamp when +        decompressing; however, some programs, such as gunzip, make use +        of it.  The format of the timestamp is the same as that of the +        return value of time.time() and of the st_mtime member of the +        object returned by os.stat(). +          """          # guarantee the file is opened in binary mode on platforms @@ -119,6 +128,7 @@ class GzipFile:          self.fileobj = fileobj          self.offset = 0 +        self.mtime = mtime          if self.mode == WRITE:              self._write_gzip_header() @@ -157,7 +167,10 @@ class GzipFile:          if fname:              flags = FNAME          self.fileobj.write(chr(flags).encode('latin-1')) -        write32u(self.fileobj, int(time.time())) +        mtime = self.mtime +        if mtime is None: +            mtime = time.time() +        write32u(self.fileobj, int(mtime))          self.fileobj.write(b'\002')          self.fileobj.write(b'\377')          if fname: @@ -175,10 +188,10 @@ class GzipFile:          if method != 8:              raise IOError('Unknown compression method')          flag = ord( self.fileobj.read(1) ) -        # modtime = self.fileobj.read(4) +        self.mtime = read32(self.fileobj)          # extraflag = self.fileobj.read(1)          # os = self.fileobj.read(1) -        self.fileobj.read(6) +        self.fileobj.read(2)          if flag & FEXTRA:              # Read & discard the extra field, if present diff --git a/Lib/test/test_gzip.py b/Lib/test/test_gzip.py index d28c024d07..e758826fa7 100644 --- a/Lib/test/test_gzip.py +++ b/Lib/test/test_gzip.py @@ -6,6 +6,7 @@ import unittest  from test import support  import os  import gzip +import struct  data1 = b"""  int length=DEFAULTALLOC, err = Z_OK; @@ -160,6 +161,68 @@ class TestGzip(unittest.TestCase):              self.assertEqual(f.name, self.filename)              f.close() +    def test_mtime(self): +        mtime = 123456789 +        fWrite = gzip.GzipFile(self.filename, 'w', mtime = mtime) +        fWrite.write(data1) +        fWrite.close() + +        fRead = gzip.GzipFile(self.filename) +        dataRead = fRead.read() +        self.assertEqual(dataRead, data1) +        self.assert_(hasattr(fRead, 'mtime')) +        self.assertEqual(fRead.mtime, mtime) +        fRead.close() + +    def test_metadata(self): +        mtime = 123456789 + +        fWrite = gzip.GzipFile(self.filename, 'w', mtime = mtime) +        fWrite.write(data1) +        fWrite.close() + +        fRead = open(self.filename, 'rb') + +        # see RFC 1952: http://www.faqs.org/rfcs/rfc1952.html + +        idBytes = fRead.read(2) +        self.assertEqual(idBytes, b'\x1f\x8b') # gzip ID + +        cmByte = fRead.read(1) +        self.assertEqual(cmByte, b'\x08') # deflate + +        flagsByte = fRead.read(1) +        self.assertEqual(flagsByte, b'\x08') # only the FNAME flag is set + +        mtimeBytes = fRead.read(4) +        self.assertEqual(mtimeBytes, struct.pack('<i', mtime)) # little-endian + +        xflByte = fRead.read(1) +        self.assertEqual(xflByte, b'\x02') # maximum compression + +        osByte = fRead.read(1) +        self.assertEqual(osByte, b'\xff') # OS "unknown" (OS-independent) + +        # Since the FNAME flag is set, the zero-terminated filename follows. +        # RFC 1952 specifies that this is the name of the input file, if any. +        # However, the gzip module defaults to storing the name of the output +        # file in this field. +        expected = self.filename.encode('Latin-1') + b'\x00' +        nameBytes = fRead.read(len(expected)) +        self.assertEqual(nameBytes, expected) + +        # Since no other flags were set, the header ends here. +        # Rather than process the compressed data, let's seek to the trailer. +        fRead.seek(os.stat(self.filename).st_size - 8) + +        crc32Bytes = fRead.read(4) # CRC32 of uncompressed data [data1] +        self.assertEqual(crc32Bytes, b'\xaf\xd7d\x83') + +        isizeBytes = fRead.read(4) +        self.assertEqual(isizeBytes, struct.pack('<i', len(data1))) + +        fRead.close() +  def test_main(verbose=None):      support.run_unittest(TestGzip) @@ -82,6 +82,11 @@ Core and Builtins  Library  ------- +- Issue #4272: Add an optional argument to the GzipFile constructor to override +  the timestamp in the gzip stream. The default value remains the current time. +  The information can be used by e.g. gunzip when decompressing. Patch by +  Jacques Frechet. +  - Restore Python 2.3 compatibility for decimal.py.  - Issue #3638: Remove functions from _tkinter module level that depend on | 
