diff options
Diffstat (limited to 'Lib/zipfile.py')
| -rw-r--r-- | Lib/zipfile.py | 444 | 
1 files changed, 251 insertions, 193 deletions
diff --git a/Lib/zipfile.py b/Lib/zipfile.py index 048f60a708..7f2b43ce1c 100644 --- a/Lib/zipfile.py +++ b/Lib/zipfile.py @@ -371,7 +371,7 @@ class ZipInfo (object):              result.append(' filemode=%r' % stat.filemode(hi))          if lo:              result.append(' external_attr=%#x' % lo) -        isdir = self.filename[-1:] == '/' +        isdir = self.is_dir()          if not isdir or self.file_size:              result.append(' file_size=%r' % self.file_size)          if ((not isdir or self.compress_size) and @@ -449,7 +449,7 @@ class ZipInfo (object):                  elif ln == 0:                      counts = ()                  else: -                    raise RuntimeError("Corrupt extra field %s"%(ln,)) +                    raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))                  idx = 0 @@ -469,6 +469,42 @@ class ZipInfo (object):              extra = extra[ln+4:] +    @classmethod +    def from_file(cls, filename, arcname=None): +        """Construct an appropriate ZipInfo for a file on the filesystem. + +        filename should be the path to a file or directory on the filesystem. + +        arcname is the name which it will have within the archive (by default, +        this will be the same as filename, but without a drive letter and with +        leading path separators removed). +        """ +        st = os.stat(filename) +        isdir = stat.S_ISDIR(st.st_mode) +        mtime = time.localtime(st.st_mtime) +        date_time = mtime[0:6] +        # Create ZipInfo instance to store file information +        if arcname is None: +            arcname = filename +        arcname = os.path.normpath(os.path.splitdrive(arcname)[1]) +        while arcname[0] in (os.sep, os.altsep): +            arcname = arcname[1:] +        if isdir: +            arcname += '/' +        zinfo = cls(arcname, date_time) +        zinfo.external_attr = (st.st_mode & 0xFFFF) << 16  # Unix attributes +        if isdir: +            zinfo.file_size = 0 +            zinfo.external_attr |= 0x10  # MS-DOS directory flag +        else: +            zinfo.file_size = st.st_size + +        return zinfo + +    def is_dir(self): +        """Return True if this archive member is a directory.""" +        return self.filename[-1] == '/' +  class _ZipDecrypter:      """Class to handle decryption of files stored within a ZIP archive. @@ -618,7 +654,7 @@ def _check_compression(compression):              raise RuntimeError(                  "Compression requires the (missing) lzma module")      else: -        raise RuntimeError("That compression method is not supported") +        raise NotImplementedError("That compression method is not supported")  def _get_compressor(compress_type): @@ -651,14 +687,19 @@ def _get_decompressor(compress_type):  class _SharedFile: -    def __init__(self, file, pos, close, lock): +    def __init__(self, file, pos, close, lock, writing):          self._file = file          self._pos = pos          self._close = close          self._lock = lock +        self._writing = writing      def read(self, n=-1):          with self._lock: +            if self._writing(): +                raise ValueError("Can't read from the ZIP file while there " +                        "is an open writing handle on it. " +                        "Close the writing handle before trying to read.")              self._file.seek(self._pos)              data = self._file.read(n)              self._pos = self._file.tell() @@ -702,9 +743,6 @@ class ZipExtFile(io.BufferedIOBase):      # Read from compressed files in 4k blocks.      MIN_READ_SIZE = 4096 -    # Search for universal newlines or line chunks. -    PATTERN = re.compile(br'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)') -      def __init__(self, fileobj, mode, zipinfo, decrypter=None,                   close_fileobj=False):          self._fileobj = fileobj @@ -721,7 +759,6 @@ class ZipExtFile(io.BufferedIOBase):          self._readbuffer = b''          self._offset = 0 -        self._universal = 'U' in mode          self.newlines = None          # Adjust read size for encrypted files since the first 12 bytes @@ -758,7 +795,7 @@ class ZipExtFile(io.BufferedIOBase):          If limit is specified, at most limit bytes will be read.          """ -        if not self._universal and limit < 0: +        if limit < 0:              # Shortcut common case - newline found in buffer.              i = self._readbuffer.find(b'\n', self._offset) + 1              if i > 0: @@ -766,41 +803,7 @@ class ZipExtFile(io.BufferedIOBase):                  self._offset = i                  return line -        if not self._universal: -            return io.BufferedIOBase.readline(self, limit) - -        line = b'' -        while limit < 0 or len(line) < limit: -            readahead = self.peek(2) -            if readahead == b'': -                return line - -            # -            # Search for universal newlines or line chunks. -            # -            # The pattern returns either a line chunk or a newline, but not -            # both. Combined with peek(2), we are assured that the sequence -            # '\r\n' is always retrieved completely and never split into -            # separate newlines - '\r', '\n' due to coincidental readaheads. -            # -            match = self.PATTERN.search(readahead) -            newline = match.group('newline') -            if newline is not None: -                if self.newlines is None: -                    self.newlines = [] -                if newline not in self.newlines: -                    self.newlines.append(newline) -                self._offset += len(newline) -                return line + b'\n' - -            chunk = match.group('chunk') -            if limit >= 0: -                chunk = chunk[: limit - len(line)] - -            self._offset += len(chunk) -            line += chunk - -        return line +        return io.BufferedIOBase.readline(self, limit)      def peek(self, n=1):          """Returns buffered bytes without advancing the position.""" @@ -958,6 +961,76 @@ class ZipExtFile(io.BufferedIOBase):              super().close() +class _ZipWriteFile(io.BufferedIOBase): +    def __init__(self, zf, zinfo, zip64): +        self._zinfo = zinfo +        self._zip64 = zip64 +        self._zipfile = zf +        self._compressor = _get_compressor(zinfo.compress_type) +        self._file_size = 0 +        self._compress_size = 0 +        self._crc = 0 + +    @property +    def _fileobj(self): +        return self._zipfile.fp + +    def writable(self): +        return True + +    def write(self, data): +        nbytes = len(data) +        self._file_size += nbytes +        self._crc = crc32(data, self._crc) +        if self._compressor: +            data = self._compressor.compress(data) +            self._compress_size += len(data) +        self._fileobj.write(data) +        return nbytes + +    def close(self): +        super().close() +        # Flush any data from the compressor, and update header info +        if self._compressor: +            buf = self._compressor.flush() +            self._compress_size += len(buf) +            self._fileobj.write(buf) +            self._zinfo.compress_size = self._compress_size +        else: +            self._zinfo.compress_size = self._file_size +        self._zinfo.CRC = self._crc +        self._zinfo.file_size = self._file_size + +        # Write updated header info +        if self._zinfo.flag_bits & 0x08: +            # Write CRC and file sizes after the file data +            fmt = '<LQQ' if self._zip64 else '<LLL' +            self._fileobj.write(struct.pack(fmt, self._zinfo.CRC, +                self._zinfo.compress_size, self._zinfo.file_size)) +            self._zipfile.start_dir = self._fileobj.tell() +        else: +            if not self._zip64: +                if self._file_size > ZIP64_LIMIT: +                    raise RuntimeError('File size unexpectedly exceeded ZIP64 ' +                                       'limit') +                if self._compress_size > ZIP64_LIMIT: +                    raise RuntimeError('Compressed size unexpectedly exceeded ' +                                       'ZIP64 limit') +            # Seek backwards and write file header (which will now include +            # correct CRC and file sizes) + +            # Preserve current position in file +            self._zipfile.start_dir = self._fileobj.tell() +            self._fileobj.seek(self._zinfo.header_offset) +            self._fileobj.write(self._zinfo.FileHeader(self._zip64)) +            self._fileobj.seek(self._zipfile.start_dir) + +        self._zipfile._writing = False + +        # Successfully written: Add file to our caches +        self._zipfile.filelist.append(self._zinfo) +        self._zipfile.NameToInfo[self._zinfo.filename] = self._zinfo +  class ZipFile:      """ Class with methods to open, read, write, close, list zip files. @@ -982,7 +1055,7 @@ class ZipFile:          """Open the ZIP file with mode read 'r', write 'w', exclusive create 'x',          or append 'a'."""          if mode not in ('r', 'w', 'x', 'a'): -            raise RuntimeError("ZipFile requires mode 'r', 'w', 'x', or 'a'") +            raise ValueError("ZipFile requires mode 'r', 'w', 'x', or 'a'")          _check_compression(compression) @@ -1020,6 +1093,7 @@ class ZipFile:          self._fileRefCnt = 1          self._lock = threading.RLock()          self._seekable = True +        self._writing = False          try:              if mode == 'r': @@ -1056,7 +1130,7 @@ class ZipFile:                      self._didModify = True                      self.start_dir = self._start_disk = self.fp.tell()              else: -                raise RuntimeError("Mode must be 'r', 'w', 'x', or 'a'") +                raise ValueError("Mode must be 'r', 'w', 'x', or 'a'")          except:              fp = self.fp              self.fp = None @@ -1205,7 +1279,7 @@ class ZipFile:      def setpassword(self, pwd):          """Set default password for encrypted files."""          if pwd and not isinstance(pwd, bytes): -            raise TypeError("pwd: expected bytes, got %s" % type(pwd)) +            raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)          if pwd:              self.pwd = pwd          else: @@ -1219,7 +1293,7 @@ class ZipFile:      @comment.setter      def comment(self, comment):          if not isinstance(comment, bytes): -            raise TypeError("comment: expected bytes, got %s" % type(comment)) +            raise TypeError("comment: expected bytes, got %s" % type(comment).__name__)          # check for valid comment length          if len(comment) > ZIP_MAX_COMMENT:              import warnings @@ -1234,30 +1308,55 @@ class ZipFile:          with self.open(name, "r", pwd) as fp:              return fp.read() -    def open(self, name, mode="r", pwd=None): -        """Return file-like object for 'name'.""" -        if mode not in ("r", "U", "rU"): -            raise RuntimeError('open() requires mode "r", "U", or "rU"') -        if 'U' in mode: -            import warnings -            warnings.warn("'U' mode is deprecated", -                          DeprecationWarning, 2) +    def open(self, name, mode="r", pwd=None, *, force_zip64=False): +        """Return file-like object for 'name'. + +        name is a string for the file name within the ZIP file, or a ZipInfo +        object. + +        mode should be 'r' to read a file already in the ZIP file, or 'w' to +        write to a file newly added to the archive. + +        pwd is the password to decrypt files (only used for reading). + +        When writing, if the file size is not known in advance but may exceed +        2 GiB, pass force_zip64 to use the ZIP64 format, which can handle large +        files.  If the size is known in advance, it is best to pass a ZipInfo +        instance for name, with zinfo.file_size set. +        """ +        if mode not in {"r", "w"}: +            raise ValueError('open() requires mode "r" or "w"')          if pwd and not isinstance(pwd, bytes): -            raise TypeError("pwd: expected bytes, got %s" % type(pwd)) +            raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__) +        if pwd and (mode == "w"): +            raise ValueError("pwd is only supported for reading files")          if not self.fp: -            raise RuntimeError( -                "Attempt to read ZIP archive that was already closed") +            raise ValueError( +                "Attempt to use ZIP archive that was already closed")          # Make sure we have an info object          if isinstance(name, ZipInfo):              # 'name' is already an info object              zinfo = name +        elif mode == 'w': +            zinfo = ZipInfo(name) +            zinfo.compress_type = self.compression          else:              # Get info object for name              zinfo = self.getinfo(name) +        if mode == 'w': +            return self._open_to_write(zinfo, force_zip64=force_zip64) + +        if self._writing: +            raise ValueError("Can't read from the ZIP file while there " +                    "is an open writing handle on it. " +                    "Close the writing handle before trying to read.") + +        # Open for reading:          self._fileRefCnt += 1 -        zef_file = _SharedFile(self.fp, zinfo.header_offset, self._fpclose, self._lock) +        zef_file = _SharedFile(self.fp, zinfo.header_offset, +                               self._fpclose, self._lock, lambda: self._writing)          try:              # Skip the file header:              fheader = zef_file.read(sizeFileHeader) @@ -1297,7 +1396,7 @@ class ZipFile:                  if not pwd:                      pwd = self.pwd                  if not pwd: -                    raise RuntimeError("File %s is encrypted, password " +                    raise RuntimeError("File %r is encrypted, password "                                         "required for extraction" % name)                  zd = _ZipDecrypter(pwd) @@ -1315,13 +1414,56 @@ class ZipFile:                      # compare against the CRC otherwise                      check_byte = (zinfo.CRC >> 24) & 0xff                  if h[11] != check_byte: -                    raise RuntimeError("Bad password for file", name) +                    raise RuntimeError("Bad password for file %r" % name)              return ZipExtFile(zef_file, mode, zinfo, zd, True)          except:              zef_file.close()              raise +    def _open_to_write(self, zinfo, force_zip64=False): +        if force_zip64 and not self._allowZip64: +            raise ValueError( +                "force_zip64 is True, but allowZip64 was False when opening " +                "the ZIP file." +            ) +        if self._writing: +            raise ValueError("Can't write to the ZIP file while there is " +                             "another write handle open on it. " +                             "Close the first handle before opening another.") + +        # Sizes and CRC are overwritten with correct data after processing the file +        if not hasattr(zinfo, 'file_size'): +            zinfo.file_size = 0 +        zinfo.compress_size = 0 +        zinfo.CRC = 0 + +        zinfo.flag_bits = 0x00 +        if zinfo.compress_type == ZIP_LZMA: +            # Compressed data includes an end-of-stream (EOS) marker +            zinfo.flag_bits |= 0x02 +        if not self._seekable: +            zinfo.flag_bits |= 0x08 + +        if not zinfo.external_attr: +            zinfo.external_attr = 0o600 << 16  # permissions: ?rw------- + +        # Compressed size can be larger than uncompressed size +        zip64 = self._allowZip64 and \ +                (force_zip64 or zinfo.file_size * 1.05 > ZIP64_LIMIT) + +        if self._seekable: +            self.fp.seek(self.start_dir) +        zinfo.header_offset = self.fp.tell() + +        self._writecheck(zinfo) +        self._didModify = True + +        self.fp.write(zinfo.FileHeader(zip64)) + +        self._writing = True +        return _ZipWriteFile(self, zinfo, zip64) +      def extract(self, member, path=None, pwd=None):          """Extract a member from the archive to the current working directory,             using its full name. Its file information is extracted as accurately @@ -1391,7 +1533,7 @@ class ZipFile:          if upperdirs and not os.path.exists(upperdirs):              os.makedirs(upperdirs) -        if member.filename[-1] == '/': +        if member.is_dir():              if not os.path.isdir(targetpath):                  os.mkdir(targetpath)              return targetpath @@ -1408,9 +1550,9 @@ class ZipFile:              import warnings              warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3)          if self.mode not in ('w', 'x', 'a'): -            raise RuntimeError("write() requires mode 'w', 'x', or 'a'") +            raise ValueError("write() requires mode 'w', 'x', or 'a'")          if not self.fp: -            raise RuntimeError( +            raise ValueError(                  "Attempt to write ZIP archive that was already closed")          _check_compression(zinfo.compress_type)          if not self._allowZip64: @@ -1429,105 +1571,43 @@ class ZipFile:          """Put the bytes from filename into the archive under the name          arcname."""          if not self.fp: -            raise RuntimeError( +            raise ValueError(                  "Attempt to write to ZIP archive that was already closed") +        if self._writing: +            raise ValueError( +                "Can't write to ZIP archive while an open writing handle exists" +            ) -        st = os.stat(filename) -        isdir = stat.S_ISDIR(st.st_mode) -        mtime = time.localtime(st.st_mtime) -        date_time = mtime[0:6] -        # Create ZipInfo instance to store file information -        if arcname is None: -            arcname = filename -        arcname = os.path.normpath(os.path.splitdrive(arcname)[1]) -        while arcname[0] in (os.sep, os.altsep): -            arcname = arcname[1:] -        if isdir: -            arcname += '/' -        zinfo = ZipInfo(arcname, date_time) -        zinfo.external_attr = (st[0] & 0xFFFF) << 16      # Unix attributes -        if isdir: -            zinfo.compress_type = ZIP_STORED -        elif compress_type is None: -            zinfo.compress_type = self.compression +        zinfo = ZipInfo.from_file(filename, arcname) + +        if zinfo.is_dir(): +            zinfo.compress_size = 0 +            zinfo.CRC = 0          else: -            zinfo.compress_type = compress_type +            if compress_type is not None: +                zinfo.compress_type = compress_type +            else: +                zinfo.compress_type = self.compression -        zinfo.file_size = st.st_size -        zinfo.flag_bits = 0x00 -        with self._lock: -            if self._seekable: -                self.fp.seek(self.start_dir) -            zinfo.header_offset = self.fp.tell()    # Start of header bytes -            if zinfo.compress_type == ZIP_LZMA: +        if zinfo.is_dir(): +            with self._lock: +                if self._seekable: +                    self.fp.seek(self.start_dir) +                zinfo.header_offset = self.fp.tell()  # Start of header bytes +                if zinfo.compress_type == ZIP_LZMA:                  # Compressed data includes an end-of-stream (EOS) marker -                zinfo.flag_bits |= 0x02 +                    zinfo.flag_bits |= 0x02 -            self._writecheck(zinfo) -            self._didModify = True +                self._writecheck(zinfo) +                self._didModify = True -            if isdir: -                zinfo.file_size = 0 -                zinfo.compress_size = 0 -                zinfo.CRC = 0 -                zinfo.external_attr |= 0x10  # MS-DOS directory flag                  self.filelist.append(zinfo)                  self.NameToInfo[zinfo.filename] = zinfo                  self.fp.write(zinfo.FileHeader(False))                  self.start_dir = self.fp.tell() -                return - -            cmpr = _get_compressor(zinfo.compress_type) -            if not self._seekable: -                zinfo.flag_bits |= 0x08 -            with open(filename, "rb") as fp: -                # Must overwrite CRC and sizes with correct data later -                zinfo.CRC = CRC = 0 -                zinfo.compress_size = compress_size = 0 -                # Compressed size can be larger than uncompressed size -                zip64 = self._allowZip64 and \ -                    zinfo.file_size * 1.05 > ZIP64_LIMIT -                self.fp.write(zinfo.FileHeader(zip64)) -                file_size = 0 -                while 1: -                    buf = fp.read(1024 * 8) -                    if not buf: -                        break -                    file_size = file_size + len(buf) -                    CRC = crc32(buf, CRC) -                    if cmpr: -                        buf = cmpr.compress(buf) -                        compress_size = compress_size + len(buf) -                    self.fp.write(buf) -            if cmpr: -                buf = cmpr.flush() -                compress_size = compress_size + len(buf) -                self.fp.write(buf) -                zinfo.compress_size = compress_size -            else: -                zinfo.compress_size = file_size -            zinfo.CRC = CRC -            zinfo.file_size = file_size -            if zinfo.flag_bits & 0x08: -                # Write CRC and file sizes after the file data -                fmt = '<LQQ' if zip64 else '<LLL' -                self.fp.write(struct.pack(fmt, zinfo.CRC, zinfo.compress_size, -                                          zinfo.file_size)) -                self.start_dir = self.fp.tell() -            else: -                if not zip64 and self._allowZip64: -                    if file_size > ZIP64_LIMIT: -                        raise RuntimeError('File size has increased during compressing') -                    if compress_size > ZIP64_LIMIT: -                        raise RuntimeError('Compressed size larger than uncompressed size') -                # Seek backwards and write file header (which will now include -                # correct CRC and file sizes) -                self.start_dir = self.fp.tell() # Preserve current position in file -                self.fp.seek(zinfo.header_offset) -                self.fp.write(zinfo.FileHeader(zip64)) -                self.fp.seek(self.start_dir) -            self.filelist.append(zinfo) -            self.NameToInfo[zinfo.filename] = zinfo +        else: +            with open(filename, "rb") as src, self.open(zinfo, 'w') as dest: +                shutil.copyfileobj(src, dest, 1024*8)      def writestr(self, zinfo_or_arcname, data, compress_type=None):          """Write a file into the archive.  The contents is 'data', which @@ -1550,47 +1630,20 @@ class ZipFile:              zinfo = zinfo_or_arcname          if not self.fp: -            raise RuntimeError( +            raise ValueError(                  "Attempt to write to ZIP archive that was already closed") +        if self._writing: +            raise ValueError( +                "Can't write to ZIP archive while an open writing handle exists." +            ) + +        if compress_type is not None: +            zinfo.compress_type = compress_type          zinfo.file_size = len(data)            # Uncompressed size          with self._lock: -            if self._seekable: -                self.fp.seek(self.start_dir) -            zinfo.header_offset = self.fp.tell()    # Start of header data -            if compress_type is not None: -                zinfo.compress_type = compress_type -            zinfo.header_offset = self.fp.tell()    # Start of header data -            if compress_type is not None: -                zinfo.compress_type = compress_type -            if zinfo.compress_type == ZIP_LZMA: -                # Compressed data includes an end-of-stream (EOS) marker -                zinfo.flag_bits |= 0x02 - -            self._writecheck(zinfo) -            self._didModify = True -            zinfo.CRC = crc32(data)       # CRC-32 checksum -            co = _get_compressor(zinfo.compress_type) -            if co: -                data = co.compress(data) + co.flush() -                zinfo.compress_size = len(data)    # Compressed size -            else: -                zinfo.compress_size = zinfo.file_size -            zip64 = zinfo.file_size > ZIP64_LIMIT or \ -                zinfo.compress_size > ZIP64_LIMIT -            if zip64 and not self._allowZip64: -                raise LargeZipFile("Filesize would require ZIP64 extensions") -            self.fp.write(zinfo.FileHeader(zip64)) -            self.fp.write(data) -            if zinfo.flag_bits & 0x08: -                # Write CRC and file sizes after the file data -                fmt = '<LQQ' if zip64 else '<LLL' -                self.fp.write(struct.pack(fmt, zinfo.CRC, zinfo.compress_size, -                                          zinfo.file_size)) -            self.fp.flush() -            self.start_dir = self.fp.tell() -            self.filelist.append(zinfo) -            self.NameToInfo[zinfo.filename] = zinfo +            with self.open(zinfo, mode='w') as dest: +                dest.write(data)      def __del__(self):          """Call the "close()" method in case the user forgot.""" @@ -1602,6 +1655,11 @@ class ZipFile:          if self.fp is None:              return +        if self._writing: +            raise ValueError("Can't close the ZIP file while there is " +                             "an open writing handle on it. " +                             "Close the writing handle before closing the zip.") +          try:              if self.mode in ('w', 'x', 'a') and self._didModify: # write ending records                  with self._lock: @@ -1746,7 +1804,7 @@ class PyZipFile(ZipFile):          if filterfunc and not filterfunc(pathname):              if self.debug:                  label = 'path' if os.path.isdir(pathname) else 'file' -                print('%s "%s" skipped by filterfunc' % (label, pathname)) +                print('%s %r skipped by filterfunc' % (label, pathname))              return          dir, name = os.path.split(pathname)          if os.path.isdir(pathname): @@ -1777,7 +1835,7 @@ class PyZipFile(ZipFile):                      elif ext == ".py":                          if filterfunc and not filterfunc(path):                              if self.debug: -                                print('file "%s" skipped by filterfunc' % path) +                                print('file %r skipped by filterfunc' % path)                              continue                          fname, arcname = self._get_codename(path[0:-3],                                                              basename) @@ -1794,7 +1852,7 @@ class PyZipFile(ZipFile):                      if ext == ".py":                          if filterfunc and not filterfunc(path):                              if self.debug: -                                print('file "%s" skipped by filterfunc' % path) +                                print('file %r skipped by filterfunc' % path)                              continue                          fname, arcname = self._get_codename(path[0:-3],                                                              basename)  | 
