diff options
Diffstat (limited to 'Lib/zipfile.py')
-rw-r--r-- | Lib/zipfile.py | 102 |
1 files changed, 80 insertions, 22 deletions
diff --git a/Lib/zipfile.py b/Lib/zipfile.py index 8b99c1189b..5dc6516cc4 100644 --- a/Lib/zipfile.py +++ b/Lib/zipfile.py @@ -16,6 +16,8 @@ import struct import sys import threading import time +import contextlib +from collections import OrderedDict try: import zlib # We may need its compression method @@ -2182,6 +2184,79 @@ def _ancestry(path): path, tail = posixpath.split(path) +class CompleteDirs(ZipFile): + """ + A ZipFile subclass that ensures that implied directories + are always included in the namelist. + """ + + @staticmethod + def _implied_dirs(names): + parents = itertools.chain.from_iterable(map(_parents, names)) + # Deduplicate entries in original order + implied_dirs = OrderedDict.fromkeys( + p + posixpath.sep for p in parents + # Cast names to a set for O(1) lookups + if p + posixpath.sep not in set(names) + ) + return implied_dirs + + def namelist(self): + names = super(CompleteDirs, self).namelist() + return names + list(self._implied_dirs(names)) + + def _name_set(self): + return set(self.namelist()) + + def resolve_dir(self, name): + """ + If the name represents a directory, return that name + as a directory (with the trailing slash). + """ + names = self._name_set() + dirname = name + '/' + dir_match = name not in names and dirname in names + return dirname if dir_match else name + + @classmethod + def make(cls, source): + """ + Given a source (filename or zipfile), return an + appropriate CompleteDirs subclass. + """ + if isinstance(source, CompleteDirs): + return source + + if not isinstance(source, ZipFile): + return cls(source) + + # Only allow for FastPath when supplied zipfile is read-only + if 'r' not in source.mode: + cls = CompleteDirs + + res = cls.__new__(cls) + vars(res).update(vars(source)) + return res + + +class FastLookup(CompleteDirs): + """ + ZipFile subclass to ensure implicit + dirs exist and are resolved rapidly. + """ + def namelist(self): + with contextlib.suppress(AttributeError): + return self.__names + self.__names = super(FastLookup, self).namelist() + return self.__names + + def _name_set(self): + with contextlib.suppress(AttributeError): + return self.__lookup + self.__lookup = super(FastLookup, self)._name_set() + return self.__lookup + + class Path: """ A pathlib-compatible interface for zip files. @@ -2250,7 +2325,7 @@ class Path: __repr = "{self.__class__.__name__}({self.root.filename!r}, {self.at!r})" def __init__(self, root, at=""): - self.root = root if isinstance(root, ZipFile) else ZipFile(root) + self.root = FastLookup.make(root) self.at = at @property @@ -2282,12 +2357,12 @@ class Path: return not self.is_dir() def exists(self): - return self.at in self._names() + return self.at in self.root._name_set() def iterdir(self): if not self.is_dir(): raise ValueError("Can't listdir a file") - subs = map(self._next, self._names()) + subs = map(self._next, self.root.namelist()) return filter(self._is_child, subs) def __str__(self): @@ -2298,25 +2373,10 @@ class Path: def joinpath(self, add): next = posixpath.join(self.at, add) - next_dir = posixpath.join(self.at, add, "") - names = self._names() - return self._next(next_dir if next not in names and next_dir in names else next) + return self._next(self.root.resolve_dir(next)) __truediv__ = joinpath - @staticmethod - def _implied_dirs(names): - return _unique_everseen( - parent + "/" - for name in names - for parent in _parents(name) - if parent + "/" not in names - ) - - @classmethod - def _add_implied_dirs(cls, names): - return names + list(cls._implied_dirs(names)) - @property def parent(self): parent_at = posixpath.dirname(self.at.rstrip('/')) @@ -2324,9 +2384,6 @@ class Path: parent_at += '/' return self._next(parent_at) - def _names(self): - return self._add_implied_dirs(self.root.namelist()) - def main(args=None): import argparse @@ -2388,5 +2445,6 @@ def main(args=None): zippath = '' addToZip(zf, path, zippath) + if __name__ == "__main__": main() |