From bb45468d27615c2ce9f9c9757a367c44d6ee80d2 Mon Sep 17 00:00:00 2001 From: Tim Heap Date: Tue, 16 Aug 2016 16:40:10 +1000 Subject: Much faster implementation of FileList, for big egg_info speedups --- setuptools/glob.py | 165 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 165 insertions(+) create mode 100644 setuptools/glob.py (limited to 'setuptools/glob.py') diff --git a/setuptools/glob.py b/setuptools/glob.py new file mode 100644 index 00000000..f51b9c83 --- /dev/null +++ b/setuptools/glob.py @@ -0,0 +1,165 @@ +""" +Filename globbing utility. Mostly a copy of `glob` from Python 3.5. + +Changes include: + * `yield from` and PEP3102 `*` removed. + * `bytes` changed to `six.binary_type`. + * Hidden files are not ignored. +""" + +import os +import re +import fnmatch +from setuptools.extern.six import binary_type + +__all__ = ["glob", "iglob", "escape"] + +def glob(pathname, recursive=False): + """Return a list of paths matching a pathname pattern. + + The pattern may contain simple shell-style wildcards a la + fnmatch. However, unlike fnmatch, filenames starting with a + dot are special cases that are not matched by '*' and '?' + patterns. + + If recursive is true, the pattern '**' will match any files and + zero or more directories and subdirectories. + """ + return list(iglob(pathname, recursive=recursive)) + +def iglob(pathname, recursive=False): + """Return an iterator which yields the paths matching a pathname pattern. + + The pattern may contain simple shell-style wildcards a la + fnmatch. However, unlike fnmatch, filenames starting with a + dot are special cases that are not matched by '*' and '?' + patterns. + + If recursive is true, the pattern '**' will match any files and + zero or more directories and subdirectories. + """ + it = _iglob(pathname, recursive) + if recursive and _isrecursive(pathname): + s = next(it) # skip empty string + assert not s + return it + +def _iglob(pathname, recursive): + dirname, basename = os.path.split(pathname) + if not has_magic(pathname): + if basename: + if os.path.lexists(pathname): + yield pathname + else: + # Patterns ending with a slash should match only directories + if os.path.isdir(dirname): + yield pathname + return + if not dirname: + if recursive and _isrecursive(basename): + for x in glob2(dirname, basename): + yield x + else: + for x in glob1(dirname, basename): + yield x + return + # `os.path.split()` returns the argument itself as a dirname if it is a + # drive or UNC path. Prevent an infinite recursion if a drive or UNC path + # contains magic characters (i.e. r'\\?\C:'). + if dirname != pathname and has_magic(dirname): + dirs = _iglob(dirname, recursive) + else: + dirs = [dirname] + if has_magic(basename): + if recursive and _isrecursive(basename): + glob_in_dir = glob2 + else: + glob_in_dir = glob1 + else: + glob_in_dir = glob0 + for dirname in dirs: + for name in glob_in_dir(dirname, basename): + yield os.path.join(dirname, name) + +# These 2 helper functions non-recursively glob inside a literal directory. +# They return a list of basenames. `glob1` accepts a pattern while `glob0` +# takes a literal basename (so it only has to check for its existence). + +def glob1(dirname, pattern): + if not dirname: + if isinstance(pattern, binary_type): + dirname = os.curdir.encode('ASCII') + else: + dirname = os.curdir + try: + names = os.listdir(dirname) + except OSError: + return [] + return fnmatch.filter(names, pattern) + +def glob0(dirname, basename): + if not basename: + # `os.path.split()` returns an empty basename for paths ending with a + # directory separator. 'q*x/' should match only directories. + if os.path.isdir(dirname): + return [basename] + else: + if os.path.lexists(os.path.join(dirname, basename)): + return [basename] + return [] + +# This helper function recursively yields relative pathnames inside a literal +# directory. + +def glob2(dirname, pattern): + assert _isrecursive(pattern) + yield pattern[:0] + for x in _rlistdir(dirname): + yield x + + +# Recursively yields relative pathnames inside a literal directory. +def _rlistdir(dirname): + if not dirname: + if isinstance(dirname, binary_type): + dirname = binary_type(os.curdir, 'ASCII') + else: + dirname = os.curdir + try: + names = os.listdir(dirname) + except os.error: + return + for x in names: + yield x + path = os.path.join(dirname, x) if dirname else x + for y in _rlistdir(path): + yield os.path.join(x, y) + + +magic_check = re.compile('([*?[])') +magic_check_bytes = re.compile(b'([*?[])') + +def has_magic(s): + if isinstance(s, binary_type): + match = magic_check_bytes.search(s) + else: + match = magic_check.search(s) + return match is not None + +def _isrecursive(pattern): + if isinstance(pattern, binary_type): + return pattern == b'**' + else: + return pattern == '**' + +def escape(pathname): + """Escape all special characters. + """ + # Escaping is done by wrapping any of "*?[" between square brackets. + # Metacharacters do not work in the drive part and shouldn't be escaped. + drive, pathname = os.path.splitdrive(pathname) + if isinstance(pathname, binary_type): + pathname = magic_check_bytes.sub(br'[\1]', pathname) + else: + pathname = magic_check.sub(r'[\1]', pathname) + return drive + pathname -- cgit v1.2.1 From 31bd37c6ac8de9e8c1bacebc2d8e1215df91eb96 Mon Sep 17 00:00:00 2001 From: stepshal Date: Tue, 18 Oct 2016 20:24:35 +0700 Subject: Fix quantity of blank lines. --- setuptools/glob.py | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'setuptools/glob.py') diff --git a/setuptools/glob.py b/setuptools/glob.py index f51b9c83..6c781de3 100644 --- a/setuptools/glob.py +++ b/setuptools/glob.py @@ -14,6 +14,7 @@ from setuptools.extern.six import binary_type __all__ = ["glob", "iglob", "escape"] + def glob(pathname, recursive=False): """Return a list of paths matching a pathname pattern. @@ -27,6 +28,7 @@ def glob(pathname, recursive=False): """ return list(iglob(pathname, recursive=recursive)) + def iglob(pathname, recursive=False): """Return an iterator which yields the paths matching a pathname pattern. @@ -44,6 +46,7 @@ def iglob(pathname, recursive=False): assert not s return it + def _iglob(pathname, recursive): dirname, basename = os.path.split(pathname) if not has_magic(pathname): @@ -81,10 +84,12 @@ def _iglob(pathname, recursive): for name in glob_in_dir(dirname, basename): yield os.path.join(dirname, name) + # These 2 helper functions non-recursively glob inside a literal directory. # They return a list of basenames. `glob1` accepts a pattern while `glob0` # takes a literal basename (so it only has to check for its existence). + def glob1(dirname, pattern): if not dirname: if isinstance(pattern, binary_type): @@ -97,6 +102,7 @@ def glob1(dirname, pattern): return [] return fnmatch.filter(names, pattern) + def glob0(dirname, basename): if not basename: # `os.path.split()` returns an empty basename for paths ending with a @@ -108,9 +114,11 @@ def glob0(dirname, basename): return [basename] return [] + # This helper function recursively yields relative pathnames inside a literal # directory. + def glob2(dirname, pattern): assert _isrecursive(pattern) yield pattern[:0] @@ -139,6 +147,7 @@ def _rlistdir(dirname): magic_check = re.compile('([*?[])') magic_check_bytes = re.compile(b'([*?[])') + def has_magic(s): if isinstance(s, binary_type): match = magic_check_bytes.search(s) @@ -146,12 +155,14 @@ def has_magic(s): match = magic_check.search(s) return match is not None + def _isrecursive(pattern): if isinstance(pattern, binary_type): return pattern == b'**' else: return pattern == '**' + def escape(pathname): """Escape all special characters. """ -- cgit v1.2.1 From ff371f18f0076bc63da05334f7e551c1cc29e10d Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Sun, 1 Jan 2017 22:34:28 -0500 Subject: Strip out vendored packages and require them instead. Ref #581. --- setuptools/glob.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'setuptools/glob.py') diff --git a/setuptools/glob.py b/setuptools/glob.py index 6c781de3..f2644026 100644 --- a/setuptools/glob.py +++ b/setuptools/glob.py @@ -10,7 +10,7 @@ Changes include: import os import re import fnmatch -from setuptools.extern.six import binary_type +from six import binary_type __all__ = ["glob", "iglob", "escape"] -- cgit v1.2.1 From 3d0cc355fb5e8012cb8c72f0e25042a5a44f31d6 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Fri, 24 Feb 2017 11:49:51 -0500 Subject: Revert "Merge pull request #933 from pypa/feature/581-depend-not-bundle" This reverts commit 089cdeb489a0fa94d11b7307b54210ef9aa40511, reversing changes made to aaec654d804cb78dbb6391afff721a63f26a71cd. --- setuptools/glob.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'setuptools/glob.py') diff --git a/setuptools/glob.py b/setuptools/glob.py index f2644026..6c781de3 100644 --- a/setuptools/glob.py +++ b/setuptools/glob.py @@ -10,7 +10,7 @@ Changes include: import os import re import fnmatch -from six import binary_type +from setuptools.extern.six import binary_type __all__ = ["glob", "iglob", "escape"] -- cgit v1.2.1 From 4f165ed9d35ea7e37823bec25ed822338387c0be Mon Sep 17 00:00:00 2001 From: Jon Dufresne Date: Fri, 14 Sep 2018 05:56:37 -0700 Subject: Remove use of compatibility shim six.binary_type The type bytes is available on all supported Pythons. Makes the code more forward compatible with Python 3. --- setuptools/glob.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'setuptools/glob.py') diff --git a/setuptools/glob.py b/setuptools/glob.py index 6c781de3..9d7cbc5d 100644 --- a/setuptools/glob.py +++ b/setuptools/glob.py @@ -3,14 +3,12 @@ Filename globbing utility. Mostly a copy of `glob` from Python 3.5. Changes include: * `yield from` and PEP3102 `*` removed. - * `bytes` changed to `six.binary_type`. * Hidden files are not ignored. """ import os import re import fnmatch -from setuptools.extern.six import binary_type __all__ = ["glob", "iglob", "escape"] @@ -92,7 +90,7 @@ def _iglob(pathname, recursive): def glob1(dirname, pattern): if not dirname: - if isinstance(pattern, binary_type): + if isinstance(pattern, bytes): dirname = os.curdir.encode('ASCII') else: dirname = os.curdir @@ -129,8 +127,8 @@ def glob2(dirname, pattern): # Recursively yields relative pathnames inside a literal directory. def _rlistdir(dirname): if not dirname: - if isinstance(dirname, binary_type): - dirname = binary_type(os.curdir, 'ASCII') + if isinstance(dirname, bytes): + dirname = os.curdir.encode('ASCII') else: dirname = os.curdir try: @@ -149,7 +147,7 @@ magic_check_bytes = re.compile(b'([*?[])') def has_magic(s): - if isinstance(s, binary_type): + if isinstance(s, bytes): match = magic_check_bytes.search(s) else: match = magic_check.search(s) @@ -157,7 +155,7 @@ def has_magic(s): def _isrecursive(pattern): - if isinstance(pattern, binary_type): + if isinstance(pattern, bytes): return pattern == b'**' else: return pattern == '**' @@ -169,7 +167,7 @@ def escape(pathname): # Escaping is done by wrapping any of "*?[" between square brackets. # Metacharacters do not work in the drive part and shouldn't be escaped. drive, pathname = os.path.splitdrive(pathname) - if isinstance(pathname, binary_type): + if isinstance(pathname, bytes): pathname = magic_check_bytes.sub(br'[\1]', pathname) else: pathname = magic_check.sub(r'[\1]', pathname) -- cgit v1.2.1 From 818680c71d2a407f76ae9dc9edaee6c8b338ab2c Mon Sep 17 00:00:00 2001 From: Sviatoslav Sydorenko Date: Thu, 31 Dec 2020 18:00:05 +0100 Subject: Simplify `setuptools.glob._iglob` --- setuptools/glob.py | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) (limited to 'setuptools/glob.py') diff --git a/setuptools/glob.py b/setuptools/glob.py index 9d7cbc5d..87062b81 100644 --- a/setuptools/glob.py +++ b/setuptools/glob.py @@ -47,6 +47,8 @@ def iglob(pathname, recursive=False): def _iglob(pathname, recursive): dirname, basename = os.path.split(pathname) + glob_in_dir = glob2 if recursive and _isrecursive(basename) else glob1 + if not has_magic(pathname): if basename: if os.path.lexists(pathname): @@ -56,13 +58,9 @@ def _iglob(pathname, recursive): if os.path.isdir(dirname): yield pathname return + if not dirname: - if recursive and _isrecursive(basename): - for x in glob2(dirname, basename): - yield x - else: - for x in glob1(dirname, basename): - yield x + yield from glob_in_dir(dirname, basename) return # `os.path.split()` returns the argument itself as a dirname if it is a # drive or UNC path. Prevent an infinite recursion if a drive or UNC path @@ -71,12 +69,7 @@ def _iglob(pathname, recursive): dirs = _iglob(dirname, recursive) else: dirs = [dirname] - if has_magic(basename): - if recursive and _isrecursive(basename): - glob_in_dir = glob2 - else: - glob_in_dir = glob1 - else: + if not has_magic(basename): glob_in_dir = glob0 for dirname in dirs: for name in glob_in_dir(dirname, basename): -- cgit v1.2.1