diff options
author | Joffrey F <f.joffrey@gmail.com> | 2018-02-22 13:35:41 -0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-02-22 13:35:41 -0800 |
commit | cc6e1b12495be792a81bff5d0ccefa01007260b6 (patch) | |
tree | 2cffc7c2060505f97296f77ce4f3245003436af2 | |
parent | ad5f49b690b2e2838ca124291db500d25a3d6cf1 (diff) | |
parent | 0c948c7df65b0d7378c3c0c8d966c38171f1ef21 (diff) | |
download | docker-py-cc6e1b12495be792a81bff5d0ccefa01007260b6.tar.gz |
Merge pull request #1914 from mefyl/master
Improve .dockerignore compliance
-rw-r--r-- | docker/utils/build.py | 207 | ||||
-rw-r--r-- | tests/unit/utils_test.py | 92 |
2 files changed, 111 insertions, 188 deletions
diff --git a/docker/utils/build.py b/docker/utils/build.py index a218873..1da56fb 100644 --- a/docker/utils/build.py +++ b/docker/utils/build.py @@ -1,20 +1,24 @@ import os +import re from ..constants import IS_WINDOWS_PLATFORM -from .fnmatch import fnmatch +from fnmatch import fnmatch +from itertools import chain from .utils import create_archive def tar(path, exclude=None, dockerfile=None, fileobj=None, gzip=False): root = os.path.abspath(path) exclude = exclude or [] - return create_archive( files=sorted(exclude_paths(root, exclude, dockerfile=dockerfile)), root=root, fileobj=fileobj, gzip=gzip ) +_SEP = re.compile('/|\\\\') if IS_WINDOWS_PLATFORM else re.compile('/') + + def exclude_paths(root, patterns, dockerfile=None): """ Given a root directory path and a list of .dockerignore patterns, return @@ -23,127 +27,90 @@ def exclude_paths(root, patterns, dockerfile=None): All paths returned are relative to the root. """ + if dockerfile is None: dockerfile = 'Dockerfile' - patterns = [p.lstrip('/') for p in patterns] - exceptions = [p for p in patterns if p.startswith('!')] - - include_patterns = [p[1:] for p in exceptions] - include_patterns += [dockerfile, '.dockerignore'] - - exclude_patterns = list(set(patterns) - set(exceptions)) - - paths = get_paths(root, exclude_patterns, include_patterns, - has_exceptions=len(exceptions) > 0) - - return set(paths).union( - # If the Dockerfile is in a subdirectory that is excluded, get_paths - # will not descend into it and the file will be skipped. This ensures - # it doesn't happen. - set([dockerfile.replace('/', os.path.sep)]) - if os.path.exists(os.path.join(root, dockerfile)) else set() - ) - - -def should_include(path, exclude_patterns, include_patterns, root): - """ - Given a path, a list of exclude patterns, and a list of inclusion patterns: - - 1. Returns True if the path doesn't match any exclusion pattern - 2. Returns False if the path matches an exclusion pattern and doesn't match - an inclusion pattern - 3. Returns true if the path matches an exclusion pattern and matches an - inclusion pattern - """ - for pattern in exclude_patterns: - if match_path(path, pattern): - for pattern in include_patterns: - if match_path(path, pattern): - return True - if os.path.isabs(pattern) and match_path( - os.path.join(root, path), pattern): - return True - return False - return True - - -def should_check_directory(directory_path, exclude_patterns, include_patterns, - root): + def normalize(p): + # Leading and trailing slashes are not relevant. Yes, + # "foo.py/" must exclude the "foo.py" regular file. "." + # components are not relevant either, even if the whole + # pattern is only ".", as the Docker reference states: "For + # historical reasons, the pattern . is ignored." + split = [pt for pt in re.split(_SEP, p) if pt and pt != '.'] + # ".." component must be cleared with the potential previous + # component, regardless of whether it exists: "A preprocessing + # step [...] eliminates . and .. elements using Go's + # filepath.". + i = 0 + while i < len(split): + if split[i] == '..': + del split[i] + if i > 0: + del split[i - 1] + i -= 1 + else: + i += 1 + return split + + patterns = ( + (True, normalize(p[1:])) + if p.startswith('!') else + (False, normalize(p)) + for p in patterns) + patterns = list(reversed(list(chain( + # Exclude empty patterns such as "." or the empty string. + filter(lambda p: p[1], patterns), + # Always include the Dockerfile and .dockerignore + [(True, dockerfile.split('/')), (True, ['.dockerignore'])])))) + return set(walk(root, patterns)) + + +def walk(root, patterns, default=True): """ - Given a directory path, a list of exclude patterns, and a list of inclusion - patterns: - - 1. Returns True if the directory path should be included according to - should_include. - 2. Returns True if the directory path is the prefix for an inclusion - pattern - 3. Returns False otherwise + A collection of file lying below root that should be included according to + patterns. """ - # To account for exception rules, check directories if their path is a - # a prefix to an inclusion pattern. This logic conforms with the current - # docker logic (2016-10-27): - # https://github.com/docker/docker/blob/bc52939b0455116ab8e0da67869ec81c1a1c3e2c/pkg/archive/archive.go#L640-L671 - - def normalize_path(path): - return path.replace(os.path.sep, '/') - - path_with_slash = normalize_path(directory_path) + '/' - possible_child_patterns = [ - pattern for pattern in map(normalize_path, include_patterns) - if (pattern + '/').startswith(path_with_slash) - ] - directory_included = should_include( - directory_path, exclude_patterns, include_patterns, root - ) - return directory_included or len(possible_child_patterns) > 0 - - -def get_paths(root, exclude_patterns, include_patterns, has_exceptions=False): - paths = [] - - for parent, dirs, files in os.walk(root, topdown=True, followlinks=False): - parent = os.path.relpath(parent, root) - if parent == '.': - parent = '' - - # Remove excluded patterns from the list of directories to traverse - # by mutating the dirs we're iterating over. - # This looks strange, but is considered the correct way to skip - # traversal. See https://docs.python.org/2/library/os.html#os.walk - dirs[:] = [ - d for d in dirs if should_check_directory( - os.path.join(parent, d), exclude_patterns, include_patterns, - root - ) - ] - - for path in dirs: - if should_include(os.path.join(parent, path), - exclude_patterns, include_patterns, root): - paths.append(os.path.join(parent, path)) - - for path in files: - if should_include(os.path.join(parent, path), - exclude_patterns, include_patterns, root): - paths.append(os.path.join(parent, path)) - - return paths - - -def match_path(path, pattern): - - pattern = pattern.rstrip('/' + os.path.sep) - if pattern and not os.path.isabs(pattern): - pattern = os.path.relpath(pattern) - - pattern_components = pattern.split(os.path.sep) - if len(pattern_components) == 1 and IS_WINDOWS_PLATFORM: - pattern_components = pattern.split('/') - - if '**' not in pattern: - path_components = path.split(os.path.sep)[:len(pattern_components)] - else: - path_components = path.split(os.path.sep) - return fnmatch('/'.join(path_components), '/'.join(pattern_components)) + def match(p): + if p[1][0] == '**': + rec = (p[0], p[1][1:]) + return [p] + (match(rec) if rec[1] else [rec]) + elif fnmatch(f, p[1][0]): + return [(p[0], p[1][1:])] + else: + return [] + + for f in os.listdir(root): + cur = os.path.join(root, f) + # The patterns if recursing in that directory. + sub = list(chain(*(match(p) for p in patterns))) + # Whether this file is explicitely included / excluded. + hit = next((p[0] for p in sub if not p[1]), None) + # Whether this file is implicitely included / excluded. + matched = default if hit is None else hit + sub = list(filter(lambda p: p[1], sub)) + if os.path.isdir(cur): + # Entirely skip directories if there are no chance any subfile will + # be included. + if all(not p[0] for p in sub) and not matched: + continue + # I think this would greatly speed up dockerignore handling by not + # recursing into directories we are sure would be entirely + # included, and only yielding the directory itself, which will be + # recursively archived anyway. However the current unit test expect + # the full list of subfiles and I'm not 100% sure it would make no + # difference yet. + # if all(p[0] for p in sub) and matched: + # yield f + # continue + children = False + for r in (os.path.join(f, p) for p in walk(cur, sub, matched)): + yield r + children = True + # The current unit tests expect directories only under those + # conditions. It might be simplifiable though. + if (not sub or not children) and hit or hit is None and default: + yield f + elif matched: + yield f diff --git a/tests/unit/utils_test.py b/tests/unit/utils_test.py index e144b7b..8a4b193 100644 --- a/tests/unit/utils_test.py +++ b/tests/unit/utils_test.py @@ -23,7 +23,6 @@ from docker.utils import ( decode_json_header, tar, split_command, parse_devices, update_headers, ) -from docker.utils.build import should_check_directory from docker.utils.ports import build_port_bindings, split_port from docker.utils.utils import format_environment @@ -758,6 +757,13 @@ class ExcludePathsTest(unittest.TestCase): self.all_paths - set(['foo/a.py']) ) + def test_exclude_include_absolute_path(self): + base = make_tree([], ['a.py', 'b.py']) + assert exclude_paths( + base, + ['/*', '!/*.py'] + ) == set(['a.py', 'b.py']) + def test_single_subdir_with_path_traversal(self): assert self.exclude(['foo/whoops/../a.py']) == convert_paths( self.all_paths - set(['foo/a.py']) @@ -876,12 +882,25 @@ class ExcludePathsTest(unittest.TestCase): ) ) - def test_exclude_include_absolute_path(self): - base = make_tree([], ['a.py', 'b.py']) + def test_include_wildcard(self): + base = make_tree(['a'], ['a/b.py']) assert exclude_paths( base, - ['/*', '!' + os.path.join(base, '*.py')] - ) == set(['a.py', 'b.py']) + ['*', '!*/b.py'] + ) == convert_paths(['a/b.py']) + + def test_last_line_precedence(self): + base = make_tree( + [], + ['garbage.md', + 'thrash.md', + 'README.md', + 'README-bis.md', + 'README-secret.md']) + assert exclude_paths( + base, + ['*.md', '!README*.md', 'README-secret.md'] + ) == set(['README.md', 'README-bis.md']) class TarTest(unittest.TestCase): @@ -1019,69 +1038,6 @@ class TarTest(unittest.TestCase): assert tar_data.getmember('th.txt').mtime == -3600 -class ShouldCheckDirectoryTest(unittest.TestCase): - exclude_patterns = [ - 'exclude_rather_large_directory', - 'dir/with/subdir_excluded', - 'dir/with/exceptions' - ] - - include_patterns = [ - 'dir/with/exceptions/like_this_one', - 'dir/with/exceptions/in/descendents' - ] - - def test_should_check_directory_not_excluded(self): - assert should_check_directory( - 'not_excluded', self.exclude_patterns, self.include_patterns, '.' - ) - assert should_check_directory( - convert_path('dir/with'), self.exclude_patterns, - self.include_patterns, '.' - ) - - def test_shoud_check_parent_directories_of_excluded(self): - assert should_check_directory( - 'dir', self.exclude_patterns, self.include_patterns, '.' - ) - assert should_check_directory( - convert_path('dir/with'), self.exclude_patterns, - self.include_patterns, '.' - ) - - def test_should_not_check_excluded_directories_with_no_exceptions(self): - assert not should_check_directory( - 'exclude_rather_large_directory', self.exclude_patterns, - self.include_patterns, '.' - ) - assert not should_check_directory( - convert_path('dir/with/subdir_excluded'), self.exclude_patterns, - self.include_patterns, '.' - ) - - def test_should_check_excluded_directory_with_exceptions(self): - assert should_check_directory( - convert_path('dir/with/exceptions'), self.exclude_patterns, - self.include_patterns, '.' - ) - assert should_check_directory( - convert_path('dir/with/exceptions/in'), self.exclude_patterns, - self.include_patterns, '.' - ) - - def test_should_not_check_siblings_of_exceptions(self): - assert not should_check_directory( - convert_path('dir/with/exceptions/but_not_here'), - self.exclude_patterns, self.include_patterns, '.' - ) - - def test_should_check_subdirectories_of_exceptions(self): - assert should_check_directory( - convert_path('dir/with/exceptions/like_this_one/subdir'), - self.exclude_patterns, self.include_patterns, '.' - ) - - class FormatEnvironmentTest(unittest.TestCase): def test_format_env_binary_unicode_value(self): env_dict = { |