From 5af89904c9ad33b632305b53058ddc69190f56a9 Mon Sep 17 00:00:00 2001 From: Joffrey F Date: Thu, 21 Jun 2018 00:15:55 -0700 Subject: Improved .dockerignore pattern processing to better match Docker CLI behavior Signed-off-by: Joffrey F --- docker/utils/build.py | 199 ++++++++++++++++++++++++++--------------------- docker/utils/fnmatch.py | 1 + tests/unit/utils_test.py | 12 ++- 3 files changed, 123 insertions(+), 89 deletions(-) diff --git a/docker/utils/build.py b/docker/utils/build.py index b644c9f..9ce0095 100644 --- a/docker/utils/build.py +++ b/docker/utils/build.py @@ -6,8 +6,7 @@ import tarfile import tempfile from ..constants import IS_WINDOWS_PLATFORM -from fnmatch import fnmatch -from itertools import chain +from .fnmatch import fnmatch _SEP = re.compile('/|\\\\') if IS_WINDOWS_PLATFORM else re.compile('/') @@ -44,92 +43,9 @@ def exclude_paths(root, patterns, dockerfile=None): if dockerfile is None: dockerfile = 'Dockerfile' - def split_path(p): - return [pt for pt in re.split(_SEP, p) if pt and pt != '.'] - - def normalize(p): - # Leading and trailing slashes are not relevant. Yes, - # "foo.py/" must exclude the "foo.py" regular file. "." - # components are not relevant either, even if the whole - # pattern is only ".", as the Docker reference states: "For - # historical reasons, the pattern . is ignored." - # ".." component must be cleared with the potential previous - # component, regardless of whether it exists: "A preprocessing - # step [...] eliminates . and .. elements using Go's - # filepath.". - i = 0 - split = split_path(p) - while i < len(split): - if split[i] == '..': - del split[i] - if i > 0: - del split[i - 1] - i -= 1 - else: - i += 1 - return split - - patterns = ( - (True, normalize(p[1:])) - if p.startswith('!') else - (False, normalize(p)) - for p in patterns) - patterns = list(reversed(list(chain( - # Exclude empty patterns such as "." or the empty string. - filter(lambda p: p[1], patterns), - # Always include the Dockerfile and .dockerignore - [(True, split_path(dockerfile)), (True, ['.dockerignore'])])))) - return set(walk(root, patterns)) - - -def walk(root, patterns, default=True): - """ - A collection of file lying below root that should be included according to - patterns. - """ - - def match(p): - if p[1][0] == '**': - rec = (p[0], p[1][1:]) - return [p] + (match(rec) if rec[1] else [rec]) - elif fnmatch(f, p[1][0]): - return [(p[0], p[1][1:])] - else: - return [] - - for f in os.listdir(root): - cur = os.path.join(root, f) - # The patterns if recursing in that directory. - sub = list(chain(*(match(p) for p in patterns))) - # Whether this file is explicitely included / excluded. - hit = next((p[0] for p in sub if not p[1]), None) - # Whether this file is implicitely included / excluded. - matched = default if hit is None else hit - sub = list(filter(lambda p: p[1], sub)) - if os.path.isdir(cur) and not os.path.islink(cur): - # Entirely skip directories if there are no chance any subfile will - # be included. - if all(not p[0] for p in sub) and not matched: - continue - # I think this would greatly speed up dockerignore handling by not - # recursing into directories we are sure would be entirely - # included, and only yielding the directory itself, which will be - # recursively archived anyway. However the current unit test expect - # the full list of subfiles and I'm not 100% sure it would make no - # difference yet. - # if all(p[0] for p in sub) and matched: - # yield f - # continue - children = False - for r in (os.path.join(f, p) for p in walk(cur, sub, matched)): - yield r - children = True - # The current unit tests expect directories only under those - # conditions. It might be simplifiable though. - if (not sub or not children) and hit or hit is None and default: - yield f - elif matched: - yield f + patterns.append('!' + dockerfile) + pm = PatternMatcher(patterns) + return set(pm.walk(root)) def build_file_list(root): @@ -217,3 +133,110 @@ def mkbuildcontext(dockerfile): t.close() f.seek(0) return f + + +def split_path(p): + return [pt for pt in re.split(_SEP, p) if pt and pt != '.'] + + +# Heavily based on +# https://github.com/moby/moby/blob/master/pkg/fileutils/fileutils.go +class PatternMatcher(object): + def __init__(self, patterns): + self.patterns = list(filter( + lambda p: p.dirs, [Pattern(p) for p in patterns] + )) + self.patterns.append(Pattern('!.dockerignore')) + + def matches(self, filepath): + matched = False + parent_path = os.path.dirname(filepath) + parent_path_dirs = split_path(parent_path) + + for pattern in self.patterns: + negative = pattern.exclusion + match = pattern.match(filepath) + if not match and parent_path != '': + if len(pattern.dirs) <= len(parent_path_dirs): + match = pattern.match( + os.path.sep.join(parent_path_dirs[:len(pattern.dirs)]) + ) + + if match: + matched = not negative + + return matched + + def walk(self, root): + def rec_walk(current_dir): + for f in os.listdir(current_dir): + fpath = os.path.join( + os.path.relpath(current_dir, root), f + ) + if fpath.startswith('.' + os.path.sep): + fpath = fpath[2:] + match = self.matches(fpath) + if not match: + yield fpath + + cur = os.path.join(root, fpath) + if not os.path.isdir(cur) or os.path.islink(cur): + continue + + if match: + # If we want to skip this file and its a directory + # then we should first check to see if there's an + # excludes pattern (e.g. !dir/file) that starts with this + # dir. If so then we can't skip this dir. + skip = True + + for pat in self.patterns: + if not pat.exclusion: + continue + if pat.cleaned_pattern.startswith(fpath): + skip = False + break + if skip: + continue + for sub in rec_walk(cur): + yield sub + + return rec_walk(root) + + +class Pattern(object): + def __init__(self, pattern_str): + self.exclusion = False + if pattern_str.startswith('!'): + self.exclusion = True + pattern_str = pattern_str[1:] + + self.dirs = self.normalize(pattern_str) + self.cleaned_pattern = '/'.join(self.dirs) + + @classmethod + def normalize(cls, p): + + # Leading and trailing slashes are not relevant. Yes, + # "foo.py/" must exclude the "foo.py" regular file. "." + # components are not relevant either, even if the whole + # pattern is only ".", as the Docker reference states: "For + # historical reasons, the pattern . is ignored." + # ".." component must be cleared with the potential previous + # component, regardless of whether it exists: "A preprocessing + # step [...] eliminates . and .. elements using Go's + # filepath.". + i = 0 + split = split_path(p) + while i < len(split): + if split[i] == '..': + del split[i] + if i > 0: + del split[i - 1] + i -= 1 + else: + i += 1 + return split + + def match(self, filepath): + return fnmatch(filepath, self.cleaned_pattern) diff --git a/docker/utils/fnmatch.py b/docker/utils/fnmatch.py index 42461dd..cc940a2 100644 --- a/docker/utils/fnmatch.py +++ b/docker/utils/fnmatch.py @@ -111,4 +111,5 @@ def translate(pat): res = '%s[%s]' % (res, stuff) else: res = res + re.escape(c) + return res + '$' diff --git a/tests/unit/utils_test.py b/tests/unit/utils_test.py index 00456e8..467e835 100644 --- a/tests/unit/utils_test.py +++ b/tests/unit/utils_test.py @@ -887,12 +887,22 @@ class ExcludePathsTest(unittest.TestCase): ) ) + def test_double_wildcard_with_exception(self): + assert self.exclude(['**', '!bar', '!foo/bar']) == convert_paths( + set([ + 'foo/bar', 'foo/bar/a.py', 'bar', 'bar/a.py', 'Dockerfile', + '.dockerignore', + ]) + ) + def test_include_wildcard(self): + # This may be surprising but it matches the CLI's behavior + # (tested with 18.05.0-ce on linux) base = make_tree(['a'], ['a/b.py']) assert exclude_paths( base, ['*', '!*/b.py'] - ) == convert_paths(['a/b.py']) + ) == set() def test_last_line_precedence(self): base = make_tree( -- cgit v1.2.1