summaryrefslogtreecommitdiff
path: root/docker/utils/build.py
blob: d4223e749ff6c5f873602281f8c90d2e2e1c7c20 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
import os

from ..constants import IS_WINDOWS_PLATFORM
from .fnmatch import fnmatch
from .utils import create_archive


def tar(path, exclude=None, dockerfile=None, fileobj=None, gzip=False):
    root = os.path.abspath(path)
    exclude = exclude or []

    return create_archive(
        files=sorted(exclude_paths(root, exclude, dockerfile=dockerfile)),
        root=root, fileobj=fileobj, gzip=gzip
    )


def exclude_paths(root, patterns, dockerfile=None):
    """
    Given a root directory path and a list of .dockerignore patterns, return
    an iterator of all paths (both regular files and directories) in the root
    directory that do *not* match any of the patterns.

    All paths returned are relative to the root.
    """
    if dockerfile is None:
        dockerfile = 'Dockerfile'

    patterns = [p.lstrip('/') for p in patterns]
    exceptions = [p for p in patterns if p.startswith('!')]

    include_patterns = [p[1:] for p in exceptions]
    include_patterns += [dockerfile, '.dockerignore']

    exclude_patterns = list(set(patterns) - set(exceptions))

    paths = get_paths(root, exclude_patterns, include_patterns,
                      has_exceptions=len(exceptions) > 0)

    return set(paths).union(
        # If the Dockerfile is in a subdirectory that is excluded, get_paths
        # will not descend into it and the file will be skipped. This ensures
        # it doesn't happen.
        set([dockerfile.replace('/', os.path.sep)])
        if os.path.exists(os.path.join(root, dockerfile)) else set()
    )


def should_include(path, exclude_patterns, include_patterns):
    """
    Given a path, a list of exclude patterns, and a list of inclusion patterns:

    1. Returns True if the path doesn't match any exclusion pattern
    2. Returns False if the path matches an exclusion pattern and doesn't match
       an inclusion pattern
    3. Returns true if the path matches an exclusion pattern and matches an
       inclusion pattern
    """
    for pattern in exclude_patterns:
        if match_path(path, pattern):
            for pattern in include_patterns:
                if match_path(path, pattern):
                    return True
            return False
    return True


def should_check_directory(directory_path, exclude_patterns, include_patterns):
    """
    Given a directory path, a list of exclude patterns, and a list of inclusion
    patterns:

    1. Returns True if the directory path should be included according to
       should_include.
    2. Returns True if the directory path is the prefix for an inclusion
       pattern
    3. Returns False otherwise
    """

    # To account for exception rules, check directories if their path is a
    # a prefix to an inclusion pattern. This logic conforms with the current
    # docker logic (2016-10-27):
    # https://github.com/docker/docker/blob/bc52939b0455116ab8e0da67869ec81c1a1c3e2c/pkg/archive/archive.go#L640-L671

    def normalize_path(path):
        return path.replace(os.path.sep, '/')

    path_with_slash = normalize_path(directory_path) + '/'
    possible_child_patterns = [
        pattern for pattern in map(normalize_path, include_patterns)
        if (pattern + '/').startswith(path_with_slash)
    ]
    directory_included = should_include(
        directory_path, exclude_patterns, include_patterns
    )
    return directory_included or len(possible_child_patterns) > 0


def get_paths(root, exclude_patterns, include_patterns, has_exceptions=False):
    paths = []

    for parent, dirs, files in os.walk(root, topdown=True, followlinks=False):
        parent = os.path.relpath(parent, root)
        if parent == '.':
            parent = ''

        # Remove excluded patterns from the list of directories to traverse
        # by mutating the dirs we're iterating over.
        # This looks strange, but is considered the correct way to skip
        # traversal. See https://docs.python.org/2/library/os.html#os.walk
        dirs[:] = [
            d for d in dirs if should_check_directory(
                os.path.join(parent, d), exclude_patterns, include_patterns
            )
        ]

        for path in dirs:
            if should_include(os.path.join(parent, path),
                              exclude_patterns, include_patterns):
                paths.append(os.path.join(parent, path))

        for path in files:
            if should_include(os.path.join(parent, path),
                              exclude_patterns, include_patterns):
                paths.append(os.path.join(parent, path))

    return paths


def match_path(path, pattern):
    pattern = pattern.rstrip('/' + os.path.sep)
    if pattern:
        pattern = os.path.relpath(pattern)

    pattern_components = pattern.split(os.path.sep)
    if len(pattern_components) == 1 and IS_WINDOWS_PLATFORM:
        pattern_components = pattern.split('/')

    if '**' not in pattern:
        path_components = path.split(os.path.sep)[:len(pattern_components)]
    else:
        path_components = path.split(os.path.sep)
    return fnmatch('/'.join(path_components), '/'.join(pattern_components))