import io import os import re import six import tarfile import tempfile from ..constants import IS_WINDOWS_PLATFORM from fnmatch import fnmatch from itertools import chain _SEP = re.compile('/|\\\\') if IS_WINDOWS_PLATFORM else re.compile('/') def tar(path, exclude=None, dockerfile=None, fileobj=None, gzip=False): root = os.path.abspath(path) exclude = exclude or [] dockerfile = dockerfile or (None, None) extra_files = [] if dockerfile[1] is not None: dockerignore_contents = '\n'.join( (exclude or ['.dockerignore']) + [dockerfile[0]] ) extra_files = [ ('.dockerignore', dockerignore_contents), dockerfile, ] return create_archive( files=sorted(exclude_paths(root, exclude, dockerfile=dockerfile[0])), root=root, fileobj=fileobj, gzip=gzip, extra_files=extra_files ) def exclude_paths(root, patterns, dockerfile=None): """ Given a root directory path and a list of .dockerignore patterns, return an iterator of all paths (both regular files and directories) in the root directory that do *not* match any of the patterns. All paths returned are relative to the root. """ if dockerfile is None: dockerfile = 'Dockerfile' def split_path(p): return [pt for pt in re.split(_SEP, p) if pt and pt != '.'] def normalize(p): # Leading and trailing slashes are not relevant. Yes, # "foo.py/" must exclude the "foo.py" regular file. "." # components are not relevant either, even if the whole # pattern is only ".", as the Docker reference states: "For # historical reasons, the pattern . is ignored." # ".." component must be cleared with the potential previous # component, regardless of whether it exists: "A preprocessing # step [...] eliminates . and .. elements using Go's # filepath.". i = 0 split = split_path(p) while i < len(split): if split[i] == '..': del split[i] if i > 0: del split[i - 1] i -= 1 else: i += 1 return split patterns = ( (True, normalize(p[1:])) if p.startswith('!') else (False, normalize(p)) for p in patterns) patterns = list(reversed(list(chain( # Exclude empty patterns such as "." or the empty string. filter(lambda p: p[1], patterns), # Always include the Dockerfile and .dockerignore [(True, split_path(dockerfile)), (True, ['.dockerignore'])])))) return set(walk(root, patterns)) def walk(root, patterns, default=True): """ A collection of file lying below root that should be included according to patterns. """ def match(p): if p[1][0] == '**': rec = (p[0], p[1][1:]) return [p] + (match(rec) if rec[1] else [rec]) elif fnmatch(f, p[1][0]): return [(p[0], p[1][1:])] else: return [] for f in os.listdir(root): cur = os.path.join(root, f) # The patterns if recursing in that directory. sub = list(chain(*(match(p) for p in patterns))) # Whether this file is explicitely included / excluded. hit = next((p[0] for p in sub if not p[1]), None) # Whether this file is implicitely included / excluded. matched = default if hit is None else hit sub = list(filter(lambda p: p[1], sub)) if os.path.isdir(cur) and not os.path.islink(cur): # Entirely skip directories if there are no chance any subfile will # be included. if all(not p[0] for p in sub) and not matched: continue # I think this would greatly speed up dockerignore handling by not # recursing into directories we are sure would be entirely # included, and only yielding the directory itself, which will be # recursively archived anyway. However the current unit test expect # the full list of subfiles and I'm not 100% sure it would make no # difference yet. # if all(p[0] for p in sub) and matched: # yield f # continue children = False for r in (os.path.join(f, p) for p in walk(cur, sub, matched)): yield r children = True # The current unit tests expect directories only under those # conditions. It might be simplifiable though. if (not sub or not children) and hit or hit is None and default: yield f elif matched: yield f def build_file_list(root): files = [] for dirname, dirnames, fnames in os.walk(root): for filename in fnames + dirnames: longpath = os.path.join(dirname, filename) files.append( longpath.replace(root, '', 1).lstrip('/') ) return files def create_archive(root, files=None, fileobj=None, gzip=False, extra_files=None): extra_files = extra_files or [] if not fileobj: fileobj = tempfile.NamedTemporaryFile() t = tarfile.open(mode='w:gz' if gzip else 'w', fileobj=fileobj) if files is None: files = build_file_list(root) extra_names = set(e[0] for e in extra_files) for path in files: if path in extra_names: # Extra files override context files with the same name continue full_path = os.path.join(root, path) i = t.gettarinfo(full_path, arcname=path) if i is None: # This happens when we encounter a socket file. We can safely # ignore it and proceed. continue # Workaround https://bugs.python.org/issue32713 if i.mtime < 0 or i.mtime > 8**11 - 1: i.mtime = int(i.mtime) if IS_WINDOWS_PLATFORM: # Windows doesn't keep track of the execute bit, so we make files # and directories executable by default. i.mode = i.mode & 0o755 | 0o111 if i.isfile(): try: with open(full_path, 'rb') as f: t.addfile(i, f) except IOError: raise IOError( 'Can not read file in context: {}'.format(full_path) ) else: # Directories, FIFOs, symlinks... don't need to be read. t.addfile(i, None) for name, contents in extra_files: info = tarfile.TarInfo(name) info.size = len(contents) t.addfile(info, io.BytesIO(contents.encode('utf-8'))) t.close() fileobj.seek(0) return fileobj def mkbuildcontext(dockerfile): f = tempfile.NamedTemporaryFile() t = tarfile.open(mode='w', fileobj=f) if isinstance(dockerfile, io.StringIO): dfinfo = tarfile.TarInfo('Dockerfile') if six.PY3: raise TypeError('Please use io.BytesIO to create in-memory ' 'Dockerfiles with Python 3') else: dfinfo.size = len(dockerfile.getvalue()) dockerfile.seek(0) elif isinstance(dockerfile, io.BytesIO): dfinfo = tarfile.TarInfo('Dockerfile') dfinfo.size = len(dockerfile.getvalue()) dockerfile.seek(0) else: dfinfo = t.gettarinfo(fileobj=dockerfile, arcname='Dockerfile') t.addfile(dfinfo, dockerfile) t.close() f.seek(0) return f