diff options
author | Valentin David <valentin.david@codethink.co.uk> | 2019-02-12 18:38:55 +0100 |
---|---|---|
committer | Valentin David <valentin.david@codethink.co.uk> | 2019-07-11 20:18:36 +0200 |
commit | 3c26f0c88ff2b0477a0d28457cf15a65cb753171 (patch) | |
tree | d82fb3bdec12a2a00321010cd10dedd5bee054b7 | |
parent | 24426ebe31fc2ad297b352a1d332c7cf158ef5c2 (diff) | |
download | buildstream-valentindavid/local-cache-exec-leak.tar.gz |
Deduplicate files in local cache with or without exec rightsvalentindavid/local-cache-exec-leak
If we introduce an exact same object with execution rights as existing
file without execution right, we should not expect that the files
suddenly get execution rights. This breaks reproducibility and it is
easy to encounter. For example install an empty file with execution
rights. Or copy files from another artifact and `chmod +x` it.
-rw-r--r-- | src/buildstream/_cas/cascache.py | 109 | ||||
-rw-r--r-- | src/buildstream/_cas/casremote.py | 9 | ||||
-rw-r--r-- | src/buildstream/_cas/casserver.py | 2 | ||||
-rw-r--r-- | src/buildstream/storage/_casbaseddirectory.py | 2 | ||||
-rw-r--r-- | src/buildstream/storage/_filebaseddirectory.py | 5 | ||||
-rw-r--r-- | tests/frontend/buildcheckout.py | 33 | ||||
-rw-r--r-- | tests/integration/shell.py | 2 | ||||
-rw-r--r-- | tests/testutils/artifactshare.py | 2 |
8 files changed, 110 insertions, 54 deletions
diff --git a/src/buildstream/_cas/cascache.py b/src/buildstream/_cas/cascache.py index 49a5e8837..1008eeff6 100644 --- a/src/buildstream/_cas/cascache.py +++ b/src/buildstream/_cas/cascache.py @@ -81,9 +81,10 @@ class CASCacheUsage(): # class CASCache(): - def __init__(self, path): + def __init__(self, path, *, disable_exec=False): self.casdir = os.path.join(path, 'cas') self.tmpdir = os.path.join(path, 'tmp') + self._disable_exec = disable_exec os.makedirs(os.path.join(self.casdir, 'refs', 'heads'), exist_ok=True) os.makedirs(os.path.join(self.casdir, 'objects'), exist_ok=True) os.makedirs(self.tmpdir, exist_ok=True) @@ -136,7 +137,7 @@ class CASCache(): # Optionally check presence of files if with_files: for filenode in directory.files: - if not os.path.exists(self.objpath(filenode.digest)): + if not os.path.exists(self.objpath(filenode.digest, is_exec=filenode.is_executable)): return False # Check subdirectories @@ -169,13 +170,9 @@ class CASCache(): # regular file, create hardlink fullpath = os.path.join(dest, filenode.name) if can_link: - utils.safe_link(self.objpath(filenode.digest), fullpath) + utils.safe_link(self.objpath(filenode.digest, is_exec=filenode.is_executable), fullpath) else: - utils.safe_copy(self.objpath(filenode.digest), fullpath) - - if filenode.is_executable: - os.chmod(fullpath, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR | - stat.S_IRGRP | stat.S_IXGRP | stat.S_IROTH | stat.S_IXOTH) + utils.safe_copy(self.objpath(filenode.digest, is_exec=filenode.is_executable), fullpath) for dirnode in directory.directories: fullpath = os.path.join(dest, dirnode.name) @@ -332,8 +329,12 @@ class CASCache(): # Returns: # (str): The path of the object # - def objpath(self, digest): - return os.path.join(self.casdir, 'objects', digest.hash[:2], digest.hash[2:]) + def objpath(self, digest, *, is_exec=False): + if is_exec and not self._disable_exec: + filename = '{}.exec'.format(digest.hash[2:]) + else: + filename = digest.hash[2:] + return os.path.join(self.casdir, 'objects', digest.hash[:2], filename) # add_object(): # @@ -350,7 +351,7 @@ class CASCache(): # # Either `path` or `buffer` must be passed, but not both. # - def add_object(self, *, digest=None, path=None, buffer=None, link_directly=False): + def add_object(self, *, digest=None, path=None, buffer=None, link_directly=False, is_exec=False): # Exactly one of the two parameters has to be specified assert (path is None) != (buffer is None) @@ -369,8 +370,7 @@ class CASCache(): for chunk in iter(lambda: tmp.read(_BUFFER_SIZE), b""): h.update(chunk) else: - tmp = stack.enter_context(self._temporary_object()) - + tmp = stack.enter_context(self._temporary_object(is_exec=is_exec)) if path: with open(path, 'rb') as f: for chunk in iter(lambda: f.read(_BUFFER_SIZE), b""): @@ -386,7 +386,7 @@ class CASCache(): digest.size_bytes = os.fstat(tmp.fileno()).st_size # Place file at final location - objpath = self.objpath(digest) + objpath = self.objpath(digest, is_exec=is_exec) os.makedirs(os.path.dirname(objpath), exist_ok=True) os.link(tmp.name, objpath) @@ -592,19 +592,25 @@ class CASCache(): # def remote_missing_blobs(self, remote, blobs): missing_blobs = dict() + executable = {} + # Limit size of FindMissingBlobs request for required_blobs_group in _grouper(iter(blobs), 512): request = remote_execution_pb2.FindMissingBlobsRequest(instance_name=remote.spec.instance_name) - for required_digest in required_blobs_group: + for is_exec, required_digest in required_blobs_group: d = request.blob_digests.add() d.CopyFrom(required_digest) + if required_digest.hash not in executable: + executable[required_digest.hash] = set() + executable[required_digest.hash].add(is_exec) response = remote.cas.FindMissingBlobs(request) for missing_digest in response.missing_blob_digests: d = remote_execution_pb2.Digest() d.CopyFrom(missing_digest) - missing_blobs[d.hash] = d + for is_exec in executable[missing_digest.hash]: + missing_blobs[(is_exec, d.hash)] = (is_exec, d) return missing_blobs.values() @@ -619,10 +625,10 @@ class CASCache(): # def local_missing_blobs(self, digests): missing_blobs = [] - for digest in digests: - objpath = self.objpath(digest) + for is_exec, digest in digests: + objpath = self.objpath(digest, is_exec=is_exec) if not os.path.exists(objpath): - missing_blobs.append(digest) + missing_blobs.append((is_exec, digest)) return missing_blobs # required_blobs_for_directory(): @@ -636,7 +642,7 @@ class CASCache(): # parse directory, and recursively add blobs - yield directory_digest + yield False, directory_digest directory = remote_execution_pb2.Directory() @@ -644,7 +650,7 @@ class CASCache(): directory.ParseFromString(f.read()) for filenode in directory.files: - yield filenode.digest + yield filenode.is_executable, filenode.digest for dirnode in directory.directories: if dirnode.name not in excluded_subdirs: @@ -797,9 +803,9 @@ class CASCache(): for filenode in directory.files: if update_mtime: - os.utime(self.objpath(filenode.digest)) + os.utime(self.objpath(filenode.digest, is_exec=filenode.is_executable)) if check_exists: - if not os.path.exists(self.objpath(filenode.digest)): + if not os.path.exists(self.objpath(filenode.digest, is_exec=filenode.is_executable)): raise FileNotFoundError reachable.add(filenode.digest.hash) @@ -813,10 +819,12 @@ class CASCache(): # # Create a named temporary file with 0o0644 access rights. @contextlib.contextmanager - def _temporary_object(self): + def _temporary_object(self, *, is_exec=False): with utils._tempnamedfile(dir=self.tmpdir) as f: - os.chmod(f.name, - stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IROTH) + access = stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IROTH + if is_exec and not self._disable_exec: + access |= stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH + os.chmod(f.name, access) yield f # _ensure_blob(): @@ -830,27 +838,27 @@ class CASCache(): # Returns: # (str): The path of the object # - def _ensure_blob(self, remote, digest): - objpath = self.objpath(digest) + def _ensure_blob(self, remote, digest, is_exec=False): + objpath = self.objpath(digest, is_exec=is_exec) if os.path.exists(objpath): # already in local repository return objpath - with self._temporary_object() as f: + with self._temporary_object(is_exec=is_exec) as f: remote._fetch_blob(digest, f) - added_digest = self.add_object(path=f.name, link_directly=True) + added_digest = self.add_object(path=f.name, link_directly=True, is_exec=is_exec) assert added_digest.hash == digest.hash return objpath def _batch_download_complete(self, batch, *, missing_blobs=None): - for digest, data in batch.send(missing_blobs=missing_blobs): - with self._temporary_object() as f: + for digest, data, is_exec in batch.send(missing_blobs=missing_blobs): + with self._temporary_object(is_exec=is_exec) as f: f.write(data) f.flush() - added_digest = self.add_object(path=f.name, link_directly=True) + added_digest = self.add_object(path=f.name, link_directly=True, is_exec=is_exec) assert added_digest.hash == digest.hash # Helper function for _fetch_directory(). @@ -864,8 +872,9 @@ class CASCache(): return _CASBatchRead(remote) # Helper function for _fetch_directory(). - def _fetch_directory_node(self, remote, digest, batch, fetch_queue, fetch_next_queue, *, recursive=False): - in_local_cache = os.path.exists(self.objpath(digest)) + def _fetch_directory_node(self, remote, digest, batch, fetch_queue, fetch_next_queue, + *, recursive=False, is_exec=False): + in_local_cache = os.path.exists(self.objpath(digest, is_exec=is_exec)) if in_local_cache: # Skip download, already in local cache. @@ -873,14 +882,14 @@ class CASCache(): elif (digest.size_bytes >= remote.max_batch_total_size_bytes or not remote.batch_read_supported): # Too large for batch request, download in independent request. - self._ensure_blob(remote, digest) + self._ensure_blob(remote, digest, is_exec=is_exec) in_local_cache = True else: - if not batch.add(digest): + if not batch.add(digest, is_exec=is_exec): # Not enough space left in batch request. # Complete pending batch first. batch = self._fetch_directory_batch(remote, batch, fetch_queue, fetch_next_queue) - batch.add(digest) + batch.add(digest, is_exec=is_exec) if recursive: if in_local_cache: @@ -963,25 +972,31 @@ class CASCache(): batch = _CASBatchRead(remote) - for digest in digests: + for d in digests: + try: + is_exec, digest = d + except TypeError: + digest = d + is_exec = False + if (digest.size_bytes >= remote.max_batch_total_size_bytes or not remote.batch_read_supported): # Too large for batch request, download in independent request. try: - self._ensure_blob(remote, digest) + self._ensure_blob(remote, digest, is_exec=is_exec) except grpc.RpcError as e: if e.code() == grpc.StatusCode.NOT_FOUND: missing_blobs.append(digest) else: raise CASCacheError("Failed to fetch blob: {}".format(e)) from e else: - if not batch.add(digest): + if not batch.add(digest, is_exec=is_exec): # Not enough space left in batch request. # Complete pending batch first. self._batch_download_complete(batch, missing_blobs=missing_blobs) batch = _CASBatchRead(remote) - batch.add(digest) + batch.add(digest, is_exec=is_exec) # Complete last pending batch self._batch_download_complete(batch, missing_blobs=missing_blobs) @@ -999,8 +1014,14 @@ class CASCache(): def send_blobs(self, remote, digests, u_uid=uuid.uuid4()): batch = _CASBatchUpdate(remote) - for digest in digests: - with open(self.objpath(digest), 'rb') as f: + for d in digests: + try: + is_exec, digest = d + except TypeError: + digest = d + is_exec = False + + with open(self.objpath(digest, is_exec=is_exec), 'rb') as f: assert os.fstat(f.fileno()).st_size == digest.size_bytes if (digest.size_bytes >= remote.max_batch_total_size_bytes or diff --git a/src/buildstream/_cas/casremote.py b/src/buildstream/_cas/casremote.py index cd46e9c38..b461d4eef 100644 --- a/src/buildstream/_cas/casremote.py +++ b/src/buildstream/_cas/casremote.py @@ -310,8 +310,9 @@ class _CASBatchRead(): self._request.instance_name = remote.instance_name self._size = 0 self._sent = False + self._is_exec = {} - def add(self, digest): + def add(self, digest, *, is_exec=False): assert not self._sent new_batch_size = self._size + digest.size_bytes @@ -323,6 +324,9 @@ class _CASBatchRead(): request_digest.hash = digest.hash request_digest.size_bytes = digest.size_bytes self._size = new_batch_size + if digest.hash not in self._is_exec: + self._is_exec[digest.hash] = set() + self._is_exec[digest.hash].add(is_exec) return True def send(self, *, missing_blobs=None): @@ -349,7 +353,8 @@ class _CASBatchRead(): raise CASRemoteError("Failed to download blob {}: expected {} bytes, received {} bytes".format( response.digest.hash, response.digest.size_bytes, len(response.data))) - yield (response.digest, response.data) + for is_exec in self._is_exec[response.digest.hash]: + yield (response.digest, response.data, is_exec) # Represents a batch of blobs queued for upload. diff --git a/src/buildstream/_cas/casserver.py b/src/buildstream/_cas/casserver.py index 9606c263b..582ac2ae6 100644 --- a/src/buildstream/_cas/casserver.py +++ b/src/buildstream/_cas/casserver.py @@ -64,7 +64,7 @@ class ArtifactTooLargeException(Exception): def create_server(repo, *, enable_push, max_head_size=int(10e9), min_head_size=int(2e9)): - cas = CASCache(os.path.abspath(repo)) + cas = CASCache(os.path.abspath(repo), disable_exec=True) artifactdir = os.path.join(os.path.abspath(repo), 'artifacts', 'refs') sourcedir = os.path.join(os.path.abspath(repo), 'source_protos') diff --git a/src/buildstream/storage/_casbaseddirectory.py b/src/buildstream/storage/_casbaseddirectory.py index 2c5d751e4..b04075585 100644 --- a/src/buildstream/storage/_casbaseddirectory.py +++ b/src/buildstream/storage/_casbaseddirectory.py @@ -136,8 +136,8 @@ class CasBasedDirectory(Directory): entry = IndexEntry(filename, _FileType.REGULAR_FILE, modified=modified or filename in self.index) path = os.path.join(basename, filename) - entry.digest = self.cas_cache.add_object(path=path, link_directly=can_link) entry.is_executable = os.access(path, os.X_OK) + entry.digest = self.cas_cache.add_object(path=path, link_directly=can_link, is_exec=entry.is_executable) self.index[filename] = entry self.__invalidate_digest() diff --git a/src/buildstream/storage/_filebaseddirectory.py b/src/buildstream/storage/_filebaseddirectory.py index 8c55819c9..4f93737c0 100644 --- a/src/buildstream/storage/_filebaseddirectory.py +++ b/src/buildstream/storage/_filebaseddirectory.py @@ -272,11 +272,8 @@ class FileBasedDirectory(Directory): continue if entry.type == _FileType.REGULAR_FILE: - src_path = source_directory.cas_cache.objpath(entry.digest) + src_path = source_directory.cas_cache.objpath(entry.digest, is_exec=entry.is_executable) actionfunc(src_path, dest_path, result=result) - if entry.is_executable: - os.chmod(dest_path, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR | - stat.S_IRGRP | stat.S_IXGRP | stat.S_IROTH | stat.S_IXOTH) else: assert entry.type == _FileType.SYMLINK os.symlink(entry.target, dest_path) diff --git a/tests/frontend/buildcheckout.py b/tests/frontend/buildcheckout.py index 97bce91a7..ccd284cdb 100644 --- a/tests/frontend/buildcheckout.py +++ b/tests/frontend/buildcheckout.py @@ -4,6 +4,8 @@ import os import tarfile import hashlib +import shutil +import stat import subprocess import re @@ -885,3 +887,34 @@ def test_partial_checkout_fail(tmpdir, datafiles, cli): checkout_dir]) res.assert_main_error(ErrorDomain.STREAM, 'uncached-checkout-attempt') assert re.findall(r'Remote \((\S+)\) does not have artifact (\S+) cached', res.stderr) + + +@pytest.mark.datafiles(DATA_DIR) +def test_access_rights(datafiles, cli): + project = str(datafiles) + checkout = os.path.join(cli.directory, 'checkout') + + shutil.copyfile(os.path.join(project, 'files', 'bin-files', 'usr', 'bin', 'hello'), + os.path.join(project, 'files', 'bin-files', 'usr', 'bin', 'hello-2')) + os.chmod(os.path.join(project, 'files', 'bin-files', 'usr', 'bin', 'hello'), + 0o0755) + os.chmod(os.path.join(project, 'files', 'bin-files', 'usr', 'bin', 'hello-2'), + 0o0644) + + result = cli.run(project=project, args=['build', 'target.bst']) + result.assert_success() + + checkout_args = ['artifact', 'checkout', 'target.bst', + '--directory', checkout] + + # Now check it out + result = cli.run(project=project, args=checkout_args) + result.assert_success() + + st = os.lstat(os.path.join(checkout, 'usr', 'bin', 'hello')) + assert stat.S_ISREG(st.st_mode) + assert stat.S_IMODE(st.st_mode) == 0o0755 + + st = os.lstat(os.path.join(checkout, 'usr', 'bin', 'hello-2')) + assert stat.S_ISREG(st.st_mode) + assert stat.S_IMODE(st.st_mode) == 0o0644 diff --git a/tests/integration/shell.py b/tests/integration/shell.py index f7de3e462..b55eb7b03 100644 --- a/tests/integration/shell.py +++ b/tests/integration/shell.py @@ -430,7 +430,7 @@ def test_integration_partial_artifact(cli, datafiles, tmpdir, integration_cache) # Remove the binary from the CAS cachedir = cli.config['cachedir'] - objpath = os.path.join(cachedir, 'cas', 'objects', digest[:2], digest[2:]) + objpath = os.path.join(cachedir, 'cas', 'objects', digest[:2], '{}.exec'.format(digest[2:])) os.unlink(objpath) # check shell doesn't work diff --git a/tests/testutils/artifactshare.py b/tests/testutils/artifactshare.py index a5522c8eb..965a4a924 100644 --- a/tests/testutils/artifactshare.py +++ b/tests/testutils/artifactshare.py @@ -46,7 +46,7 @@ class ArtifactShare(): self.artifactdir = os.path.join(self.repodir, 'artifacts', 'refs') os.makedirs(self.artifactdir) - self.cas = CASCache(self.repodir) + self.cas = CASCache(self.repodir, disable_exec=True) self.total_space = total_space self.free_space = free_space |