summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorValentin David <valentin.david@codethink.co.uk>2019-02-12 18:38:55 +0100
committerValentin David <valentin.david@codethink.co.uk>2019-07-11 20:18:36 +0200
commit3c26f0c88ff2b0477a0d28457cf15a65cb753171 (patch)
treed82fb3bdec12a2a00321010cd10dedd5bee054b7
parent24426ebe31fc2ad297b352a1d332c7cf158ef5c2 (diff)
downloadbuildstream-valentindavid/local-cache-exec-leak.tar.gz
Deduplicate files in local cache with or without exec rightsvalentindavid/local-cache-exec-leak
If we introduce an exact same object with execution rights as existing file without execution right, we should not expect that the files suddenly get execution rights. This breaks reproducibility and it is easy to encounter. For example install an empty file with execution rights. Or copy files from another artifact and `chmod +x` it.
-rw-r--r--src/buildstream/_cas/cascache.py109
-rw-r--r--src/buildstream/_cas/casremote.py9
-rw-r--r--src/buildstream/_cas/casserver.py2
-rw-r--r--src/buildstream/storage/_casbaseddirectory.py2
-rw-r--r--src/buildstream/storage/_filebaseddirectory.py5
-rw-r--r--tests/frontend/buildcheckout.py33
-rw-r--r--tests/integration/shell.py2
-rw-r--r--tests/testutils/artifactshare.py2
8 files changed, 110 insertions, 54 deletions
diff --git a/src/buildstream/_cas/cascache.py b/src/buildstream/_cas/cascache.py
index 49a5e8837..1008eeff6 100644
--- a/src/buildstream/_cas/cascache.py
+++ b/src/buildstream/_cas/cascache.py
@@ -81,9 +81,10 @@ class CASCacheUsage():
#
class CASCache():
- def __init__(self, path):
+ def __init__(self, path, *, disable_exec=False):
self.casdir = os.path.join(path, 'cas')
self.tmpdir = os.path.join(path, 'tmp')
+ self._disable_exec = disable_exec
os.makedirs(os.path.join(self.casdir, 'refs', 'heads'), exist_ok=True)
os.makedirs(os.path.join(self.casdir, 'objects'), exist_ok=True)
os.makedirs(self.tmpdir, exist_ok=True)
@@ -136,7 +137,7 @@ class CASCache():
# Optionally check presence of files
if with_files:
for filenode in directory.files:
- if not os.path.exists(self.objpath(filenode.digest)):
+ if not os.path.exists(self.objpath(filenode.digest, is_exec=filenode.is_executable)):
return False
# Check subdirectories
@@ -169,13 +170,9 @@ class CASCache():
# regular file, create hardlink
fullpath = os.path.join(dest, filenode.name)
if can_link:
- utils.safe_link(self.objpath(filenode.digest), fullpath)
+ utils.safe_link(self.objpath(filenode.digest, is_exec=filenode.is_executable), fullpath)
else:
- utils.safe_copy(self.objpath(filenode.digest), fullpath)
-
- if filenode.is_executable:
- os.chmod(fullpath, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR |
- stat.S_IRGRP | stat.S_IXGRP | stat.S_IROTH | stat.S_IXOTH)
+ utils.safe_copy(self.objpath(filenode.digest, is_exec=filenode.is_executable), fullpath)
for dirnode in directory.directories:
fullpath = os.path.join(dest, dirnode.name)
@@ -332,8 +329,12 @@ class CASCache():
# Returns:
# (str): The path of the object
#
- def objpath(self, digest):
- return os.path.join(self.casdir, 'objects', digest.hash[:2], digest.hash[2:])
+ def objpath(self, digest, *, is_exec=False):
+ if is_exec and not self._disable_exec:
+ filename = '{}.exec'.format(digest.hash[2:])
+ else:
+ filename = digest.hash[2:]
+ return os.path.join(self.casdir, 'objects', digest.hash[:2], filename)
# add_object():
#
@@ -350,7 +351,7 @@ class CASCache():
#
# Either `path` or `buffer` must be passed, but not both.
#
- def add_object(self, *, digest=None, path=None, buffer=None, link_directly=False):
+ def add_object(self, *, digest=None, path=None, buffer=None, link_directly=False, is_exec=False):
# Exactly one of the two parameters has to be specified
assert (path is None) != (buffer is None)
@@ -369,8 +370,7 @@ class CASCache():
for chunk in iter(lambda: tmp.read(_BUFFER_SIZE), b""):
h.update(chunk)
else:
- tmp = stack.enter_context(self._temporary_object())
-
+ tmp = stack.enter_context(self._temporary_object(is_exec=is_exec))
if path:
with open(path, 'rb') as f:
for chunk in iter(lambda: f.read(_BUFFER_SIZE), b""):
@@ -386,7 +386,7 @@ class CASCache():
digest.size_bytes = os.fstat(tmp.fileno()).st_size
# Place file at final location
- objpath = self.objpath(digest)
+ objpath = self.objpath(digest, is_exec=is_exec)
os.makedirs(os.path.dirname(objpath), exist_ok=True)
os.link(tmp.name, objpath)
@@ -592,19 +592,25 @@ class CASCache():
#
def remote_missing_blobs(self, remote, blobs):
missing_blobs = dict()
+ executable = {}
+
# Limit size of FindMissingBlobs request
for required_blobs_group in _grouper(iter(blobs), 512):
request = remote_execution_pb2.FindMissingBlobsRequest(instance_name=remote.spec.instance_name)
- for required_digest in required_blobs_group:
+ for is_exec, required_digest in required_blobs_group:
d = request.blob_digests.add()
d.CopyFrom(required_digest)
+ if required_digest.hash not in executable:
+ executable[required_digest.hash] = set()
+ executable[required_digest.hash].add(is_exec)
response = remote.cas.FindMissingBlobs(request)
for missing_digest in response.missing_blob_digests:
d = remote_execution_pb2.Digest()
d.CopyFrom(missing_digest)
- missing_blobs[d.hash] = d
+ for is_exec in executable[missing_digest.hash]:
+ missing_blobs[(is_exec, d.hash)] = (is_exec, d)
return missing_blobs.values()
@@ -619,10 +625,10 @@ class CASCache():
#
def local_missing_blobs(self, digests):
missing_blobs = []
- for digest in digests:
- objpath = self.objpath(digest)
+ for is_exec, digest in digests:
+ objpath = self.objpath(digest, is_exec=is_exec)
if not os.path.exists(objpath):
- missing_blobs.append(digest)
+ missing_blobs.append((is_exec, digest))
return missing_blobs
# required_blobs_for_directory():
@@ -636,7 +642,7 @@ class CASCache():
# parse directory, and recursively add blobs
- yield directory_digest
+ yield False, directory_digest
directory = remote_execution_pb2.Directory()
@@ -644,7 +650,7 @@ class CASCache():
directory.ParseFromString(f.read())
for filenode in directory.files:
- yield filenode.digest
+ yield filenode.is_executable, filenode.digest
for dirnode in directory.directories:
if dirnode.name not in excluded_subdirs:
@@ -797,9 +803,9 @@ class CASCache():
for filenode in directory.files:
if update_mtime:
- os.utime(self.objpath(filenode.digest))
+ os.utime(self.objpath(filenode.digest, is_exec=filenode.is_executable))
if check_exists:
- if not os.path.exists(self.objpath(filenode.digest)):
+ if not os.path.exists(self.objpath(filenode.digest, is_exec=filenode.is_executable)):
raise FileNotFoundError
reachable.add(filenode.digest.hash)
@@ -813,10 +819,12 @@ class CASCache():
#
# Create a named temporary file with 0o0644 access rights.
@contextlib.contextmanager
- def _temporary_object(self):
+ def _temporary_object(self, *, is_exec=False):
with utils._tempnamedfile(dir=self.tmpdir) as f:
- os.chmod(f.name,
- stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IROTH)
+ access = stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IROTH
+ if is_exec and not self._disable_exec:
+ access |= stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH
+ os.chmod(f.name, access)
yield f
# _ensure_blob():
@@ -830,27 +838,27 @@ class CASCache():
# Returns:
# (str): The path of the object
#
- def _ensure_blob(self, remote, digest):
- objpath = self.objpath(digest)
+ def _ensure_blob(self, remote, digest, is_exec=False):
+ objpath = self.objpath(digest, is_exec=is_exec)
if os.path.exists(objpath):
# already in local repository
return objpath
- with self._temporary_object() as f:
+ with self._temporary_object(is_exec=is_exec) as f:
remote._fetch_blob(digest, f)
- added_digest = self.add_object(path=f.name, link_directly=True)
+ added_digest = self.add_object(path=f.name, link_directly=True, is_exec=is_exec)
assert added_digest.hash == digest.hash
return objpath
def _batch_download_complete(self, batch, *, missing_blobs=None):
- for digest, data in batch.send(missing_blobs=missing_blobs):
- with self._temporary_object() as f:
+ for digest, data, is_exec in batch.send(missing_blobs=missing_blobs):
+ with self._temporary_object(is_exec=is_exec) as f:
f.write(data)
f.flush()
- added_digest = self.add_object(path=f.name, link_directly=True)
+ added_digest = self.add_object(path=f.name, link_directly=True, is_exec=is_exec)
assert added_digest.hash == digest.hash
# Helper function for _fetch_directory().
@@ -864,8 +872,9 @@ class CASCache():
return _CASBatchRead(remote)
# Helper function for _fetch_directory().
- def _fetch_directory_node(self, remote, digest, batch, fetch_queue, fetch_next_queue, *, recursive=False):
- in_local_cache = os.path.exists(self.objpath(digest))
+ def _fetch_directory_node(self, remote, digest, batch, fetch_queue, fetch_next_queue,
+ *, recursive=False, is_exec=False):
+ in_local_cache = os.path.exists(self.objpath(digest, is_exec=is_exec))
if in_local_cache:
# Skip download, already in local cache.
@@ -873,14 +882,14 @@ class CASCache():
elif (digest.size_bytes >= remote.max_batch_total_size_bytes or
not remote.batch_read_supported):
# Too large for batch request, download in independent request.
- self._ensure_blob(remote, digest)
+ self._ensure_blob(remote, digest, is_exec=is_exec)
in_local_cache = True
else:
- if not batch.add(digest):
+ if not batch.add(digest, is_exec=is_exec):
# Not enough space left in batch request.
# Complete pending batch first.
batch = self._fetch_directory_batch(remote, batch, fetch_queue, fetch_next_queue)
- batch.add(digest)
+ batch.add(digest, is_exec=is_exec)
if recursive:
if in_local_cache:
@@ -963,25 +972,31 @@ class CASCache():
batch = _CASBatchRead(remote)
- for digest in digests:
+ for d in digests:
+ try:
+ is_exec, digest = d
+ except TypeError:
+ digest = d
+ is_exec = False
+
if (digest.size_bytes >= remote.max_batch_total_size_bytes or
not remote.batch_read_supported):
# Too large for batch request, download in independent request.
try:
- self._ensure_blob(remote, digest)
+ self._ensure_blob(remote, digest, is_exec=is_exec)
except grpc.RpcError as e:
if e.code() == grpc.StatusCode.NOT_FOUND:
missing_blobs.append(digest)
else:
raise CASCacheError("Failed to fetch blob: {}".format(e)) from e
else:
- if not batch.add(digest):
+ if not batch.add(digest, is_exec=is_exec):
# Not enough space left in batch request.
# Complete pending batch first.
self._batch_download_complete(batch, missing_blobs=missing_blobs)
batch = _CASBatchRead(remote)
- batch.add(digest)
+ batch.add(digest, is_exec=is_exec)
# Complete last pending batch
self._batch_download_complete(batch, missing_blobs=missing_blobs)
@@ -999,8 +1014,14 @@ class CASCache():
def send_blobs(self, remote, digests, u_uid=uuid.uuid4()):
batch = _CASBatchUpdate(remote)
- for digest in digests:
- with open(self.objpath(digest), 'rb') as f:
+ for d in digests:
+ try:
+ is_exec, digest = d
+ except TypeError:
+ digest = d
+ is_exec = False
+
+ with open(self.objpath(digest, is_exec=is_exec), 'rb') as f:
assert os.fstat(f.fileno()).st_size == digest.size_bytes
if (digest.size_bytes >= remote.max_batch_total_size_bytes or
diff --git a/src/buildstream/_cas/casremote.py b/src/buildstream/_cas/casremote.py
index cd46e9c38..b461d4eef 100644
--- a/src/buildstream/_cas/casremote.py
+++ b/src/buildstream/_cas/casremote.py
@@ -310,8 +310,9 @@ class _CASBatchRead():
self._request.instance_name = remote.instance_name
self._size = 0
self._sent = False
+ self._is_exec = {}
- def add(self, digest):
+ def add(self, digest, *, is_exec=False):
assert not self._sent
new_batch_size = self._size + digest.size_bytes
@@ -323,6 +324,9 @@ class _CASBatchRead():
request_digest.hash = digest.hash
request_digest.size_bytes = digest.size_bytes
self._size = new_batch_size
+ if digest.hash not in self._is_exec:
+ self._is_exec[digest.hash] = set()
+ self._is_exec[digest.hash].add(is_exec)
return True
def send(self, *, missing_blobs=None):
@@ -349,7 +353,8 @@ class _CASBatchRead():
raise CASRemoteError("Failed to download blob {}: expected {} bytes, received {} bytes".format(
response.digest.hash, response.digest.size_bytes, len(response.data)))
- yield (response.digest, response.data)
+ for is_exec in self._is_exec[response.digest.hash]:
+ yield (response.digest, response.data, is_exec)
# Represents a batch of blobs queued for upload.
diff --git a/src/buildstream/_cas/casserver.py b/src/buildstream/_cas/casserver.py
index 9606c263b..582ac2ae6 100644
--- a/src/buildstream/_cas/casserver.py
+++ b/src/buildstream/_cas/casserver.py
@@ -64,7 +64,7 @@ class ArtifactTooLargeException(Exception):
def create_server(repo, *, enable_push,
max_head_size=int(10e9),
min_head_size=int(2e9)):
- cas = CASCache(os.path.abspath(repo))
+ cas = CASCache(os.path.abspath(repo), disable_exec=True)
artifactdir = os.path.join(os.path.abspath(repo), 'artifacts', 'refs')
sourcedir = os.path.join(os.path.abspath(repo), 'source_protos')
diff --git a/src/buildstream/storage/_casbaseddirectory.py b/src/buildstream/storage/_casbaseddirectory.py
index 2c5d751e4..b04075585 100644
--- a/src/buildstream/storage/_casbaseddirectory.py
+++ b/src/buildstream/storage/_casbaseddirectory.py
@@ -136,8 +136,8 @@ class CasBasedDirectory(Directory):
entry = IndexEntry(filename, _FileType.REGULAR_FILE,
modified=modified or filename in self.index)
path = os.path.join(basename, filename)
- entry.digest = self.cas_cache.add_object(path=path, link_directly=can_link)
entry.is_executable = os.access(path, os.X_OK)
+ entry.digest = self.cas_cache.add_object(path=path, link_directly=can_link, is_exec=entry.is_executable)
self.index[filename] = entry
self.__invalidate_digest()
diff --git a/src/buildstream/storage/_filebaseddirectory.py b/src/buildstream/storage/_filebaseddirectory.py
index 8c55819c9..4f93737c0 100644
--- a/src/buildstream/storage/_filebaseddirectory.py
+++ b/src/buildstream/storage/_filebaseddirectory.py
@@ -272,11 +272,8 @@ class FileBasedDirectory(Directory):
continue
if entry.type == _FileType.REGULAR_FILE:
- src_path = source_directory.cas_cache.objpath(entry.digest)
+ src_path = source_directory.cas_cache.objpath(entry.digest, is_exec=entry.is_executable)
actionfunc(src_path, dest_path, result=result)
- if entry.is_executable:
- os.chmod(dest_path, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR |
- stat.S_IRGRP | stat.S_IXGRP | stat.S_IROTH | stat.S_IXOTH)
else:
assert entry.type == _FileType.SYMLINK
os.symlink(entry.target, dest_path)
diff --git a/tests/frontend/buildcheckout.py b/tests/frontend/buildcheckout.py
index 97bce91a7..ccd284cdb 100644
--- a/tests/frontend/buildcheckout.py
+++ b/tests/frontend/buildcheckout.py
@@ -4,6 +4,8 @@
import os
import tarfile
import hashlib
+import shutil
+import stat
import subprocess
import re
@@ -885,3 +887,34 @@ def test_partial_checkout_fail(tmpdir, datafiles, cli):
checkout_dir])
res.assert_main_error(ErrorDomain.STREAM, 'uncached-checkout-attempt')
assert re.findall(r'Remote \((\S+)\) does not have artifact (\S+) cached', res.stderr)
+
+
+@pytest.mark.datafiles(DATA_DIR)
+def test_access_rights(datafiles, cli):
+ project = str(datafiles)
+ checkout = os.path.join(cli.directory, 'checkout')
+
+ shutil.copyfile(os.path.join(project, 'files', 'bin-files', 'usr', 'bin', 'hello'),
+ os.path.join(project, 'files', 'bin-files', 'usr', 'bin', 'hello-2'))
+ os.chmod(os.path.join(project, 'files', 'bin-files', 'usr', 'bin', 'hello'),
+ 0o0755)
+ os.chmod(os.path.join(project, 'files', 'bin-files', 'usr', 'bin', 'hello-2'),
+ 0o0644)
+
+ result = cli.run(project=project, args=['build', 'target.bst'])
+ result.assert_success()
+
+ checkout_args = ['artifact', 'checkout', 'target.bst',
+ '--directory', checkout]
+
+ # Now check it out
+ result = cli.run(project=project, args=checkout_args)
+ result.assert_success()
+
+ st = os.lstat(os.path.join(checkout, 'usr', 'bin', 'hello'))
+ assert stat.S_ISREG(st.st_mode)
+ assert stat.S_IMODE(st.st_mode) == 0o0755
+
+ st = os.lstat(os.path.join(checkout, 'usr', 'bin', 'hello-2'))
+ assert stat.S_ISREG(st.st_mode)
+ assert stat.S_IMODE(st.st_mode) == 0o0644
diff --git a/tests/integration/shell.py b/tests/integration/shell.py
index f7de3e462..b55eb7b03 100644
--- a/tests/integration/shell.py
+++ b/tests/integration/shell.py
@@ -430,7 +430,7 @@ def test_integration_partial_artifact(cli, datafiles, tmpdir, integration_cache)
# Remove the binary from the CAS
cachedir = cli.config['cachedir']
- objpath = os.path.join(cachedir, 'cas', 'objects', digest[:2], digest[2:])
+ objpath = os.path.join(cachedir, 'cas', 'objects', digest[:2], '{}.exec'.format(digest[2:]))
os.unlink(objpath)
# check shell doesn't work
diff --git a/tests/testutils/artifactshare.py b/tests/testutils/artifactshare.py
index a5522c8eb..965a4a924 100644
--- a/tests/testutils/artifactshare.py
+++ b/tests/testutils/artifactshare.py
@@ -46,7 +46,7 @@ class ArtifactShare():
self.artifactdir = os.path.join(self.repodir, 'artifacts', 'refs')
os.makedirs(self.artifactdir)
- self.cas = CASCache(self.repodir)
+ self.cas = CASCache(self.repodir, disable_exec=True)
self.total_space = total_space
self.free_space = free_space