summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorbst-marge-bot <marge-bot@buildstream.build>2020-04-20 14:57:21 +0000
committerbst-marge-bot <marge-bot@buildstream.build>2020-04-20 14:57:21 +0000
commit3d77f1583fd020ce5c18da1fa3c01d40784c2abd (patch)
treee6887c58808b83140bbb459568c38d0591b0a23a /src
parent74bbafa4c29caff91574316fb25fe2a5f9ff5fbf (diff)
parentf42205787e33174408012e5de3e3576acaab9f9b (diff)
downloadbuildstream-3d77f1583fd020ce5c18da1fa3c01d40784c2abd.tar.gz
Merge branch 'juerg/artifact-blob-not-found' into 'master'
Fix handling of missing blobs in `ArtifactCache.pull()` Closes #1276 See merge request BuildStream/buildstream!1843
Diffstat (limited to 'src')
-rw-r--r--src/buildstream/_artifact.py43
-rw-r--r--src/buildstream/_artifactcache.py10
-rw-r--r--src/buildstream/_cas/cascache.py19
-rw-r--r--src/buildstream/_sourcecache.py6
-rw-r--r--src/buildstream/sandbox/_sandboxremote.py7
-rw-r--r--src/buildstream/storage/_casbaseddirectory.py2
6 files changed, 43 insertions, 44 deletions
diff --git a/src/buildstream/_artifact.py b/src/buildstream/_artifact.py
index 659facba4..0a70d096f 100644
--- a/src/buildstream/_artifact.py
+++ b/src/buildstream/_artifact.py
@@ -423,8 +423,7 @@ class Artifact:
context = self._context
- artifact = self._get_proto()
-
+ artifact = self._load_proto()
if not artifact:
self._cached = False
return False
@@ -443,11 +442,14 @@ class Artifact:
self._cached = False
return False
- # Check whether public data is available
- if not self._cas.contains_file(artifact.public_data):
+ # Check whether public data and logs are available
+ logfile_digests = [logfile.digest for logfile in artifact.logs]
+ digests = [artifact.public_data] + logfile_digests
+ if not self._cas.contains_files(digests):
self._cached = False
return False
+ self._proto = artifact
self._cached = True
return True
@@ -460,16 +462,9 @@ class Artifact:
# element not cached or missing logs.
#
def cached_logs(self):
- if not self._element._cached():
- return False
-
- artifact = self._get_proto()
-
- for logfile in artifact.logs:
- if not self._cas.contains_file(logfile.digest):
- return False
-
- return True
+ # Log files are currently considered an essential part of an artifact.
+ # If the artifact is cached, its log files are available as well.
+ return self._element._cached()
# reset_cached()
#
@@ -477,6 +472,7 @@ class Artifact:
# is cached or not.
#
def reset_cached(self):
+ self._proto = None
self._cached = None
# set_cached()
@@ -485,18 +481,15 @@ class Artifact:
# This is used as optimization when we know the artifact is available.
#
def set_cached(self):
+ self._proto = self._load_proto()
self._cached = True
- # _get_proto()
+ # load_proto()
#
# Returns:
# (Artifact): Artifact proto
#
- def _get_proto(self):
- # Check if we've already cached the proto object
- if self._proto is not None:
- return self._proto
-
+ def _load_proto(self):
key = self.get_extract_key()
proto_path = os.path.join(self._artifactdir, self._element.get_artifact_name(key=key))
@@ -508,9 +501,15 @@ class Artifact:
return None
os.utime(proto_path)
- # Cache the proto object
- self._proto = artifact
+ return artifact
+
+ # _get_proto()
+ #
+ # Returns:
+ # (Artifact): Artifact proto
+ #
+ def _get_proto(self):
return self._proto
# _get_field_digest()
diff --git a/src/buildstream/_artifactcache.py b/src/buildstream/_artifactcache.py
index f1648e947..9cebeb1a3 100644
--- a/src/buildstream/_artifactcache.py
+++ b/src/buildstream/_artifactcache.py
@@ -22,6 +22,7 @@ import os
import grpc
from ._basecache import BaseCache
+from ._cas.casremote import BlobNotFound
from ._exceptions import ArtifactError, CASError, CacheError, CASRemoteError, RemoteError
from ._protos.buildstream.v2 import buildstream_pb2, buildstream_pb2_grpc, artifact_pb2, artifact_pb2_grpc
@@ -281,7 +282,6 @@ class ArtifactCache(BaseCache):
element.status("Pulling artifact {} <- {}".format(display_key, remote))
artifact = self._pull_artifact_proto(element, key, remote)
if artifact:
- element.info("Pulled artifact {} <- {}".format(display_key, remote))
break
element.info("Remote ({}) does not have artifact {} cached".format(remote, display_key))
@@ -307,10 +307,14 @@ class ArtifactCache(BaseCache):
element.status("Pulling data for artifact {} <- {}".format(display_key, remote))
if self._pull_artifact_storage(element, artifact, remote, pull_buildtrees=pull_buildtrees):
- element.info("Pulled data for artifact {} <- {}".format(display_key, remote))
+ element.info("Pulled artifact {} <- {}".format(display_key, remote))
return True
element.info("Remote ({}) does not have artifact {} cached".format(remote, display_key))
+ except BlobNotFound as e:
+ # Not all blobs are available on this remote
+ element.info("Remote cas ({}) does not have blob {} cached".format(remote, e.blob))
+ continue
except CASError as e:
element.warn("Could not pull from remote {}: {}".format(remote, e))
errors.append(e)
@@ -401,7 +405,7 @@ class ArtifactCache(BaseCache):
remote.init()
# fetch_blobs() will return the blobs that are still missing
- missing_blobs = self.cas.fetch_blobs(remote, missing_blobs)
+ missing_blobs = self.cas.fetch_blobs(remote, missing_blobs, allow_partial=True)
if missing_blobs:
raise ArtifactError("Blobs not found on configured artifact servers")
diff --git a/src/buildstream/_cas/cascache.py b/src/buildstream/_cas/cascache.py
index 03be75c72..74912c4e2 100644
--- a/src/buildstream/_cas/cascache.py
+++ b/src/buildstream/_cas/cascache.py
@@ -165,20 +165,20 @@ class CASCache:
self._casd_process_manager.release_resources(messenger)
self._casd_process_manager = None
- # contains_file():
+ # contains_files():
#
- # Check whether a digest corresponds to a file which exists in CAS
+ # Check whether file digests exist in the local CAS cache
#
# Args:
# digest (Digest): The file digest to check
#
- # Returns: True if the file is in the cache, False otherwise
+ # Returns: True if the files are in the cache, False otherwise
#
- def contains_file(self, digest):
+ def contains_files(self, digests):
cas = self.get_cas()
request = remote_execution_pb2.FindMissingBlobsRequest()
- request.blob_digests.append(digest)
+ request.blob_digests.extend(digests)
response = cas.FindMissingBlobs(request)
return len(response.missing_blob_digests) == 0
@@ -647,16 +647,19 @@ class CASCache:
# fetch_blobs():
#
- # Fetch blobs from remote CAS. Returns missing blobs that could not be fetched.
+ # Fetch blobs from remote CAS. Optionally returns missing blobs that could
+ # not be fetched.
#
# Args:
# remote (CASRemote): The remote repository to fetch from
# digests (list): The Digests of blobs to fetch
+ # allow_partial (bool): True to return missing blobs, False to raise a
+ # BlobNotFound error if a blob is missing
#
# Returns: The Digests of the blobs that were not available on the remote CAS
#
- def fetch_blobs(self, remote, digests):
- missing_blobs = []
+ def fetch_blobs(self, remote, digests, *, allow_partial=False):
+ missing_blobs = [] if allow_partial else None
remote.init()
diff --git a/src/buildstream/_sourcecache.py b/src/buildstream/_sourcecache.py
index e485fbd47..4533a2580 100644
--- a/src/buildstream/_sourcecache.py
+++ b/src/buildstream/_sourcecache.py
@@ -242,11 +242,7 @@ class SourceCache(BaseCache):
self.cas._fetch_directory(remote, source_proto.files)
required_blobs = self.cas.required_blobs_for_directory(source_proto.files)
missing_blobs = self.cas.local_missing_blobs(required_blobs)
- missing_blobs = self.cas.fetch_blobs(remote, missing_blobs)
-
- if missing_blobs:
- source.info("Remote cas ({}) does not have source {} cached".format(remote, display_key))
- continue
+ self.cas.fetch_blobs(remote, missing_blobs)
source.info("Pulled source {} <- {}".format(display_key, remote))
return True
diff --git a/src/buildstream/sandbox/_sandboxremote.py b/src/buildstream/sandbox/_sandboxremote.py
index 3dcbb2ccc..5b03852f6 100644
--- a/src/buildstream/sandbox/_sandboxremote.py
+++ b/src/buildstream/sandbox/_sandboxremote.py
@@ -291,12 +291,7 @@ class SandboxRemote(SandboxREAPI):
blobs_to_fetch = artifactcache.find_missing_blobs(project, local_missing_blobs)
with CASRemote(self.storage_remote_spec, cascache) as casremote:
- remote_missing_blobs = cascache.fetch_blobs(casremote, blobs_to_fetch)
-
- if remote_missing_blobs:
- raise SandboxError(
- "{} output files are missing on the CAS server".format(len(remote_missing_blobs))
- )
+ cascache.fetch_blobs(casremote, blobs_to_fetch)
def _execute_action(self, action, flags):
stdout, stderr = self._get_output()
diff --git a/src/buildstream/storage/_casbaseddirectory.py b/src/buildstream/storage/_casbaseddirectory.py
index b8b5ca09c..e33bdc3d7 100644
--- a/src/buildstream/storage/_casbaseddirectory.py
+++ b/src/buildstream/storage/_casbaseddirectory.py
@@ -754,6 +754,8 @@ class CasBasedDirectory(Directory):
raise FileExistsError("{} already exists in {}".format(path[-1], str(subdir)))
with utils._tempnamedfile(mode, encoding=encoding, dir=self.cas_cache.tmpdir) as f:
+ # Make sure the temporary file is readable by buildbox-casd
+ os.chmod(f.name, stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IROTH)
yield f
# Import written temporary file into CAS
f.flush()