diff options
author | bst-marge-bot <marge-bot@buildstream.build> | 2020-04-20 14:57:21 +0000 |
---|---|---|
committer | bst-marge-bot <marge-bot@buildstream.build> | 2020-04-20 14:57:21 +0000 |
commit | 3d77f1583fd020ce5c18da1fa3c01d40784c2abd (patch) | |
tree | e6887c58808b83140bbb459568c38d0591b0a23a /src | |
parent | 74bbafa4c29caff91574316fb25fe2a5f9ff5fbf (diff) | |
parent | f42205787e33174408012e5de3e3576acaab9f9b (diff) | |
download | buildstream-3d77f1583fd020ce5c18da1fa3c01d40784c2abd.tar.gz |
Merge branch 'juerg/artifact-blob-not-found' into 'master'
Fix handling of missing blobs in `ArtifactCache.pull()`
Closes #1276
See merge request BuildStream/buildstream!1843
Diffstat (limited to 'src')
-rw-r--r-- | src/buildstream/_artifact.py | 43 | ||||
-rw-r--r-- | src/buildstream/_artifactcache.py | 10 | ||||
-rw-r--r-- | src/buildstream/_cas/cascache.py | 19 | ||||
-rw-r--r-- | src/buildstream/_sourcecache.py | 6 | ||||
-rw-r--r-- | src/buildstream/sandbox/_sandboxremote.py | 7 | ||||
-rw-r--r-- | src/buildstream/storage/_casbaseddirectory.py | 2 |
6 files changed, 43 insertions, 44 deletions
diff --git a/src/buildstream/_artifact.py b/src/buildstream/_artifact.py index 659facba4..0a70d096f 100644 --- a/src/buildstream/_artifact.py +++ b/src/buildstream/_artifact.py @@ -423,8 +423,7 @@ class Artifact: context = self._context - artifact = self._get_proto() - + artifact = self._load_proto() if not artifact: self._cached = False return False @@ -443,11 +442,14 @@ class Artifact: self._cached = False return False - # Check whether public data is available - if not self._cas.contains_file(artifact.public_data): + # Check whether public data and logs are available + logfile_digests = [logfile.digest for logfile in artifact.logs] + digests = [artifact.public_data] + logfile_digests + if not self._cas.contains_files(digests): self._cached = False return False + self._proto = artifact self._cached = True return True @@ -460,16 +462,9 @@ class Artifact: # element not cached or missing logs. # def cached_logs(self): - if not self._element._cached(): - return False - - artifact = self._get_proto() - - for logfile in artifact.logs: - if not self._cas.contains_file(logfile.digest): - return False - - return True + # Log files are currently considered an essential part of an artifact. + # If the artifact is cached, its log files are available as well. + return self._element._cached() # reset_cached() # @@ -477,6 +472,7 @@ class Artifact: # is cached or not. # def reset_cached(self): + self._proto = None self._cached = None # set_cached() @@ -485,18 +481,15 @@ class Artifact: # This is used as optimization when we know the artifact is available. # def set_cached(self): + self._proto = self._load_proto() self._cached = True - # _get_proto() + # load_proto() # # Returns: # (Artifact): Artifact proto # - def _get_proto(self): - # Check if we've already cached the proto object - if self._proto is not None: - return self._proto - + def _load_proto(self): key = self.get_extract_key() proto_path = os.path.join(self._artifactdir, self._element.get_artifact_name(key=key)) @@ -508,9 +501,15 @@ class Artifact: return None os.utime(proto_path) - # Cache the proto object - self._proto = artifact + return artifact + + # _get_proto() + # + # Returns: + # (Artifact): Artifact proto + # + def _get_proto(self): return self._proto # _get_field_digest() diff --git a/src/buildstream/_artifactcache.py b/src/buildstream/_artifactcache.py index f1648e947..9cebeb1a3 100644 --- a/src/buildstream/_artifactcache.py +++ b/src/buildstream/_artifactcache.py @@ -22,6 +22,7 @@ import os import grpc from ._basecache import BaseCache +from ._cas.casremote import BlobNotFound from ._exceptions import ArtifactError, CASError, CacheError, CASRemoteError, RemoteError from ._protos.buildstream.v2 import buildstream_pb2, buildstream_pb2_grpc, artifact_pb2, artifact_pb2_grpc @@ -281,7 +282,6 @@ class ArtifactCache(BaseCache): element.status("Pulling artifact {} <- {}".format(display_key, remote)) artifact = self._pull_artifact_proto(element, key, remote) if artifact: - element.info("Pulled artifact {} <- {}".format(display_key, remote)) break element.info("Remote ({}) does not have artifact {} cached".format(remote, display_key)) @@ -307,10 +307,14 @@ class ArtifactCache(BaseCache): element.status("Pulling data for artifact {} <- {}".format(display_key, remote)) if self._pull_artifact_storage(element, artifact, remote, pull_buildtrees=pull_buildtrees): - element.info("Pulled data for artifact {} <- {}".format(display_key, remote)) + element.info("Pulled artifact {} <- {}".format(display_key, remote)) return True element.info("Remote ({}) does not have artifact {} cached".format(remote, display_key)) + except BlobNotFound as e: + # Not all blobs are available on this remote + element.info("Remote cas ({}) does not have blob {} cached".format(remote, e.blob)) + continue except CASError as e: element.warn("Could not pull from remote {}: {}".format(remote, e)) errors.append(e) @@ -401,7 +405,7 @@ class ArtifactCache(BaseCache): remote.init() # fetch_blobs() will return the blobs that are still missing - missing_blobs = self.cas.fetch_blobs(remote, missing_blobs) + missing_blobs = self.cas.fetch_blobs(remote, missing_blobs, allow_partial=True) if missing_blobs: raise ArtifactError("Blobs not found on configured artifact servers") diff --git a/src/buildstream/_cas/cascache.py b/src/buildstream/_cas/cascache.py index 03be75c72..74912c4e2 100644 --- a/src/buildstream/_cas/cascache.py +++ b/src/buildstream/_cas/cascache.py @@ -165,20 +165,20 @@ class CASCache: self._casd_process_manager.release_resources(messenger) self._casd_process_manager = None - # contains_file(): + # contains_files(): # - # Check whether a digest corresponds to a file which exists in CAS + # Check whether file digests exist in the local CAS cache # # Args: # digest (Digest): The file digest to check # - # Returns: True if the file is in the cache, False otherwise + # Returns: True if the files are in the cache, False otherwise # - def contains_file(self, digest): + def contains_files(self, digests): cas = self.get_cas() request = remote_execution_pb2.FindMissingBlobsRequest() - request.blob_digests.append(digest) + request.blob_digests.extend(digests) response = cas.FindMissingBlobs(request) return len(response.missing_blob_digests) == 0 @@ -647,16 +647,19 @@ class CASCache: # fetch_blobs(): # - # Fetch blobs from remote CAS. Returns missing blobs that could not be fetched. + # Fetch blobs from remote CAS. Optionally returns missing blobs that could + # not be fetched. # # Args: # remote (CASRemote): The remote repository to fetch from # digests (list): The Digests of blobs to fetch + # allow_partial (bool): True to return missing blobs, False to raise a + # BlobNotFound error if a blob is missing # # Returns: The Digests of the blobs that were not available on the remote CAS # - def fetch_blobs(self, remote, digests): - missing_blobs = [] + def fetch_blobs(self, remote, digests, *, allow_partial=False): + missing_blobs = [] if allow_partial else None remote.init() diff --git a/src/buildstream/_sourcecache.py b/src/buildstream/_sourcecache.py index e485fbd47..4533a2580 100644 --- a/src/buildstream/_sourcecache.py +++ b/src/buildstream/_sourcecache.py @@ -242,11 +242,7 @@ class SourceCache(BaseCache): self.cas._fetch_directory(remote, source_proto.files) required_blobs = self.cas.required_blobs_for_directory(source_proto.files) missing_blobs = self.cas.local_missing_blobs(required_blobs) - missing_blobs = self.cas.fetch_blobs(remote, missing_blobs) - - if missing_blobs: - source.info("Remote cas ({}) does not have source {} cached".format(remote, display_key)) - continue + self.cas.fetch_blobs(remote, missing_blobs) source.info("Pulled source {} <- {}".format(display_key, remote)) return True diff --git a/src/buildstream/sandbox/_sandboxremote.py b/src/buildstream/sandbox/_sandboxremote.py index 3dcbb2ccc..5b03852f6 100644 --- a/src/buildstream/sandbox/_sandboxremote.py +++ b/src/buildstream/sandbox/_sandboxremote.py @@ -291,12 +291,7 @@ class SandboxRemote(SandboxREAPI): blobs_to_fetch = artifactcache.find_missing_blobs(project, local_missing_blobs) with CASRemote(self.storage_remote_spec, cascache) as casremote: - remote_missing_blobs = cascache.fetch_blobs(casremote, blobs_to_fetch) - - if remote_missing_blobs: - raise SandboxError( - "{} output files are missing on the CAS server".format(len(remote_missing_blobs)) - ) + cascache.fetch_blobs(casremote, blobs_to_fetch) def _execute_action(self, action, flags): stdout, stderr = self._get_output() diff --git a/src/buildstream/storage/_casbaseddirectory.py b/src/buildstream/storage/_casbaseddirectory.py index b8b5ca09c..e33bdc3d7 100644 --- a/src/buildstream/storage/_casbaseddirectory.py +++ b/src/buildstream/storage/_casbaseddirectory.py @@ -754,6 +754,8 @@ class CasBasedDirectory(Directory): raise FileExistsError("{} already exists in {}".format(path[-1], str(subdir))) with utils._tempnamedfile(mode, encoding=encoding, dir=self.cas_cache.tmpdir) as f: + # Make sure the temporary file is readable by buildbox-casd + os.chmod(f.name, stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IROTH) yield f # Import written temporary file into CAS f.flush() |