diff options
-rw-r--r-- | buildstream/_artifactcache/artifactcache.py | 60 | ||||
-rw-r--r-- | buildstream/_artifactcache/ostreecache.py | 372 | ||||
-rw-r--r-- | buildstream/_artifactcache/tarcache.py | 152 | ||||
-rw-r--r-- | tests/artifactcache/tar.py | 6 |
4 files changed, 251 insertions, 339 deletions
diff --git a/buildstream/_artifactcache/artifactcache.py b/buildstream/_artifactcache/artifactcache.py index b10b6b0b9..8757094e6 100644 --- a/buildstream/_artifactcache/artifactcache.py +++ b/buildstream/_artifactcache/artifactcache.py @@ -19,6 +19,7 @@ # Tristan Maat <tristan.maat@codethink.co.uk> import os +import string from collections import Mapping, namedtuple from .._exceptions import ImplError, LoadError, LoadErrorReason @@ -35,8 +36,13 @@ from .. import _yaml # in addition to pulling from it. # class ArtifactCacheSpec(namedtuple('ArtifactCacheSpec', 'url push')): + + # _new_from_config_node + # + # Creates an ArtifactCacheSpec() from a YAML loaded node + # @staticmethod - def new_from_config_node(spec_node): + def _new_from_config_node(spec_node): _yaml.node_validate(spec_node, ['url', 'push']) url = _yaml.node_get(spec_node, str, 'url') push = _yaml.node_get(spec_node, bool, 'push', default_value=False) @@ -65,10 +71,10 @@ def artifact_cache_specs_from_config_node(config_node): artifacts = config_node.get('artifacts', []) if isinstance(artifacts, Mapping): - cache_specs.append(ArtifactCacheSpec.new_from_config_node(artifacts)) + cache_specs.append(ArtifactCacheSpec._new_from_config_node(artifacts)) elif isinstance(artifacts, list): for spec_node in artifacts: - cache_specs.append(ArtifactCacheSpec.new_from_config_node(spec_node)) + cache_specs.append(ArtifactCacheSpec._new_from_config_node(spec_node)) else: provenance = _yaml.node_get_provenance(config_node, key='artifacts') raise _yaml.LoadError(_yaml.LoadErrorReason.INVALID_DATA, @@ -104,16 +110,50 @@ def configured_remote_artifact_cache_specs(context, project): # class ArtifactCache(): def __init__(self, context): - self.context = context - - os.makedirs(context.artifactdir, exist_ok=True) self.extractdir = os.path.join(context.artifactdir, 'extract') - - self._local = False self.global_remote_specs = [] self.project_remote_specs = {} + self._local = False + + os.makedirs(context.artifactdir, exist_ok=True) + + ################################################ + # Methods implemented on the abstract class # + ################################################ + + # get_artifact_fullname() + # + # Generate a full name for an artifact, including the + # project namespace, element name and cache key. + # + # This can also be used as a relative path safely, and + # will normalize parts of the element name such that only + # digits, letters and some select characters are allowed. + # + # Args: + # element (Element): The Element object + # key (str): The element's cache key + # + # Returns: + # (str): The relative path for the artifact + # + def get_artifact_fullname(self, element, key): + project = element._get_project() + + # Normalize ostree ref unsupported chars + valid_chars = string.digits + string.ascii_letters + '-._' + element_name = ''.join([ + x if x in valid_chars else '_' + for x in element.normal_name + ]) + + assert key is not None + + # assume project and element names are not allowed to contain slashes + return '{0}/{1}/{2}'.format(project.name, element_name, key) + # set_remotes(): # # Set the list of remote caches. If project is None, the global list of @@ -130,6 +170,10 @@ class ArtifactCache(): else: self.project_remote_specs[project] = remote_specs + ################################################ + # Abstract methods for subclasses to implement # + ################################################ + # initialize_remotes(): # # This will contact each remote cache. diff --git a/buildstream/_artifactcache/ostreecache.py b/buildstream/_artifactcache/ostreecache.py index 29f6c0b8c..182b3f9c6 100644 --- a/buildstream/_artifactcache/ostreecache.py +++ b/buildstream/_artifactcache/ostreecache.py @@ -21,7 +21,6 @@ import multiprocessing import os import signal -import string import tempfile from .. import _ostree, _signals, utils @@ -34,53 +33,6 @@ from .pushreceive import push as push_artifact from .pushreceive import PushException -def buildref(element, key): - project = element._get_project() - - # Normalize ostree ref unsupported chars - valid_chars = string.digits + string.ascii_letters + '-._' - element_name = ''.join([ - x if x in valid_chars else '_' - for x in element.normal_name - ]) - - assert key is not None - - # assume project and element names are not allowed to contain slashes - return '{0}/{1}/{2}'.format(project.name, element_name, key) - - -# Represents a single remote OSTree cache. -# -class _OSTreeRemote(): - def __init__(self, spec, pull_url, push_url): - self.spec = spec - self.pull_url = pull_url - self.push_url = push_url - - -# Maps artifacts to the remotes that contain them. -# -class _OSTreeArtifactMap(): - def __init__(self): - self._ref_to_remotes = {} - - def append(self, ref, remote): - if ref in self._ref_to_remotes: - self._ref_to_remotes[ref].append(remote) - else: - self._ref_to_remotes[ref] = [remote] - - def lookup(self, ref): - return self._ref_to_remotes.get(ref, []) - - def lookup_first(self, ref): - return self._ref_to_remotes.get(ref, [])[0] - - def contains(self, ref): - return ref in self._ref_to_remotes - - # An OSTreeCache manages artifacts in an OSTree repository # # Args: @@ -109,6 +61,9 @@ class OSTreeCache(ArtifactCache): self._has_fetch_remotes = False self._has_push_remotes = False + ################################################ + # Implementation of abstract methods # + ################################################ def has_fetch_remotes(self): return self._has_fetch_remotes @@ -124,100 +79,24 @@ class OSTreeCache(ArtifactCache): remotes_for_project = self._remotes[element._get_project()] return any(remote.spec.push for remote in remotes_for_project) - # contains(): - # - # Check whether the artifact for the specified Element is already available - # in the local artifact cache. - # - # Args: - # element (Element): The Element to check - # key (str): The cache key to use - # - # Returns: True if the artifact is in the cache, False otherwise - # def contains(self, element, key): - ref = buildref(element, key) + ref = self.get_artifact_fullname(element, key) return _ostree.exists(self.repo, ref) - # remotes_containing_key(): - # - # Return every remote cache that contains the key. The result will be an - # ordered list of remotes. - # - # Args: - # element (Element): The Element to check - # key (str): The key to use - # - # Returns (list): A list of _OSTreeRemote instances. - # - def remotes_containing_key(self, element, key): - if not self._has_fetch_remotes: - return [] - - artifact_map = self._artifact_maps[element._get_project()] - ref = buildref(element, key) - return artifact_map.lookup(ref) - - # remote_contains(): - # - # Check whether the artifact for the specified Element is already available - # in the remote artifact cache. - # - # Args: - # element (Element): The Element to check - # key (str): The cache key to use - # - # Returns: True if the artifact is in a cache, False otherwise - # def remote_contains(self, element, key): - remotes = self.remotes_containing_key(element, key) + remotes = self._remotes_containing_key(element, key) return len(remotes) > 0 - # push_needed(): - # - # Check whether an artifact for the specified Element needs to be pushed to - # any of the configured push remotes. The policy is to push every artifact - # we build to every configured push remote, so this should only return False - # if all of the configured push remotes already contain the given artifact. - # - # This function checks for presence of the artifact only using its strong - # key. The presence of the weak key in a cache does not necessarily indicate - # that this particular artifact is present, only that there is a - # partially-compatible version available. - # - # Args: - # element (Element): The Element to check - # key (str): The cache key to use - # - # Returns: False if all the push remotes have the artifact, True otherwise - # def push_needed(self, element, key): - remotes_with_artifact = self.remotes_containing_key(element, key) + remotes_with_artifact = self._remotes_containing_key(element, key) push_remotes_with_artifact = set(r for r in remotes_with_artifact if r.spec.push) push_remotes_for_project = set(self._remotes[element._get_project()]) return not push_remotes_for_project.issubset(push_remotes_with_artifact) - # extract(): - # - # Extract cached artifact for the specified Element if it hasn't - # already been extracted. - # - # Assumes artifact has previously been fetched or committed. - # - # Args: - # element (Element): The Element to extract - # key (str): The cache key to use - # - # Raises: - # ArtifactError: In cases there was an OSError, or if the artifact - # did not exist. - # - # Returns: path to extracted artifact - # def extract(self, element, key): - ref = buildref(element, key) + ref = self.get_artifact_fullname(element, key) # resolve ref to checksum rev = _ostree.checksum(self.repo, ref) @@ -252,46 +131,20 @@ class OSTreeCache(ArtifactCache): return dest - # commit(): - # - # Commit built artifact to cache. - # - # Args: - # element (Element): The Element commit an artifact for - # content (str): The element's content directory - # keys (list): The cache keys to use - # def commit(self, element, content, keys): - refs = [buildref(element, key) for key in keys] + refs = [self.get_artifact_fullname(element, key) for key in keys] try: _ostree.commit(self.repo, content, refs) except OSTreeError as e: raise ArtifactError("Failed to commit artifact: {}".format(e)) from e - # can_diff(): - # - # Whether this cache implementation can diff (unfortunately - # there's no way to tell if an implementation is going to throw - # ImplError without abc). - # def can_diff(self): return True - # diff(): - # - # Return a list of files that have been added or modified between - # the artifacts described by key_a and key_b. - # - # Args: - # element (Element): The element whose artifacts to compare - # key_a (str): The first artifact key - # key_b (str): The second artifact key - # subdir (str): A subdirectory to limit the comparison to - # def diff(self, element, key_a, key_b, *, subdir=None): - _, a, _ = self.repo.read_commit(buildref(element, key_a)) - _, b, _ = self.repo.read_commit(buildref(element, key_b)) + _, a, _ = self.repo.read_commit(self.get_artifact_fullname(element, key_a)) + _, b, _ = self.repo.read_commit(self.get_artifact_fullname(element, key_b)) if subdir: a = a.get_child(subdir) @@ -309,21 +162,12 @@ class OSTreeCache(ArtifactCache): return modified, removed, added - # pull(): - # - # Pull artifact from one of the configured remote repositories. - # - # Args: - # element (Element): The Element whose artifact is to be fetched - # key (str): The cache key to use - # progress (callable): The progress callback, if any - # def pull(self, element, key, *, progress=None): project = element._get_project() artifact_map = self._artifact_maps[project] - ref = buildref(element, key) + ref = self.get_artifact_fullname(element, key) try: # fetch the artifact from highest priority remote using the specified cache key @@ -334,18 +178,9 @@ class OSTreeCache(ArtifactCache): raise ArtifactError("Failed to pull artifact for element {}: {}" .format(element.name, e)) from e - # link_key(): - # - # Add a key for an existing artifact. - # - # Args: - # element (Element): The Element whose artifact is to be linked - # oldkey (str): An existing cache key for the artifact - # newkey (str): A new cache key for the artifact - # def link_key(self, element, oldkey, newkey): - oldref = buildref(element, oldkey) - newref = buildref(element, newkey) + oldref = self.get_artifact_fullname(element, oldkey) + newref = self.get_artifact_fullname(element, newkey) # resolve ref to checksum rev = _ostree.checksum(self.repo, oldref) @@ -353,19 +188,6 @@ class OSTreeCache(ArtifactCache): # create additional ref for the same checksum _ostree.set_ref(self.repo, newref, rev) - # push(): - # - # Push committed artifact to remote repository. - # - # Args: - # element (Element): The Element whose artifact is to be pushed - # keys (list): The cache keys to use - # - # Returns: - # (bool): True if any remote was updated, False if no pushes were required - # - # Raises: - # (ArtifactError): if there was an error def push(self, element, keys): any_pushed = False @@ -376,9 +198,9 @@ class OSTreeCache(ArtifactCache): if not push_remotes: raise ArtifactError("Push is not enabled for any of the configured remote artifact caches.") - refs = [buildref(element, key) for key in keys] + refs = [self.get_artifact_fullname(element, key) for key in keys] - remotes_for_each_ref = [self.remotes_containing_key(element, ref) for ref in refs] + remotes_for_each_ref = [self._remotes_containing_key(element, ref) for ref in refs] for remote in push_remotes: # Push if the remote is missing any of the refs @@ -387,61 +209,6 @@ class OSTreeCache(ArtifactCache): return any_pushed - # _initialize_remote(): - # - # Do protocol-specific initialization necessary to use a given OSTree - # remote. - # - # The SSH protocol that we use only supports pushing so initializing these - # involves contacting the remote to find out the corresponding pull URL. - # - # Args: - # url (str): URL of the remote - # - # Returns: - # (str, str): the pull URL and push URL for the remote - # - # Raises: - # ArtifactError: if there was an error - def _initialize_remote(self, url): - if url.startswith('ssh://'): - try: - push_url = url - pull_url = initialize_push_connection(url) - except PushException as e: - raise ArtifactError(e) from e - elif url.startswith('/'): - push_url = pull_url = 'file://' + url - elif url.startswith('file://'): - push_url = pull_url = url - elif url.startswith('http://') or url.startswith('https://'): - push_url = None - pull_url = url - else: - raise ArtifactError("Unsupported URL: {}".format(url)) - - return push_url, pull_url - - # _ensure_remote(): - # - # Ensure that our OSTree repo has a remote configured for the given URL. - # Note that SSH access to remotes is not handled by libostree itself. - # - # Args: - # repo (OSTree.Repo): an OSTree repository - # pull_url (str): the URL where libostree can pull from the remote - # - # Returns: - # (str): the name of the remote, which can be passed to various other - # operations implemented by the _ostree module. - # - # Raises: - # OSTreeError: if there was a problem reported by libostree - def _ensure_remote(self, repo, pull_url): - remote_name = utils.url_directory_name(pull_url) - _ostree.configure_remote(repo, remote_name, pull_url) - return remote_name - def initialize_remotes(self, *, on_failure=None): remote_specs = self.global_remote_specs @@ -536,6 +303,84 @@ class OSTreeCache(ArtifactCache): self._artifact_maps[project] = artifact_map self._remotes[project] = remotes + ################################################ + # Local Private Methods # + ################################################ + + # _initialize_remote(): + # + # Do protocol-specific initialization necessary to use a given OSTree + # remote. + # + # The SSH protocol that we use only supports pushing so initializing these + # involves contacting the remote to find out the corresponding pull URL. + # + # Args: + # url (str): URL of the remote + # + # Returns: + # (str, str): the pull URL and push URL for the remote + # + # Raises: + # ArtifactError: if there was an error + def _initialize_remote(self, url): + if url.startswith('ssh://'): + try: + push_url = url + pull_url = initialize_push_connection(url) + except PushException as e: + raise ArtifactError(e) from e + elif url.startswith('/'): + push_url = pull_url = 'file://' + url + elif url.startswith('file://'): + push_url = pull_url = url + elif url.startswith('http://') or url.startswith('https://'): + push_url = None + pull_url = url + else: + raise ArtifactError("Unsupported URL: {}".format(url)) + + return push_url, pull_url + + # _ensure_remote(): + # + # Ensure that our OSTree repo has a remote configured for the given URL. + # Note that SSH access to remotes is not handled by libostree itself. + # + # Args: + # repo (OSTree.Repo): an OSTree repository + # pull_url (str): the URL where libostree can pull from the remote + # + # Returns: + # (str): the name of the remote, which can be passed to various other + # operations implemented by the _ostree module. + # + # Raises: + # OSTreeError: if there was a problem reported by libostree + def _ensure_remote(self, repo, pull_url): + remote_name = utils.url_directory_name(pull_url) + _ostree.configure_remote(repo, remote_name, pull_url) + return remote_name + + # _remotes_containing_key(): + # + # Return every remote cache that contains the key. The result will be an + # ordered list of remotes. + # + # Args: + # element (Element): The Element to check + # key (str): The key to use + # + # Returns (list): A list of _OSTreeRemote instances. + # + def _remotes_containing_key(self, element, key): + if not self._has_fetch_remotes: + return [] + + artifact_map = self._artifact_maps[element._get_project()] + ref = self.get_artifact_fullname(element, key) + return artifact_map.lookup(ref) + def _push_to_remote(self, remote, element, refs): with utils._tempdir(dir=self.context.artifactdir, prefix='push-repo-') as temp_repo_dir: @@ -558,3 +403,34 @@ class OSTreeCache(ArtifactCache): raise ArtifactError("Failed to push artifact {}: {}".format(refs, e)) from e return pushed + + +# Represents a single remote OSTree cache. +# +class _OSTreeRemote(): + def __init__(self, spec, pull_url, push_url): + self.spec = spec + self.pull_url = pull_url + self.push_url = push_url + + +# Maps artifacts to the remotes that contain them. +# +class _OSTreeArtifactMap(): + def __init__(self): + self._ref_to_remotes = {} + + def append(self, ref, remote): + if ref in self._ref_to_remotes: + self._ref_to_remotes[ref].append(remote) + else: + self._ref_to_remotes[ref] = [remote] + + def lookup(self, ref): + return self._ref_to_remotes.get(ref, []) + + def lookup_first(self, ref): + return self._ref_to_remotes.get(ref, [])[0] + + def contains(self, ref): + return ref in self._ref_to_remotes diff --git a/buildstream/_artifactcache/tarcache.py b/buildstream/_artifactcache/tarcache.py index ea01d378d..a53b22162 100644 --- a/buildstream/_artifactcache/tarcache.py +++ b/buildstream/_artifactcache/tarcache.py @@ -29,23 +29,87 @@ from .._exceptions import ArtifactError from . import ArtifactCache -def buildref(element, key): - project = element._get_project() +class TarCache(ArtifactCache): + + def __init__(self, context): + + super().__init__(context) + + self.tardir = os.path.join(context.artifactdir, 'tar') + os.makedirs(self.tardir, exist_ok=True) + + ################################################ + # Implementation of abstract methods # + ################################################ + def contains(self, element, key): + path = os.path.join(self.tardir, _tarpath(element, key)) + return os.path.isfile(path) + + def commit(self, element, content, keys): + os.makedirs(os.path.join(self.tardir, element._get_project().name, element.normal_name), exist_ok=True) + + with utils._tempdir() as temp: + for key in keys: + ref = _tarpath(element, key) + + refdir = os.path.join(temp, key) + shutil.copytree(content, refdir, symlinks=True) + + _Tar.archive(os.path.join(self.tardir, ref), key, temp) + + def extract(self, element, key): + + fullname = self.get_artifact_fullname(element, key) + path = _tarpath(element, key) + + if not os.path.isfile(os.path.join(self.tardir, path)): + raise ArtifactError("Artifact missing for {}".format(fullname)) - # Normalize ostree ref unsupported chars - element_name = element.normal_name.replace('+', 'X') + # If the destination already exists, the artifact has been extracted + dest = os.path.join(self.extractdir, fullname) + if os.path.isdir(dest): + return dest - # assume project and element names are not allowed to contain slashes - return '{0}/{1}/{2}'.format(project.name, element_name, key) + os.makedirs(self.extractdir, exist_ok=True) + with utils._tempdir(dir=self.extractdir) as tmpdir: + _Tar.extract(os.path.join(self.tardir, path), tmpdir) -def tarpath(element, key): + os.makedirs(os.path.join(self.extractdir, element._get_project().name, element.normal_name), + exist_ok=True) + try: + os.rename(os.path.join(tmpdir, key), dest) + except OSError as e: + # With rename, it's possible to get either ENOTEMPTY or EEXIST + # in the case that the destination path is a not empty directory. + # + # If rename fails with these errors, another process beat + # us to it so just ignore. + if e.errno not in [os.errno.ENOTEMPTY, os.errno.EEXIST]: + raise ArtifactError("Failed to extract artifact '{}': {}" + .format(fullname, e)) from e + + return dest + + +# _tarpath() +# +# Generate a relative tarball path for a given element and it's cache key +# +# Args: +# element (Element): The Element object +# key (str): The element's cache key +# +# Returns: +# (str): The relative path to use for storing tarballs +# +def _tarpath(element, key): project = element._get_project() return os.path.join(project.name, element.normal_name, key + '.tar.bz2') # A helper class that contains tar archive/extract functions -class Tar(): +class _Tar(): # archive() # @@ -232,75 +296,3 @@ class Tar(): def _extract_with_python(cls, location, dest): with tarfile.open(location) as tar: tar.extractall(path=dest) - - -class TarCache(ArtifactCache): - - def __init__(self, context): - - super().__init__(context) - - self.tardir = os.path.join(context.artifactdir, 'tar') - os.makedirs(self.tardir, exist_ok=True) - - # contains() - # - # Implements artifactcache.contains(). - # - def contains(self, element, key): - path = os.path.join(self.tardir, tarpath(element, key)) - return os.path.isfile(path) - - # commit() - # - # Implements artifactcache.commit(). - # - def commit(self, element, content, keys): - os.makedirs(os.path.join(self.tardir, element._get_project().name, element.normal_name), exist_ok=True) - - with utils._tempdir() as temp: - for key in keys: - ref = tarpath(element, key) - - refdir = os.path.join(temp, key) - shutil.copytree(content, refdir, symlinks=True) - - Tar.archive(os.path.join(self.tardir, ref), key, temp) - - # extract() - # - # Implements artifactcache.extract(). - # - def extract(self, element, key): - - ref = buildref(element, key) - path = tarpath(element, key) - - if not os.path.isfile(os.path.join(self.tardir, path)): - raise ArtifactError("Artifact missing for {}".format(ref)) - - # If the destination already exists, the artifact has been extracted - dest = os.path.join(self.extractdir, ref) - if os.path.isdir(dest): - return dest - - os.makedirs(self.extractdir, exist_ok=True) - - with utils._tempdir(dir=self.extractdir) as tmpdir: - Tar.extract(os.path.join(self.tardir, path), tmpdir) - - os.makedirs(os.path.join(self.extractdir, element._get_project().name, element.normal_name), - exist_ok=True) - try: - os.rename(os.path.join(tmpdir, key), dest) - except OSError as e: - # With rename, it's possible to get either ENOTEMPTY or EEXIST - # in the case that the destination path is a not empty directory. - # - # If rename fails with these errors, another process beat - # us to it so just ignore. - if e.errno not in [os.errno.ENOTEMPTY, os.errno.EEXIST]: - raise ArtifactError("Failed to extract artifact for ref '{}': {}" - .format(ref, e)) from e - - return dest diff --git a/tests/artifactcache/tar.py b/tests/artifactcache/tar.py index 4a2f5658a..ef39be31c 100644 --- a/tests/artifactcache/tar.py +++ b/tests/artifactcache/tar.py @@ -5,7 +5,7 @@ from contextlib import ExitStack import pytest -from buildstream._artifactcache.tarcache import Tar +from buildstream._artifactcache.tarcache import _Tar from buildstream import utils, ProgramNotFoundError @@ -19,7 +19,7 @@ def test_archive_default(): test_file = stack.enter_context(open(os.path.join(src, 'test'), 'a')) test_file.write('Test') - Tar.archive(os.path.join(tar_dir, 'test.tar'), '.', src) + _Tar.archive(os.path.join(tar_dir, 'test.tar'), '.', src) with tarfile.open(os.path.join(tar_dir, 'test.tar')) as tar: tar.extractall(path=scratch) @@ -58,7 +58,7 @@ def test_extract_default(): with tarfile.open(os.path.join(tar_dir, 'test.tar'), 'a:') as tar: tar.add(src, 'contents') - Tar.extract(os.path.join(tar_dir, 'test.tar'), scratch) + _Tar.extract(os.path.join(tar_dir, 'test.tar'), scratch) assert os.listdir(os.path.join(scratch, 'contents')) == os.listdir(src) |