diff options
author | bst-marge-bot <marge-bot@buildstream.build> | 2019-06-24 11:29:22 +0000 |
---|---|---|
committer | bst-marge-bot <marge-bot@buildstream.build> | 2019-06-24 11:29:22 +0000 |
commit | 5c8642b4c86fed13b0b29aaa01e21871d3a89a31 (patch) | |
tree | 5aac4491cec29a17a7a083340b86ea4bb6e843b3 | |
parent | 2d57e14bdd8ac83ddf054ee25c1b97ceb2a2bfaf (diff) | |
parent | 3255fe9d6b5facc0096e2585a157719898272e75 (diff) | |
download | buildstream-5c8642b4c86fed13b0b29aaa01e21871d3a89a31.tar.gz |
Merge branch 'raoul/1044-blobs-on-demand' into 'master'
CLI options for on demand blob fetching
Closes #1044
See merge request BuildStream/buildstream!1400
-rw-r--r-- | src/buildstream/_artifactcache.py | 9 | ||||
-rw-r--r-- | src/buildstream/_cas/cascache.py | 4 | ||||
-rw-r--r-- | src/buildstream/_frontend/cli.py | 35 | ||||
-rw-r--r-- | src/buildstream/_stream.py | 40 | ||||
-rw-r--r-- | tests/frontend/buildcheckout.py | 64 | ||||
-rw-r--r-- | tests/integration/shell.py | 50 | ||||
-rw-r--r-- | tests/integration/shellbuildtrees.py | 29 | ||||
-rw-r--r-- | tests/remoteexecution/partial.py | 25 |
8 files changed, 229 insertions, 27 deletions
diff --git a/src/buildstream/_artifactcache.py b/src/buildstream/_artifactcache.py index de17ea7ac..4a502064f 100644 --- a/src/buildstream/_artifactcache.py +++ b/src/buildstream/_artifactcache.py @@ -469,15 +469,18 @@ class ArtifactCache(BaseCache): push_remotes = [r for r in self._remotes[project] if r.spec.push] - remote_missing_blobs_set = set() + remote_missing_blobs_list = [] for remote in push_remotes: remote.init() remote_missing_blobs = self.cas.remote_missing_blobs(remote, missing_blobs) - remote_missing_blobs_set.update(remote_missing_blobs) - return list(remote_missing_blobs_set) + for blob in remote_missing_blobs: + if blob not in remote_missing_blobs_list: + remote_missing_blobs_list.append(blob) + + return remote_missing_blobs_list ################################################ # Local Private Methods # diff --git a/src/buildstream/_cas/cascache.py b/src/buildstream/_cas/cascache.py index 58527d4cb..434e71663 100644 --- a/src/buildstream/_cas/cascache.py +++ b/src/buildstream/_cas/cascache.py @@ -596,14 +596,14 @@ class CASCache(): # Determine which blobs are missing on the remote. # # Args: - # blobs (Digest): The directory digest + # blobs ([Digest]): List of directory digests to check # # Returns: List of missing Digest objects # def remote_missing_blobs(self, remote, blobs): missing_blobs = dict() # Limit size of FindMissingBlobs request - for required_blobs_group in _grouper(blobs, 512): + for required_blobs_group in _grouper(iter(blobs), 512): request = remote_execution_pb2.FindMissingBlobsRequest(instance_name=remote.spec.instance_name) for required_digest in required_blobs_group: diff --git a/src/buildstream/_frontend/cli.py b/src/buildstream/_frontend/cli.py index 2301fcb78..53adb188c 100644 --- a/src/buildstream/_frontend/cli.py +++ b/src/buildstream/_frontend/cli.py @@ -501,11 +501,13 @@ def show(app, elements, deps, except_, order, format_): @click.option('--use-buildtree', '-t', 'cli_buildtree', type=click.Choice(['ask', 'try', 'always', 'never']), default='ask', help='Defaults to ask but if set to always the function will fail if a build tree is not available') +@click.option('--pull', 'pull_', is_flag=True, default=False, + help='Attempt to pull missing or incomplete artifacts') @click.argument('element', required=False, type=click.Path(readable=False)) @click.argument('command', type=click.STRING, nargs=-1) @click.pass_obj -def shell(app, element, sysroot, mount, isolate, build_, cli_buildtree, command): +def shell(app, element, sysroot, mount, isolate, build_, cli_buildtree, pull_, command): """Run a command in the target element's sandbox environment When this command is executed from a workspace directory, the default @@ -535,11 +537,10 @@ def shell(app, element, sysroot, mount, isolate, build_, cli_buildtree, command) from .._project import HostMount from .._pipeline import PipelineSelection - if build_: - scope = Scope.BUILD - else: - scope = Scope.RUN + scope = Scope.BUILD if build_ else Scope.RUN + # We may need to fetch dependency artifacts if we're pulling the artifact + selection = PipelineSelection.ALL if pull_ else PipelineSelection.NONE use_buildtree = None with app.initialized(): @@ -548,9 +549,14 @@ def shell(app, element, sysroot, mount, isolate, build_, cli_buildtree, command) if not element: raise AppError('Missing argument "ELEMENT".') - dependencies = app.stream.load_selection((element,), selection=PipelineSelection.NONE, - use_artifact_config=True) - element = dependencies[0] + elements = app.stream.load_selection((element,), selection=selection, + use_artifact_config=True) + + # last one will be the element we want to stage, previous ones are + # elements to try and pull + element = elements[-1] + pull_dependencies = elements[:-1] if pull_ else None + prompt = app.shell_prompt(element) mounts = [ HostMount(path, host_path) @@ -561,7 +567,7 @@ def shell(app, element, sysroot, mount, isolate, build_, cli_buildtree, command) buildtree_exists = element._buildtree_exists() if cli_buildtree in ("always", "try"): - if buildtree_exists: + if buildtree_exists or pull_: use_buildtree = cli_buildtree if not cached and use_buildtree == "always": click.echo("WARNING: buildtree is not cached locally, will attempt to pull from available remotes", @@ -601,7 +607,8 @@ def shell(app, element, sysroot, mount, isolate, build_, cli_buildtree, command) mounts=mounts, isolate=isolate, command=command, - usebuildtree=use_buildtree) + usebuildtree=use_buildtree, + pull_dependencies=pull_dependencies) except BstError as e: raise AppError("Error launching shell: {}".format(e), detail=e.detail) from e @@ -950,13 +957,16 @@ def artifact(): help="Create a tarball from the artifact contents instead " "of a file tree. If LOCATION is '-', the tarball " "will be dumped to the standard output.") +@click.option('--pull', 'pull_', default=False, is_flag=True, + help="Whether to pull the artifact if it's missing or " + "incomplete.") @click.option('--directory', default=None, type=click.Path(file_okay=False), help="The directory to checkout the artifact to") @click.argument('element', required=False, type=click.Path(readable=False)) @click.pass_obj -def artifact_checkout(app, force, deps, integrate, hardlinks, tar, directory, element): +def artifact_checkout(app, force, deps, integrate, hardlinks, tar, pull_, directory, element): """Checkout contents of an artifact When this command is executed from a workspace directory, the default @@ -1002,7 +1012,8 @@ def artifact_checkout(app, force, deps, integrate, hardlinks, tar, directory, el scope=scope, integrate=True if integrate is None else integrate, hardlinks=hardlinks, - tar=tar) + tar=tar, + pull=pull_) ################################################################ diff --git a/src/buildstream/_stream.py b/src/buildstream/_stream.py index 0606c906a..8097f451d 100644 --- a/src/buildstream/_stream.py +++ b/src/buildstream/_stream.py @@ -140,6 +140,7 @@ class Stream(): # isolate (bool): Whether to isolate the environment like we do in builds # command (list): An argv to launch in the sandbox, or None # usebuildtree (str): Whether to use a buildtree as the source, given cli option + # pull_dependencies ([Element]|None): Elements to attempt to pull # # Returns: # (int): The exit code of the launched shell @@ -149,20 +150,27 @@ class Stream(): mounts=None, isolate=False, command=None, - usebuildtree=None): + usebuildtree=None, + pull_dependencies=None): # Assert we have everything we need built, unless the directory is specified # in which case we just blindly trust the directory, using the element # definitions to control the execution environment only. if directory is None: missing_deps = [ - dep._get_full_name() - for dep in self._pipeline.dependencies([element], scope) + dep for dep in self._pipeline.dependencies([element], scope) if not dep._cached() ] if missing_deps: - raise StreamError("Elements need to be built or downloaded before staging a shell environment", - detail="\n".join(missing_deps)) + if not pull_dependencies: + raise StreamError( + "Elements need to be built or downloaded before staging a shell environment", + detail="\n" + .join(list(map(lambda x: x._get_full_name(), missing_deps)))) + self._message(MessageType.INFO, "Attempting to fetch missing or incomplete artifacts") + self._add_queue(PullQueue(self._scheduler)) + self._enqueue_plan([element] + missing_deps) + self._run() buildtree = False # Check if we require a pull queue attempt, with given artifact state and context @@ -474,6 +482,8 @@ class Stream(): # will be placed at the given location. If true and # location is '-', the tarball will be dumped on the # standard output. + # pull (bool): If true will attempt to pull any missing or incomplete + # artifacts. # def checkout(self, target, *, location=None, @@ -481,14 +491,26 @@ class Stream(): scope=Scope.RUN, integrate=True, hardlinks=False, - tar=False): + tar=False, + pull=False): - # We only have one target in a checkout command - elements, _ = self._load((target,), (), fetch_subprojects=True) - target = elements[0] + # if pulling we need to ensure dependency artifacts are also pulled + selection = PipelineSelection.RUN if pull else PipelineSelection.NONE + elements, _ = self._load( + (target,), (), selection=selection, + fetch_subprojects=True, use_artifact_config=True) + + target = elements[-1] self._check_location_writable(location, force=force, tar=tar) + uncached_elts = [elt for elt in elements if not elt._cached()] + if uncached_elts and pull: + self._message(MessageType.INFO, "Attempting to fetch missing or incomplete artifact") + self._add_queue(PullQueue(self._scheduler)) + self._enqueue_plan(uncached_elts) + self._run() + # Stage deps into a temporary sandbox first try: with target._prepare_sandbox(scope=scope, directory=None, diff --git a/tests/frontend/buildcheckout.py b/tests/frontend/buildcheckout.py index 556bf811c..97bce91a7 100644 --- a/tests/frontend/buildcheckout.py +++ b/tests/frontend/buildcheckout.py @@ -5,6 +5,7 @@ import os import tarfile import hashlib import subprocess +import re import pytest @@ -12,8 +13,9 @@ from buildstream.testing import cli # pylint: disable=unused-import from buildstream.testing._utils.site import IS_WINDOWS from buildstream import _yaml from buildstream._exceptions import ErrorDomain, LoadErrorReason +from buildstream import utils -from tests.testutils import generate_junction, yaml_file_get_provenance +from tests.testutils import generate_junction, yaml_file_get_provenance, create_artifact_share from . import configure_project @@ -823,3 +825,63 @@ def test_build_junction_transitive_short_notation_with_junction(cli, tmpdir, dat # cross-junction elements is not allowed. result = cli.run(project=project, args=['build', 'junction-dep.bst']) result.assert_main_error(ErrorDomain.LOAD, LoadErrorReason.INVALID_DATA) + + +# Should check that after a build we have partial artifacts locally, but should +# then attempt to fetch them when doing a artifact checkout +@pytest.mark.datafiles(DATA_DIR) +def test_partial_artifact_checkout_fetch(cli, datafiles, tmpdir): + project = str(datafiles) + build_elt = 'import-bin.bst' + checkout_dir = os.path.join(str(tmpdir), 'checkout') + + with create_artifact_share(os.path.join(str(tmpdir), 'artifactshare')) as share: + + cli.configure({'artifacts': { + 'url': share.repo, + 'push': True + }}) + + result = cli.run(project=project, args=['build', build_elt]) + result.assert_success() + + # A push artifact cache means we have to pull to push to them, so + # delete some blobs from that CAS such that we have to fetch + digest = utils.sha256sum(os.path.join(project, 'files', 'bin-files', 'usr', 'bin', 'hello')) + objpath = os.path.join(cli.directory, 'cas', 'objects', digest[:2], digest[2:]) + os.unlink(objpath) + + # Verify that the build-only dependency is not (complete) in the local cache + result = cli.run(project=project, args=[ + 'artifact', 'checkout', build_elt, + '--directory', checkout_dir]) + result.assert_main_error(ErrorDomain.STREAM, 'uncached-checkout-attempt') + + # Verify that the pull method fetches relevant artifacts in order to stage + result = cli.run(project=project, args=[ + 'artifact', 'checkout', '--pull', build_elt, + '--directory', checkout_dir]) + result.assert_success() + + # should have pulled whatever was deleted previous + assert 'import-bin.bst' in result.get_pulled_elements() + + +@pytest.mark.datafiles(DATA_DIR) +def test_partial_checkout_fail(tmpdir, datafiles, cli): + project = str(datafiles) + build_elt = 'import-bin.bst' + checkout_dir = os.path.join(str(tmpdir), 'checkout') + + with create_artifact_share(os.path.join(str(tmpdir), 'artifactshare')) as share: + + cli.configure({'artifacts': { + 'url': share.repo, + 'push': True + }}) + + res = cli.run(project=project, args=[ + 'artifact', 'checkout', '--pull', build_elt, '--directory', + checkout_dir]) + res.assert_main_error(ErrorDomain.STREAM, 'uncached-checkout-attempt') + assert re.findall(r'Remote \((\S+)\) does not have artifact (\S+) cached', res.stderr) diff --git a/tests/integration/shell.py b/tests/integration/shell.py index d0c9f1f99..f7de3e462 100644 --- a/tests/integration/shell.py +++ b/tests/integration/shell.py @@ -7,6 +7,10 @@ import pytest from buildstream import _yaml from buildstream.testing import cli_integration as cli # pylint: disable=unused-import from buildstream.testing._utils.site import HAVE_SANDBOX +from buildstream._exceptions import ErrorDomain +from buildstream import utils + +from tests.testutils import create_artifact_share pytestmark = pytest.mark.integration @@ -391,3 +395,49 @@ def test_integration_external_workspace(cli, tmpdir_factory, datafiles, build_sh command.extend([element_name, '--', 'true']) result = cli.run(project=project, cwd=workspace_dir, args=command) result.assert_success() + + +@pytest.mark.datafiles(DATA_DIR) +@pytest.mark.skipif(not HAVE_SANDBOX, reason='Only available with a functioning sandbox') +def test_integration_partial_artifact(cli, datafiles, tmpdir, integration_cache): + + project = str(datafiles) + element_name = 'autotools/amhello.bst' + + # push to an artifact server so we can pull from it later. + with create_artifact_share(os.path.join(str(tmpdir), 'artifactshare')) as share: + cli.configure({'artifacts': { + 'url': share.repo, + 'push': True + }}) + result = cli.run(project=project, args=['build', element_name]) + result.assert_success() + + # If the build is cached then it might not push to the artifact cache + result = cli.run(project=project, args=['artifact', 'push', element_name]) + result.assert_success() + + result = cli.run(project=project, args=['shell', element_name]) + result.assert_success() + + # do a checkout and get the digest of the hello binary. + result = cli.run(project=project, args=[ + 'artifact', 'checkout', '--deps', 'none', + '--directory', os.path.join(str(tmpdir), 'tmp'), + 'autotools/amhello.bst']) + result.assert_success() + digest = utils.sha256sum(os.path.join(str(tmpdir), 'tmp', 'usr', 'bin', 'hello')) + + # Remove the binary from the CAS + cachedir = cli.config['cachedir'] + objpath = os.path.join(cachedir, 'cas', 'objects', digest[:2], digest[2:]) + os.unlink(objpath) + + # check shell doesn't work + result = cli.run(project=project, args=['shell', element_name, '--', 'hello']) + result.assert_main_error(ErrorDomain.APP, None) + + # check the artifact gets completed with '--pull' specified + result = cli.run(project=project, args=['shell', '--pull', element_name, '--', 'hello']) + result.assert_success() + assert 'autotools/amhello.bst' in result.get_pulled_elements() diff --git a/tests/integration/shellbuildtrees.py b/tests/integration/shellbuildtrees.py index b48f4afe7..a1eecb1eb 100644 --- a/tests/integration/shellbuildtrees.py +++ b/tests/integration/shellbuildtrees.py @@ -305,3 +305,32 @@ def test_buildtree_options(cli, tmpdir, datafiles): assert 'Hi' in res.output assert "buildtree is not cached locally, will attempt to pull from available remotes" in res.stderr assert 'Attempting to fetch missing artifact buildtree' in res.stderr + + +# Tests running pull and pull-buildtree options at the same time. +@pytest.mark.datafiles(DATA_DIR) +@pytest.mark.skipif(not HAVE_SANDBOX, reason='Only available with a functioning sandbox') +def test_pull_buildtree_pulled(cli, tmpdir, datafiles): + project = str(datafiles) + element_name = 'build-shell/buildtree.bst' + + with create_artifact_share(os.path.join(str(tmpdir), 'artifactshare')) as share: + # Build the element to push it to cache + cli.configure({ + 'artifacts': {'url': share.repo, 'push': True} + }) + result = cli.run(project=project, args=['--cache-buildtrees', 'always', 'build', element_name]) + result.assert_success() + assert cli.get_element_state(project, element_name) == 'cached' + + # Discard the cache + shutil.rmtree(str(os.path.join(str(tmpdir), 'cache', 'cas'))) + shutil.rmtree(str(os.path.join(str(tmpdir), 'cache', 'artifacts'))) + assert cli.get_element_state(project, element_name) != 'cached' + + # Check it's using the cached build tree + res = cli.run(project=project, args=[ + '--pull-buildtrees', 'shell', '--build', element_name, '--pull', + '--use-buildtree', 'always', '--', 'cat', 'test' + ]) + res.assert_success() diff --git a/tests/remoteexecution/partial.py b/tests/remoteexecution/partial.py index fd7b11cdb..a684bda18 100644 --- a/tests/remoteexecution/partial.py +++ b/tests/remoteexecution/partial.py @@ -8,6 +8,8 @@ from buildstream._exceptions import ErrorDomain from buildstream.testing import cli_remote_execution as cli # pylint: disable=unused-import from buildstream.testing.integration import assert_contains +from tests.testutils.artifactshare import create_artifact_share + pytestmark = pytest.mark.remoteexecution @@ -44,3 +46,26 @@ def test_build_dependency_partial_local_cas(cli, datafiles): result = cli.run(project=project, args=['artifact', 'checkout', builddep_element_name, '--directory', builddep_checkout]) result.assert_main_error(ErrorDomain.STREAM, 'uncached-checkout-attempt') + + +@pytest.mark.datafiles(DATA_DIR) +def test_build_partial_push(cli, tmpdir, datafiles): + project = str(datafiles) + share_dir = os.path.join(str(tmpdir), "artifactshare") + element_name = 'no-runtime-deps.bst' + builddep_element_name = 'autotools/amhello.bst' + + with create_artifact_share(share_dir) as share: + + services = cli.ensure_services() + assert set(services) == set(['action-cache', 'execution', 'storage']) + + cli.config['artifacts'] = { + 'url': share.repo, + 'push': True, + } + + res = cli.run(project=project, args=['build', element_name]) + res.assert_success() + + assert builddep_element_name in res.get_pushed_elements() |