summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorbst-marge-bot <marge-bot@buildstream.build>2019-06-24 11:29:22 +0000
committerbst-marge-bot <marge-bot@buildstream.build>2019-06-24 11:29:22 +0000
commit5c8642b4c86fed13b0b29aaa01e21871d3a89a31 (patch)
tree5aac4491cec29a17a7a083340b86ea4bb6e843b3
parent2d57e14bdd8ac83ddf054ee25c1b97ceb2a2bfaf (diff)
parent3255fe9d6b5facc0096e2585a157719898272e75 (diff)
downloadbuildstream-5c8642b4c86fed13b0b29aaa01e21871d3a89a31.tar.gz
Merge branch 'raoul/1044-blobs-on-demand' into 'master'
CLI options for on demand blob fetching Closes #1044 See merge request BuildStream/buildstream!1400
-rw-r--r--src/buildstream/_artifactcache.py9
-rw-r--r--src/buildstream/_cas/cascache.py4
-rw-r--r--src/buildstream/_frontend/cli.py35
-rw-r--r--src/buildstream/_stream.py40
-rw-r--r--tests/frontend/buildcheckout.py64
-rw-r--r--tests/integration/shell.py50
-rw-r--r--tests/integration/shellbuildtrees.py29
-rw-r--r--tests/remoteexecution/partial.py25
8 files changed, 229 insertions, 27 deletions
diff --git a/src/buildstream/_artifactcache.py b/src/buildstream/_artifactcache.py
index de17ea7ac..4a502064f 100644
--- a/src/buildstream/_artifactcache.py
+++ b/src/buildstream/_artifactcache.py
@@ -469,15 +469,18 @@ class ArtifactCache(BaseCache):
push_remotes = [r for r in self._remotes[project] if r.spec.push]
- remote_missing_blobs_set = set()
+ remote_missing_blobs_list = []
for remote in push_remotes:
remote.init()
remote_missing_blobs = self.cas.remote_missing_blobs(remote, missing_blobs)
- remote_missing_blobs_set.update(remote_missing_blobs)
- return list(remote_missing_blobs_set)
+ for blob in remote_missing_blobs:
+ if blob not in remote_missing_blobs_list:
+ remote_missing_blobs_list.append(blob)
+
+ return remote_missing_blobs_list
################################################
# Local Private Methods #
diff --git a/src/buildstream/_cas/cascache.py b/src/buildstream/_cas/cascache.py
index 58527d4cb..434e71663 100644
--- a/src/buildstream/_cas/cascache.py
+++ b/src/buildstream/_cas/cascache.py
@@ -596,14 +596,14 @@ class CASCache():
# Determine which blobs are missing on the remote.
#
# Args:
- # blobs (Digest): The directory digest
+ # blobs ([Digest]): List of directory digests to check
#
# Returns: List of missing Digest objects
#
def remote_missing_blobs(self, remote, blobs):
missing_blobs = dict()
# Limit size of FindMissingBlobs request
- for required_blobs_group in _grouper(blobs, 512):
+ for required_blobs_group in _grouper(iter(blobs), 512):
request = remote_execution_pb2.FindMissingBlobsRequest(instance_name=remote.spec.instance_name)
for required_digest in required_blobs_group:
diff --git a/src/buildstream/_frontend/cli.py b/src/buildstream/_frontend/cli.py
index 2301fcb78..53adb188c 100644
--- a/src/buildstream/_frontend/cli.py
+++ b/src/buildstream/_frontend/cli.py
@@ -501,11 +501,13 @@ def show(app, elements, deps, except_, order, format_):
@click.option('--use-buildtree', '-t', 'cli_buildtree', type=click.Choice(['ask', 'try', 'always', 'never']),
default='ask',
help='Defaults to ask but if set to always the function will fail if a build tree is not available')
+@click.option('--pull', 'pull_', is_flag=True, default=False,
+ help='Attempt to pull missing or incomplete artifacts')
@click.argument('element', required=False,
type=click.Path(readable=False))
@click.argument('command', type=click.STRING, nargs=-1)
@click.pass_obj
-def shell(app, element, sysroot, mount, isolate, build_, cli_buildtree, command):
+def shell(app, element, sysroot, mount, isolate, build_, cli_buildtree, pull_, command):
"""Run a command in the target element's sandbox environment
When this command is executed from a workspace directory, the default
@@ -535,11 +537,10 @@ def shell(app, element, sysroot, mount, isolate, build_, cli_buildtree, command)
from .._project import HostMount
from .._pipeline import PipelineSelection
- if build_:
- scope = Scope.BUILD
- else:
- scope = Scope.RUN
+ scope = Scope.BUILD if build_ else Scope.RUN
+ # We may need to fetch dependency artifacts if we're pulling the artifact
+ selection = PipelineSelection.ALL if pull_ else PipelineSelection.NONE
use_buildtree = None
with app.initialized():
@@ -548,9 +549,14 @@ def shell(app, element, sysroot, mount, isolate, build_, cli_buildtree, command)
if not element:
raise AppError('Missing argument "ELEMENT".')
- dependencies = app.stream.load_selection((element,), selection=PipelineSelection.NONE,
- use_artifact_config=True)
- element = dependencies[0]
+ elements = app.stream.load_selection((element,), selection=selection,
+ use_artifact_config=True)
+
+ # last one will be the element we want to stage, previous ones are
+ # elements to try and pull
+ element = elements[-1]
+ pull_dependencies = elements[:-1] if pull_ else None
+
prompt = app.shell_prompt(element)
mounts = [
HostMount(path, host_path)
@@ -561,7 +567,7 @@ def shell(app, element, sysroot, mount, isolate, build_, cli_buildtree, command)
buildtree_exists = element._buildtree_exists()
if cli_buildtree in ("always", "try"):
- if buildtree_exists:
+ if buildtree_exists or pull_:
use_buildtree = cli_buildtree
if not cached and use_buildtree == "always":
click.echo("WARNING: buildtree is not cached locally, will attempt to pull from available remotes",
@@ -601,7 +607,8 @@ def shell(app, element, sysroot, mount, isolate, build_, cli_buildtree, command)
mounts=mounts,
isolate=isolate,
command=command,
- usebuildtree=use_buildtree)
+ usebuildtree=use_buildtree,
+ pull_dependencies=pull_dependencies)
except BstError as e:
raise AppError("Error launching shell: {}".format(e), detail=e.detail) from e
@@ -950,13 +957,16 @@ def artifact():
help="Create a tarball from the artifact contents instead "
"of a file tree. If LOCATION is '-', the tarball "
"will be dumped to the standard output.")
+@click.option('--pull', 'pull_', default=False, is_flag=True,
+ help="Whether to pull the artifact if it's missing or "
+ "incomplete.")
@click.option('--directory', default=None,
type=click.Path(file_okay=False),
help="The directory to checkout the artifact to")
@click.argument('element', required=False,
type=click.Path(readable=False))
@click.pass_obj
-def artifact_checkout(app, force, deps, integrate, hardlinks, tar, directory, element):
+def artifact_checkout(app, force, deps, integrate, hardlinks, tar, pull_, directory, element):
"""Checkout contents of an artifact
When this command is executed from a workspace directory, the default
@@ -1002,7 +1012,8 @@ def artifact_checkout(app, force, deps, integrate, hardlinks, tar, directory, el
scope=scope,
integrate=True if integrate is None else integrate,
hardlinks=hardlinks,
- tar=tar)
+ tar=tar,
+ pull=pull_)
################################################################
diff --git a/src/buildstream/_stream.py b/src/buildstream/_stream.py
index 0606c906a..8097f451d 100644
--- a/src/buildstream/_stream.py
+++ b/src/buildstream/_stream.py
@@ -140,6 +140,7 @@ class Stream():
# isolate (bool): Whether to isolate the environment like we do in builds
# command (list): An argv to launch in the sandbox, or None
# usebuildtree (str): Whether to use a buildtree as the source, given cli option
+ # pull_dependencies ([Element]|None): Elements to attempt to pull
#
# Returns:
# (int): The exit code of the launched shell
@@ -149,20 +150,27 @@ class Stream():
mounts=None,
isolate=False,
command=None,
- usebuildtree=None):
+ usebuildtree=None,
+ pull_dependencies=None):
# Assert we have everything we need built, unless the directory is specified
# in which case we just blindly trust the directory, using the element
# definitions to control the execution environment only.
if directory is None:
missing_deps = [
- dep._get_full_name()
- for dep in self._pipeline.dependencies([element], scope)
+ dep for dep in self._pipeline.dependencies([element], scope)
if not dep._cached()
]
if missing_deps:
- raise StreamError("Elements need to be built or downloaded before staging a shell environment",
- detail="\n".join(missing_deps))
+ if not pull_dependencies:
+ raise StreamError(
+ "Elements need to be built or downloaded before staging a shell environment",
+ detail="\n"
+ .join(list(map(lambda x: x._get_full_name(), missing_deps))))
+ self._message(MessageType.INFO, "Attempting to fetch missing or incomplete artifacts")
+ self._add_queue(PullQueue(self._scheduler))
+ self._enqueue_plan([element] + missing_deps)
+ self._run()
buildtree = False
# Check if we require a pull queue attempt, with given artifact state and context
@@ -474,6 +482,8 @@ class Stream():
# will be placed at the given location. If true and
# location is '-', the tarball will be dumped on the
# standard output.
+ # pull (bool): If true will attempt to pull any missing or incomplete
+ # artifacts.
#
def checkout(self, target, *,
location=None,
@@ -481,14 +491,26 @@ class Stream():
scope=Scope.RUN,
integrate=True,
hardlinks=False,
- tar=False):
+ tar=False,
+ pull=False):
- # We only have one target in a checkout command
- elements, _ = self._load((target,), (), fetch_subprojects=True)
- target = elements[0]
+ # if pulling we need to ensure dependency artifacts are also pulled
+ selection = PipelineSelection.RUN if pull else PipelineSelection.NONE
+ elements, _ = self._load(
+ (target,), (), selection=selection,
+ fetch_subprojects=True, use_artifact_config=True)
+
+ target = elements[-1]
self._check_location_writable(location, force=force, tar=tar)
+ uncached_elts = [elt for elt in elements if not elt._cached()]
+ if uncached_elts and pull:
+ self._message(MessageType.INFO, "Attempting to fetch missing or incomplete artifact")
+ self._add_queue(PullQueue(self._scheduler))
+ self._enqueue_plan(uncached_elts)
+ self._run()
+
# Stage deps into a temporary sandbox first
try:
with target._prepare_sandbox(scope=scope, directory=None,
diff --git a/tests/frontend/buildcheckout.py b/tests/frontend/buildcheckout.py
index 556bf811c..97bce91a7 100644
--- a/tests/frontend/buildcheckout.py
+++ b/tests/frontend/buildcheckout.py
@@ -5,6 +5,7 @@ import os
import tarfile
import hashlib
import subprocess
+import re
import pytest
@@ -12,8 +13,9 @@ from buildstream.testing import cli # pylint: disable=unused-import
from buildstream.testing._utils.site import IS_WINDOWS
from buildstream import _yaml
from buildstream._exceptions import ErrorDomain, LoadErrorReason
+from buildstream import utils
-from tests.testutils import generate_junction, yaml_file_get_provenance
+from tests.testutils import generate_junction, yaml_file_get_provenance, create_artifact_share
from . import configure_project
@@ -823,3 +825,63 @@ def test_build_junction_transitive_short_notation_with_junction(cli, tmpdir, dat
# cross-junction elements is not allowed.
result = cli.run(project=project, args=['build', 'junction-dep.bst'])
result.assert_main_error(ErrorDomain.LOAD, LoadErrorReason.INVALID_DATA)
+
+
+# Should check that after a build we have partial artifacts locally, but should
+# then attempt to fetch them when doing a artifact checkout
+@pytest.mark.datafiles(DATA_DIR)
+def test_partial_artifact_checkout_fetch(cli, datafiles, tmpdir):
+ project = str(datafiles)
+ build_elt = 'import-bin.bst'
+ checkout_dir = os.path.join(str(tmpdir), 'checkout')
+
+ with create_artifact_share(os.path.join(str(tmpdir), 'artifactshare')) as share:
+
+ cli.configure({'artifacts': {
+ 'url': share.repo,
+ 'push': True
+ }})
+
+ result = cli.run(project=project, args=['build', build_elt])
+ result.assert_success()
+
+ # A push artifact cache means we have to pull to push to them, so
+ # delete some blobs from that CAS such that we have to fetch
+ digest = utils.sha256sum(os.path.join(project, 'files', 'bin-files', 'usr', 'bin', 'hello'))
+ objpath = os.path.join(cli.directory, 'cas', 'objects', digest[:2], digest[2:])
+ os.unlink(objpath)
+
+ # Verify that the build-only dependency is not (complete) in the local cache
+ result = cli.run(project=project, args=[
+ 'artifact', 'checkout', build_elt,
+ '--directory', checkout_dir])
+ result.assert_main_error(ErrorDomain.STREAM, 'uncached-checkout-attempt')
+
+ # Verify that the pull method fetches relevant artifacts in order to stage
+ result = cli.run(project=project, args=[
+ 'artifact', 'checkout', '--pull', build_elt,
+ '--directory', checkout_dir])
+ result.assert_success()
+
+ # should have pulled whatever was deleted previous
+ assert 'import-bin.bst' in result.get_pulled_elements()
+
+
+@pytest.mark.datafiles(DATA_DIR)
+def test_partial_checkout_fail(tmpdir, datafiles, cli):
+ project = str(datafiles)
+ build_elt = 'import-bin.bst'
+ checkout_dir = os.path.join(str(tmpdir), 'checkout')
+
+ with create_artifact_share(os.path.join(str(tmpdir), 'artifactshare')) as share:
+
+ cli.configure({'artifacts': {
+ 'url': share.repo,
+ 'push': True
+ }})
+
+ res = cli.run(project=project, args=[
+ 'artifact', 'checkout', '--pull', build_elt, '--directory',
+ checkout_dir])
+ res.assert_main_error(ErrorDomain.STREAM, 'uncached-checkout-attempt')
+ assert re.findall(r'Remote \((\S+)\) does not have artifact (\S+) cached', res.stderr)
diff --git a/tests/integration/shell.py b/tests/integration/shell.py
index d0c9f1f99..f7de3e462 100644
--- a/tests/integration/shell.py
+++ b/tests/integration/shell.py
@@ -7,6 +7,10 @@ import pytest
from buildstream import _yaml
from buildstream.testing import cli_integration as cli # pylint: disable=unused-import
from buildstream.testing._utils.site import HAVE_SANDBOX
+from buildstream._exceptions import ErrorDomain
+from buildstream import utils
+
+from tests.testutils import create_artifact_share
pytestmark = pytest.mark.integration
@@ -391,3 +395,49 @@ def test_integration_external_workspace(cli, tmpdir_factory, datafiles, build_sh
command.extend([element_name, '--', 'true'])
result = cli.run(project=project, cwd=workspace_dir, args=command)
result.assert_success()
+
+
+@pytest.mark.datafiles(DATA_DIR)
+@pytest.mark.skipif(not HAVE_SANDBOX, reason='Only available with a functioning sandbox')
+def test_integration_partial_artifact(cli, datafiles, tmpdir, integration_cache):
+
+ project = str(datafiles)
+ element_name = 'autotools/amhello.bst'
+
+ # push to an artifact server so we can pull from it later.
+ with create_artifact_share(os.path.join(str(tmpdir), 'artifactshare')) as share:
+ cli.configure({'artifacts': {
+ 'url': share.repo,
+ 'push': True
+ }})
+ result = cli.run(project=project, args=['build', element_name])
+ result.assert_success()
+
+ # If the build is cached then it might not push to the artifact cache
+ result = cli.run(project=project, args=['artifact', 'push', element_name])
+ result.assert_success()
+
+ result = cli.run(project=project, args=['shell', element_name])
+ result.assert_success()
+
+ # do a checkout and get the digest of the hello binary.
+ result = cli.run(project=project, args=[
+ 'artifact', 'checkout', '--deps', 'none',
+ '--directory', os.path.join(str(tmpdir), 'tmp'),
+ 'autotools/amhello.bst'])
+ result.assert_success()
+ digest = utils.sha256sum(os.path.join(str(tmpdir), 'tmp', 'usr', 'bin', 'hello'))
+
+ # Remove the binary from the CAS
+ cachedir = cli.config['cachedir']
+ objpath = os.path.join(cachedir, 'cas', 'objects', digest[:2], digest[2:])
+ os.unlink(objpath)
+
+ # check shell doesn't work
+ result = cli.run(project=project, args=['shell', element_name, '--', 'hello'])
+ result.assert_main_error(ErrorDomain.APP, None)
+
+ # check the artifact gets completed with '--pull' specified
+ result = cli.run(project=project, args=['shell', '--pull', element_name, '--', 'hello'])
+ result.assert_success()
+ assert 'autotools/amhello.bst' in result.get_pulled_elements()
diff --git a/tests/integration/shellbuildtrees.py b/tests/integration/shellbuildtrees.py
index b48f4afe7..a1eecb1eb 100644
--- a/tests/integration/shellbuildtrees.py
+++ b/tests/integration/shellbuildtrees.py
@@ -305,3 +305,32 @@ def test_buildtree_options(cli, tmpdir, datafiles):
assert 'Hi' in res.output
assert "buildtree is not cached locally, will attempt to pull from available remotes" in res.stderr
assert 'Attempting to fetch missing artifact buildtree' in res.stderr
+
+
+# Tests running pull and pull-buildtree options at the same time.
+@pytest.mark.datafiles(DATA_DIR)
+@pytest.mark.skipif(not HAVE_SANDBOX, reason='Only available with a functioning sandbox')
+def test_pull_buildtree_pulled(cli, tmpdir, datafiles):
+ project = str(datafiles)
+ element_name = 'build-shell/buildtree.bst'
+
+ with create_artifact_share(os.path.join(str(tmpdir), 'artifactshare')) as share:
+ # Build the element to push it to cache
+ cli.configure({
+ 'artifacts': {'url': share.repo, 'push': True}
+ })
+ result = cli.run(project=project, args=['--cache-buildtrees', 'always', 'build', element_name])
+ result.assert_success()
+ assert cli.get_element_state(project, element_name) == 'cached'
+
+ # Discard the cache
+ shutil.rmtree(str(os.path.join(str(tmpdir), 'cache', 'cas')))
+ shutil.rmtree(str(os.path.join(str(tmpdir), 'cache', 'artifacts')))
+ assert cli.get_element_state(project, element_name) != 'cached'
+
+ # Check it's using the cached build tree
+ res = cli.run(project=project, args=[
+ '--pull-buildtrees', 'shell', '--build', element_name, '--pull',
+ '--use-buildtree', 'always', '--', 'cat', 'test'
+ ])
+ res.assert_success()
diff --git a/tests/remoteexecution/partial.py b/tests/remoteexecution/partial.py
index fd7b11cdb..a684bda18 100644
--- a/tests/remoteexecution/partial.py
+++ b/tests/remoteexecution/partial.py
@@ -8,6 +8,8 @@ from buildstream._exceptions import ErrorDomain
from buildstream.testing import cli_remote_execution as cli # pylint: disable=unused-import
from buildstream.testing.integration import assert_contains
+from tests.testutils.artifactshare import create_artifact_share
+
pytestmark = pytest.mark.remoteexecution
@@ -44,3 +46,26 @@ def test_build_dependency_partial_local_cas(cli, datafiles):
result = cli.run(project=project, args=['artifact', 'checkout', builddep_element_name,
'--directory', builddep_checkout])
result.assert_main_error(ErrorDomain.STREAM, 'uncached-checkout-attempt')
+
+
+@pytest.mark.datafiles(DATA_DIR)
+def test_build_partial_push(cli, tmpdir, datafiles):
+ project = str(datafiles)
+ share_dir = os.path.join(str(tmpdir), "artifactshare")
+ element_name = 'no-runtime-deps.bst'
+ builddep_element_name = 'autotools/amhello.bst'
+
+ with create_artifact_share(share_dir) as share:
+
+ services = cli.ensure_services()
+ assert set(services) == set(['action-cache', 'execution', 'storage'])
+
+ cli.config['artifacts'] = {
+ 'url': share.repo,
+ 'push': True,
+ }
+
+ res = cli.run(project=project, args=['build', element_name])
+ res.assert_success()
+
+ assert builddep_element_name in res.get_pushed_elements()