diff options
author | bst-marge-bot <marge-bot@buildstream.build> | 2019-10-10 10:50:38 +0000 |
---|---|---|
committer | bst-marge-bot <marge-bot@buildstream.build> | 2019-10-10 10:50:38 +0000 |
commit | 28803a91bb0525a197de004dce5fbe750a00fbde (patch) | |
tree | 97df3d122ac5b9e82b98bbab71a743614223f998 | |
parent | e31b22a39e3f127685be50cd5a56217a8c11b07d (diff) | |
parent | 276cfa63b5aec4760b171b0c4a60962b2a9ba819 (diff) | |
download | buildstream-28803a91bb0525a197de004dce5fbe750a00fbde.tar.gz |
Merge branch 'bschubert/partial-source-cache' into 'master'
Gracefully fallback to fetching source if remote doesn't have every blob cached
See merge request BuildStream/buildstream!1635
-rw-r--r-- | src/buildstream/_sourcecache.py | 5 | ||||
-rw-r--r-- | tests/sourcecache/fetch.py | 201 |
2 files changed, 111 insertions, 95 deletions
diff --git a/src/buildstream/_sourcecache.py b/src/buildstream/_sourcecache.py index 03ba9a74c..b987d9b8d 100644 --- a/src/buildstream/_sourcecache.py +++ b/src/buildstream/_sourcecache.py @@ -21,6 +21,7 @@ import os import grpc from ._remote import BaseRemote +from ._cas.casremote import BlobNotFound from .storage._casbaseddirectory import CasBasedDirectory from ._basecache import BaseCache from ._exceptions import CASError, CASRemoteError, SourceCacheError @@ -259,6 +260,10 @@ class SourceCache(BaseCache): source.info("Pulled source {} <- {}".format(display_key, remote)) return True + except BlobNotFound as e: + # Not all blobs are available on this remote + source.info("Remote cas ({}) does not have blob {} cached".format(remote, e.blob)) + continue except CASError as e: raise SourceCacheError("Failed to pull source {}: {}".format( display_key, e)) from e diff --git a/tests/sourcecache/fetch.py b/tests/sourcecache/fetch.py index 99c00f120..a5863b867 100644 --- a/tests/sourcecache/fetch.py +++ b/tests/sourcecache/fetch.py @@ -19,6 +19,7 @@ # # Pylint doesn't play well with fixtures and dependency injection from pytest # pylint: disable=redefined-outer-name +from contextlib import contextmanager import os import shutil import pytest @@ -34,42 +35,60 @@ from tests.testutils import create_artifact_share, dummy_context DATA_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), "project") +def move_local_cas_to_remote_source_share(local, remote): + shutil.rmtree(os.path.join(remote, 'repo', 'cas')) + shutil.move(os.path.join(local, 'source_protos'), os.path.join(remote, 'repo')) + shutil.move(os.path.join(local, 'cas'), os.path.join(remote, 'repo')) + shutil.rmtree(os.path.join(local, 'sources')) + shutil.rmtree(os.path.join(local, 'artifacts')) + + +def create_test_element(tmpdir, project_dir): + repo = create_repo('git', str(tmpdir)) + ref = repo.create(os.path.join(project_dir, 'files')) + element_path = os.path.join(project_dir, 'elements') + element_name = 'fetch.bst' + element = { + 'kind': 'import', + 'sources': [repo.source_config(ref=ref)] + } + _yaml.roundtrip_dump(element, os.path.join(element_path, element_name)) + + return element_name, repo, ref + + +@contextmanager +def context_with_source_cache(cli, cache, share, tmpdir): + user_config_file = str(tmpdir.join('buildstream.conf')) + user_config = { + 'scheduler': { + 'pushers': 1 + }, + 'source-caches': { + 'url': share.repo, + }, + 'cachedir': cache, + } + _yaml.roundtrip_dump(user_config, file=user_config_file) + cli.configure(user_config) + + with dummy_context(config=user_config_file) as context: + yield context + + @pytest.mark.datafiles(DATA_DIR) def test_source_fetch(cli, tmpdir, datafiles): project_dir = str(datafiles) + element_name, _repo, _ref = create_test_element(tmpdir, project_dir) + cache_dir = os.path.join(str(tmpdir), 'cache') # use artifact cache for sources for now, they should work the same with create_artifact_share(os.path.join(str(tmpdir), 'sourceshare')) as share: - # configure using this share - cache_dir = os.path.join(str(tmpdir), 'cache') - user_config_file = str(tmpdir.join('buildstream.conf')) - user_config = { - 'scheduler': { - 'pushers': 1 - }, - 'source-caches': { - 'url': share.repo, - }, - 'cachedir': cache_dir, - } - _yaml.roundtrip_dump(user_config, file=user_config_file) - cli.configure(user_config) - - repo = create_repo('git', str(tmpdir)) - ref = repo.create(os.path.join(project_dir, 'files')) - element_path = os.path.join(project_dir, 'elements') - element_name = 'fetch.bst' - element = { - 'kind': 'import', - 'sources': [repo.source_config(ref=ref)] - } - _yaml.roundtrip_dump(element, os.path.join(element_path, element_name)) - - with dummy_context(config=user_config_file) as context: + with context_with_source_cache(cli, cache_dir, share, tmpdir) as context: project = Project(project_dir, context) project.ensure_fully_loaded() - element = project.load_elements(['fetch.bst'])[0] + element = project.load_elements([element_name])[0] assert not element._source_cached() source = list(element.sources())[0] @@ -77,7 +96,7 @@ def test_source_fetch(cli, tmpdir, datafiles): assert not cas.contains(source._get_source_name()) # Just check that we sensibly fetch and build the element - res = cli.run(project=project_dir, args=['build', 'fetch.bst']) + res = cli.run(project=project_dir, args=['build', element_name]) res.assert_success() assert os.listdir(os.path.join(str(tmpdir), 'cache', 'sources', 'git')) != [] @@ -86,25 +105,16 @@ def test_source_fetch(cli, tmpdir, datafiles): sourcecache = context.sourcecache digest = sourcecache.export(source)._get_digest() - # Move source in local cas to repo - shutil.rmtree(os.path.join(str(tmpdir), 'sourceshare', 'repo', 'cas')) - shutil.move( - os.path.join(str(tmpdir), 'cache', 'source_protos'), - os.path.join(str(tmpdir), 'sourceshare', 'repo')) - shutil.move( - os.path.join(str(tmpdir), 'cache', 'cas'), - os.path.join(str(tmpdir), 'sourceshare', 'repo')) - shutil.rmtree(os.path.join(str(tmpdir), 'cache', 'sources')) - shutil.rmtree(os.path.join(str(tmpdir), 'cache', 'artifacts')) + move_local_cas_to_remote_source_share(str(cache_dir), share.directory) # check the share has the object assert share.has_object(digest) - state = cli.get_element_state(project_dir, 'fetch.bst') + state = cli.get_element_state(project_dir, element_name) assert state == 'fetch needed' # Now fetch the source and check - res = cli.run(project=project_dir, args=['source', 'fetch', 'fetch.bst']) + res = cli.run(project=project_dir, args=['source', 'fetch', element_name]) res.assert_success() assert "Pulled source" in res.stderr @@ -116,39 +126,16 @@ def test_source_fetch(cli, tmpdir, datafiles): @pytest.mark.datafiles(DATA_DIR) def test_fetch_fallback(cli, tmpdir, datafiles): project_dir = str(datafiles) + element_name, repo, ref = create_test_element(tmpdir, project_dir) + cache_dir = os.path.join(str(tmpdir), 'cache') # use artifact cache for sources for now, they should work the same with create_artifact_share(os.path.join(str(tmpdir), 'sourceshare')) as share: - # configure using this share - cache_dir = os.path.join(str(tmpdir), 'cache') - user_config_file = str(tmpdir.join('buildstream.conf')) - user_config = { - 'scheduler': { - 'pushers': 1 - }, - 'source-caches': { - 'url': share.repo, - }, - 'cachedir': cache_dir, - } - _yaml.roundtrip_dump(user_config, file=user_config_file) - cli.configure(user_config) - - repo = create_repo('git', str(tmpdir)) - ref = repo.create(os.path.join(project_dir, 'files')) - element_path = os.path.join(project_dir, 'elements') - element_name = 'fetch.bst' - element = { - 'kind': 'import', - 'sources': [repo.source_config(ref=ref)] - } - _yaml.roundtrip_dump(element, os.path.join(element_path, element_name)) - - with dummy_context(config=user_config_file) as context: + with context_with_source_cache(cli, cache_dir, share, tmpdir) as context: project = Project(project_dir, context) project.ensure_fully_loaded() - element = project.load_elements(['fetch.bst'])[0] + element = project.load_elements([element_name])[0] assert not element._source_cached() source = list(element.sources())[0] @@ -157,7 +144,7 @@ def test_fetch_fallback(cli, tmpdir, datafiles): assert not os.path.exists(os.path.join(cache_dir, 'sources')) # Now check if it falls back to the source fetch method. - res = cli.run(project=project_dir, args=['source', 'fetch', 'fetch.bst']) + res = cli.run(project=project_dir, args=['source', 'fetch', element_name]) res.assert_success() brief_key = source._get_brief_display_key() assert ("Remote source service ({}) does not have source {} cached" @@ -172,38 +159,15 @@ def test_fetch_fallback(cli, tmpdir, datafiles): @pytest.mark.datafiles(DATA_DIR) def test_pull_fail(cli, tmpdir, datafiles): project_dir = str(datafiles) + element_name, repo, _ref = create_test_element(tmpdir, project_dir) cache_dir = os.path.join(str(tmpdir), 'cache') with create_artifact_share(os.path.join(str(tmpdir), 'sourceshare')) as share: - user_config_file = str(tmpdir.join('buildstream.conf')) - user_config = { - 'scheduler': { - 'pushers': 1 - }, - 'source-caches': { - 'url': share.repo, - }, - 'cachedir': cache_dir, - } - _yaml.roundtrip_dump(user_config, file=user_config_file) - cli.configure(user_config) - - repo = create_repo('git', str(tmpdir)) - ref = repo.create(os.path.join(project_dir, 'files')) - element_path = os.path.join(project_dir, 'elements') - element_name = 'push.bst' - element = { - 'kind': 'import', - 'sources': [repo.source_config(ref=ref)] - } - _yaml.roundtrip_dump(element, os.path.join(element_path, element_name)) - - # get the source object - with dummy_context(config=user_config_file) as context: + with context_with_source_cache(cli, cache_dir, share, tmpdir) as context: project = Project(project_dir, context) project.ensure_fully_loaded() - element = project.load_elements(['push.bst'])[0] + element = project.load_elements([element_name])[0] assert not element._source_cached() source = list(element.sources())[0] @@ -211,8 +175,55 @@ def test_pull_fail(cli, tmpdir, datafiles): shutil.rmtree(repo.repo) # Should fail in stream, with a plugin task causing the error - res = cli.run(project=project_dir, args=['build', 'push.bst']) + res = cli.run(project=project_dir, args=['build', element_name]) res.assert_main_error(ErrorDomain.STREAM, None) res.assert_task_error(ErrorDomain.PLUGIN, None) assert "Remote source service ({}) does not have source {} cached".format( share.repo, source._get_brief_display_key()) in res.stderr + + +@pytest.mark.datafiles(DATA_DIR) +def test_source_pull_partial_fallback_fetch(cli, tmpdir, datafiles): + project_dir = str(datafiles) + element_name, repo, ref = create_test_element(tmpdir, project_dir) + cache_dir = os.path.join(str(tmpdir), 'cache') + + # use artifact cache for sources for now, they should work the same + with create_artifact_share(os.path.join(str(tmpdir), 'sourceshare')) as share: + with context_with_source_cache(cli, cache_dir, share, tmpdir) as context: + project = Project(project_dir, context) + project.ensure_fully_loaded() + + element = project.load_elements([element_name])[0] + assert not element._source_cached() + source = list(element.sources())[0] + + cas = context.get_cascache() + assert not cas.contains(source._get_source_name()) + + # Just check that we sensibly fetch and build the element + res = cli.run(project=project_dir, args=['build', element_name]) + res.assert_success() + + assert os.listdir(os.path.join(str(tmpdir), 'cache', 'sources', 'git')) != [] + + # get root digest of source + sourcecache = context.sourcecache + digest = sourcecache.export(source)._get_digest() + + move_local_cas_to_remote_source_share(str(cache_dir), share.directory) + + # Remove the cas content, only keep the proto and such around + shutil.rmtree(os.path.join(str(tmpdir), "sourceshare", "repo", "cas", "objects")) + # check the share doesn't have the object + assert not share.has_object(digest) + + state = cli.get_element_state(project_dir, element_name) + assert state == 'fetch needed' + + # Now fetch the source and check + res = cli.run(project=project_dir, args=['source', 'fetch', element_name]) + res.assert_success() + + assert ("SUCCESS Fetching from {}" + .format(repo.source_config(ref=ref)['url'])) in res.stderr |