summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--buildstream/_gitsourcebase.py145
-rw-r--r--tests/sources/git.py247
2 files changed, 372 insertions, 20 deletions
diff --git a/buildstream/_gitsourcebase.py b/buildstream/_gitsourcebase.py
index 7d07c56cb..f42431a72 100644
--- a/buildstream/_gitsourcebase.py
+++ b/buildstream/_gitsourcebase.py
@@ -49,7 +49,7 @@ WARN_INVALID_SUBMODULE = "invalid-submodule"
#
class _GitMirror(SourceFetcher):
- def __init__(self, source, path, url, ref, *, primary=False, tags=[]):
+ def __init__(self, source, path, url, ref, *, primary=False, tags=[], tracking=None):
super().__init__()
self.source = source
@@ -58,11 +58,101 @@ class _GitMirror(SourceFetcher):
self.ref = ref
self.tags = tags
self.primary = primary
+ dirname = utils.url_directory_name(url)
self.mirror = os.path.join(source.get_mirror_directory(), utils.url_directory_name(url))
+ self.fetch_mirror = os.path.join(source.get_mirror_directory(), '{}-{}'.format(dirname, ref))
self.mark_download_url(url)
+ self.tracking = tracking
+
+ def mirror_path(self):
+ if os.path.exists(self.mirror):
+ return self.mirror
+ else:
+ assert os.path.exists(self.fetch_mirror)
+ return self.fetch_mirror
+
+ def ensure_fetchable(self, alias_override=None):
+
+ if os.path.exists(self.mirror):
+ return
+
+ if self.tags:
+ for tag, commit, _ in self.tags:
+ if commit != self.ref:
+ self.source.status("{}: tag '{}' is not on commit '{}', so a full clone is required"
+ .format(self.source, tag, commit))
+ self.ensure_trackable(alias_override=alias_override)
+ return
+
+ if os.path.exists(self.fetch_mirror):
+ return
+
+ with self.source.tempdir() as tmpdir:
+ self.source.call([self.source.host_git, 'init', '--bare', tmpdir],
+ fail="Failed to init git repository",
+ fail_temporarily=True)
+
+ url = self.source.translate_url(self.url, alias_override=alias_override,
+ primary=self.primary)
+
+ self.source.call([self.source.host_git, 'remote', 'add', '--mirror=fetch', 'origin', url],
+ cwd=tmpdir,
+ fail="Failed to init git repository",
+ fail_temporarily=True)
+
+ _, refs = self.source.check_output([self.source.host_git, 'ls-remote', 'origin'],
+ cwd=tmpdir,
+ fail="Failed to clone git repository {}".format(url),
+ fail_temporarily=True)
+
+ advertised = None
+ for ref_line in refs.splitlines():
+ commit, ref = ref_line.split('\t', 1)
+ if ref == 'HEAD':
+ continue
+ if self.tracking:
+ # For validate_cache to work
+ if ref not in ['refs/heads/{}'.format(self.tracking),
+ 'refs/tags/{}'.format(self.tracking),
+ 'refs/tags/{}{}'.format(self.tracking, '^{}')]:
+ continue
+ if self.ref == commit:
+ if ref.endswith('^{}'):
+ ref = ref[:-3]
+ advertised = ref
+ break
+
+ if advertised is None:
+ self.source.status("{}: {} is not advertised on {}, so a full clone is required"
+ .format(self.source, self.ref, url))
+
+ self.ensure_trackable(alias_override=alias_override)
+ return
+
+ self.source.call([self.source.host_git, 'fetch', '--depth=1', 'origin', advertised],
+ cwd=tmpdir,
+ fail="Failed to fetch repository",
+ fail_temporarily=True)
+
+ # We need to have a ref to make it clonable
+ self.source.call([self.source.host_git, 'update-ref', 'HEAD', self.ref],
+ cwd=tmpdir,
+ fail="Failed to tag HEAD",
+ fail_temporarily=True)
+
+ try:
+ move_atomic(tmpdir, self.fetch_mirror)
+ except DirectoryExistsError:
+ # Another process was quicker to download this repository.
+ # Let's discard our own
+ self.source.status("{}: Discarding duplicate clone of {}"
+ .format(self.source, url))
+ except OSError as e:
+ raise SourceError("{}: Failed to move cloned git repository {} from '{}' to '{}': {}"
+ .format(self.source, url, tmpdir, self.fetch_mirror, e)) from e
# Ensures that the mirror exists
- def ensure(self, alias_override=None):
+ def ensure_trackable(self, alias_override=None):
# Unfortunately, git does not know how to only clone just a specific ref,
# so we have to download all of those gigs even if we only need a couple
@@ -97,18 +187,20 @@ class _GitMirror(SourceFetcher):
alias_override=alias_override,
primary=self.primary)
+ mirror = self.mirror_path()
+
if alias_override:
remote_name = utils.url_directory_name(alias_override)
_, remotes = self.source.check_output(
[self.source.host_git, 'remote'],
- fail="Failed to retrieve list of remotes in {}".format(self.mirror),
- cwd=self.mirror
+ fail="Failed to retrieve list of remotes in {}".format(mirror),
+ cwd=mirror
)
if remote_name not in remotes:
self.source.call(
[self.source.host_git, 'remote', 'add', remote_name, url],
fail="Failed to add remote {} with url {}".format(remote_name, url),
- cwd=self.mirror
+ cwd=mirror
)
else:
remote_name = "origin"
@@ -117,7 +209,7 @@ class _GitMirror(SourceFetcher):
'+refs/heads/*:refs/heads/*', '+refs/tags/*:refs/tags/*'],
fail="Failed to fetch from remote git repository: {}".format(url),
fail_temporarily=True,
- cwd=self.mirror)
+ cwd=mirror)
def fetch(self, alias_override=None):
# Resolve the URL for the message
@@ -128,7 +220,7 @@ class _GitMirror(SourceFetcher):
with self.source.timed_activity("Fetching from {}"
.format(resolved_url),
silent_nested=True):
- self.ensure(alias_override)
+ self.ensure_fetchable(alias_override)
if not self.has_ref():
self._fetch(alias_override)
self.assert_ref()
@@ -137,12 +229,14 @@ class _GitMirror(SourceFetcher):
if not self.ref:
return False
- # If the mirror doesnt exist, we also dont have the ref
- if not os.path.exists(self.mirror):
+ if not os.path.exists(self.mirror) and not os.path.exists(self.fetch_mirror):
+ # If the mirror doesnt exist, we also dont have the ref
return False
+ mirror = self.mirror_path()
+
# Check if the ref is really there
- rc = self.source.call([self.source.host_git, 'cat-file', '-t', self.ref], cwd=self.mirror)
+ rc = self.source.call([self.source.host_git, 'cat-file', '-t', self.ref], cwd=mirror)
return rc == 0
def assert_ref(self):
@@ -192,11 +286,13 @@ class _GitMirror(SourceFetcher):
def stage(self, directory):
fullpath = os.path.join(directory, self.path)
+ mirror = self.mirror_path()
+
# Using --shared here avoids copying the objects into the checkout, in any
# case we're just checking out a specific commit and then removing the .git/
# directory.
- self.source.call([self.source.host_git, 'clone', '--no-checkout', '--shared', self.mirror, fullpath],
- fail="Failed to create git mirror {} in directory: {}".format(self.mirror, fullpath),
+ self.source.call([self.source.host_git, 'clone', '--no-checkout', '--shared', mirror, fullpath],
+ fail="Failed to create git mirror {} in directory: {}".format(mirror, fullpath),
fail_temporarily=True)
self.source.call([self.source.host_git, 'checkout', '--force', self.ref],
@@ -226,9 +322,11 @@ class _GitMirror(SourceFetcher):
# List the submodules (path/url tuples) present at the given ref of this repo
def submodule_list(self):
+ mirror = self.mirror_path()
+
modules = "{}:{}".format(self.ref, GIT_MODULES)
exit_code, output = self.source.check_output(
- [self.source.host_git, 'show', modules], cwd=self.mirror)
+ [self.source.host_git, 'show', modules], cwd=mirror)
# If git show reports error code 128 here, we take it to mean there is
# no .gitmodules file to display for the given revision.
@@ -256,6 +354,8 @@ class _GitMirror(SourceFetcher):
# Fetch the ref which this mirror requires its submodule to have,
# at the given ref of this mirror.
def submodule_ref(self, submodule, ref=None):
+ mirror = self.mirror_path()
+
if not ref:
ref = self.ref
@@ -264,7 +364,7 @@ class _GitMirror(SourceFetcher):
_, output = self.source.check_output([self.source.host_git, 'ls-tree', ref, submodule],
fail="ls-tree failed for commit {} and submodule: {}".format(
ref, submodule),
- cwd=self.mirror)
+ cwd=mirror)
# read the commit hash from the output
fields = output.split()
@@ -392,8 +492,11 @@ class _GitSourceBase(Source):
self.track_tags = self.node_get_member(node, bool, 'track-tags', False)
self.original_url = self.node_get_member(node, str, 'url')
- self.mirror = self.BST_MIRROR_CLASS(self, '', self.original_url, ref, tags=tags, primary=True)
self.tracking = self.node_get_member(node, str, 'track', None)
+ self.mirror = self.BST_MIRROR_CLASS(self, '', self.original_url, ref,
+ tags=tags,
+ primary=True,
+ tracking=self.tracking)
self.ref_format = self.node_get_member(node, str, 'ref-format', 'sha1')
if self.ref_format not in ['sha1', 'git-describe']:
@@ -511,7 +614,7 @@ class _GitSourceBase(Source):
with self.timed_activity("Tracking {} from {}"
.format(self.tracking, resolved_url),
silent_nested=True):
- self.mirror.ensure()
+ self.mirror.ensure_trackable()
self.mirror._fetch()
# Update self.mirror.ref and node.ref from the self.tracking branch
@@ -521,6 +624,7 @@ class _GitSourceBase(Source):
def init_workspace(self, directory):
# XXX: may wish to refactor this as some code dupe with stage()
+ self.mirror.ensure_trackable()
self._refresh_submodules()
with self.timed_activity('Setting up workspace "{}"'.format(directory), silent_nested=True):
@@ -595,15 +699,16 @@ class _GitSourceBase(Source):
# Assert that the ref exists in the track tag/branch, if track has been specified.
ref_in_track = False
if self.tracking:
+ mirror = self.mirror.mirror_path()
_, branch = self.check_output([self.host_git, 'branch', '--list', self.tracking,
'--contains', self.mirror.ref],
- cwd=self.mirror.mirror)
+ cwd=mirror)
if branch:
ref_in_track = True
else:
_, tag = self.check_output([self.host_git, 'tag', '--list', self.tracking,
'--contains', self.mirror.ref],
- cwd=self.mirror.mirror)
+ cwd=mirror)
if tag:
ref_in_track = True
@@ -628,7 +733,7 @@ class _GitSourceBase(Source):
self._refresh_submodules()
for mirror in self.submodules:
- if not os.path.exists(mirror.mirror):
+ if not os.path.exists(mirror.mirror) and not os.path.exists(mirror.fetch_mirror):
return False
if not mirror.has_ref():
return False
@@ -640,7 +745,7 @@ class _GitSourceBase(Source):
# Assumes that we have our mirror and we have the ref which we point to
#
def _refresh_submodules(self):
- self.mirror.ensure()
+ self.mirror.ensure_fetchable()
submodules = []
for path, url in self.mirror.submodule_list():
diff --git a/tests/sources/git.py b/tests/sources/git.py
index b7b175ee6..43cf95ed5 100644
--- a/tests/sources/git.py
+++ b/tests/sources/git.py
@@ -34,6 +34,7 @@ from buildstream import _yaml
from buildstream.plugin import CoreWarnings
from buildstream.testing import cli # pylint: disable=unused-import
from buildstream.testing import create_repo
+from buildstream.utils import url_directory_name
from tests.testutils.site import HAVE_GIT, HAVE_OLD_GIT
@@ -1225,3 +1226,249 @@ def test_overwrite_rogue_tag_multiple_remotes(cli, tmpdir, datafiles):
result = cli.run(project=project, args=['build', 'target.bst'])
result.assert_success()
+
+
+@pytest.mark.skipif(HAVE_GIT is False, reason="git is not available")
+@pytest.mark.datafiles(os.path.join(DATA_DIR, 'template'))
+def test_fetch_shallow(cli, tmpdir, datafiles):
+ project = str(datafiles)
+
+ repo = create_repo('git', str(tmpdir))
+ previous_ref = repo.create(os.path.join(project, 'repofiles'))
+
+ file1 = os.path.join(str(tmpdir), 'file1')
+ with open(file1, 'w') as f:
+ f.write('test\n')
+ ref = repo.add_file(file1)
+
+ source_config = repo.source_config(ref=ref)
+
+ # Write out our test target with a bad ref
+ element = {
+ 'kind': 'import',
+ 'sources': [
+ source_config
+ ]
+ }
+ _yaml.dump(element, os.path.join(project, 'target.bst'))
+
+ sources_dir = os.path.join(str(tmpdir), 'sources')
+ os.makedirs(sources_dir, exist_ok=True)
+ config = {
+ 'sourcedir': sources_dir
+ }
+ cli.configure(config)
+
+ result = cli.run(project=project, args=[
+ 'source', 'fetch', 'target.bst'
+ ])
+ result.assert_success()
+
+ cache_dir_name = url_directory_name(source_config['url'])
+ full_cache_path = os.path.join(sources_dir, 'git', cache_dir_name)
+ shallow_cache_path = os.path.join(sources_dir, 'git', '{}-{}'.format(cache_dir_name, ref))
+
+ assert os.path.exists(shallow_cache_path)
+ assert not os.path.exists(full_cache_path)
+
+ output = subprocess.run(['git', 'log', '--format=format:%H'],
+ cwd=shallow_cache_path,
+ stdout=subprocess.PIPE).stdout.decode('ascii')
+ assert output.splitlines() == [ref]
+
+ result = cli.run(project=project, args=[
+ 'build', 'target.bst'
+ ])
+ result.assert_success()
+
+ output = subprocess.run(['git', 'log', '--format=format:%H'],
+ cwd=shallow_cache_path,
+ stdout=subprocess.PIPE).stdout.decode('ascii')
+ assert output.splitlines() == [ref]
+
+ assert os.path.exists(shallow_cache_path)
+ assert not os.path.exists(full_cache_path)
+
+ result = cli.run(project=project, args=[
+ 'source', 'track', 'target.bst'
+ ])
+ result.assert_success()
+
+ assert os.path.exists(full_cache_path)
+ output = subprocess.run(['git', 'log', '--format=format:%H'],
+ cwd=full_cache_path,
+ stdout=subprocess.PIPE).stdout.decode('ascii')
+ assert output.splitlines() == [ref, previous_ref]
+
+
+@pytest.mark.skipif(HAVE_GIT is False, reason="git is not available")
+@pytest.mark.datafiles(os.path.join(DATA_DIR, 'template'))
+def test_fetch_shallow_not_tagged(cli, tmpdir, datafiles):
+ """When a ref is not tagged and not head of branch on remote we cannot
+ get a shallow clone. It should automatically get a full clone.
+ """
+
+ project = str(datafiles)
+
+ repo = create_repo('git', str(tmpdir))
+ previous_ref = repo.create(os.path.join(project, 'repofiles'))
+
+ file1 = os.path.join(str(tmpdir), 'file1')
+ with open(file1, 'w') as f:
+ f.write('test\n')
+ ref = repo.add_file(file1)
+
+ source_config = repo.source_config(ref=previous_ref)
+
+ # Write out our test target with a bad ref
+ element = {
+ 'kind': 'import',
+ 'sources': [
+ source_config
+ ]
+ }
+ _yaml.dump(element, os.path.join(project, 'target.bst'))
+
+ sources_dir = os.path.join(str(tmpdir), 'sources')
+ os.makedirs(sources_dir, exist_ok=True)
+ config = {
+ 'sourcedir': sources_dir
+ }
+ cli.configure(config)
+
+ result = cli.run(project=project, args=[
+ 'source', 'fetch', 'target.bst'
+ ])
+ result.assert_success()
+
+ cache_dir_name = url_directory_name(source_config['url'])
+ full_cache_path = os.path.join(sources_dir, 'git', cache_dir_name)
+ shallow_cache_path = os.path.join(sources_dir, 'git', '{}-{}'.format(cache_dir_name, previous_ref))
+
+ assert not os.path.exists(shallow_cache_path)
+ assert os.path.exists(full_cache_path)
+
+ output = subprocess.run(['git', 'log', '--format=format:%H'],
+ cwd=full_cache_path,
+ stdout=subprocess.PIPE).stdout.decode('ascii')
+ assert output.splitlines() == [ref, previous_ref]
+
+
+@pytest.mark.skipif(HAVE_GIT is False, reason="git is not available")
+@pytest.mark.datafiles(os.path.join(DATA_DIR, 'template'))
+def test_fetch_shallow_annotated_tag(cli, tmpdir, datafiles):
+ """When a ref is not tagged and not head of branch on remote we cannot
+ get a shallow clone. It should automatically get a full clone.
+ """
+
+ project = str(datafiles)
+
+ repo = create_repo('git', str(tmpdir))
+ previous_ref = repo.create(os.path.join(project, 'repofiles'))
+
+ repo.add_annotated_tag('tag', 'tag')
+
+ file1 = os.path.join(str(tmpdir), 'file1')
+ with open(file1, 'w') as f:
+ f.write('test\n')
+ repo.add_file(file1)
+
+ source_config = repo.source_config(ref=previous_ref)
+ del source_config['track']
+
+ # Write out our test target with a bad ref
+ element = {
+ 'kind': 'import',
+ 'sources': [
+ source_config
+ ]
+ }
+ _yaml.dump(element, os.path.join(project, 'target.bst'))
+
+ sources_dir = os.path.join(str(tmpdir), 'sources')
+ os.makedirs(sources_dir, exist_ok=True)
+ config = {
+ 'sourcedir': sources_dir
+ }
+ cli.configure(config)
+
+ result = cli.run(project=project, args=[
+ 'source', 'fetch', 'target.bst'
+ ])
+ result.assert_success()
+
+ cache_dir_name = url_directory_name(source_config['url'])
+ full_cache_path = os.path.join(sources_dir, 'git', cache_dir_name)
+ shallow_cache_path = os.path.join(sources_dir, 'git', '{}-{}'.format(cache_dir_name, previous_ref))
+
+ assert os.path.exists(shallow_cache_path)
+ assert not os.path.exists(full_cache_path)
+
+ output = subprocess.run(['git', 'log', '--format=format:%H'],
+ cwd=shallow_cache_path,
+ stdout=subprocess.PIPE).stdout.decode('ascii')
+ assert output.splitlines() == [previous_ref]
+
+
+@pytest.mark.skipif(HAVE_GIT is False, reason="git is not available")
+@pytest.mark.datafiles(os.path.join(DATA_DIR, 'template'))
+def test_fetch_shallow_workspace_open(cli, tmpdir, datafiles):
+ """
+ Workspaces should get a full clone.
+ """
+ project = str(datafiles)
+
+ repo = create_repo('git', str(tmpdir))
+ previous_ref = repo.create(os.path.join(project, 'repofiles'))
+
+ file1 = os.path.join(str(tmpdir), 'file1')
+ with open(file1, 'w') as f:
+ f.write('test\n')
+ ref = repo.add_file(file1)
+
+ source_config = repo.source_config(ref=ref)
+
+ # Write out our test target with a bad ref
+ element = {
+ 'kind': 'import',
+ 'sources': [
+ source_config
+ ]
+ }
+ _yaml.dump(element, os.path.join(project, 'target.bst'))
+
+ sources_dir = os.path.join(str(tmpdir), 'sources')
+ os.makedirs(sources_dir, exist_ok=True)
+ config = {
+ 'sourcedir': sources_dir
+ }
+ cli.configure(config)
+
+ result = cli.run(project=project, args=[
+ 'source', 'fetch', 'target.bst'
+ ])
+ result.assert_success()
+
+ cache_dir_name = url_directory_name(source_config['url'])
+ full_cache_path = os.path.join(sources_dir, 'git', cache_dir_name)
+ shallow_cache_path = os.path.join(sources_dir, 'git', '{}-{}'.format(cache_dir_name, ref))
+
+ assert os.path.exists(shallow_cache_path)
+ assert not os.path.exists(full_cache_path)
+
+ output = subprocess.run(['git', 'log', '--format=format:%H'],
+ cwd=shallow_cache_path,
+ stdout=subprocess.PIPE).stdout.decode('ascii')
+ assert output.splitlines() == [ref]
+
+ workspace = os.path.join(str(tmpdir), 'workspace')
+
+ result = cli.run(project=project, args=[
+ 'workspace', 'open', 'target.bst', '--directory', workspace
+ ])
+ result.assert_success()
+
+ output = subprocess.run(['git', 'log', '--format=format:%H'],
+ cwd=workspace,
+ stdout=subprocess.PIPE).stdout.decode('ascii')
+ assert output.splitlines() == [ref, previous_ref]