summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBenjamin Schubert <contact@benschubert.me>2020-01-29 09:50:41 +0000
committerBenjamin Schubert <contact@benschubert.me>2020-01-29 09:50:41 +0000
commitbd2cd025d675963b50e698a89a608dddf5a68c8a (patch)
tree5bcb9c09c39017873a5e614aede0fb06b74f2bd7
parent1c3baa29dac7ee49a7d6b44d906d419645e22b4d (diff)
parent8ff0d77660d30163016211f5d4b55ca4c6a7cab4 (diff)
downloadbuildstream-bd2cd025d675963b50e698a89a608dddf5a68c8a.tar.gz
Merge branch 'tmewett/git-source' into 'master'jjardon/fedora-30-removal
Improvements to _GitSourceBase and _GitMirror See merge request BuildStream/buildstream!1808
-rw-r--r--NEWS16
-rw-r--r--src/buildstream/_gitsourcebase.py231
-rw-r--r--tests/sources/git.py28
3 files changed, 201 insertions, 74 deletions
diff --git a/NEWS b/NEWS
index 6b6766961..62deefdd5 100644
--- a/NEWS
+++ b/NEWS
@@ -2,6 +2,16 @@
(unreleased)
============
+Plugins
+-------
+
+ o The 'git' source will now only fetch a single revision when its ref is set
+ to an exact tag in git-describe format (...-0-g...).
+
+ o When a 'git' source has a ref in git-describe format, only the commit
+ ID is used when calculating the element's cache key. This means you can
+ change between ref formats without having to rebuild the element.
+
==================
buildstream 1.93.0
@@ -89,6 +99,12 @@ CLI
* `bst workspace open --track`
* `bst workspace reset --track`
+Plugins
+-------
+
+ o The 'git' source will now fetch submodules recursively when
+ its 'checkout-submodules' option is enabled.
+
API
---
diff --git a/src/buildstream/_gitsourcebase.py b/src/buildstream/_gitsourcebase.py
index a8253e36f..1f74bb972 100644
--- a/src/buildstream/_gitsourcebase.py
+++ b/src/buildstream/_gitsourcebase.py
@@ -18,6 +18,7 @@
# Authors:
# Tristan Van Berkom <tristan.vanberkom@codethink.co.uk>
# Chandan Singh <csingh43@bloomberg.net>
+# Tom Mewett <tom.mewett@codethink.co.uk>
"""Abstract base class for source implementations that work with a Git repository"""
@@ -36,6 +37,7 @@ from .types import FastEnum
from .utils import move_atomic, DirectoryExistsError
GIT_MODULES = ".gitmodules"
+EXACT_TAG_PATTERN = r"(?P<tag>.*)-0-g(?P<commit>.*)"
# Warnings
WARN_INCONSISTENT_SUBMODULE = "inconsistent-submodule"
@@ -48,6 +50,10 @@ class _RefFormat(FastEnum):
GIT_DESCRIBE = "git-describe"
+def _strip_tag(rev):
+ return rev.split("-g")[-1]
+
+
# This class represents a single Git repository. The Git source needs to account for
# submodules, but we don't want to cache them all under the umbrella of the
# superproject - so we use this class which caches them independently, according
@@ -75,25 +81,16 @@ class _GitMirror(SourceFetcher):
self.primary = primary
self.mirror = os.path.join(source.get_mirror_directory(), utils.url_directory_name(url))
- # Ensures that the mirror exists
- def ensure(self, alias_override=None):
-
- # Unfortunately, git does not know how to only clone just a specific ref,
- # so we have to download all of those gigs even if we only need a couple
- # of bytes.
+ # _ensure_repo():
+ #
+ # Ensures that the Git repository exists at the mirror location and is configured
+ # to fetch from the given URL
+ #
+ def _ensure_repo(self):
if not os.path.exists(self.mirror):
-
- # Do the initial clone in a tmpdir just because we want an atomic move
- # after a long standing clone which could fail overtime, for now do
- # this directly in our git directory, eliminating the chances that the
- # system configured tmpdir is not on the same partition.
- #
with self.source.tempdir() as tmpdir:
- url = self.source.translate_url(self.url, alias_override=alias_override, primary=self.primary)
self.source.call(
- [self.source.host_git, "clone", "--mirror", "-n", url, tmpdir],
- fail="Failed to clone git repository {}".format(url),
- fail_temporarily=True,
+ [self.source.host_git, "init", "--bare", tmpdir], fail="Failed to initialise repository",
)
try:
@@ -101,55 +98,78 @@ class _GitMirror(SourceFetcher):
except DirectoryExistsError:
# Another process was quicker to download this repository.
# Let's discard our own
- self.source.status("{}: Discarding duplicate clone of {}".format(self.source, url))
+ self.source.status("{}: Discarding duplicate repository".format(self.source))
except OSError as e:
raise SourceError(
- "{}: Failed to move cloned git repository {} from '{}' to '{}': {}".format(
- self.source, url, tmpdir, self.mirror, e
+ "{}: Failed to move created repository from '{}' to '{}': {}".format(
+ self.source, tmpdir, self.mirror, e
)
) from e
- def _fetch(self, alias_override=None):
- url = self.source.translate_url(self.url, alias_override=alias_override, primary=self.primary)
+ def _fetch(self, url, fetch_all=False):
+ self._ensure_repo()
+
+ # Work out whether we can fetch a specific tag: are we given a ref which
+ # 1. is in git-describe format
+ # 2. refers to an exact tag (is "...-0-g...")
+ # 3. is available on the remote and tags the specified commit?
+ # And lastly: are we on a new-enough Git which allows cloning from our potentially shallow cache?
+ if fetch_all:
+ pass
+ # Fetching from a shallow-cloned repo was first supported in v1.9.0
+ elif not self.ref or self.source.host_git_version is not None and self.source.host_git_version < (1, 9, 0):
+ fetch_all = True
+ else:
+ m = re.match(EXACT_TAG_PATTERN, self.ref)
+ if m is None:
+ fetch_all = True
+ else:
+ tag = m.group("tag")
+ commit = m.group("commit")
+
+ if not self.remote_has_tag(url, tag, commit):
+ self.source.status(
+ "{}: {} is not advertised on {}. Fetching all Git refs".format(self.source, self.ref, url)
+ )
+ fetch_all = True
+ else:
+ exit_code = self.source.call(
+ [
+ self.source.host_git,
+ "fetch",
+ "--depth=1",
+ url,
+ "+refs/tags/{tag}:refs/tags/{tag}".format(tag=tag),
+ ],
+ cwd=self.mirror,
+ )
+ if exit_code != 0:
+ self.source.status(
+ "{}: Failed to fetch tag '{}' from {}. Fetching all Git refs".format(self.source, tag, url)
+ )
+ fetch_all = True
- if alias_override:
- remote_name = utils.url_directory_name(alias_override)
- _, remotes = self.source.check_output(
- [self.source.host_git, "remote"],
- fail="Failed to retrieve list of remotes in {}".format(self.mirror),
+ if fetch_all:
+ self.source.call(
+ [
+ self.source.host_git,
+ "fetch",
+ "--prune",
+ url,
+ "+refs/heads/*:refs/heads/*",
+ "+refs/tags/*:refs/tags/*",
+ ],
+ fail="Failed to fetch from remote git repository: {}".format(url),
+ fail_temporarily=True,
cwd=self.mirror,
)
- if remote_name not in remotes:
- self.source.call(
- [self.source.host_git, "remote", "add", remote_name, url],
- fail="Failed to add remote {} with url {}".format(remote_name, url),
- cwd=self.mirror,
- )
- else:
- remote_name = "origin"
-
- self.source.call(
- [
- self.source.host_git,
- "fetch",
- remote_name,
- "--prune",
- "+refs/heads/*:refs/heads/*",
- "+refs/tags/*:refs/tags/*",
- ],
- fail="Failed to fetch from remote git repository: {}".format(url),
- fail_temporarily=True,
- cwd=self.mirror,
- )
def fetch(self, alias_override=None): # pylint: disable=arguments-differ
- # Resolve the URL for the message
resolved_url = self.source.translate_url(self.url, alias_override=alias_override, primary=self.primary)
with self.source.timed_activity("Fetching from {}".format(resolved_url), silent_nested=True):
- self.ensure(alias_override)
if not self.has_ref():
- self._fetch(alias_override)
+ self._fetch(resolved_url)
self.assert_ref()
def has_ref(self):
@@ -170,30 +190,76 @@ class _GitMirror(SourceFetcher):
"{}: expected ref '{}' was not found in git repository: '{}'".format(self.source, self.ref, self.url)
)
- def latest_commit_with_tags(self, tracking, track_tags=False):
+ # remote_has_tag():
+ #
+ # Args:
+ # url (str)
+ # tag (str)
+ # commit (str)
+ #
+ # Returns:
+ # (bool): Whether the remote at `url` has the tag `tag` attached to `commit`
+ #
+ def remote_has_tag(self, url, tag, commit):
+ _, ls_remote = self.source.check_output(
+ [self.source.host_git, "ls-remote", url],
+ cwd=self.mirror,
+ fail="Failed to list advertised remote refs from git repository {}".format(url),
+ )
+
+ line = "{commit}\trefs/tags/{tag}".format(commit=commit, tag=tag)
+ return line in ls_remote or line + "^{}" in ls_remote
+
+ # to_commit():
+ #
+ # Args:
+ # rev (str): A Git "commit-ish" rev
+ #
+ # Returns:
+ # (str): The full revision ID of the commit
+ #
+ def to_commit(self, rev):
_, output = self.source.check_output(
- [self.source.host_git, "rev-parse", tracking],
- fail="Unable to find commit for specified branch name '{}'".format(tracking),
+ [self.source.host_git, "rev-list", "-n", "1", rev],
+ fail="Unable to find revision {}".format(rev),
cwd=self.mirror,
)
- ref = output.rstrip("\n")
- if self.source.ref_format == _RefFormat.GIT_DESCRIBE:
- # Prefix the ref with the closest tag, if available,
- # to make the ref human readable
- exit_code, output = self.source.check_output(
- [self.source.host_git, "describe", "--tags", "--abbrev=40", "--long", ref], cwd=self.mirror
- )
- if exit_code == 0:
- ref = output.rstrip("\n")
+ return output.strip()
- if not track_tags:
- return ref, []
+ # describe():
+ #
+ # Args:
+ # rev (str): A Git "commit-ish" rev
+ #
+ # Returns:
+ # (str): The full revision ID of the commit given by rev, prepended
+ # with tag information as given by git-describe (where available)
+ #
+ def describe(self, rev):
+ _, output = self.source.check_output(
+ [self.source.host_git, "describe", "--tags", "--abbrev=40", "--long", "--always", rev],
+ fail="Unable to find revision {}".format(rev),
+ cwd=self.mirror,
+ )
+
+ return output.strip()
+ # reachable_tags():
+ #
+ # Args:
+ # rev (str): A Git "commit-ish" rev
+ #
+ # Returns:
+ # (list): A list of tags in the ancestry of rev. Each entry is a triple of the form
+ # (tag name (str), commit ref (str), annotated (bool)) describing a tag,
+ # its tagged commit and whether it's annotated
+ #
+ def reachable_tags(self, rev):
tags = set()
for options in [[], ["--first-parent"], ["--tags"], ["--tags", "--first-parent"]]:
exit_code, output = self.source.check_output(
- [self.source.host_git, "describe", "--abbrev=0", ref, *options], cwd=self.mirror
+ [self.source.host_git, "describe", "--abbrev=0", rev, *options], cwd=self.mirror
)
if exit_code == 0:
tag = output.strip()
@@ -207,7 +273,7 @@ class _GitMirror(SourceFetcher):
tags.add((tag, commit_ref.strip(), annotated))
- return ref, list(tags)
+ return list(tags)
def stage(self, directory):
fullpath = os.path.join(directory, self.path)
@@ -493,11 +559,24 @@ class _GitSourceBase(Source):
# Check if git is installed, get the binary at the same time
self.host_git = utils.get_host_tool("git")
+ rc, version_str = self.check_output([self.host_git, "--version"])
+ # e.g. on Git for Windows we get "git version 2.21.0.windows.1".
+ # e.g. on Mac via Homebrew we get "git version 2.19.0".
+ if rc == 0:
+ self.host_git_version = tuple(int(x) for x in version_str.split(" ")[2].split(".")[:3])
+ else:
+ self.host_git_version = None
+
def get_unique_key(self):
+ ref = self.mirror.ref
+ if ref is not None:
+ # Strip any (arbitary) tag information, leaving just the commit ID
+ ref = _strip_tag(ref)
+
# Here we want to encode the local name of the repository and
# the ref, if the user changes the alias to fetch the same sources
# from another location, it should not affect the cache key.
- key = [self.original_url, self.mirror.ref]
+ key = [self.original_url, ref]
if self.mirror.tags:
tags = {tag: (commit, annotated) for tag, commit, annotated in self.mirror.tags}
key.append({"tags": tags})
@@ -566,13 +645,15 @@ class _GitSourceBase(Source):
# Resolve the URL for the message
resolved_url = self.translate_url(self.mirror.url)
with self.timed_activity("Tracking {} from {}".format(self.tracking, resolved_url), silent_nested=True):
- self.mirror.ensure()
- self.mirror._fetch()
+ self.mirror._fetch(resolved_url, fetch_all=True)
+
+ ref = self.mirror.to_commit(self.tracking)
+ tags = self.mirror.reachable_tags(ref) if self.track_tags else []
- # Update self.mirror.ref and node.ref from the self.tracking branch
- ret = self.mirror.latest_commit_with_tags(self.tracking, self.track_tags)
+ if self.ref_format == _RefFormat.GIT_DESCRIBE:
+ ref = self.mirror.describe(ref)
- return ret
+ return ref, tags
def init_workspace(self, directory):
with self.timed_activity('Setting up workspace "{}"'.format(directory), silent_nested=True):
@@ -643,8 +724,10 @@ class _GitSourceBase(Source):
)
# Assert that the ref exists in the track tag/branch, if track has been specified.
+ # Also don't do this check if an exact tag ref is given, as we probably didn't fetch
+ # any branch refs
ref_in_track = False
- if self.tracking:
+ if not re.match(EXACT_TAG_PATTERN, self.mirror.ref) and self.tracking:
_, branch = self.check_output(
[self.host_git, "branch", "--list", self.tracking, "--contains", self.mirror.ref],
cwd=self.mirror.mirror,
diff --git a/tests/sources/git.py b/tests/sources/git.py
index 25ec49347..033db1bf9 100644
--- a/tests/sources/git.py
+++ b/tests/sources/git.py
@@ -60,6 +60,34 @@ def test_fetch_bad_ref(cli, tmpdir, datafiles):
@pytest.mark.skipif(HAVE_GIT is False, reason="git is not available")
+@pytest.mark.skipif(HAVE_OLD_GIT, reason="old git cannot clone a shallow repo to stage the source")
+@pytest.mark.datafiles(os.path.join(DATA_DIR, "template"))
+def test_fetch_shallow(cli, tmpdir, datafiles):
+ project = str(datafiles)
+ workspacedir = os.path.join(str(tmpdir), "workspace")
+
+ # Create the repo from 'repofiles' subdir
+ repo = create_repo("git", str(tmpdir))
+ repo.create(os.path.join(project, "repofiles"))
+ first_commit = repo.latest_commit()
+ repo.add_commit()
+ repo.add_tag("tag")
+
+ ref = "tag-0-g" + repo.latest_commit()
+
+ element = {"kind": "import", "sources": [repo.source_config(ref=ref)]}
+ generate_element(project, "target.bst", element)
+
+ result = cli.run(project=project, args=["source", "fetch", "target.bst"])
+ result.assert_success()
+ result = cli.run(project=project, args=["workspace", "open", "--directory", workspacedir, "target.bst"])
+ result.assert_success()
+
+ assert subprocess.call(["git", "show", "tag"], cwd=workspacedir) == 0
+ assert subprocess.call(["git", "show", first_commit], cwd=workspacedir) != 0
+
+
+@pytest.mark.skipif(HAVE_GIT is False, reason="git is not available")
@pytest.mark.datafiles(os.path.join(DATA_DIR, "template"))
def test_submodule_fetch_checkout(cli, tmpdir, datafiles):
project = str(datafiles)