diff options
author | Benjamin Schubert <contact@benschubert.me> | 2019-12-16 11:33:11 +0000 |
---|---|---|
committer | Benjamin Schubert <contact@benschubert.me> | 2019-12-16 11:33:11 +0000 |
commit | 0118773753c571aec6c153d30f85c87e0d501d2c (patch) | |
tree | 49928a955ada4a73ecba4bc279db9805b509f607 | |
parent | 2fc5a5cfbcf5c16407914849ea1a99bfdb71cb0e (diff) | |
parent | 7fe2bb4bd2f28f49567100eaf388685e53ad4bf3 (diff) | |
download | buildstream-0118773753c571aec6c153d30f85c87e0d501d2c.tar.gz |
Merge branch 'tmewett/recursive-git' into 'master'
Make Git source plugin clone submodules recursively
Closes #1162
See merge request BuildStream/buildstream!1765
-rw-r--r-- | src/buildstream/_gitsourcebase.py | 153 | ||||
-rw-r--r-- | src/buildstream/plugins/sources/git.py | 8 | ||||
-rw-r--r-- | tests/sources/git.py | 40 |
3 files changed, 133 insertions, 68 deletions
diff --git a/src/buildstream/_gitsourcebase.py b/src/buildstream/_gitsourcebase.py index 4e9e59161..5b7f051b9 100644 --- a/src/buildstream/_gitsourcebase.py +++ b/src/buildstream/_gitsourcebase.py @@ -48,9 +48,20 @@ class _RefFormat(FastEnum): GIT_DESCRIBE = "git-describe" -# Because of handling of submodules, we maintain a _GitMirror -# for the primary git source and also for each submodule it -# might have at a given time +# This class represents a single Git repository. The Git source needs to account for +# submodules, but we don't want to cache them all under the umbrella of the +# superproject - so we use this class which caches them independently, according +# to their URL. Instances keep reference to their "parent" GitSourceBase, +# and if applicable, where in the superproject they are found. +# +# Args: +# source (_GitSourceBase or subclass): The parent source +# path (str): The relative location of the submodule in the superproject; +# the empty string for the superproject itself +# url (str): Where to clone the repo from +# ref (str): Specified 'ref' from the source configuration +# primary (bool): Whether this is the primary URL for the source +# tags (list): Tag configuration; see _GitSourceBase._load_tags # class _GitMirror(SourceFetcher): def __init__(self, source, path, url, ref, *, primary=False, tags=[]): @@ -63,7 +74,6 @@ class _GitMirror(SourceFetcher): self.tags = tags self.primary = primary self.mirror = os.path.join(source.get_mirror_directory(), utils.url_directory_name(url)) - self.mark_download_url(url) # Ensures that the mirror exists def ensure(self, alias_override=None): @@ -244,6 +254,19 @@ class _GitMirror(SourceFetcher): cwd=fullpath, ) + # get_submodule_mirrors(): + # + # Returns: + # An iterator through new instances of this class, one of each submodule + # in the repo + # + def get_submodule_mirrors(self): + for path, url in self.submodule_list(): + ref = self.submodule_ref(path) + if ref is not None: + mirror = self.__class__(self.source, os.path.join(self.path, path), url, ref) + yield mirror + # List the submodules (path/url tuples) present at the given ref of this repo def submodule_list(self): modules = "{}:{}".format(self.ref, GIT_MODULES) @@ -445,7 +468,6 @@ class _GitSourceBase(Source): ) self.checkout_submodules = node.get_bool("checkout-submodules", default=True) - self.submodules = [] # Parse a dict of submodule overrides, stored in the submodule_overrides # and submodule_checkout_overrides dictionaries. @@ -554,34 +576,27 @@ class _GitSourceBase(Source): return ret def init_workspace(self, directory): - # XXX: may wish to refactor this as some code dupe with stage() - self._refresh_submodules() - with self.timed_activity('Setting up workspace "{}"'.format(directory), silent_nested=True): self.mirror.init_workspace(directory) - for mirror in self.submodules: + for mirror in self._recurse_submodules(configure=True): mirror.init_workspace(directory) def stage(self, directory): - - # Need to refresh submodule list here again, because - # it's possible that we did not load in the main process - # with submodules present (source needed fetching) and - # we may not know about the submodule yet come time to build. - # - self._refresh_submodules() - # Stage the main repo in the specified directory # with self.timed_activity("Staging {}".format(self.mirror.url), silent_nested=True): self.mirror.stage(directory) - for mirror in self.submodules: + for mirror in self._recurse_submodules(configure=True): mirror.stage(directory) def get_source_fetchers(self): + self.mirror.mark_download_url(self.mirror.url) yield self.mirror - self._refresh_submodules() - for submodule in self.submodules: + # _recurse_submodules only iterates those which are known at the current + # cached state - but fetch is called on each result as we go, so this will + # yield all configured submodules + for submodule in self._recurse_submodules(configure=True): + submodule.mark_download_url(submodule.url) yield submodule def validate_cache(self): @@ -589,14 +604,13 @@ class _GitSourceBase(Source): unlisted_submodules = [] invalid_submodules = [] - for path, url in self.mirror.submodule_list(): - discovered_submodules[path] = url - if self._ignore_submodule(path): + for submodule in self._recurse_submodules(configure=False): + discovered_submodules[submodule.path] = submodule.url + if self._ignoring_submodule(submodule.path): continue - override_url = self.submodule_overrides.get(path) - if not override_url: - unlisted_submodules.append((path, url)) + if submodule.path not in self.submodule_overrides: + unlisted_submodules.append((submodule.path, submodule.url)) # Warn about submodules which are explicitly configured but do not exist for path, url in self.submodule_overrides.items(): @@ -668,44 +682,50 @@ class _GitSourceBase(Source): ########################################################### def _have_all_refs(self): - if not self.mirror.has_ref(): - return False - - self._refresh_submodules() - for mirror in self.submodules: - if not os.path.exists(mirror.mirror): - return False - if not mirror.has_ref(): - return False - - return True + return self.mirror.has_ref() and all( + submodule.has_ref() for submodule in self._recurse_submodules(configure=True) + ) - # Refreshes the BST_MIRROR_CLASS objects for submodules + # _configure_submodules(): # - # Assumes that we have our mirror and we have the ref which we point to + # Args: + # submodules: An iterator of _GitMirror (or similar) objects for submodules # - def _refresh_submodules(self): - self.mirror.ensure() - submodules = [] - - for path, url in self.mirror.submodule_list(): - - # Completely ignore submodules which are disabled for checkout - if self._ignore_submodule(path): + # Returns: + # An iterator through `submodules` but filtered of any ignored submodules + # and modified to use any custom URLs configured in the source + # + def _configure_submodules(self, submodules): + for submodule in submodules: + if self._ignoring_submodule(submodule.path): continue + # Allow configuration to override the upstream location of the submodules. + submodule.url = self.submodule_overrides.get(submodule.path, submodule.url) + yield submodule - # Allow configuration to override the upstream - # location of the submodules. - override_url = self.submodule_overrides.get(path) - if override_url: - url = override_url + # _recurse_submodules(): + # + # Recursively iterates through GitMirrors for submodules of the main repo. Only + # submodules that are cached are recursed into - but this is decided at + # iteration time, so you can fetch in a for loop over this function to fetch + # all submodules. + # + # Args: + # configure (bool): Whether to apply the 'submodule' config while recursing + # (URL changing and 'checkout' overrides) + # + def _recurse_submodules(self, configure): + def recurse(mirror): + submodules = mirror.get_submodule_mirrors() + if configure: + submodules = self._configure_submodules(submodules) - ref = self.mirror.submodule_ref(path) - if ref is not None: - mirror = self.BST_MIRROR_CLASS(self, path, url, ref) - submodules.append(mirror) + for submodule in submodules: + yield submodule + if submodule.has_ref(): + yield from recurse(submodule) - self.submodules = submodules + yield from recurse(self.mirror) def _load_tags(self, node): tags = [] @@ -717,12 +737,13 @@ class _GitSourceBase(Source): tags.append((tag, commit_ref, annotated)) return tags - # Checks whether the plugin configuration has explicitly - # configured this submodule to be ignored - def _ignore_submodule(self, path): - try: - checkout = self.submodule_checkout_overrides[path] - except KeyError: - checkout = self.checkout_submodules - - return not checkout + # _ignoring_submodule(): + # + # Args: + # path (str): The path of a submodule in the superproject + # + # Returns: + # (bool): Whether to not clone/checkout this submodule + # + def _ignoring_submodule(self, path): + return not self.submodule_checkout_overrides.get(path, self.checkout_submodules) diff --git a/src/buildstream/plugins/sources/git.py b/src/buildstream/plugins/sources/git.py index 5e6834979..d04ccfe1b 100644 --- a/src/buildstream/plugins/sources/git.py +++ b/src/buildstream/plugins/sources/git.py @@ -57,6 +57,7 @@ git - stage files from a git repository ref: d63cbb6fdc0bbdadc4a1b92284826a6d63a7ebcd # Optionally specify whether submodules should be checked-out. + # This is done recursively, as with `git clone --recurse-submodules`. # If not set, this will default to 'True' checkout-submodules: True @@ -66,12 +67,15 @@ git - stage files from a git repository # especially handy when used with project defined aliases which # can be redefined at a later time. # You may also explicitly specify whether to check out this - # submodule. If 'checkout' is set, it will override - # 'checkout-submodules' with the value set below. + # submodule. If 'checkout' is set, it will control whether to + # checkout that submodule and recurse into it. It defaults to the + # value of 'checkout-submodules'. submodules: plugins/bar: url: upstream:bar.git checkout: True + plugins/bar/quux: + checkout: False plugins/baz: url: upstream:baz.git checkout: False diff --git a/tests/sources/git.py b/tests/sources/git.py index e4252fe51..096338bbe 100644 --- a/tests/sources/git.py +++ b/tests/sources/git.py @@ -94,6 +94,46 @@ def test_submodule_fetch_checkout(cli, tmpdir, datafiles): @pytest.mark.skipif(HAVE_GIT is False, reason="git is not available") @pytest.mark.datafiles(os.path.join(DATA_DIR, "template")) +def test_recursive_submodule_fetch_checkout(cli, tmpdir, datafiles): + project = str(datafiles) + checkoutdir = os.path.join(str(tmpdir), "checkout") + + # Create a submodule from the 'othersubrepofiles' subdir + subsubrepo = create_repo("git", str(tmpdir), "subsubrepo") + subsubrepo.create(os.path.join(project, "othersubrepofiles")) + + # Create another submodule from the 'subrepofiles' subdir + subrepo = create_repo("git", str(tmpdir), "subrepo") + subrepo.create(os.path.join(project, "subrepofiles")) + + # Create the repo from 'repofiles' subdir + repo = create_repo("git", str(tmpdir)) + repo.create(os.path.join(project, "repofiles")) + + # Configure submodules + subrepo.add_submodule("subdir", "file://" + subsubrepo.repo) + ref = repo.add_submodule("subdir", "file://" + subrepo.repo) + + # Write out our test target + element = {"kind": "import", "sources": [repo.source_config(ref=ref)]} + _yaml.roundtrip_dump(element, os.path.join(project, "target.bst")) + + # Fetch, build, checkout + result = cli.run(project=project, args=["source", "fetch", "target.bst"]) + result.assert_success() + result = cli.run(project=project, args=["build", "target.bst"]) + result.assert_success() + result = cli.run(project=project, args=["artifact", "checkout", "target.bst", "--directory", checkoutdir]) + result.assert_success() + + # Assert we checked out all files at their expected location + assert os.path.exists(os.path.join(checkoutdir, "file.txt")) + assert os.path.exists(os.path.join(checkoutdir, "subdir", "ponyfile.txt")) + assert os.path.exists(os.path.join(checkoutdir, "subdir", "subdir", "unicornfile.txt")) + + +@pytest.mark.skipif(HAVE_GIT is False, reason="git is not available") +@pytest.mark.datafiles(os.path.join(DATA_DIR, "template")) def test_submodule_fetch_source_enable_explicit(cli, tmpdir, datafiles): project = str(datafiles) checkoutdir = os.path.join(str(tmpdir), "checkout") |