summaryrefslogtreecommitdiff
path: root/buildstream/plugins
diff options
context:
space:
mode:
authorChandan Singh <csingh43@bloomberg.net>2018-12-14 18:36:30 +0000
committerChandan Singh <csingh43@bloomberg.net>2018-12-20 12:50:02 +0000
commit9835b7f145686f71f0a132d8e1c19ce4ad6f0b3c (patch)
tree72d5d31a86a863071bba033bc50c19686e24fa78 /buildstream/plugins
parent77d8ad458ce457aa176ff2d101e12690c31f2a75 (diff)
downloadbuildstream-9835b7f145686f71f0a132d8e1c19ce4ad6f0b3c.tar.gz
Expose base class for Git source plugins
Add a `_GitSourceBase` class to act as the base class for deriving source plugins that work with Git. The first user of this base class is the `git` source plugin that's in core at the moment. The contents of this base class is almost identical to the existing `GitSource` class. The two notable differences are: - the private methods now have leading underscores, as some were missing it originally - the name of the class Note that we are exposing a private member here as we expect it to move to a separate package soon. See the following discussion for more details: https://gitlab.com/BuildStream/buildstream/issues/739#note_124819869 Fixes #739.
Diffstat (limited to 'buildstream/plugins')
-rw-r--r--buildstream/plugins/sources/git.py647
1 files changed, 3 insertions, 644 deletions
diff --git a/buildstream/plugins/sources/git.py b/buildstream/plugins/sources/git.py
index 74d632b6d..4160797d6 100644
--- a/buildstream/plugins/sources/git.py
+++ b/buildstream/plugins/sources/git.py
@@ -156,652 +156,11 @@ This plugin also utilises the following configurable :class:`core warnings <buil
found in the provided track in the element's git repository.
"""
-import os
-import re
-import shutil
-from collections.abc import Mapping
-from io import StringIO
-from tempfile import TemporaryFile
+from buildstream import _GitSourceBase
-from configparser import RawConfigParser
-from buildstream import Source, SourceError, Consistency, SourceFetcher, CoreWarnings
-from buildstream import utils
-from buildstream.utils import move_atomic, DirectoryExistsError
-
-GIT_MODULES = '.gitmodules'
-
-# Warnings
-WARN_INCONSISTENT_SUBMODULE = "inconsistent-submodule"
-WARN_UNLISTED_SUBMODULE = "unlisted-submodule"
-WARN_INVALID_SUBMODULE = "invalid-submodule"
-
-
-# Because of handling of submodules, we maintain a GitMirror
-# for the primary git source and also for each submodule it
-# might have at a given time
-#
-class GitMirror(SourceFetcher):
-
- def __init__(self, source, path, url, ref, *, primary=False, tags=[]):
-
- super().__init__()
- self.source = source
- self.path = path
- self.url = url
- self.ref = ref
- self.tags = tags
- self.primary = primary
- self.mirror = os.path.join(source.get_mirror_directory(), utils.url_directory_name(url))
- self.mark_download_url(url)
-
- # Ensures that the mirror exists
- def ensure(self, alias_override=None):
-
- # Unfortunately, git does not know how to only clone just a specific ref,
- # so we have to download all of those gigs even if we only need a couple
- # of bytes.
- if not os.path.exists(self.mirror):
-
- # Do the initial clone in a tmpdir just because we want an atomic move
- # after a long standing clone which could fail overtime, for now do
- # this directly in our git directory, eliminating the chances that the
- # system configured tmpdir is not on the same partition.
- #
- with self.source.tempdir() as tmpdir:
- url = self.source.translate_url(self.url, alias_override=alias_override,
- primary=self.primary)
- self.source.call([self.source.host_git, 'clone', '--mirror', '-n', url, tmpdir],
- fail="Failed to clone git repository {}".format(url),
- fail_temporarily=True)
-
- try:
- move_atomic(tmpdir, self.mirror)
- except DirectoryExistsError:
- # Another process was quicker to download this repository.
- # Let's discard our own
- self.source.status("{}: Discarding duplicate clone of {}"
- .format(self.source, url))
- except OSError as e:
- raise SourceError("{}: Failed to move cloned git repository {} from '{}' to '{}': {}"
- .format(self.source, url, tmpdir, self.mirror, e)) from e
-
- def _fetch(self, alias_override=None):
- url = self.source.translate_url(self.url,
- alias_override=alias_override,
- primary=self.primary)
-
- if alias_override:
- remote_name = utils.url_directory_name(alias_override)
- _, remotes = self.source.check_output(
- [self.source.host_git, 'remote'],
- fail="Failed to retrieve list of remotes in {}".format(self.mirror),
- cwd=self.mirror
- )
- if remote_name not in remotes:
- self.source.call(
- [self.source.host_git, 'remote', 'add', remote_name, url],
- fail="Failed to add remote {} with url {}".format(remote_name, url),
- cwd=self.mirror
- )
- else:
- remote_name = "origin"
-
- self.source.call([self.source.host_git, 'fetch', remote_name, '--prune', '--force', '--tags'],
- fail="Failed to fetch from remote git repository: {}".format(url),
- fail_temporarily=True,
- cwd=self.mirror)
-
- def fetch(self, alias_override=None):
- # Resolve the URL for the message
- resolved_url = self.source.translate_url(self.url,
- alias_override=alias_override,
- primary=self.primary)
-
- with self.source.timed_activity("Fetching from {}"
- .format(resolved_url),
- silent_nested=True):
- self.ensure(alias_override)
- if not self.has_ref():
- self._fetch(alias_override)
- self.assert_ref()
-
- def has_ref(self):
- if not self.ref:
- return False
-
- # If the mirror doesnt exist, we also dont have the ref
- if not os.path.exists(self.mirror):
- return False
-
- # Check if the ref is really there
- rc = self.source.call([self.source.host_git, 'cat-file', '-t', self.ref], cwd=self.mirror)
- return rc == 0
-
- def assert_ref(self):
- if not self.has_ref():
- raise SourceError("{}: expected ref '{}' was not found in git repository: '{}'"
- .format(self.source, self.ref, self.url))
-
- def latest_commit_with_tags(self, tracking, track_tags=False):
- _, output = self.source.check_output(
- [self.source.host_git, 'rev-parse', tracking],
- fail="Unable to find commit for specified branch name '{}'".format(tracking),
- cwd=self.mirror)
- ref = output.rstrip('\n')
-
- if self.source.ref_format == 'git-describe':
- # Prefix the ref with the closest tag, if available,
- # to make the ref human readable
- exit_code, output = self.source.check_output(
- [self.source.host_git, 'describe', '--tags', '--abbrev=40', '--long', ref],
- cwd=self.mirror)
- if exit_code == 0:
- ref = output.rstrip('\n')
-
- if not track_tags:
- return ref, []
-
- tags = set()
- for options in [[], ['--first-parent'], ['--tags'], ['--tags', '--first-parent']]:
- exit_code, output = self.source.check_output(
- [self.source.host_git, 'describe', '--abbrev=0', ref] + options,
- cwd=self.mirror)
- if exit_code == 0:
- tag = output.strip()
- _, commit_ref = self.source.check_output(
- [self.source.host_git, 'rev-parse', tag + '^{commit}'],
- fail="Unable to resolve tag '{}'".format(tag),
- cwd=self.mirror)
- exit_code = self.source.call(
- [self.source.host_git, 'cat-file', 'tag', tag],
- cwd=self.mirror)
- annotated = (exit_code == 0)
-
- tags.add((tag, commit_ref.strip(), annotated))
-
- return ref, list(tags)
-
- def stage(self, directory):
- fullpath = os.path.join(directory, self.path)
-
- # Using --shared here avoids copying the objects into the checkout, in any
- # case we're just checking out a specific commit and then removing the .git/
- # directory.
- self.source.call([self.source.host_git, 'clone', '--no-checkout', '--shared', self.mirror, fullpath],
- fail="Failed to create git mirror {} in directory: {}".format(self.mirror, fullpath),
- fail_temporarily=True)
-
- self.source.call([self.source.host_git, 'checkout', '--force', self.ref],
- fail="Failed to checkout git ref {}".format(self.ref),
- cwd=fullpath)
-
- # Remove .git dir
- shutil.rmtree(os.path.join(fullpath, ".git"))
-
- self._rebuild_git(fullpath)
-
- def init_workspace(self, directory):
- fullpath = os.path.join(directory, self.path)
- url = self.source.translate_url(self.url)
-
- self.source.call([self.source.host_git, 'clone', '--no-checkout', self.mirror, fullpath],
- fail="Failed to clone git mirror {} in directory: {}".format(self.mirror, fullpath),
- fail_temporarily=True)
-
- self.source.call([self.source.host_git, 'remote', 'set-url', 'origin', url],
- fail='Failed to add remote origin "{}"'.format(url),
- cwd=fullpath)
-
- self.source.call([self.source.host_git, 'checkout', '--force', self.ref],
- fail="Failed to checkout git ref {}".format(self.ref),
- cwd=fullpath)
-
- # List the submodules (path/url tuples) present at the given ref of this repo
- def submodule_list(self):
- modules = "{}:{}".format(self.ref, GIT_MODULES)
- exit_code, output = self.source.check_output(
- [self.source.host_git, 'show', modules], cwd=self.mirror)
-
- # If git show reports error code 128 here, we take it to mean there is
- # no .gitmodules file to display for the given revision.
- if exit_code == 128:
- return
- elif exit_code != 0:
- raise SourceError(
- "{plugin}: Failed to show gitmodules at ref {ref}".format(
- plugin=self, ref=self.ref))
-
- content = '\n'.join([l.strip() for l in output.splitlines()])
-
- io = StringIO(content)
- parser = RawConfigParser()
- parser.read_file(io)
-
- for section in parser.sections():
- # validate section name against the 'submodule "foo"' pattern
- if re.match(r'submodule "(.*)"', section):
- path = parser.get(section, 'path')
- url = parser.get(section, 'url')
-
- yield (path, url)
-
- # Fetch the ref which this mirror requires its submodule to have,
- # at the given ref of this mirror.
- def submodule_ref(self, submodule, ref=None):
- if not ref:
- ref = self.ref
-
- # list objects in the parent repo tree to find the commit
- # object that corresponds to the submodule
- _, output = self.source.check_output([self.source.host_git, 'ls-tree', ref, submodule],
- fail="ls-tree failed for commit {} and submodule: {}".format(
- ref, submodule),
- cwd=self.mirror)
-
- # read the commit hash from the output
- fields = output.split()
- if len(fields) >= 2 and fields[1] == 'commit':
- submodule_commit = output.split()[2]
-
- # fail if the commit hash is invalid
- if len(submodule_commit) != 40:
- raise SourceError("{}: Error reading commit information for submodule '{}'"
- .format(self.source, submodule))
-
- return submodule_commit
-
- else:
- detail = "The submodule '{}' is defined either in the BuildStream source\n".format(submodule) + \
- "definition, or in a .gitmodules file. But the submodule was never added to the\n" + \
- "underlying git repository with `git submodule add`."
-
- self.source.warn("{}: Ignoring inconsistent submodule '{}'"
- .format(self.source, submodule), detail=detail,
- warning_token=WARN_INCONSISTENT_SUBMODULE)
-
- return None
-
- def _rebuild_git(self, fullpath):
- if not self.tags:
- return
-
- with self.source.tempdir() as tmpdir:
- included = set()
- shallow = set()
- for _, commit_ref, _ in self.tags:
-
- _, out = self.source.check_output([self.source.host_git, 'rev-list',
- '--boundary', '{}..{}'.format(commit_ref, self.ref)],
- fail="Failed to get git history {}..{} in directory: {}"
- .format(commit_ref, self.ref, fullpath),
- fail_temporarily=True,
- cwd=self.mirror)
- for line in out.splitlines():
- rev = line.lstrip('-')
- if line[0] == '-':
- shallow.add(rev)
- else:
- included.add(rev)
-
- shallow -= included
- included |= shallow
-
- self.source.call([self.source.host_git, 'init'],
- fail="Cannot initialize git repository: {}".format(fullpath),
- cwd=fullpath)
-
- for rev in included:
- with TemporaryFile(dir=tmpdir) as commit_file:
- self.source.call([self.source.host_git, 'cat-file', 'commit', rev],
- stdout=commit_file,
- fail="Failed to get commit {}".format(rev),
- cwd=self.mirror)
- commit_file.seek(0, 0)
- self.source.call([self.source.host_git, 'hash-object', '-w', '-t', 'commit', '--stdin'],
- stdin=commit_file,
- fail="Failed to add commit object {}".format(rev),
- cwd=fullpath)
-
- with open(os.path.join(fullpath, '.git', 'shallow'), 'w') as shallow_file:
- for rev in shallow:
- shallow_file.write('{}\n'.format(rev))
-
- for tag, commit_ref, annotated in self.tags:
- if annotated:
- with TemporaryFile(dir=tmpdir) as tag_file:
- tag_data = 'object {}\ntype commit\ntag {}\n'.format(commit_ref, tag)
- tag_file.write(tag_data.encode('ascii'))
- tag_file.seek(0, 0)
- _, tag_ref = self.source.check_output(
- [self.source.host_git, 'hash-object', '-w', '-t',
- 'tag', '--stdin'],
- stdin=tag_file,
- fail="Failed to add tag object {}".format(tag),
- cwd=fullpath)
-
- self.source.call([self.source.host_git, 'tag', tag, tag_ref.strip()],
- fail="Failed to tag: {}".format(tag),
- cwd=fullpath)
- else:
- self.source.call([self.source.host_git, 'tag', tag, commit_ref],
- fail="Failed to tag: {}".format(tag),
- cwd=fullpath)
-
- with open(os.path.join(fullpath, '.git', 'HEAD'), 'w') as head:
- self.source.call([self.source.host_git, 'rev-parse', self.ref],
- stdout=head,
- fail="Failed to parse commit {}".format(self.ref),
- cwd=self.mirror)
-
-
-class GitSource(Source):
- # pylint: disable=attribute-defined-outside-init
-
- def configure(self, node):
- ref = self.node_get_member(node, str, 'ref', None)
-
- config_keys = ['url', 'track', 'ref', 'submodules',
- 'checkout-submodules', 'ref-format',
- 'track-tags', 'tags']
- self.node_validate(node, config_keys + Source.COMMON_CONFIG_KEYS)
-
- tags_node = self.node_get_member(node, list, 'tags', [])
- for tag_node in tags_node:
- self.node_validate(tag_node, ['tag', 'commit', 'annotated'])
-
- tags = self._load_tags(node)
- self.track_tags = self.node_get_member(node, bool, 'track-tags', False)
-
- self.original_url = self.node_get_member(node, str, 'url')
- self.mirror = GitMirror(self, '', self.original_url, ref, tags=tags, primary=True)
- self.tracking = self.node_get_member(node, str, 'track', None)
-
- self.ref_format = self.node_get_member(node, str, 'ref-format', 'sha1')
- if self.ref_format not in ['sha1', 'git-describe']:
- provenance = self.node_provenance(node, member_name='ref-format')
- raise SourceError("{}: Unexpected value for ref-format: {}".format(provenance, self.ref_format))
-
- # At this point we now know if the source has a ref and/or a track.
- # If it is missing both then we will be unable to track or build.
- if self.mirror.ref is None and self.tracking is None:
- raise SourceError("{}: Git sources require a ref and/or track".format(self),
- reason="missing-track-and-ref")
-
- self.checkout_submodules = self.node_get_member(node, bool, 'checkout-submodules', True)
- self.submodules = []
-
- # Parse a dict of submodule overrides, stored in the submodule_overrides
- # and submodule_checkout_overrides dictionaries.
- self.submodule_overrides = {}
- self.submodule_checkout_overrides = {}
- modules = self.node_get_member(node, Mapping, 'submodules', {})
- for path, _ in self.node_items(modules):
- submodule = self.node_get_member(modules, Mapping, path)
- url = self.node_get_member(submodule, str, 'url', None)
-
- # Make sure to mark all URLs that are specified in the configuration
- if url:
- self.mark_download_url(url, primary=False)
-
- self.submodule_overrides[path] = url
- if 'checkout' in submodule:
- checkout = self.node_get_member(submodule, bool, 'checkout')
- self.submodule_checkout_overrides[path] = checkout
-
- self.mark_download_url(self.original_url)
-
- def preflight(self):
- # Check if git is installed, get the binary at the same time
- self.host_git = utils.get_host_tool('git')
-
- def get_unique_key(self):
- # Here we want to encode the local name of the repository and
- # the ref, if the user changes the alias to fetch the same sources
- # from another location, it should not affect the cache key.
- key = [self.original_url, self.mirror.ref]
- if self.mirror.tags:
- tags = {tag: (commit, annotated) for tag, commit, annotated in self.mirror.tags}
- key.append({'tags': tags})
-
- # Only modify the cache key with checkout_submodules if it's something
- # other than the default behaviour.
- if self.checkout_submodules is False:
- key.append({"checkout_submodules": self.checkout_submodules})
-
- # We want the cache key to change if the source was
- # configured differently, and submodules count.
- if self.submodule_overrides:
- key.append(self.submodule_overrides)
-
- if self.submodule_checkout_overrides:
- key.append({"submodule_checkout_overrides": self.submodule_checkout_overrides})
-
- return key
-
- def get_consistency(self):
- if self.have_all_refs():
- return Consistency.CACHED
- elif self.mirror.ref is not None:
- return Consistency.RESOLVED
- return Consistency.INCONSISTENT
-
- def load_ref(self, node):
- self.mirror.ref = self.node_get_member(node, str, 'ref', None)
- self.mirror.tags = self._load_tags(node)
-
- def get_ref(self):
- return self.mirror.ref, self.mirror.tags
-
- def set_ref(self, ref_data, node):
- if not ref_data:
- self.mirror.ref = None
- if 'ref' in node:
- del node['ref']
- self.mirror.tags = []
- if 'tags' in node:
- del node['tags']
- else:
- ref, tags = ref_data
- node['ref'] = self.mirror.ref = ref
- self.mirror.tags = tags
- if tags:
- node['tags'] = []
- for tag, commit_ref, annotated in tags:
- data = {'tag': tag,
- 'commit': commit_ref,
- 'annotated': annotated}
- node['tags'].append(data)
- else:
- if 'tags' in node:
- del node['tags']
-
- def track(self):
-
- # If self.tracking is not specified it's not an error, just silently return
- if not self.tracking:
- # Is there a better way to check if a ref is given.
- if self.mirror.ref is None:
- detail = 'Without a tracking branch ref can not be updated. Please ' + \
- 'provide a ref or a track.'
- raise SourceError("{}: No track or ref".format(self),
- detail=detail, reason="track-attempt-no-track")
- return None
-
- # Resolve the URL for the message
- resolved_url = self.translate_url(self.mirror.url)
- with self.timed_activity("Tracking {} from {}"
- .format(self.tracking, resolved_url),
- silent_nested=True):
- self.mirror.ensure()
- self.mirror._fetch()
-
- # Update self.mirror.ref and node.ref from the self.tracking branch
- ret = self.mirror.latest_commit_with_tags(self.tracking, self.track_tags)
-
- return ret
-
- def init_workspace(self, directory):
- # XXX: may wish to refactor this as some code dupe with stage()
- self.refresh_submodules()
-
- with self.timed_activity('Setting up workspace "{}"'.format(directory), silent_nested=True):
- self.mirror.init_workspace(directory)
- for mirror in self.submodules:
- mirror.init_workspace(directory)
-
- def stage(self, directory):
-
- # Need to refresh submodule list here again, because
- # it's possible that we did not load in the main process
- # with submodules present (source needed fetching) and
- # we may not know about the submodule yet come time to build.
- #
- self.refresh_submodules()
-
- # Stage the main repo in the specified directory
- #
- with self.timed_activity("Staging {}".format(self.mirror.url), silent_nested=True):
- self.mirror.stage(directory)
- for mirror in self.submodules:
- mirror.stage(directory)
-
- def get_source_fetchers(self):
- yield self.mirror
- self.refresh_submodules()
- for submodule in self.submodules:
- yield submodule
-
- def validate_cache(self):
- discovered_submodules = {}
- unlisted_submodules = []
- invalid_submodules = []
-
- for path, url in self.mirror.submodule_list():
- discovered_submodules[path] = url
- if self.ignore_submodule(path):
- continue
-
- override_url = self.submodule_overrides.get(path)
- if not override_url:
- unlisted_submodules.append((path, url))
-
- # Warn about submodules which are explicitly configured but do not exist
- for path, url in self.submodule_overrides.items():
- if path not in discovered_submodules:
- invalid_submodules.append((path, url))
-
- if invalid_submodules:
- detail = []
- for path, url in invalid_submodules:
- detail.append(" Submodule URL '{}' at path '{}'".format(url, path))
-
- self.warn("{}: Invalid submodules specified".format(self),
- warning_token=WARN_INVALID_SUBMODULE,
- detail="The following submodules are specified in the source "
- "description but do not exist according to the repository\n\n" +
- "\n".join(detail))
-
- # Warn about submodules which exist but have not been explicitly configured
- if unlisted_submodules:
- detail = []
- for path, url in unlisted_submodules:
- detail.append(" Submodule URL '{}' at path '{}'".format(url, path))
-
- self.warn("{}: Unlisted submodules exist".format(self),
- warning_token=WARN_UNLISTED_SUBMODULE,
- detail="The following submodules exist but are not specified " +
- "in the source description\n\n" +
- "\n".join(detail))
-
- # Assert that the ref exists in the track tag/branch, if track has been specified.
- ref_in_track = False
- if self.tracking:
- _, branch = self.check_output([self.host_git, 'branch', '--list', self.tracking,
- '--contains', self.mirror.ref],
- cwd=self.mirror.mirror)
- if branch:
- ref_in_track = True
- else:
- _, tag = self.check_output([self.host_git, 'tag', '--list', self.tracking,
- '--contains', self.mirror.ref],
- cwd=self.mirror.mirror)
- if tag:
- ref_in_track = True
-
- if not ref_in_track:
- detail = "The ref provided for the element does not exist locally " + \
- "in the provided track branch / tag '{}'.\n".format(self.tracking) + \
- "You may wish to track the element to update the ref from '{}' ".format(self.tracking) + \
- "with `bst track`,\n" + \
- "or examine the upstream at '{}' for the specific ref.".format(self.mirror.url)
-
- self.warn("{}: expected ref '{}' was not found in given track '{}' for staged repository: '{}'\n"
- .format(self, self.mirror.ref, self.tracking, self.mirror.url),
- detail=detail, warning_token=CoreWarnings.REF_NOT_IN_TRACK)
-
- ###########################################################
- # Local Functions #
- ###########################################################
- def have_all_refs(self):
- if not self.mirror.has_ref():
- return False
-
- self.refresh_submodules()
- for mirror in self.submodules:
- if not os.path.exists(mirror.mirror):
- return False
- if not mirror.has_ref():
- return False
-
- return True
-
- # Refreshes the GitMirror objects for submodules
- #
- # Assumes that we have our mirror and we have the ref which we point to
- #
- def refresh_submodules(self):
- self.mirror.ensure()
- submodules = []
-
- for path, url in self.mirror.submodule_list():
-
- # Completely ignore submodules which are disabled for checkout
- if self.ignore_submodule(path):
- continue
-
- # Allow configuration to override the upstream
- # location of the submodules.
- override_url = self.submodule_overrides.get(path)
- if override_url:
- url = override_url
-
- ref = self.mirror.submodule_ref(path)
- if ref is not None:
- mirror = GitMirror(self, path, url, ref)
- submodules.append(mirror)
-
- self.submodules = submodules
-
- def _load_tags(self, node):
- tags = []
- tags_node = self.node_get_member(node, list, 'tags', [])
- for tag_node in tags_node:
- tag = self.node_get_member(tag_node, str, 'tag')
- commit_ref = self.node_get_member(tag_node, str, 'commit')
- annotated = self.node_get_member(tag_node, bool, 'annotated')
- tags.append((tag, commit_ref, annotated))
- return tags
-
- # Checks whether the plugin configuration has explicitly
- # configured this submodule to be ignored
- def ignore_submodule(self, path):
- try:
- checkout = self.submodule_checkout_overrides[path]
- except KeyError:
- checkout = self.checkout_submodules
-
- return not checkout
+class GitSource(_GitSourceBase):
+ pass
# Plugin entry point