From 9da47db4e11aa613a85233ae7519d94eeea48c14 Mon Sep 17 00:00:00 2001 From: Tristan van Berkom Date: Fri, 12 Jun 2020 15:04:27 +0900 Subject: _loader: Add support for duplicate junctions This adds some measure of tolerance to duplicate projects by accepting cases where multiple projects are loaded but whitelisted to be loaded as duplicates by the project.conf. This change does the following: * _project.py: Add `junction_is_duplicated()` API and parse `duplicates` The project now maintains a table of junctions which are considered "duplicated" per project, and exposes an API for the loader to query whether a given loader is explicitly listed as a duplicate (allowing multiple "duplicated" instances of that project to coexist in the same pipeline). This new API searches for the passed Loader instance by way of it's project relative search paths specified in the duplicates. Using the `Loader.get_loader()` codepath ensures that any links get resolved and that any overrides along the way will be considered correctly while attempting to match the duplicate. * _loader/loader.py: Few changes here. - Allow Loader.get_loader() to operate in a mode which does not load subprojects. - Add new function Loader.ancestors(), this is a generator which yields every ancestor loader which is capable of addressing the given loader, including ancestors for which their junction definitions to this loader have been overridden. To this end, we now record `_alternative_parents` when searching for overrides. - Now implements __str__() in order to print an identifying string for a loader in error messages * _loader/loadcontext.py: Enhanced to now ask projects whether they are marked as duplicates in order to avoid erroring out on project collisions if those are explicitly marked to not be an error. We now also split out the new function LoadContext.assert_loaders(), which is to be called once at the end of loading the whole pipeline recursively. This reduces the complexity of searching the loaded graph while loading the graph, and also reports more complete errors (all instances of a conflicting project will be reported in the error at once). --- src/buildstream/_loader/loadcontext.py | 164 +++++++++++++++++++++++++++------ src/buildstream/_loader/loader.py | 125 ++++++++++++++++++++++--- src/buildstream/_project.py | 55 +++++++++++ 3 files changed, 299 insertions(+), 45 deletions(-) diff --git a/src/buildstream/_loader/loadcontext.py b/src/buildstream/_loader/loadcontext.py index 65b7e9157..c6d256050 100644 --- a/src/buildstream/_loader/loadcontext.py +++ b/src/buildstream/_loader/loadcontext.py @@ -21,6 +21,118 @@ from .._exceptions import LoadError from ..exceptions import LoadErrorReason +# ProjectLoaders() +# +# An object representing all of the loaders for a given project. +# +class ProjectLoaders: + def __init__(self, project_name): + + # The project name + self._name = project_name + + # A list of all loaded loaders for this project + self._collect = [] + + # register_loader() + # + # Register a Loader for this project + # + # Args: + # loader (Loader): The loader to register + # + def register_loader(self, loader): + assert loader.project.name == self._name + + self._collect.append(loader) + + # assert_loaders(): + # + # Asserts the validity of loaders for this project + # + # Raises: + # (LoadError): In case there is a CONFLICTING_JUNCTION error + # + def assert_loaders(self): + duplicates = {} + primary = [] + + for loader in self._collect: + duplicating = self._search_duplicates(loader) + if duplicating: + duplicates[loader] = duplicating + else: + primary.append(loader) + + if len(primary) > 1: + self._raise_conflict(duplicates) + + elif primary and duplicates: + self._raise_conflict(duplicates) + + # _search_duplicates() + # + # Searches this loader's ancestry for projects which mark this + # loader as a duplicate. + # + # Args: + # loader (Loader): The loader to search for duplicate markers of + # + # Returns: + # (list): A list of Loader objects who's project has marked + # this junction as a duplicate + # + def _search_duplicates(self, loader): + duplicates = [] + for parent in loader.ancestors(): + if parent.project.junction_is_duplicated(self._name, loader): + duplicates.append(parent) + return duplicates + + # _raise_conflict() + # + # Raises the LoadError indicating there was a conflict, this + # will list all of the instances in which the project has + # been loaded as the LoadError detail string + # + # Args: + # duplicates (dict): A table of duplicating Loaders, indexed + # by duplicated Loader + # + # Raises: + # (LoadError): In case there is a CONFLICTING_JUNCTION error + # + def _raise_conflict(self, duplicates): + lines = [self._loader_description(loader, duplicates) for loader in self._collect] + raise LoadError( + "Project '{}' was loaded in multiple contexts".format(self._name), + LoadErrorReason.CONFLICTING_JUNCTION, + detail="\n".join(lines), + ) + + # _loader_description() + # + # Args: + # loader (Loader): The loader to describe + # duplicates (dict): A table of duplicating Loaders, indexed + # by duplicated Loader + # + # Returns: + # (str): A string representing how this loader was loaded + # + def _loader_description(self, loader, duplicates): + + line = "{}\n".format(loader) + + # Mention projects which have marked this project as a duplicate + duplicating = duplicates.get(loader) + if duplicating: + for dup in duplicating: + line += " Duplicated by: {}\n".format(dup) + + return line + + # LoaderContext() # # An object to keep track of overall context during the load process. @@ -71,44 +183,36 @@ class LoadContext: def set_fetch_subprojects(self, fetch_subprojects): self.fetch_subprojects = fetch_subprojects + # assert_loaders() + # + # Asserts that there are no conflicting projects loaded. + # + # Raises: + # (LoadError): A CONFLICTING_JUNCTION LoadError in the case of a conflict + # + def assert_loaders(self): + for _, loaders in self._loaders.items(): + loaders.assert_loaders() + # register_loader() # # Registers a new loader in the load context, possibly - # raising an error in the case of a conflict + # raising an error in the case of a conflict. + # + # This must be called after a recursive load process has completed, + # and after the pipeline is resolved (which is to say that all related + # Plugin derived objects have been instantiated). # # Args: # loader (Loader): The Loader object to register into context # - # Raises: - # (LoadError): A CONFLICTING_JUNCTION LoadError in the case of a conflict - # def register_loader(self, loader): project = loader.project - existing_loader = self._loaders.get(project.name, None) - if existing_loader: + try: + project_loaders = self._loaders[project.name] + except KeyError: + project_loaders = ProjectLoaders(project.name) + self._loaders[project.name] = project_loaders - assert project.junction is not None - - if existing_loader.project.junction: - # The existing provenance can be None even if there is a junction, this - # can happen when specifying a full element path on the command line. - # - provenance_str = "" - if existing_loader.provenance: - provenance_str = ": {}".format(existing_loader.provenance) - - detail = "Project '{}' was already loaded by junction '{}'{}".format( - project.name, existing_loader.project.junction._get_full_name(), provenance_str - ) - else: - detail = "Project '{}' is also the toplevel project".format(project.name) - - raise LoadError( - "{}: Error loading project '{}' from junction: {}".format( - loader.provenance, project.name, project.junction._get_full_name() - ), - LoadErrorReason.CONFLICTING_JUNCTION, - detail=detail, - ) - self._loaders[project.name] = loader + project_loaders.register_loader(loader) diff --git a/src/buildstream/_loader/loader.py b/src/buildstream/_loader/loader.py index 76d5eef82..81e5fb032 100644 --- a/src/buildstream/_loader/loader.py +++ b/src/buildstream/_loader/loader.py @@ -18,6 +18,7 @@ # Tristan Van Berkom import os +from contextlib import suppress from .._exceptions import LoadError from ..exceptions import LoadErrorReason @@ -71,6 +72,7 @@ class Loader: self._basedir = basedir # Base project directory self._first_pass_options = project.first_pass_config.options # Project options (OptionPool) self._parent = parent # The parent loader + self._alternative_parents = [] # Overridden parent loaders self._meta_elements = {} # Dict of resolved meta elements by name self._elements = {} # Dict of elements @@ -83,6 +85,24 @@ class Loader: self.load_context.register_loader(self) + # The __str__ of a Loader is used to clearly identify the Loader, + # the junction is was loaded as, and the provenance causing the + # junction to be loaded. + # + def __str__(self): + project_name = self.project.name + + if self.project.junction: + junction_name = self.project.junction._get_full_name() + if self.provenance: + provenance = "({}): {}".format(junction_name, self.provenance) + else: + provenance = "({})".format(junction_name) + else: + provenance = "(toplevel)" + + return "{} {}".format(project_name, provenance) + # load(): # # Loads the project based on the parameters given to the constructor @@ -163,19 +183,29 @@ class Loader: # # Obtains the appropriate loader for the specified junction # + # If `load_subprojects` is enabled, then this function will + # either return the desired loader or raise a LoadError. If + # `load_subprojects` is disabled, then it can also return None + # in the case that a loader could not be found. In either case, + # a non-existant file in a loaded project will result in a LoadError. + # # Args: # name (str): Name of junction, may have multiple `:` in the name # provenance (ProvenanceInformation): The provenance + # load_subprojects (bool): Whether to load subprojects on demand # # Returns: # (Loader): loader for sub-project # - def get_loader(self, name, provenance): + def get_loader(self, name, provenance, *, load_subprojects=True): junction_path = name.split(":") loader = self circular_provenance = self._loader_search_provenances.get(name, None) if circular_provenance: + + assert provenance + detail = None if circular_provenance is not provenance: detail = "Already searching for '{}' at: {}".format(name, circular_provenance) @@ -188,12 +218,39 @@ class Loader: self._loader_search_provenances[name] = provenance for junction_name in junction_path: - loader = loader._get_loader(junction_name, provenance) + loader = loader._get_loader(junction_name, provenance, load_subprojects=load_subprojects) del self._loader_search_provenances[name] return loader + # ancestors() + # + # This will traverse all active loaders in the ancestry for which this + # project is reachable using a relative path. + # + # Yields: + # (Loader): Each loader in the ancestry + # + def ancestors(self): + traversed = {} + + def foreach_parent(parent): + while parent: + if parent in traversed: + return + traversed[parent] = True + yield parent + parent = parent._parent + + # Yield from the direct/active ancestry + yield from foreach_parent(self._parent) + + # Yield from alternative parents which have been replaced by + # overrides in the ancestry. + for parent in self._alternative_parents: + yield from foreach_parent(parent) + # collect_element_no_deps() # # Collect a single element, without its dependencies, into a meta_element @@ -360,15 +417,16 @@ class Loader: # # Args: # filename (str): The element-path relative bst file + # load_subprojects (bool): Whether to load subprojects # provenance (Provenance): The location from where the file was referred to, or None # # Returns: # (LoadElement): A loaded LoadElement # - def _load_file(self, filename, provenance): + def _load_file(self, filename, provenance, *, load_subprojects=True): # Silently ignore already loaded files - if filename in self._elements: + with suppress(KeyError): return self._elements[filename] top_element = self._load_file_no_deps(filename, provenance) @@ -376,8 +434,12 @@ class Loader: # If this element is a link then we need to resolve it # and replace the dependency we've processed with this one if top_element.link_target is not None: - _, filename, loader = self._parse_name(top_element.link_target, top_element.link_target_provenance) - top_element = loader._load_file(filename, top_element.link_target_provenance) + _, filename, loader = self._parse_name( + top_element.link_target, top_element.link_target_provenance, load_subprojects=load_subprojects + ) + top_element = loader._load_file( + filename, top_element.link_target_provenance, load_subprojects=load_subprojects + ) dependencies = extract_depends_from_node(top_element.node) # The loader queue is a stack of tuples @@ -540,6 +602,11 @@ class Loader: # # Search parent projects for an overridden subproject to replace this junction. # + # This function is called once for each direct child while looking up + # child loaders, after which point the child loader is cached in the `_loaders` + # table. This function also has the side effect of recording alternative parents + # of a child loader in the case that the child loader is overridden. + # # Args: # filename (str): Junction name # @@ -554,7 +621,7 @@ class Loader: junction = loader.project.junction override_filename, override_provenance = junction.overrides.get(override_path, (None, None)) if override_filename: - overriding_loaders.append((loader, override_filename, override_provenance)) + overriding_loaders.append((loader._parent, override_filename, override_provenance)) override_path = junction.name + ":" + override_path loader = loader._parent @@ -563,8 +630,27 @@ class Loader: # the ancestry to lookup the loader for this project. # if overriding_loaders: - loader, filename, provenance = overriding_loaders[-1] - return loader._parent.get_loader(filename, provenance) + overriding_loader, override_filename, provenance = overriding_loaders[-1] + loader = overriding_loader.get_loader(override_filename, provenance) + + # + # Record alternative loaders which were overridden. + # + # When a junction is overridden by another higher priority junction, + # the resulting loader is still reachable with the original element paths, + # which will now traverse override redirections. + # + # In order to iterate over every project/loader in the ancestry which can + # reach the actually selected loader, we need to keep track of the parent + # loaders of all overridden junctions. + # + if loader is not self: + loader._alternative_parents.append(self) + + del overriding_loaders[-1] + loader._alternative_parents.extend(l for l, _, _ in overriding_loaders) + + return loader # No overrides were found in the ancestry # @@ -576,13 +662,14 @@ class Loader: # # Args: # filename (str): Junction name + # load_subprojects (bool): Whether to load subprojects # provenance (Provenance): The location from where the file was referred to, or None # # Raises: LoadError # # Returns: A Loader or None if specified junction does not exist # - def _get_loader(self, filename, provenance): + def _get_loader(self, filename, provenance, *, load_subprojects=True): loader = None provenance_str = "" if provenance is not None: @@ -604,7 +691,7 @@ class Loader: # # Load the junction file # - self._load_file(filename, provenance) + self._load_file(filename, provenance, load_subprojects=load_subprojects) # At this point we've loaded the LoadElement load_element = self._elements[filename] @@ -613,8 +700,15 @@ class Loader: # immediately and move on to the target. # if load_element.link_target: - _, filename, loader = self._parse_name(load_element.link_target, load_element.link_target_provenance) - return loader.get_loader(filename, load_element.link_target_provenance) + _, filename, loader = self._parse_name( + load_element.link_target, load_element.link_target_provenance, load_subprojects=load_subprojects + ) + return loader.get_loader(filename, load_element.link_target_provenance, load_subprojects=load_subprojects) + + # If we're only performing a lookup, we're done here. + # + if not load_subprojects: + return None # meta junction element # @@ -734,13 +828,14 @@ class Loader: # Args: # name (str): Name of target # provenance (ProvenanceInformation): The provenance + # load_subprojects (bool): Whether to load subprojects # # Returns: # (tuple): - (str): name of the junction element # - (str): name of the element # - (Loader): loader for sub-project # - def _parse_name(self, name, provenance): + def _parse_name(self, name, provenance, *, load_subprojects=True): # We allow to split only once since deep junctions names are forbidden. # Users who want to refer to elements in sub-sub-projects are required # to create junctions on the top level project. @@ -748,7 +843,7 @@ class Loader: if len(junction_path) == 1: return None, junction_path[-1], self else: - loader = self.get_loader(junction_path[-2], provenance) + loader = self.get_loader(junction_path[-2], provenance, load_subprojects=load_subprojects) return junction_path[-2], junction_path[-1], loader # Print a warning message, checks warning_token against project configuration diff --git a/src/buildstream/_project.py b/src/buildstream/_project.py index fcfb31988..3a8998abb 100644 --- a/src/buildstream/_project.py +++ b/src/buildstream/_project.py @@ -158,6 +158,11 @@ class Project: self._sandbox = None self._splits = None + # This is a lookup table of dictionaries indexed by project, + # the child dictionaries are junction names as keys with their + # provenances as values + self._junction_duplicates = {} + self._context.add_project(self) self._partially_loaded = False @@ -434,6 +439,12 @@ class Project: Element._clear_meta_elements_cache() + # Assert loaders after resolving everything, this is because plugin + # loading (across junction boundaries) can also be the cause of + # conflicting projects. + # + self.load_context.assert_loaders() + # Now warn about any redundant source references which may have # been discovered in the resolve() phase. redundant_refs = Element._get_redundant_source_refs() @@ -541,6 +552,37 @@ class Project: return tuple(default_targets) + # junction_is_duplicated() + # + # Check whether this loader is specified as a duplicate by + # this project. + # + # Args: + # project_name: (str): The project name + # loader (Loader): The loader to check for + # + # Returns: + # (bool): Whether the loader is specified as duplicate + # + def junction_is_duplicated(self, project_name, loader): + + junction_dict = self._junction_duplicates.get(project_name, {}) + + # Iterate over all paths specified by this project and see + # if we find a match for the specified loader. + # + # Using the regular `Loader.get_loader()` codepath from this + # project ensures that we will find the correct loader relative + # to this project, regardless of any overrides or link elements + # which might have been used in the project. + # + for dup_path, dup_provenance in junction_dict.items(): + search = self.loader.get_loader(dup_path, dup_provenance, load_subprojects=False) + if loader is search: + return True + + return False + ######################################################## # Private Methods # ######################################################## @@ -577,6 +619,7 @@ class Project: "remote-execution", "sources", "source-caches", + "junctions", "(@)", ] ) @@ -701,6 +744,18 @@ class Project: # Fatal warnings self._fatal_warnings = pre_config_node.get_str_list("fatal-warnings", default=[]) + # Junction configuration + junctions_node = pre_config_node.get_mapping("junctions", default={}) + junctions_node.validate_keys(["duplicates"]) + junction_duplicates = junctions_node.get_mapping("duplicates", default={}) + for project_name, junctions in junction_duplicates.items(): + # For each junction we preserve the provenance and the junction string, + # the provenance is used for lookups later on. + # + self._junction_duplicates[project_name] = junctions_dict = {} + for junction_node in junctions: + junctions_dict[junction_node.as_str()] = junction_node.get_provenance() + self.loader = Loader(self, parent=parent_loader, provenance=provenance) self._project_includes = Includes(self.loader, copy_tree=False) -- cgit v1.2.1