diff options
author | bst-marge-bot <marge-bot@buildstream.build> | 2020-07-23 03:32:25 +0000 |
---|---|---|
committer | bst-marge-bot <marge-bot@buildstream.build> | 2020-07-23 03:32:25 +0000 |
commit | f5d6bae43879c58cfb9b42cf047e794277e10fb6 (patch) | |
tree | 1be77bb7bce8ed846fc1cce3d96332ec86f357e7 | |
parent | 64d257e6e34382f4e90b834a05da36f31e1167f9 (diff) | |
parent | 59e6e2a3c1973c5efe09035c925c884b226192b3 (diff) | |
download | buildstream-f5d6bae43879c58cfb9b42cf047e794277e10fb6.tar.gz |
Merge branch 'tristan/partial-variables' into 'master'
Variables non-recursive and lazy resolution refactor
See merge request BuildStream/buildstream!1973
-rw-r--r-- | src/buildstream/_variables.pyx | 776 | ||||
-rw-r--r-- | src/buildstream/element.py | 6 | ||||
-rw-r--r-- | src/buildstream/exceptions.py | 4 | ||||
-rw-r--r-- | tests/format/variables.py | 99 | ||||
-rw-r--r-- | tests/format/variables/cyclic_variables/cyclic.bst | 2 | ||||
-rw-r--r-- | tests/format/variables/cyclic_variables/indirect-cyclic.bst | 8 | ||||
-rw-r--r-- | tests/format/variables/cyclic_variables/self-reference.bst | 4 | ||||
-rw-r--r-- | tests/format/variables/missing_variables/manual3.bst | 10 | ||||
-rw-r--r-- | tests/format/variables/partial_context/base.bst | 4 | ||||
-rw-r--r-- | tests/format/variables/partial_context/base/project.conf | 3 | ||||
-rw-r--r-- | tests/format/variables/partial_context/base/vars.yml | 2 | ||||
-rw-r--r-- | tests/format/variables/partial_context/project.conf | 8 | ||||
-rw-r--r-- | tests/format/variables/partial_context/test.bst | 3 | ||||
-rw-r--r-- | tests/frontend/overlaps.py | 15 | ||||
-rw-r--r-- | tests/frontend/overlaps/whitelist-undefined.bst | 13 |
15 files changed, 755 insertions, 202 deletions
diff --git a/src/buildstream/_variables.pyx b/src/buildstream/_variables.pyx index e3fcfc3a0..78f4dcecb 100644 --- a/src/buildstream/_variables.pyx +++ b/src/buildstream/_variables.pyx @@ -1,5 +1,5 @@ # -# Copyright (C) 2016 Codethink Limited +# Copyright (C) 2020 Codethink Limited # Copyright (C) 2019 Bloomberg L.P. # # This program is free software; you can redistribute it and/or @@ -22,42 +22,94 @@ import re import sys +import itertools from ._exceptions import LoadError from .exceptions import LoadErrorReason -from .node cimport MappingNode, Node, ScalarNode, SequenceNode +from .node cimport MappingNode, Node, ScalarNode, SequenceNode, ProvenanceInformation -# Variables are allowed to have dashes here +######################################################## +# Understanding Value Expressions # +######################################################## +# +# This code uses the term "value expression" a lot to refer to `str` objects +# which have references to variables in them, and also to `list` objects which +# are effectively broken down strings. +# +# Ideally we would have a ValueExpression type in order to make this more +# comprehensive, but this would unfortunately introduce unnecessary overhead, +# making the code measurably slower. +# +# Value Expression Strings +# ------------------------ +# Strings which contain variables in them, such as: +# +# "My name is %{username}, good day." +# +# +# Parsed Value Expression Lists +# ----------------------------- +# Using `re.split()` from python's regular expression implementation, we +# parse the list using our locally defined VALUE_EXPRESSION_REGEX, which +# breaks down the string into a list of "literal" and "variable" components. +# +# The "literal" components are literal portions of the string which need +# no substitution, while the "variable" components represent variable names +# which need to be substituted with their corresponding resolved values. +# +# The parsed variable expressions have the following properties: +# +# * They are sparse, some of the "literal" values contain zero length +# strings which can be ignored. +# +# * Literal values are found only at even indices of the parsed +# variable expression +# +# * Variable names are found only at odd indices +# +# The above example "My name is %{username}, good day." is broken down +# into a parsed value expression as follows: +# +# [ +# "My name is ", # <- Index 0, literal value +# "username", # <- Index 1, variable name, '%{ ... }' discarded +# ", good day." # <- Index 2, literal value +# ] # -PARSE_EXPANSION = re.compile(r"\%\{([a-zA-Z][a-zA-Z0-9_-]*)\}") +# Maximum recursion depth using the fast (recursive) variable resolution +# algorithm. +# +cdef Py_ssize_t MAX_RECURSION_DEPTH = 200 -# Throughout this code you will see variables named things like `expstr`. -# These hold data structures called "expansion strings" and are the parsed -# form of the strings which are the input to this subsystem. Strings -# such as "Hello %{name}, how are you?" are parsed into the form: -# ["Hello ", "name", ", how are you?"] -# i.e. a list which consists of one or more strings. -# Strings in even indices of the list (0, 2, 4, etc) are constants which -# are copied into the output of the expansion algorithm. Strings in the -# odd indices (1, 3, 5, etc) are the names of further expansions to make. -# In the example above, first "Hello " is copied, then "name" is expanded -# and so must be another named expansion string passed in to the constructor -# of the Variables class, and whatever is yielded from the expansion of "name" -# is added to the concatenation for the result. Finally ", how are you?" is -# copied in and the whole lot concatenated for return. +# Regular expression used to parse %{variables} in value expressions # -# To see how strings are parsed, see `_parse_expstr()` after the class, and -# to see how expansion strings are expanded, see `_expand_expstr()` after that. +# Note that variables are allowed to have dashes +# +VALUE_EXPRESSION_REGEX = re.compile(r"\%\{([a-zA-Z][a-zA-Z0-9_-]*)\}") + +# Cache for the parsed expansion strings. +# +cdef dict VALUE_EXPRESSION_CACHE = { + # Prime the cache with the empty string since otherwise that can + # cause issues with the parser, complications to which cause slowdown + "": [""], +} -# The Variables helper object will resolve the variable references in -# the given dictionary, expecting that any dictionary values which contain -# variable references can be resolved from the same dictionary. +# Variables() +# +# The Variables object resolves the variable references in the given MappingNode, +# expecting that any dictionary values which contain variable references can be +# resolved from the same dictionary. # # Each Element creates its own Variables instance to track the configured # variable settings for the element. # +# Notably, this object is delegated the responsibility of expanding +# variables in yaml Node hierarchies and substituting variables in strings +# in the context of a given Element's variable configuration. +# # Args: # node (Node): A node loaded and composited with yaml tools # @@ -66,20 +118,61 @@ PARSE_EXPANSION = re.compile(r"\%\{([a-zA-Z][a-zA-Z0-9_-]*)\}") # cdef class Variables: - cdef MappingNode original - cdef dict _expstr_map + cdef MappingNode _original + cdef dict _values + ################################################################# + # Dunder Methods # + ################################################################# def __init__(self, MappingNode node): - self.original = node - self._expstr_map = self._resolve(node) - self._check_for_missing() - self._check_for_cycles() + # The original MappingNode, we need to keep this + # around for proper error reporting. + # + self._original = node + + # The value map, this dictionary contains either unresolved + # value expressions, or resolved values. + # + # Each mapping value is a list, in the case that the value + # is resolved, then the list is only 1 element long. + # + self._values = self._init_values(node) + + # __getitem__() + # + # Fetches a resolved variable by it's name, allows + # addressing the Variables instance like a dictionary. + # + # Args: + # name (str): The name of the variable + # + # Returns: + # (str): The resolved variable value + # + # Raises: + # (LoadError): In the case of an undefined variable or + # a cyclic variable reference + # def __getitem__(self, str name): - return _expand_var(self._expstr_map, name) + if name not in self._values: + raise KeyError(name) + + return self._expand_var(name) + # __contains__() + # + # Checks whether a given variable exists, allows + # supporting `if 'foo' in variables` expressions. + # + # Args: + # name (str): The name of the variable to check for + # + # Returns: + # (bool): True if `name` is a valid variable + # def __contains__(self, str name): - return name in self._expstr_map + return name in self._values # __iter__() # @@ -89,7 +182,28 @@ cdef class Variables: # (Iterator[Tuple[str, str]]) # def __iter__(self): - return _VariablesIterator(self._expstr_map) + return _VariablesIterator(self) + + ################################################################# + # Public API # + ################################################################# + + # check() + # + # Assert that all variables declared on this Variables + # instance have been resolved properly, and reports errors + # for undefined references and circular references. + # + # Raises: + # (LoadError): In the case of an undefined variable or + # a cyclic variable reference + # + cpdef check(self): + cdef object key + + # Just resolve all variables. + for key in self._values.keys(): + self._expand_var(<str> key) # get() # @@ -97,15 +211,15 @@ cdef class Variables: # defined, it will return None instead of failing. # # Args: - # name (str): Name of the variable to expand + # name (str): Name of the variable to expand # # Returns: - # (str|None): The expanded value for the variable or None variable was not defined. + # (str|None): The expanded value for the variable or None variable was not defined. # cpdef str get(self, str name): - if name not in self._expstr_map: + if name not in self._values: return None - return _expand_var(self._expstr_map, name) + return self[name] # expand() # @@ -114,11 +228,15 @@ cdef class Variables: # the node untouched, you should use `node.clone()` beforehand # # Args: - # (Node): A node for which to substitute the values + # (Node): A node for which to substitute the values + # + # Raises: + # (LoadError): In the case of an undefined variable or + # a cyclic variable reference # cpdef expand(self, Node node): if isinstance(node, ScalarNode): - (<ScalarNode> node).value = self.subst((<ScalarNode> node).value) + (<ScalarNode> node).value = self.subst(<ScalarNode> node) elif isinstance(node, SequenceNode): for entry in (<SequenceNode> node).value: self.expand(entry) @@ -133,43 +251,40 @@ cdef class Variables: # Substitutes any variables in 'string' and returns the result. # # Args: - # (string): The string to substitute + # node (ScalarNode): The ScalarNode to substitute variables in # # Returns: - # (string): The new string with any substitutions made + # (str): The new string with any substitutions made # # Raises: - # LoadError, if the string contains unresolved variable references. + # (LoadError): In the case of an undefined variable or + # a cyclic variable reference # - cpdef subst(self, str string): - expstr = _parse_expstr(string) + cpdef str subst(self, ScalarNode node): + value_expression = _parse_value_expression(node.as_str()) + return self._expand_value_expression(value_expression, node) - try: - return _expand_expstr(self._expstr_map, expstr) - except KeyError: - unmatched = [] - - # Look for any unmatched variable names in the expansion string - for var in expstr[1::2]: - if var not in self._expstr_map: - unmatched.append(var) - - if unmatched: - message = "Unresolved variable{}: {}".format( - "s" if len(unmatched) > 1 else "", - ", ".join(unmatched) - ) - - raise LoadError(message, LoadErrorReason.UNRESOLVED_VARIABLE) - # Otherwise, re-raise the KeyError since it clearly came from some - # other unknowable cause. - raise - - # Variable resolving code - # - # Here we resolve all of our inputs into a dictionary, ready for use - # in subst() - cdef dict _resolve(self, MappingNode node): + ################################################################# + # Private API # + ################################################################# + + # _init_values() + # + # Initialize the table of values. + # + # The value table is a dictionary keyed by the variable names where + # the values are value expressions (lists) which are initially unresolved. + # + # Value expressions are later resolved on demand and replaced in this + # table with single element lists. + # + # Args: + # node (MappingNode): The original variables mapping node + # + # Returns: + # (dict): A dictionary of value expressions (lists) + # + cdef dict _init_values(self, MappingNode node): # Special case, if notparallel is specified in the variables for this # element, then override max-jobs to be 1. # Initialize it as a string as all variables are processed as strings. @@ -178,158 +293,451 @@ cdef class Variables: node['max-jobs'] = str(1) cdef dict ret = {} + cdef object key_object cdef str key cdef str value - for key in node.keys(): + for key_object in node.keys(): + key = <str> key_object value = node.get_str(key) - ret[sys.intern(key)] = _parse_expstr(value) + ret[sys.intern(key)] = _parse_value_expression(value) + return ret - def _check_for_missing(self): - # First the check for anything unresolvable - summary = [] - for key, expstr in self._expstr_map.items(): - for var in expstr[1::2]: - if var not in self._expstr_map: - line = " unresolved variable '{unmatched}' in declaration of '{variable}' at: {provenance}" - provenance = self.original.get_scalar(key).get_provenance() - summary.append(line.format(unmatched=var, variable=key, provenance=provenance)) - if summary: - raise LoadError("Failed to resolve one or more variable:\n{}\n".format("\n".join(summary)), - LoadErrorReason.UNRESOLVED_VARIABLE) - - def _check_for_cycles(self): - # And now the cycle checks - def cycle_check(expstr, visited, cleared): - for var in expstr[1::2]: - if var in cleared: + # _expand_var() + # + # Expand and cache a variable definition. + # + # This will try the fast, recursive path first and fallback to + # the slower iterative codepath. + # + # Args: + # name (str): Name of the variable to expand + # + # Returns: + # (str): The expanded value of variable + # + # Raises: + # (LoadError): In the case of an undefined variable or + # a cyclic variable reference + # + cdef str _expand_var(self, str name): + try: + return self._fast_expand_var(name) + except (KeyError, RecursionError): + return self._slow_expand_var(name) + + # _expand_value_expression() + # + # Expands a value expression + # + # This will try the fast, recursive path first and fallback to + # the slower iterative codepath. + # + # Args: + # value_expression (list): The parsed value expression to be expanded + # node (ScalarNode): The toplevel ScalarNode who is asking for an expansion + # + # Returns: + # (str): The expanded value expression + # + # Raises: + # (LoadError): In the case of an undefined variable or + # a cyclic variable reference + # + cdef str _expand_value_expression(self, list value_expression, ScalarNode node): + try: + return self._fast_expand_value_expression(value_expression) + except (KeyError, RecursionError): + return self._slow_expand_value_expression(None, value_expression, node) + + ################################################################# + # Resolution algorithm: fast path # + ################################################################# + + # _fast_expand_var() + # + # Fast, recursive path for variable expansion + # + # Args: + # name (str): Name of the variable to expand + # counter (int): Number of recursion cycles (used only in recursion) + # + # Returns: + # (str): The expanded value of variable + # + # Raises: + # (KeyError): If a reference to an undefined variable is encountered + # (RecursionError): If MAX_RECURSION_DEPTH recursion cycles is reached + # + cdef str _fast_expand_var(self, str name, int counter = 0): + cdef str sub + cdef list value_expression + + value_expression = <list> self._values[name] + if len(value_expression) > 1: + sub = self._fast_expand_value_expression(value_expression, counter) + value_expression = [sys.intern(sub)] + self._values[name] = value_expression + + return <str> value_expression[0] + + # _fast_expand_value_expression() + # + # Fast, recursive path for value expression expansion. + # + # Args: + # value_expression (list): The parsed value expression to be expanded + # counter (int): Number of recursion cycles (used only in recursion) + # + # Returns: + # (str): The expanded value expression + # + # Raises: + # (KeyError): If a reference to an undefined variable is encountered + # (RecursionError): If MAX_RECURSION_DEPTH recursion cycles is reached + # + cdef str _fast_expand_value_expression(self, list value_expression, int counter = 0): + if counter > MAX_RECURSION_DEPTH: + raise RecursionError() + + cdef Py_ssize_t idx + cdef object value + cdef list acc = [] + + for idx, value in enumerate(value_expression): + if (idx % 2) == 0: + acc.append(value) + else: + acc.append(self._fast_expand_var(<str> value, counter + 1)) + + return "".join(acc) + + ################################################################# + # Resolution algorithm: slow path # + ################################################################# + + # _slow_expand_var() + # + # Slow, iterative path for variable expansion with full error reporting + # + # Args: + # name (str): Name of the variable to expand + # + # Returns: + # (str): The expanded value of variable + # + # Raises: + # (LoadError): In the case of an undefined variable or + # a cyclic variable reference + # + cdef str _slow_expand_var(self, str name): + cdef list value_expression + cdef str expanded + + value_expression = self._get_checked_value_expression(name, None, None) + if len(value_expression) > 1: + expanded = self._slow_expand_value_expression(name, value_expression, None) + value_expression = [sys.intern(expanded)] + self._values[name] = value_expression + + return <str> value_expression[0] + + # _slow_expand_value_expression() + # + # Slow, iterative path for value expression expansion with full error reporting + # + # Note that either `varname` or `node` must be provided, these are used to + # identify the provenance of this value expression (which might be the value + # of a variable, or a value expression found elswhere in project YAML which + # needs to be substituted). + # + # Args: + # varname (str|None): The variable name associated with this value expression, if any + # value_expression (list): The parsed value expression to be expanded + # node (ScalarNode|None): The ScalarNode who is asking for an expansion + # + # Returns: + # (str): The expanded value expression + # + # Raises: + # (LoadError): In the case of an undefined variable or + # a cyclic variable reference + # + cdef str _slow_expand_value_expression(self, str varname, list value_expression, ScalarNode node): + cdef ResolutionStep step + cdef ResolutionStep new_step + cdef ResolutionStep this_step + cdef list iter_value_expression + cdef Py_ssize_t idx = 0 + cdef object value + cdef str resolved_varname + cdef str resolved_value = None + + # We will collect the varnames and value expressions which need + # to be resolved in the loop, sorted by dependency, and then + # finally reverse through them resolving them one at a time + # + cdef list resolved_varnames = [] + cdef list resolved_values = [] + + step = ResolutionStep() + step.init(varname, value_expression, None) + while step: + # Keep a hold of the current overall step + this_step = step + step = step.prev + + # Check for circular dependencies + this_step.check_circular(self._original) + + for idx, value in enumerate(this_step.value_expression): + + # Skip literal parts of the value expression + if (idx % 2) == 0: continue - if var in visited: - raise LoadError("{}: ".format(self.original.get_scalar(var).get_provenance()) + - ("Variable '{}' expands to contain a reference to itself. " + - "Perhaps '{}' contains '%{{{}}}").format(var, visited[-1], var), - LoadErrorReason.RECURSIVE_VARIABLE) - visited.append(var) - cycle_check(self._expstr_map[var], visited, cleared) - visited.pop() - cleared.add(var) - - cleared = set() - for key, expstr in self._expstr_map.items(): - if key not in cleared: - cycle_check(expstr, [key], cleared) - -# Cache for the parsed expansion strings. While this is nominally -# something which might "waste" memory, in reality each of these -# will live as long as the element which uses it, which is the -# vast majority of the memory usage across the execution of BuildStream. -cdef dict PARSE_CACHE = { - # Prime the cache with the empty string since otherwise that can - # cause issues with the parser, complications to which cause slowdown - "": [""], -} + iter_value_expression = self._get_checked_value_expression(<str> value, this_step.referee, node) + + # Queue up this value. + # + # Even if the value was already resolved, we need it in context to resolve + # previously enqueued variables + resolved_values.append(iter_value_expression) + resolved_varnames.append(value) + + # Queue up the values dependencies. + # + if len(iter_value_expression) > 1: + new_step = ResolutionStep() + new_step.init(<str> value, iter_value_expression, this_step) + + # Link it to the end of the stack + new_step.prev = step + step = new_step + + # We've now constructed the dependencies queue such that + # later dependencies are on the right, we can now safely peddle + # backwards and the last (leftmost) resolved value is the one + # we want to return. + # + for iter_value_expression, resolved_varname in zip(reversed(resolved_values), reversed(resolved_varnames)): -# Helper to parse a string into an expansion string tuple, caching -# the results so that future parse requests don't need to think about -# the string -cdef list _parse_expstr(str instr): - cdef list ret + # Resolve variable expressions as needed + # + if len(iter_value_expression) > 1: + resolved_value = self._resolve_value_expression(iter_value_expression) + iter_value_expression = [resolved_value] + if resolved_varname is not None: + self._values[resolved_varname] = iter_value_expression - try: - return <list> PARSE_CACHE[instr] - except KeyError: - # This use of the regex turns a string like "foo %{bar} baz" into - # a list ["foo ", "bar", " baz"] - splits = PARSE_EXPANSION.split(instr) - # If an expansion ends the string, we get an empty string on the end - # which we can optimise away, making the expansion routines not need - # a test for this. - if splits[-1] == '': - del splits [-1] - # Cache an interned copy of this. We intern it to try and reduce the - # memory impact of the cache. It seems odd to cache the list length - # but this is measurably cheaper than calculating it each time during - # string expansion. - ret = [sys.intern(<str> s) for s in <list> splits] - PARSE_CACHE[instr] = ret - return ret + return resolved_value + # _get_checked_value_expression() + # + # Fetches a value expression from the value table and raises a user + # facing error if the value is undefined. + # + # Args: + # varname (str): The variable name to fetch + # referee (str): The variable name referring to `varname`, or None + # node (ScalarNode): The ScalarNode for which we need to resolve `name` + # + # Returns: + # (list): The value expression for varname + # + # Raises: + # (LoadError): An appropriate error in case of undefined variables + # + cdef list _get_checked_value_expression(self, str varname, str referee, ScalarNode node): + cdef ProvenanceInformation provenance = None + cdef Node referee_node + cdef str error_message -# Helper to expand and cache a variable definition in the context of -# the given dictionary of expansion strings. -# -# Args: -# content (dict): Dictionary of expansion strings -# name (str): Name of the variable to expand -# counter (int): Recursion counter -# -# Returns: -# (str): The expanded value of variable + # + # Fetch the value and detect undefined references + # + try: + return <list> self._values[varname] + except KeyError as e: + + # Either the provenance is the toplevel calling provenance, + # or it is the provenance of the direct referee + referee_node = self._original.get_node(referee, allowed_types=None, allow_none=True) + if referee_node is not None: + provenance = referee_node.get_provenance() + elif node: + provenance = node.get_provenance() + + error_message = "Reference to undefined variable '{}'".format(varname) + if provenance: + error_message = "{}: {}".format(provenance, error_message) + raise LoadError(error_message, LoadErrorReason.UNRESOLVED_VARIABLE) from e + + # _resolve_value_expression() + # + # Resolves a value expression with the expectation that all + # variables within this value expression have already been + # resolved and updated in the Variables._values table. + # + # This is used as a part of the iterative resolution codepath, + # where value expressions are first sorted by dependency before + # being resolved in one go. + # + # Args: + # value_expression (list): The value expression to resolve + # + # Returns: + # (str): The resolved value expression + # + cdef str _resolve_value_expression(self, list value_expression): + cdef Py_ssize_t idx + cdef object value + cdef list acc = [] + + for idx, value in enumerate(value_expression): + if (idx % 2) == 0: + acc.append(value) + else: + acc.append(self._values[value][0]) + + return "".join(acc) + + +# ResolutionStep() # -# Raises: -# KeyError, if any expansion is missing -# RecursionError, if recursion required for evaluation is too deep +# The context for a single iteration in variable resolution. # -cdef str _expand_var(dict content, str name, int counter = 0): - cdef str sub +cdef class ResolutionStep: + cdef str referee + cdef list value_expression + cdef ResolutionStep parent + cdef ResolutionStep prev + + # init() + # + # Initialize this ResolutionStep + # + # Args: + # referee (str): The name of the referring variable + # value_expression (list): The parsed value expression to be expanded + # parent (ResolutionStep): The parent ResolutionStep + # + cdef init(self, str referee, list value_expression, ResolutionStep parent): + self.referee = referee + self.value_expression = value_expression + self.parent = parent + self.prev = None + + # check_circular() + # + # Check for circular references in this step. + # + # Args: + # original_values (MappingNode): The original MappingNode for the Variables + # + # Raises: + # (LoadError): Will raise a user facing LoadError with + # LoadErrorReason.CIRCULAR_REFERENCE_VARIABLE in case + # circular references were encountered. + # + cdef check_circular(self, MappingNode original_values): + cdef ResolutionStep step = self.parent + while step: + if self.referee is step.referee: + self._raise_circular_reference_error(step, original_values) + step = step.parent + + # _raise_circular_reference_error() + # + # Helper function to construct a full report and raise the LoadError + # with LoadErrorReason.CIRCULAR_REFERENCE_VARIABLE. + # + # Args: + # conflict (ResolutionStep): The resolution step which conflicts with this step + # original_values (MappingNode): The original node to extract provenances from + # + # Raises: + # (LoadError): Unconditionally + # + cdef _raise_circular_reference_error(self, ResolutionStep conflict, MappingNode original_values): + cdef list error_lines = [] + cdef ResolutionStep step = self + cdef ScalarNode node + cdef str referee + + while step is not conflict: + if step.parent: + referee = step.parent.referee + else: + referee = self.referee + + node = original_values.get_scalar(referee) - if len(content[name]) > 1: - sub = _expand_expstr(content, <list> content[name], counter) - content[name] = [sys.intern(sub)] + error_lines.append("{}: Variable '{}' refers to variable '{}'".format(node.get_provenance(), referee, step.referee)) + step = step.parent - return content[name][0] + raise LoadError("Circular dependency detected on variable '{}'".format(self.referee), + LoadErrorReason.CIRCULAR_REFERENCE_VARIABLE, + detail="\n".join(reversed(error_lines))) -# Helper to expand a given top level expansion string tuple in the context -# of the given dictionary of expansion strings. +# _parse_value_expression() +# +# Tries to fetch the parsed value expression from the cache, parsing and +# caching value expressions on demand and returns the parsed value expression. # # Args: -# content (dict): Dictionary of expansion strings -# name (str): Name of the variable to expand -# counter (int): Recursion counter +# value_expression (str): The value expression in string form to parse # # Returns: -# (str): The expanded value of variable -# -# Raises: -# KeyError, if any expansion is missing -# RecursionError, if recursion required for evaluation is too deep +# (list): The parsed value expression in list form. # -cdef str _expand_expstr(dict content, list value, int counter = 0): - if counter > 1000: - raise RecursionError() +cdef list _parse_value_expression(str value_expression): + cdef list ret - cdef Py_ssize_t idx = 0 - cdef Py_ssize_t value_len = len(value) - cdef str sub - cdef list acc = [] + try: + return <list> VALUE_EXPRESSION_CACHE[value_expression] + except KeyError: + # This use of the regex turns a string like "foo %{bar} baz" into + # a list ["foo ", "bar", " baz"] + # + # The result is a parsed value expression, where even indicies + # contain literal parts of the value and odd indices contain + # variable names which need to be replaced by resolved variables. + # + splits = VALUE_EXPRESSION_REGEX.split(value_expression) - while idx < value_len: - acc.append(value[idx]) - idx += 1 + # Optimize later routines by discarding any unnecessary trailing + # empty strings. + # + if splits[-1] == '': + del splits[-1] - if idx < value_len: - acc.append(_expand_var(content, <str> value[idx], counter + 1)) - idx += 1 + # We intern the string parts to try and reduce the memory impact + # of the cache. + # + ret = [sys.intern(<str> s) for s in <list> splits] - return "".join(acc) + # Cache and return the value expression + # + VALUE_EXPRESSION_CACHE[value_expression] = ret + return ret # Iterator for all flatten variables. # Used by Variables.__iter__ cdef class _VariablesIterator: - cdef dict _expstr_map + cdef Variables _variables cdef object _iter - def __init__(self, dict expstr_map): - self._expstr_map = expstr_map - self._iter = iter(expstr_map) + def __init__(self, Variables variables): + self._variables = variables + self._iter = iter(variables._values) def __iter__(self): return self def __next__(self): name = next(self._iter) - return name, _expand_var(self._expstr_map, name) + return name, self._variables._expand_var(name) diff --git a/src/buildstream/element.py b/src/buildstream/element.py index 45c3735fd..547397e54 100644 --- a/src/buildstream/element.py +++ b/src/buildstream/element.py @@ -285,6 +285,8 @@ class Element(Plugin): variables = self.__extract_variables(project, meta) variables["element-name"] = self.name self.__variables = Variables(variables) + if not meta.is_junction: + self.__variables.check() # Collect the composited environment now that we have variables unexpanded_env = self.__extract_environment(project, meta) @@ -2815,8 +2817,8 @@ class Element(Plugin): # If this ever changes, things will go wrong unexpectedly. if not self.__whitelist_regex: bstdata = self.get_public_data("bst") - whitelist = bstdata.get_str_list("overlap-whitelist", default=[]) - whitelist_expressions = [utils._glob2re(self.__variables.subst(exp.strip())) for exp in whitelist] + whitelist = bstdata.get_sequence("overlap-whitelist", default=[]) + whitelist_expressions = [utils._glob2re(self.__variables.subst(node)) for node in whitelist] expression = "^(?:" + "|".join(whitelist_expressions) + ")$" self.__whitelist_regex = re.compile(expression) return self.__whitelist_regex.match(os.path.join(os.sep, path)) diff --git a/src/buildstream/exceptions.py b/src/buildstream/exceptions.py index e77d64fe7..caf08ae57 100644 --- a/src/buildstream/exceptions.py +++ b/src/buildstream/exceptions.py @@ -131,8 +131,8 @@ class LoadErrorReason(Enum): RECURSIVE_INCLUDE = 21 """A recursive include has been encountered""" - RECURSIVE_VARIABLE = 22 - """A recursive variable has been encountered""" + CIRCULAR_REFERENCE_VARIABLE = 22 + """A circular variable reference was detected""" PROTECTED_VARIABLE_REDEFINED = 23 """An attempt was made to set the value of a protected variable""" diff --git a/tests/format/variables.py b/tests/format/variables.py index c5e8eebad..2f436fc75 100644 --- a/tests/format/variables.py +++ b/tests/format/variables.py @@ -53,30 +53,95 @@ def test_overrides(cli, datafiles, target, varname, expected): assert result_vars.get_str(varname) == expected -@pytest.mark.parametrize("element", ["manual.bst", "manual2.bst"]) +@pytest.mark.parametrize( + "element,provenance", + [ + # This test makes a reference to an undefined variable in a build command + ("manual.bst", "manual.bst [line 5 column 6]"), + # This test makes a reference to an undefined variable by another variable, + # ensuring that we validate variables even when they are unused + ("manual2.bst", "manual2.bst [line 4 column 8]"), + # This test uses a build command to refer to some variables which ultimately + # refer to an undefined variable, testing a more complex case. + ("manual3.bst", "manual3.bst [line 6 column 8]"), + ], + ids=["build-command", "variables", "complex"], +) @pytest.mark.datafiles(os.path.join(DATA_DIR, "missing_variables")) -def test_missing_variable(cli, datafiles, element): +def test_undefined(cli, datafiles, element, provenance): project = str(datafiles) result = cli.run(project=project, silent=True, args=["show", "--deps", "none", "--format", "%{config}", element]) result.assert_main_error(ErrorDomain.LOAD, LoadErrorReason.UNRESOLVED_VARIABLE) + assert provenance in result.stderr +@pytest.mark.parametrize( + "element,provenances", + [ + # Test a simple a -> b and b -> a reference + ("simple-cyclic.bst", ["simple-cyclic.bst [line 4 column 5]", "simple-cyclic.bst [line 5 column 5]"]), + # Test a simple a -> b and b -> a reference with some text involved + ("cyclic.bst", ["cyclic.bst [line 5 column 10]", "cyclic.bst [line 4 column 5]"]), + # Test an indirect circular dependency + ( + "indirect-cyclic.bst", + [ + "indirect-cyclic.bst [line 5 column 5]", + "indirect-cyclic.bst [line 6 column 5]", + "indirect-cyclic.bst [line 7 column 5]", + "indirect-cyclic.bst [line 8 column 5]", + ], + ), + # Test an indirect circular dependency + ("self-reference.bst", ["self-reference.bst [line 4 column 5]"]), + ], + ids=["simple", "simple-text", "indirect", "self-reference"], +) @pytest.mark.timeout(15, method="signal") @pytest.mark.datafiles(os.path.join(DATA_DIR, "cyclic_variables")) -def test_simple_cyclic_variables(cli, datafiles): - print_warning("Performing cyclic test, if this test times out it will " + "exit the test sequence") +def test_circular_reference(cli, datafiles, element, provenances): + print_warning("Performing cyclic test, if this test times out it will exit the test sequence") project = str(datafiles) - result = cli.run(project=project, silent=True, args=["build", "simple-cyclic.bst"]) - result.assert_main_error(ErrorDomain.LOAD, LoadErrorReason.RECURSIVE_VARIABLE) + result = cli.run(project=project, silent=True, args=["build", element]) + result.assert_main_error(ErrorDomain.LOAD, LoadErrorReason.CIRCULAR_REFERENCE_VARIABLE) + for provenance in provenances: + assert provenance in result.stderr -@pytest.mark.timeout(15, method="signal") -@pytest.mark.datafiles(os.path.join(DATA_DIR, "cyclic_variables")) -def test_cyclic_variables(cli, datafiles): - print_warning("Performing cyclic test, if this test times out it will " + "exit the test sequence") +# Test that variables which refer to eachother very deeply are +# still resolved correctly, this ensures that we are not relying +# on a recursive algorithm limited by stack depth. +# +@pytest.mark.parametrize( + "maxvars", [50, 500, 5000], +) +@pytest.mark.datafiles(os.path.join(DATA_DIR, "defaults")) +def test_deep_references(cli, datafiles, maxvars): project = str(datafiles) - result = cli.run(project=project, silent=True, args=["build", "cyclic.bst"]) - result.assert_main_error(ErrorDomain.LOAD, LoadErrorReason.RECURSIVE_VARIABLE) + + # Generate an element with very, very many variables to resolve, + # each which expand to the value of the previous variable. + # + # The bottom variable defines a test value which we check for + # in the top variable in `bst show` output. + # + topvar = "var{}".format(maxvars) + bottomvar = "var0" + testvalue = "testvalue {}".format(maxvars) + + # Generate + variables = {"var{}".format(idx + 1): "%{var" + str(idx) + "}" for idx in range(maxvars)} + variables[bottomvar] = testvalue + element = {"kind": "manual", "variables": variables} + _yaml.roundtrip_dump(element, os.path.join(project, "test.bst")) + + # Run `bst show` + result = cli.run(project=project, args=["show", "--format", "%{vars}", "test.bst"]) + result.assert_success() + + # Test results + result_vars = _yaml.load_data(result.output) + assert result_vars.get_str(topvar) == testvalue @pytest.mark.parametrize("protected_var", PROTECTED_VARIABLES) @@ -168,3 +233,13 @@ def test_variables_resolving_errors_in_public_section(cli, datafiles): result = cli.run(project=project, args=["show", "--format", "%{public}", "public_unresolved.bst"]) result.assert_main_error(ErrorDomain.LOAD, LoadErrorReason.UNRESOLVED_VARIABLE) + + +@pytest.mark.datafiles(os.path.join(DATA_DIR, "partial_context")) +def test_partial_context_junctions(cli, datafiles): + project = str(datafiles) + + result = cli.run(project=project, args=["show", "--format", "%{vars}", "test.bst"]) + result.assert_success() + result_vars = _yaml.load_data(result.output) + assert result_vars.get_str("eltvar") == "/bar/foo/baz" diff --git a/tests/format/variables/cyclic_variables/cyclic.bst b/tests/format/variables/cyclic_variables/cyclic.bst index a05a40b27..38832fa86 100644 --- a/tests/format/variables/cyclic_variables/cyclic.bst +++ b/tests/format/variables/cyclic_variables/cyclic.bst @@ -2,4 +2,4 @@ kind: manual variables: a: "%{prefix}/a" - prefix: "%{a}/some_prefix/"
\ No newline at end of file + prefix: "%{a}/some_prefix/" diff --git a/tests/format/variables/cyclic_variables/indirect-cyclic.bst b/tests/format/variables/cyclic_variables/indirect-cyclic.bst new file mode 100644 index 000000000..fb06fb008 --- /dev/null +++ b/tests/format/variables/cyclic_variables/indirect-cyclic.bst @@ -0,0 +1,8 @@ +kind: manual + +variables: + foo: "%{a}" + a: "%{b}" + b: "%{c}" + c: "%{d}" + d: "%{a}" diff --git a/tests/format/variables/cyclic_variables/self-reference.bst b/tests/format/variables/cyclic_variables/self-reference.bst new file mode 100644 index 000000000..2e9829d03 --- /dev/null +++ b/tests/format/variables/cyclic_variables/self-reference.bst @@ -0,0 +1,4 @@ +kind: manual + +variables: + a: "Referencing itself with %{a}" diff --git a/tests/format/variables/missing_variables/manual3.bst b/tests/format/variables/missing_variables/manual3.bst new file mode 100644 index 000000000..496864890 --- /dev/null +++ b/tests/format/variables/missing_variables/manual3.bst @@ -0,0 +1,10 @@ +kind: manual + +variables: + hello: "Hello mister %{pony}" + greeting: "The %{hello} string twice: %{hello} again" + pony: "The pony is %{undefined}" + +config: + build-commands: + - Some indirectly undefined variable %{greeting} diff --git a/tests/format/variables/partial_context/base.bst b/tests/format/variables/partial_context/base.bst new file mode 100644 index 000000000..10ce559a9 --- /dev/null +++ b/tests/format/variables/partial_context/base.bst @@ -0,0 +1,4 @@ +kind: junction +sources: +- kind: local + path: base diff --git a/tests/format/variables/partial_context/base/project.conf b/tests/format/variables/partial_context/base/project.conf new file mode 100644 index 000000000..91511bfe9 --- /dev/null +++ b/tests/format/variables/partial_context/base/project.conf @@ -0,0 +1,3 @@ +name: base +min-version: 2.0 + diff --git a/tests/format/variables/partial_context/base/vars.yml b/tests/format/variables/partial_context/base/vars.yml new file mode 100644 index 000000000..3a91e1310 --- /dev/null +++ b/tests/format/variables/partial_context/base/vars.yml @@ -0,0 +1,2 @@ +variables: + subvar: "/bar" diff --git a/tests/format/variables/partial_context/project.conf b/tests/format/variables/partial_context/project.conf new file mode 100644 index 000000000..e8a8ed038 --- /dev/null +++ b/tests/format/variables/partial_context/project.conf @@ -0,0 +1,8 @@ +name: test +min-version: 2.0 + +(@): base.bst:vars.yml + +variables: + var: "%{subvar}/foo" + diff --git a/tests/format/variables/partial_context/test.bst b/tests/format/variables/partial_context/test.bst new file mode 100644 index 000000000..8276763c3 --- /dev/null +++ b/tests/format/variables/partial_context/test.bst @@ -0,0 +1,3 @@ +kind: manual +variables: + eltvar: "%{var}/baz" diff --git a/tests/frontend/overlaps.py b/tests/frontend/overlaps.py index a45fc700d..28bf8a7f5 100644 --- a/tests/frontend/overlaps.py +++ b/tests/frontend/overlaps.py @@ -4,7 +4,7 @@ import os import pytest from buildstream.testing.runcli import cli # pylint: disable=unused-import -from buildstream.exceptions import ErrorDomain +from buildstream.exceptions import ErrorDomain, LoadErrorReason from buildstream import _yaml from buildstream.plugin import CoreWarnings from tests.testutils import generate_junction @@ -71,6 +71,19 @@ def test_overlaps_whitelist_on_overlapper(cli, datafiles): @pytest.mark.datafiles(DATA_DIR) +def test_overlaps_whitelist_undefined_variable(cli, datafiles): + project_dir = str(datafiles) + gen_project(project_dir, False) + result = cli.run(project=project_dir, silent=True, args=["build", "whitelist-undefined.bst"]) + + # Assert that we get the expected undefined variable error, + # and that it has the provenance we expect from whitelist-undefined.bst + # + result.assert_main_error(ErrorDomain.LOAD, LoadErrorReason.UNRESOLVED_VARIABLE) + assert "whitelist-undefined.bst [line 13 column 6]" in result.stderr + + +@pytest.mark.datafiles(DATA_DIR) @pytest.mark.parametrize("use_fatal_warnings", [True, False]) def test_overlaps_script(cli, datafiles, use_fatal_warnings): # Test overlaps with script element to test diff --git a/tests/frontend/overlaps/whitelist-undefined.bst b/tests/frontend/overlaps/whitelist-undefined.bst new file mode 100644 index 000000000..5cb0c645e --- /dev/null +++ b/tests/frontend/overlaps/whitelist-undefined.bst @@ -0,0 +1,13 @@ +kind: import +config: + source: / + target: / +depends: +- b-whitelisted.bst +sources: +- kind: local + path: "a" +public: + bst: + overlap-whitelist: + - "%{undefined-variable}/*" |