diff options
author | Daniel Silverstone <daniel.silverstone@codethink.co.uk> | 2019-02-14 10:50:08 +0000 |
---|---|---|
committer | Daniel Silverstone <daniel.silverstone@codethink.co.uk> | 2019-02-15 10:03:46 +0000 |
commit | 0f78a47d60f49d7590d54093d6c19eb066eee813 (patch) | |
tree | bc46b3ec23c74913c22505d2cb739dec43e6994d /buildstream | |
parent | 51dae74715c2934d8770dc92c60925f32295a4e0 (diff) | |
download | buildstream-0f78a47d60f49d7590d54093d6c19eb066eee813.tar.gz |
Variables: Rework how expansion strings work
Rather than constantly using regular expressions and retrieval from YAML
nodes, pre-parse expansion strings into a list representation cached
for reuse, and then expand them as simple string concatenation.
Signed-off-by: Daniel Silverstone <daniel.silverstone@codethink.co.uk>
Diffstat (limited to 'buildstream')
-rw-r--r-- | buildstream/_frontend/widget.py | 2 | ||||
-rw-r--r-- | buildstream/_variables.py | 308 | ||||
-rw-r--r-- | buildstream/element.py | 5 |
3 files changed, 182 insertions, 133 deletions
diff --git a/buildstream/_frontend/widget.py b/buildstream/_frontend/widget.py index b8e3d920a..2920d657d 100644 --- a/buildstream/_frontend/widget.py +++ b/buildstream/_frontend/widget.py @@ -398,7 +398,7 @@ class LogLine(Widget): # Variables if "%{vars" in format_: - variables = _yaml.node_sanitize(element._Element__variables.variables) + variables = _yaml.node_sanitize(element._Element__variables.flat) line = p.fmt_subst( line, 'vars', yaml.round_trip_dump(variables, default_flow_style=False, allow_unicode=True)) diff --git a/buildstream/_variables.py b/buildstream/_variables.py index 95b80cc08..436b80962 100644 --- a/buildstream/_variables.py +++ b/buildstream/_variables.py @@ -1,5 +1,6 @@ # # Copyright (C) 2016 Codethink Limited +# Copyright (C) 2019 Bloomberg L.P. # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public @@ -16,15 +17,37 @@ # # Authors: # Tristan Van Berkom <tristan.vanberkom@codethink.co.uk> +# Daniel Silverstone <daniel.silverstone@codethink.co.uk> import re +import sys from ._exceptions import LoadError, LoadErrorReason from . import _yaml # Variables are allowed to have dashes here # -_VARIABLE_MATCH = r'\%\{([a-zA-Z][a-zA-Z0-9_-]*)\}' +PARSE_EXPANSION = re.compile(r"\%\{([a-zA-Z][a-zA-Z0-9_-]*)\}") + + +# Throughout this code you will see variables named things like `expstr`. +# These hold data structures called "expansion strings" and are the parsed +# form of the strings which are the input to this subsystem. Strings +# such as "Hello %{name}, how are you?" are parsed into the form: +# (3, ["Hello ", "name", ", how are you?"]) +# i.e. a tuple of an integer and a list, where the integer is the cached +# length of the list, and the list consists of one or more strings. +# Strings in even indices of the list (0, 2, 4, etc) are constants which +# are copied into the output of the expansion algorithm. Strings in the +# odd indices (1, 3, 5, etc) are the names of further expansions to make. +# In the example above, first "Hello " is copied, then "name" is expanded +# and so must be another named expansion string passed in to the constructor +# of the Variables class, and whatever is yielded from the expansion of "name" +# is added to the concatenation for the result. Finally ", how are you?" is +# copied in and the whole lot concatenated for return. +# +# To see how strings are parsed, see `_parse_expstr()` after the class, and +# to see how expansion strings are expanded, see `_expand_expstr()` after that. # The Variables helper object will resolve the variable references in @@ -38,14 +61,15 @@ _VARIABLE_MATCH = r'\%\{([a-zA-Z][a-zA-Z0-9_-]*)\}' # node (dict): A node loaded and composited with yaml tools # # Raises: -# LoadError, if unresolved variables occur. +# LoadError, if unresolved variables, or cycles in resolution, occur. # class Variables(): def __init__(self, node): self.original = node - self.variables = self._resolve(node) + self._expstr_map = self._resolve(node) + self.flat = self._flatten() # subst(): # @@ -61,139 +85,167 @@ class Variables(): # LoadError, if the string contains unresolved variable references. # def subst(self, string): - substitute, unmatched, _ = self._subst(string, self.variables) - unmatched = list(set(unmatched)) - if unmatched: - if len(unmatched) == 1: - message = "Unresolved variable '{var}'".format(var=unmatched[0]) - else: - message = "Unresolved variables: " - for unmatch in unmatched: - if unmatched.index(unmatch) > 0: - message += ', ' - message += unmatch - - raise LoadError(LoadErrorReason.UNRESOLVED_VARIABLE, message) - - return substitute - - def _subst(self, string, variables): - - def subst_callback(match): - nonlocal variables - nonlocal unmatched - nonlocal matched - - token = match.group(0) - varname = match.group(1) - - value = _yaml.node_get(variables, str, varname, default_value=None) - if value is not None: - # We have to check if the inner string has variables - # and return unmatches for those - unmatched += re.findall(_VARIABLE_MATCH, value) - matched += [varname] - else: - # Return unmodified token - unmatched += [varname] - value = token - - return value - - matched = [] - unmatched = [] - replacement = re.sub(_VARIABLE_MATCH, subst_callback, string) - - return (replacement, unmatched, matched) + expstr = _parse_expstr(string) + + try: + return _expand_expstr(self._expstr_map, expstr) + except KeyError: + unmatched = [] + + # Look for any unmatched variable names in the expansion string + for var in expstr[1][1::2]: + if var not in self._expstr_map: + unmatched.append(var) + + if unmatched: + message = "Unresolved variable{}: {}".format( + "s" if len(unmatched) > 1 else "", + ", ".join(unmatched) + ) + + raise LoadError(LoadErrorReason.UNRESOLVED_VARIABLE, message) + # Otherwise, re-raise the KeyError since it clearly came from some + # other unknowable cause. + raise # Variable resolving code # - # Here we substitute variables for values (resolve variables) repeatedly - # in a dictionary, each time creating a new dictionary until there is no - # more unresolved variables to resolve, or, until resolving further no - # longer resolves anything, in which case we throw an exception. + # Here we resolve all of our inputs into a dictionary, ready for use + # in subst() def _resolve(self, node): - variables = node - # Special case, if notparallel is specified in the variables for this # element, then override max-jobs to be 1. # Initialize it as a string as all variables are processed as strings. # - if _yaml.node_get(variables, bool, 'notparallel', default_value=False): - variables['max-jobs'] = str(1) - - # Resolve the dictionary once, reporting the new dictionary with things - # substituted in it, and reporting unmatched tokens. - # - def resolve_one(variables): - unmatched = [] - resolved = {} - - for key, value in _yaml.node_items(variables): - - # Ensure stringness of the value before substitution - value = _yaml.node_get(variables, str, key) - - resolved_var, item_unmatched, matched = self._subst(value, variables) - - if _wrap_variable(key) in resolved_var: - referenced_through = find_recursive_variable(key, matched, variables) + if _yaml.node_get(node, bool, 'notparallel', default_value=False): + node['max-jobs'] = str(1) + + ret = {} + for key, value in _yaml.node_items(node): + value = _yaml.node_get(node, str, key) + ret[sys.intern(key)] = _parse_expstr(value) + return ret + + def _check_for_missing(self): + # First the check for anything unresolvable + summary = [] + for key, expstr in self._expstr_map.items(): + for var in expstr[1][1::2]: + if var not in self._expstr_map: + line = " unresolved variable '{unmatched}' in declaration of '{variable}' at: {provenance}" + provenance = _yaml.node_get_provenance(self.original, key) + summary.append(line.format(unmatched=var, variable=key, provenance=provenance)) + if summary: + raise LoadError(LoadErrorReason.UNRESOLVED_VARIABLE, + "Failed to resolve one or more variable:\n{}\n".format("\n".join(summary))) + + def _check_for_cycles(self): + # And now the cycle checks + def cycle_check(expstr, visited, cleared): + for var in expstr[1][1::2]: + if var in cleared: + continue + if var in visited: raise LoadError(LoadErrorReason.RECURSIVE_VARIABLE, - "{}: ".format(_yaml.node_get_provenance(variables, key)) + + "{}: ".format(_yaml.node_get_provenance(self.original, var)) + ("Variable '{}' expands to contain a reference to itself. " + - "Perhaps '{}' contains '{}").format(key, referenced_through, _wrap_variable(key))) - - resolved[key] = resolved_var - unmatched += item_unmatched - - # Carry over provenance - resolved[_yaml.PROVENANCE_KEY] = variables[_yaml.PROVENANCE_KEY] - return (resolved, unmatched) - - # Resolve it until it's resolved or broken - # - resolved = variables - unmatched = ['dummy'] - last_unmatched = ['dummy'] - while unmatched: - resolved, unmatched = resolve_one(resolved) - - # Lists of strings can be compared like this - if unmatched == last_unmatched: - # We've got the same result twice without matching everything, - # something is undeclared or cyclic, compose a summary. - # - summary = '' - for unmatch in set(unmatched): - for var, provenance in self._find_references(unmatch): - line = " unresolved variable '{unmatched}' in declaration of '{variable}' at: {provenance}\n" - summary += line.format(unmatched=unmatch, variable=var, provenance=provenance) - - raise LoadError(LoadErrorReason.UNRESOLVED_VARIABLE, - "Failed to resolve one or more variable:\n{}".format(summary)) - - last_unmatched = unmatched - - return resolved - - # Helper function to fetch information about the node referring to a variable + "Perhaps '{}' contains '%{{{}}}").format(var, visited[-1], var)) + visited.append(var) + cycle_check(self._expstr_map[var], visited, cleared) + visited.pop() + cleared.add(var) + + cleared = set() + for key, expstr in self._expstr_map.items(): + if key not in cleared: + cycle_check(expstr, [key], cleared) + + # _flatten(): # - def _find_references(self, varname): - fullname = _wrap_variable(varname) - for key, value in _yaml.node_items(self.original): - if fullname in value: - provenance = _yaml.node_get_provenance(self.original, key) - yield (key, provenance) - - -def find_recursive_variable(variable, matched_variables, all_vars): - matched_values = (_yaml.node_get(all_vars, str, key) for key in matched_variables) - for key, value in zip(matched_variables, matched_values): - if _wrap_variable(variable) in value: - return key - # We failed to find a recursive variable - return None - - -def _wrap_variable(var): - return "%{" + var + "}" + # Turn our dictionary of expansion strings into a flattened dict + # so that we can run expansions faster in the future + # + # Raises: + # LoadError, if the string contains unresolved variable references or + # if cycles are detected in the variable references + # + def _flatten(self): + flat = {} + try: + for key, expstr in self._expstr_map.items(): + if expstr[0] > 1: + expstr = (1, [sys.intern(_expand_expstr(self._expstr_map, expstr))]) + self._expstr_map[key] = expstr + flat[key] = expstr[1][0] + except KeyError: + self._check_for_missing() + raise + except RecursionError: + self._check_for_cycles() + raise + return flat + + +# Cache for the parsed expansion strings. While this is nominally +# something which might "waste" memory, in reality each of these +# will live as long as the element which uses it, which is the +# vast majority of the memory usage across the execution of BuildStream. +PARSE_CACHE = { + # Prime the cache with the empty string since otherwise that can + # cause issues with the parser, complications to which cause slowdown + "": (1, [""]), +} + + +# Helper to parse a string into an expansion string tuple, caching +# the results so that future parse requests don't need to think about +# the string +def _parse_expstr(instr): + try: + return PARSE_CACHE[instr] + except KeyError: + # This use of the regex turns a string like "foo %{bar} baz" into + # a list ["foo ", "bar", " baz"] + splits = PARSE_EXPANSION.split(instr) + # If an expansion ends the string, we get an empty string on the end + # which we can optimise away, making the expansion routines not need + # a test for this. + if splits[-1] == '': + splits = splits[:-1] + # Cache an interned copy of this. We intern it to try and reduce the + # memory impact of the cache. It seems odd to cache the list length + # but this is measurably cheaper than calculating it each time during + # string expansion. + PARSE_CACHE[instr] = (len(splits), [sys.intern(s) for s in splits]) + return PARSE_CACHE[instr] + + +# Helper to expand a given top level expansion string tuple in the context +# of the given dictionary of expansion strings. +# +# Note: Will raise KeyError if any expansion is missing +def _expand_expstr(content, topvalue): + # Short-circuit constant strings + if topvalue[0] == 1: + return topvalue[1][0] + + # Short-circuit strings which are entirely an expansion of another variable + # e.g. "%{another}" + if topvalue[0] == 2 and topvalue[1][0] == "": + return _expand_expstr(content, content[topvalue[1][1]]) + + # Otherwise process fully... + def internal_expand(value): + (expansion_len, expansion_bits) = value + idx = 0 + while idx < expansion_len: + # First yield any constant string content + yield expansion_bits[idx] + idx += 1 + # Now, if there is an expansion variable left to expand, yield + # the expansion of that variable too + if idx < expansion_len: + yield from internal_expand(content[expansion_bits[idx]]) + idx += 1 + + return "".join(internal_expand(topvalue)) diff --git a/buildstream/element.py b/buildstream/element.py index 5ecc25ab2..b3f4d5518 100644 --- a/buildstream/element.py +++ b/buildstream/element.py @@ -894,10 +894,7 @@ class Element(Plugin): (str): The resolved value for *varname*, or None if no variable was declared with the given name. """ - if varname in self.__variables.variables: - return self.__variables.variables[varname] - - return None + return self.__variables.flat.get(varname) def batch_prepare_assemble(self, flags, *, collect=None): """ Configure command batching across prepare() and assemble() |