summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Silverstone <daniel.silverstone@codethink.co.uk>2019-02-14 10:50:08 +0000
committerDaniel Silverstone <daniel.silverstone@codethink.co.uk>2019-02-15 10:03:46 +0000
commit0f78a47d60f49d7590d54093d6c19eb066eee813 (patch)
treebc46b3ec23c74913c22505d2cb739dec43e6994d
parent51dae74715c2934d8770dc92c60925f32295a4e0 (diff)
downloadbuildstream-0f78a47d60f49d7590d54093d6c19eb066eee813.tar.gz
Variables: Rework how expansion strings work
Rather than constantly using regular expressions and retrieval from YAML nodes, pre-parse expansion strings into a list representation cached for reuse, and then expand them as simple string concatenation. Signed-off-by: Daniel Silverstone <daniel.silverstone@codethink.co.uk>
-rw-r--r--buildstream/_frontend/widget.py2
-rw-r--r--buildstream/_variables.py308
-rw-r--r--buildstream/element.py5
3 files changed, 182 insertions, 133 deletions
diff --git a/buildstream/_frontend/widget.py b/buildstream/_frontend/widget.py
index b8e3d920a..2920d657d 100644
--- a/buildstream/_frontend/widget.py
+++ b/buildstream/_frontend/widget.py
@@ -398,7 +398,7 @@ class LogLine(Widget):
# Variables
if "%{vars" in format_:
- variables = _yaml.node_sanitize(element._Element__variables.variables)
+ variables = _yaml.node_sanitize(element._Element__variables.flat)
line = p.fmt_subst(
line, 'vars',
yaml.round_trip_dump(variables, default_flow_style=False, allow_unicode=True))
diff --git a/buildstream/_variables.py b/buildstream/_variables.py
index 95b80cc08..436b80962 100644
--- a/buildstream/_variables.py
+++ b/buildstream/_variables.py
@@ -1,5 +1,6 @@
#
# Copyright (C) 2016 Codethink Limited
+# Copyright (C) 2019 Bloomberg L.P.
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
@@ -16,15 +17,37 @@
#
# Authors:
# Tristan Van Berkom <tristan.vanberkom@codethink.co.uk>
+# Daniel Silverstone <daniel.silverstone@codethink.co.uk>
import re
+import sys
from ._exceptions import LoadError, LoadErrorReason
from . import _yaml
# Variables are allowed to have dashes here
#
-_VARIABLE_MATCH = r'\%\{([a-zA-Z][a-zA-Z0-9_-]*)\}'
+PARSE_EXPANSION = re.compile(r"\%\{([a-zA-Z][a-zA-Z0-9_-]*)\}")
+
+
+# Throughout this code you will see variables named things like `expstr`.
+# These hold data structures called "expansion strings" and are the parsed
+# form of the strings which are the input to this subsystem. Strings
+# such as "Hello %{name}, how are you?" are parsed into the form:
+# (3, ["Hello ", "name", ", how are you?"])
+# i.e. a tuple of an integer and a list, where the integer is the cached
+# length of the list, and the list consists of one or more strings.
+# Strings in even indices of the list (0, 2, 4, etc) are constants which
+# are copied into the output of the expansion algorithm. Strings in the
+# odd indices (1, 3, 5, etc) are the names of further expansions to make.
+# In the example above, first "Hello " is copied, then "name" is expanded
+# and so must be another named expansion string passed in to the constructor
+# of the Variables class, and whatever is yielded from the expansion of "name"
+# is added to the concatenation for the result. Finally ", how are you?" is
+# copied in and the whole lot concatenated for return.
+#
+# To see how strings are parsed, see `_parse_expstr()` after the class, and
+# to see how expansion strings are expanded, see `_expand_expstr()` after that.
# The Variables helper object will resolve the variable references in
@@ -38,14 +61,15 @@ _VARIABLE_MATCH = r'\%\{([a-zA-Z][a-zA-Z0-9_-]*)\}'
# node (dict): A node loaded and composited with yaml tools
#
# Raises:
-# LoadError, if unresolved variables occur.
+# LoadError, if unresolved variables, or cycles in resolution, occur.
#
class Variables():
def __init__(self, node):
self.original = node
- self.variables = self._resolve(node)
+ self._expstr_map = self._resolve(node)
+ self.flat = self._flatten()
# subst():
#
@@ -61,139 +85,167 @@ class Variables():
# LoadError, if the string contains unresolved variable references.
#
def subst(self, string):
- substitute, unmatched, _ = self._subst(string, self.variables)
- unmatched = list(set(unmatched))
- if unmatched:
- if len(unmatched) == 1:
- message = "Unresolved variable '{var}'".format(var=unmatched[0])
- else:
- message = "Unresolved variables: "
- for unmatch in unmatched:
- if unmatched.index(unmatch) > 0:
- message += ', '
- message += unmatch
-
- raise LoadError(LoadErrorReason.UNRESOLVED_VARIABLE, message)
-
- return substitute
-
- def _subst(self, string, variables):
-
- def subst_callback(match):
- nonlocal variables
- nonlocal unmatched
- nonlocal matched
-
- token = match.group(0)
- varname = match.group(1)
-
- value = _yaml.node_get(variables, str, varname, default_value=None)
- if value is not None:
- # We have to check if the inner string has variables
- # and return unmatches for those
- unmatched += re.findall(_VARIABLE_MATCH, value)
- matched += [varname]
- else:
- # Return unmodified token
- unmatched += [varname]
- value = token
-
- return value
-
- matched = []
- unmatched = []
- replacement = re.sub(_VARIABLE_MATCH, subst_callback, string)
-
- return (replacement, unmatched, matched)
+ expstr = _parse_expstr(string)
+
+ try:
+ return _expand_expstr(self._expstr_map, expstr)
+ except KeyError:
+ unmatched = []
+
+ # Look for any unmatched variable names in the expansion string
+ for var in expstr[1][1::2]:
+ if var not in self._expstr_map:
+ unmatched.append(var)
+
+ if unmatched:
+ message = "Unresolved variable{}: {}".format(
+ "s" if len(unmatched) > 1 else "",
+ ", ".join(unmatched)
+ )
+
+ raise LoadError(LoadErrorReason.UNRESOLVED_VARIABLE, message)
+ # Otherwise, re-raise the KeyError since it clearly came from some
+ # other unknowable cause.
+ raise
# Variable resolving code
#
- # Here we substitute variables for values (resolve variables) repeatedly
- # in a dictionary, each time creating a new dictionary until there is no
- # more unresolved variables to resolve, or, until resolving further no
- # longer resolves anything, in which case we throw an exception.
+ # Here we resolve all of our inputs into a dictionary, ready for use
+ # in subst()
def _resolve(self, node):
- variables = node
-
# Special case, if notparallel is specified in the variables for this
# element, then override max-jobs to be 1.
# Initialize it as a string as all variables are processed as strings.
#
- if _yaml.node_get(variables, bool, 'notparallel', default_value=False):
- variables['max-jobs'] = str(1)
-
- # Resolve the dictionary once, reporting the new dictionary with things
- # substituted in it, and reporting unmatched tokens.
- #
- def resolve_one(variables):
- unmatched = []
- resolved = {}
-
- for key, value in _yaml.node_items(variables):
-
- # Ensure stringness of the value before substitution
- value = _yaml.node_get(variables, str, key)
-
- resolved_var, item_unmatched, matched = self._subst(value, variables)
-
- if _wrap_variable(key) in resolved_var:
- referenced_through = find_recursive_variable(key, matched, variables)
+ if _yaml.node_get(node, bool, 'notparallel', default_value=False):
+ node['max-jobs'] = str(1)
+
+ ret = {}
+ for key, value in _yaml.node_items(node):
+ value = _yaml.node_get(node, str, key)
+ ret[sys.intern(key)] = _parse_expstr(value)
+ return ret
+
+ def _check_for_missing(self):
+ # First the check for anything unresolvable
+ summary = []
+ for key, expstr in self._expstr_map.items():
+ for var in expstr[1][1::2]:
+ if var not in self._expstr_map:
+ line = " unresolved variable '{unmatched}' in declaration of '{variable}' at: {provenance}"
+ provenance = _yaml.node_get_provenance(self.original, key)
+ summary.append(line.format(unmatched=var, variable=key, provenance=provenance))
+ if summary:
+ raise LoadError(LoadErrorReason.UNRESOLVED_VARIABLE,
+ "Failed to resolve one or more variable:\n{}\n".format("\n".join(summary)))
+
+ def _check_for_cycles(self):
+ # And now the cycle checks
+ def cycle_check(expstr, visited, cleared):
+ for var in expstr[1][1::2]:
+ if var in cleared:
+ continue
+ if var in visited:
raise LoadError(LoadErrorReason.RECURSIVE_VARIABLE,
- "{}: ".format(_yaml.node_get_provenance(variables, key)) +
+ "{}: ".format(_yaml.node_get_provenance(self.original, var)) +
("Variable '{}' expands to contain a reference to itself. " +
- "Perhaps '{}' contains '{}").format(key, referenced_through, _wrap_variable(key)))
-
- resolved[key] = resolved_var
- unmatched += item_unmatched
-
- # Carry over provenance
- resolved[_yaml.PROVENANCE_KEY] = variables[_yaml.PROVENANCE_KEY]
- return (resolved, unmatched)
-
- # Resolve it until it's resolved or broken
- #
- resolved = variables
- unmatched = ['dummy']
- last_unmatched = ['dummy']
- while unmatched:
- resolved, unmatched = resolve_one(resolved)
-
- # Lists of strings can be compared like this
- if unmatched == last_unmatched:
- # We've got the same result twice without matching everything,
- # something is undeclared or cyclic, compose a summary.
- #
- summary = ''
- for unmatch in set(unmatched):
- for var, provenance in self._find_references(unmatch):
- line = " unresolved variable '{unmatched}' in declaration of '{variable}' at: {provenance}\n"
- summary += line.format(unmatched=unmatch, variable=var, provenance=provenance)
-
- raise LoadError(LoadErrorReason.UNRESOLVED_VARIABLE,
- "Failed to resolve one or more variable:\n{}".format(summary))
-
- last_unmatched = unmatched
-
- return resolved
-
- # Helper function to fetch information about the node referring to a variable
+ "Perhaps '{}' contains '%{{{}}}").format(var, visited[-1], var))
+ visited.append(var)
+ cycle_check(self._expstr_map[var], visited, cleared)
+ visited.pop()
+ cleared.add(var)
+
+ cleared = set()
+ for key, expstr in self._expstr_map.items():
+ if key not in cleared:
+ cycle_check(expstr, [key], cleared)
+
+ # _flatten():
#
- def _find_references(self, varname):
- fullname = _wrap_variable(varname)
- for key, value in _yaml.node_items(self.original):
- if fullname in value:
- provenance = _yaml.node_get_provenance(self.original, key)
- yield (key, provenance)
-
-
-def find_recursive_variable(variable, matched_variables, all_vars):
- matched_values = (_yaml.node_get(all_vars, str, key) for key in matched_variables)
- for key, value in zip(matched_variables, matched_values):
- if _wrap_variable(variable) in value:
- return key
- # We failed to find a recursive variable
- return None
-
-
-def _wrap_variable(var):
- return "%{" + var + "}"
+ # Turn our dictionary of expansion strings into a flattened dict
+ # so that we can run expansions faster in the future
+ #
+ # Raises:
+ # LoadError, if the string contains unresolved variable references or
+ # if cycles are detected in the variable references
+ #
+ def _flatten(self):
+ flat = {}
+ try:
+ for key, expstr in self._expstr_map.items():
+ if expstr[0] > 1:
+ expstr = (1, [sys.intern(_expand_expstr(self._expstr_map, expstr))])
+ self._expstr_map[key] = expstr
+ flat[key] = expstr[1][0]
+ except KeyError:
+ self._check_for_missing()
+ raise
+ except RecursionError:
+ self._check_for_cycles()
+ raise
+ return flat
+
+
+# Cache for the parsed expansion strings. While this is nominally
+# something which might "waste" memory, in reality each of these
+# will live as long as the element which uses it, which is the
+# vast majority of the memory usage across the execution of BuildStream.
+PARSE_CACHE = {
+ # Prime the cache with the empty string since otherwise that can
+ # cause issues with the parser, complications to which cause slowdown
+ "": (1, [""]),
+}
+
+
+# Helper to parse a string into an expansion string tuple, caching
+# the results so that future parse requests don't need to think about
+# the string
+def _parse_expstr(instr):
+ try:
+ return PARSE_CACHE[instr]
+ except KeyError:
+ # This use of the regex turns a string like "foo %{bar} baz" into
+ # a list ["foo ", "bar", " baz"]
+ splits = PARSE_EXPANSION.split(instr)
+ # If an expansion ends the string, we get an empty string on the end
+ # which we can optimise away, making the expansion routines not need
+ # a test for this.
+ if splits[-1] == '':
+ splits = splits[:-1]
+ # Cache an interned copy of this. We intern it to try and reduce the
+ # memory impact of the cache. It seems odd to cache the list length
+ # but this is measurably cheaper than calculating it each time during
+ # string expansion.
+ PARSE_CACHE[instr] = (len(splits), [sys.intern(s) for s in splits])
+ return PARSE_CACHE[instr]
+
+
+# Helper to expand a given top level expansion string tuple in the context
+# of the given dictionary of expansion strings.
+#
+# Note: Will raise KeyError if any expansion is missing
+def _expand_expstr(content, topvalue):
+ # Short-circuit constant strings
+ if topvalue[0] == 1:
+ return topvalue[1][0]
+
+ # Short-circuit strings which are entirely an expansion of another variable
+ # e.g. "%{another}"
+ if topvalue[0] == 2 and topvalue[1][0] == "":
+ return _expand_expstr(content, content[topvalue[1][1]])
+
+ # Otherwise process fully...
+ def internal_expand(value):
+ (expansion_len, expansion_bits) = value
+ idx = 0
+ while idx < expansion_len:
+ # First yield any constant string content
+ yield expansion_bits[idx]
+ idx += 1
+ # Now, if there is an expansion variable left to expand, yield
+ # the expansion of that variable too
+ if idx < expansion_len:
+ yield from internal_expand(content[expansion_bits[idx]])
+ idx += 1
+
+ return "".join(internal_expand(topvalue))
diff --git a/buildstream/element.py b/buildstream/element.py
index 5ecc25ab2..b3f4d5518 100644
--- a/buildstream/element.py
+++ b/buildstream/element.py
@@ -894,10 +894,7 @@ class Element(Plugin):
(str): The resolved value for *varname*, or None if no
variable was declared with the given name.
"""
- if varname in self.__variables.variables:
- return self.__variables.variables[varname]
-
- return None
+ return self.__variables.flat.get(varname)
def batch_prepare_assemble(self, flags, *, collect=None):
""" Configure command batching across prepare() and assemble()