summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Silverstone <daniel.silverstone@codethink.co.uk>2019-02-14 10:50:08 +0000
committerDaniel Silverstone <daniel.silverstone@codethink.co.uk>2019-02-14 17:02:04 +0000
commitc7f71451bff4b0cc3fd1404783c54cc55974a625 (patch)
treef6891017d2ab5f440adea7457aafa58544a0dd3d
parent56c07baad4cdd79e2fbab1fda856107ad6e3bfb7 (diff)
downloadbuildstream-danielsilverstone-ct/variables-rework.tar.gz
Variables: Rework how expansion strings workdanielsilverstone-ct/variables-rework
Rather than constantly using regular expressions and retrieval from YAML nodes, pre-parse expansion strings into a list representation cached for reuse, and then expand them as simple string concatenation. Signed-off-by: Daniel Silverstone <daniel.silverstone@codethink.co.uk>
-rw-r--r--buildstream/_frontend/widget.py2
-rw-r--r--buildstream/_variables.py282
-rw-r--r--buildstream/element.py5
3 files changed, 156 insertions, 133 deletions
diff --git a/buildstream/_frontend/widget.py b/buildstream/_frontend/widget.py
index b8e3d920a..2920d657d 100644
--- a/buildstream/_frontend/widget.py
+++ b/buildstream/_frontend/widget.py
@@ -398,7 +398,7 @@ class LogLine(Widget):
# Variables
if "%{vars" in format_:
- variables = _yaml.node_sanitize(element._Element__variables.variables)
+ variables = _yaml.node_sanitize(element._Element__variables.flat)
line = p.fmt_subst(
line, 'vars',
yaml.round_trip_dump(variables, default_flow_style=False, allow_unicode=True))
diff --git a/buildstream/_variables.py b/buildstream/_variables.py
index 95b80cc08..3678be6d0 100644
--- a/buildstream/_variables.py
+++ b/buildstream/_variables.py
@@ -1,5 +1,6 @@
#
# Copyright (C) 2016 Codethink Limited
+# Copyright (C) 2019 Bloomberg L.P.
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
@@ -16,15 +17,17 @@
#
# Authors:
# Tristan Van Berkom <tristan.vanberkom@codethink.co.uk>
+# Daniel Silverstone <daniel.silverstone@codethink.co.uk>
import re
+import sys
from ._exceptions import LoadError, LoadErrorReason
from . import _yaml
# Variables are allowed to have dashes here
#
-_VARIABLE_MATCH = r'\%\{([a-zA-Z][a-zA-Z0-9_-]*)\}'
+PARSE_EXPANSION = re.compile(r"\%\{([a-zA-Z][a-zA-Z0-9_-]*)\}")
# The Variables helper object will resolve the variable references in
@@ -38,14 +41,15 @@ _VARIABLE_MATCH = r'\%\{([a-zA-Z][a-zA-Z0-9_-]*)\}'
# node (dict): A node loaded and composited with yaml tools
#
# Raises:
-# LoadError, if unresolved variables occur.
+# LoadError, if unresolved variables, or cycles in resolution, occur.
#
class Variables():
def __init__(self, node):
self.original = node
- self.variables = self._resolve(node)
+ self.newexp = self._resolve(node)
+ self.flat = self._flatten()
# subst():
#
@@ -61,139 +65,161 @@ class Variables():
# LoadError, if the string contains unresolved variable references.
#
def subst(self, string):
- substitute, unmatched, _ = self._subst(string, self.variables)
- unmatched = list(set(unmatched))
- if unmatched:
- if len(unmatched) == 1:
- message = "Unresolved variable '{var}'".format(var=unmatched[0])
- else:
- message = "Unresolved variables: "
- for unmatch in unmatched:
- if unmatched.index(unmatch) > 0:
- message += ', '
- message += unmatch
-
- raise LoadError(LoadErrorReason.UNRESOLVED_VARIABLE, message)
-
- return substitute
-
- def _subst(self, string, variables):
-
- def subst_callback(match):
- nonlocal variables
- nonlocal unmatched
- nonlocal matched
-
- token = match.group(0)
- varname = match.group(1)
-
- value = _yaml.node_get(variables, str, varname, default_value=None)
- if value is not None:
- # We have to check if the inner string has variables
- # and return unmatches for those
- unmatched += re.findall(_VARIABLE_MATCH, value)
- matched += [varname]
- else:
- # Return unmodified token
- unmatched += [varname]
- value = token
-
- return value
-
- matched = []
- unmatched = []
- replacement = re.sub(_VARIABLE_MATCH, subst_callback, string)
-
- return (replacement, unmatched, matched)
+ exp = _parse_expstr(string)
+
+ try:
+ return _expand_expstr(self.newexp, exp)
+ except KeyError:
+ unmatched = []
+
+ for v in exp[1][1::2]:
+ if v not in self.newexp:
+ unmatched.append(v)
+
+ if unmatched:
+ if len(unmatched) == 1:
+ message = "Unresolved variable '{var}'".format(var=unmatched[0])
+ else:
+ message = "Unresolved variables: "
+ for unmatch in unmatched:
+ if unmatched.index(unmatch) > 0:
+ message += ', '
+ message += unmatch
+
+ raise LoadError(LoadErrorReason.UNRESOLVED_VARIABLE, message)
+ raise
# Variable resolving code
#
- # Here we substitute variables for values (resolve variables) repeatedly
- # in a dictionary, each time creating a new dictionary until there is no
- # more unresolved variables to resolve, or, until resolving further no
- # longer resolves anything, in which case we throw an exception.
+ # Here we resolve all of our inputs into a dictionary, ready for use
+ # in subst()
def _resolve(self, node):
- variables = node
-
# Special case, if notparallel is specified in the variables for this
# element, then override max-jobs to be 1.
# Initialize it as a string as all variables are processed as strings.
#
- if _yaml.node_get(variables, bool, 'notparallel', default_value=False):
- variables['max-jobs'] = str(1)
-
- # Resolve the dictionary once, reporting the new dictionary with things
- # substituted in it, and reporting unmatched tokens.
- #
- def resolve_one(variables):
- unmatched = []
- resolved = {}
-
- for key, value in _yaml.node_items(variables):
-
- # Ensure stringness of the value before substitution
- value = _yaml.node_get(variables, str, key)
-
- resolved_var, item_unmatched, matched = self._subst(value, variables)
-
- if _wrap_variable(key) in resolved_var:
- referenced_through = find_recursive_variable(key, matched, variables)
+ if _yaml.node_get(node, bool, 'notparallel', default_value=False):
+ node['max-jobs'] = str(1)
+
+ ret = {}
+ for key, value in _yaml.node_items(node):
+ value = _yaml.node_get(node, str, key)
+ ret[sys.intern(key)] = _parse_expstr(value)
+ return ret
+
+ def _check_for_missing(self):
+ # First the check for anything unresolvable
+ summary = []
+ for key, expstr in self.newexp.items():
+ for var in expstr[1][1::2]:
+ if var not in self.newexp:
+ line = " unresolved variable '{unmatched}' in declaration of '{variable}' at: {provenance}"
+ provenance = _yaml.node_get_provenance(self.original, key)
+ summary.append(line.format(unmatched=var, variable=key, provenance=provenance))
+ if summary:
+ raise LoadError(LoadErrorReason.UNRESOLVED_VARIABLE,
+ "Failed to resolve one or more variable:\n{}\n".format("\n".join(summary)))
+
+ def _check_for_cycles(self):
+ # And now the cycle checks
+ def cycle_check(exp, visited, cleared):
+ for var in exp[1][1::2]:
+ if var in cleared:
+ continue
+ if var in visited:
raise LoadError(LoadErrorReason.RECURSIVE_VARIABLE,
- "{}: ".format(_yaml.node_get_provenance(variables, key)) +
+ "{}: ".format(_yaml.node_get_provenance(self.original, var)) +
("Variable '{}' expands to contain a reference to itself. " +
- "Perhaps '{}' contains '{}").format(key, referenced_through, _wrap_variable(key)))
-
- resolved[key] = resolved_var
- unmatched += item_unmatched
-
- # Carry over provenance
- resolved[_yaml.PROVENANCE_KEY] = variables[_yaml.PROVENANCE_KEY]
- return (resolved, unmatched)
-
- # Resolve it until it's resolved or broken
- #
- resolved = variables
- unmatched = ['dummy']
- last_unmatched = ['dummy']
- while unmatched:
- resolved, unmatched = resolve_one(resolved)
-
- # Lists of strings can be compared like this
- if unmatched == last_unmatched:
- # We've got the same result twice without matching everything,
- # something is undeclared or cyclic, compose a summary.
- #
- summary = ''
- for unmatch in set(unmatched):
- for var, provenance in self._find_references(unmatch):
- line = " unresolved variable '{unmatched}' in declaration of '{variable}' at: {provenance}\n"
- summary += line.format(unmatched=unmatch, variable=var, provenance=provenance)
-
- raise LoadError(LoadErrorReason.UNRESOLVED_VARIABLE,
- "Failed to resolve one or more variable:\n{}".format(summary))
-
- last_unmatched = unmatched
-
- return resolved
-
- # Helper function to fetch information about the node referring to a variable
+ "Perhaps '{}' contains '%{{{}}}").format(var, visited[-1], var))
+ visited.append(var)
+ cycle_check(self.newexp[var], visited, cleared)
+ visited.pop()
+ cleared.add(var)
+
+ cleared = set()
+ for key, expstr in self.newexp.items():
+ if key not in cleared:
+ cycle_check(expstr, [key], cleared)
+
+ # _flatten():
#
- def _find_references(self, varname):
- fullname = _wrap_variable(varname)
- for key, value in _yaml.node_items(self.original):
- if fullname in value:
- provenance = _yaml.node_get_provenance(self.original, key)
- yield (key, provenance)
-
-
-def find_recursive_variable(variable, matched_variables, all_vars):
- matched_values = (_yaml.node_get(all_vars, str, key) for key in matched_variables)
- for key, value in zip(matched_variables, matched_values):
- if _wrap_variable(variable) in value:
- return key
- # We failed to find a recursive variable
- return None
-
-
-def _wrap_variable(var):
- return "%{" + var + "}"
+ # Turn our dictionary of expansion strings into a flattened set
+ # so that we can run expansions faster in the future
+ #
+ # Raises:
+ # LoadError, if the string contains unresolved variable references or
+ # if cycles are detected in the variable references
+ #
+ def _flatten(self):
+ flat = {}
+ try:
+ for key, expstr in self.newexp.items():
+ if expstr[0] > 1:
+ expstr = (1, [sys.intern(_expand_expstr(self.newexp, expstr))])
+ self.newexp[key] = expstr
+ flat[key] = expstr[1][0]
+ except KeyError:
+ self._check_for_missing()
+ raise
+ except RecursionError:
+ self._check_for_cycles()
+ raise
+ return flat
+
+
+# Cache for the parsed expansion strings. While this is nominally
+# something which might "waste" memory, in reality each of these
+# will live as long as the element which uses it, which is the
+# vast majority of the memory usage across the execution of BuildStream.
+PARSE_CACHE = {
+ # Prime the cache with the empty string since otherwise that can
+ # cause issues with the parser, complications to which cause slowdown
+ "": (1, [""]),
+}
+
+
+# Helper to parse a string into an expansion string tuple, caching
+# the results so that future parse requests don't need to think about
+# the string
+def _parse_expstr(instr):
+ try:
+ return PARSE_CACHE[instr]
+ except KeyError:
+ # This use of the regex turns a string like "foo %{bar} baz" into
+ # a list ["foo ", "bar", " baz"]
+ splits = PARSE_EXPANSION.split(instr)
+ # If an expansion ends the string, we get an empty string on the end
+ # which we can optimise away, making the expansion routines not need
+ # a test for this.
+ if splits[-1] == '':
+ splits = splits[:-1]
+ # Cache an interned copy of this. We intern it to try and reduce the
+ # memory impact of the cache. It seems odd to cache the list length
+ # but this is measurably cheaper than calculating it each time during
+ # string expansion.
+ PARSE_CACHE[instr] = (len(splits), [sys.intern(s) for s in splits])
+ return PARSE_CACHE[instr]
+
+
+# Helper to expand a given top level expansion string tuple in the context
+# of the given dictionary of expansion strings.
+#
+# Note: Will raise KeyError if any expansion is missing
+def _expand_expstr(content, topvalue):
+ if topvalue[0] == 1:
+ return topvalue[1][0]
+
+ def __expand(value):
+ (expansion_len, expansion_bits) = value
+ idx = 0
+ while idx < expansion_len:
+ # First yield any constant string content
+ yield expansion_bits[idx]
+ idx += 1
+ # Now, if there is an expansion variable left to expand, yield
+ # the expansion of that variable too
+ if idx < expansion_len:
+ yield from __expand(content[expansion_bits[idx]])
+ idx += 1
+
+ return "".join(__expand(topvalue))
diff --git a/buildstream/element.py b/buildstream/element.py
index 5ecc25ab2..b3f4d5518 100644
--- a/buildstream/element.py
+++ b/buildstream/element.py
@@ -894,10 +894,7 @@ class Element(Plugin):
(str): The resolved value for *varname*, or None if no
variable was declared with the given name.
"""
- if varname in self.__variables.variables:
- return self.__variables.variables[varname]
-
- return None
+ return self.__variables.flat.get(varname)
def batch_prepare_assemble(self, flags, *, collect=None):
""" Configure command batching across prepare() and assemble()