diff options
author | Valentin David <valentin.david@codethink.co.uk> | 2020-05-28 15:34:15 +0200 |
---|---|---|
committer | Valentin David <valentin.david@codethink.co.uk> | 2020-06-03 10:35:18 +0000 |
commit | 8ae5e115a4693ff33248278f5702614ff6f3abee (patch) | |
tree | 42fb0714c0ecdffd800d1a9b35b77c1cc9c7bad2 | |
parent | 76c51c2d1f00f171c1cf6e517cc4d7e43c740d28 (diff) | |
download | buildstream-8ae5e115a4693ff33248278f5702614ff6f3abee.tar.gz |
Optimize variable flattening
Verifying for variables (all used variables defined and no cycle) is
much faster than flattening them. It also uses much less memory. Only
`bst show -f "%{vars}"` requires to flatten all variables. For other
cases we can improve performance by only checking rather than
flattening.
Also flattening very nested variables could lead to very long lists of
empty strings. The memory usage for flattening could be a lot bigger
than the resulting value. Force flattening and caching every
variable definitions can improve intermediate memory consumption and
only consume memory of the size of the result.
Finally, an optimization allowed inifinite cycles leading to
segmentation fault. This was solved by simplifying the code.
-rw-r--r-- | src/buildstream/_frontend/widget.py | 2 | ||||
-rw-r--r-- | src/buildstream/_variables.pyx | 156 | ||||
-rw-r--r-- | src/buildstream/element.py | 2 | ||||
-rw-r--r-- | tests/format/variables.py | 9 | ||||
-rw-r--r-- | tests/format/variables/cyclic_variables/simple-cyclic.bst | 5 |
5 files changed, 109 insertions, 65 deletions
diff --git a/src/buildstream/_frontend/widget.py b/src/buildstream/_frontend/widget.py index 81ca2f7b5..f07e3dba0 100644 --- a/src/buildstream/_frontend/widget.py +++ b/src/buildstream/_frontend/widget.py @@ -372,7 +372,7 @@ class LogLine(Widget): # Variables if "%{vars" in format_: - variables = element._Element__variables.flat + variables = dict(element._Element__variables) line = p.fmt_subst( line, "vars", yaml.round_trip_dump(variables, default_flow_style=False, allow_unicode=True) ) diff --git a/src/buildstream/_variables.pyx b/src/buildstream/_variables.pyx index 9dc6cf623..e3fcfc3a0 100644 --- a/src/buildstream/_variables.pyx +++ b/src/buildstream/_variables.pyx @@ -68,12 +68,44 @@ cdef class Variables: cdef MappingNode original cdef dict _expstr_map - cdef public dict flat def __init__(self, MappingNode node): self.original = node self._expstr_map = self._resolve(node) - self.flat = self._flatten() + self._check_for_missing() + self._check_for_cycles() + + def __getitem__(self, str name): + return _expand_var(self._expstr_map, name) + + def __contains__(self, str name): + return name in self._expstr_map + + # __iter__() + # + # Provide an iterator for all variables effective values + # + # Returns: + # (Iterator[Tuple[str, str]]) + # + def __iter__(self): + return _VariablesIterator(self._expstr_map) + + # get() + # + # Expand definition of variable by name. If the variable is not + # defined, it will return None instead of failing. + # + # Args: + # name (str): Name of the variable to expand + # + # Returns: + # (str|None): The expanded value for the variable or None variable was not defined. + # + cpdef str get(self, str name): + if name not in self._expstr_map: + return None + return _expand_var(self._expstr_map, name) # expand() # @@ -188,36 +220,6 @@ cdef class Variables: if key not in cleared: cycle_check(expstr, [key], cleared) - # _flatten(): - # - # Turn our dictionary of expansion strings into a flattened dict - # so that we can run expansions faster in the future - # - # Raises: - # LoadError, if the string contains unresolved variable references or - # if cycles are detected in the variable references - # - cdef dict _flatten(self): - cdef dict flat = {} - cdef str key - cdef list expstr - - try: - for key, expstr in self._expstr_map.items(): - if len(expstr) > 1: - # FIXME: do we really gain anything by interning? - expstr = [sys.intern(_expand_expstr(self._expstr_map, expstr))] - self._expstr_map[key] = expstr - flat[key] = expstr[0] - except KeyError: - self._check_for_missing() - raise - except RecursionError: - self._check_for_cycles() - raise - return flat - - # Cache for the parsed expansion strings. While this is nominally # something which might "waste" memory, in reality each of these # will live as long as the element which uses it, which is the @@ -255,51 +257,79 @@ cdef list _parse_expstr(str instr): return ret -# Helper to expand recursively an expansion string in the context -# of the given dictionary of expansion string +# Helper to expand and cache a variable definition in the context of +# the given dictionary of expansion strings. # # Args: -# content (dict): dictionary context for resolving the variables -# value (list): expansion string to expand -# acc (list): list in which to add the result -# counter (int): counter to count the number of recursion in order to -# detect cycles. +# content (dict): Dictionary of expansion strings +# name (str): Name of the variable to expand +# counter (int): Recursion counter +# +# Returns: +# (str): The expanded value of variable # # Raises: -# KeyError: if any expansion is missing -# RecursionError: if a variable is defined recursively +# KeyError, if any expansion is missing +# RecursionError, if recursion required for evaluation is too deep # -cdef void _expand_expstr_helper(dict content, list value, list acc, int counter = 0) except *: - cdef Py_ssize_t idx = 0 - cdef Py_ssize_t value_len = len(value) +cdef str _expand_var(dict content, str name, int counter = 0): + cdef str sub + + if len(content[name]) > 1: + sub = _expand_expstr(content, <list> content[name], counter) + content[name] = [sys.intern(sub)] + return content[name][0] + + +# Helper to expand a given top level expansion string tuple in the context +# of the given dictionary of expansion strings. +# +# Args: +# content (dict): Dictionary of expansion strings +# name (str): Name of the variable to expand +# counter (int): Recursion counter +# +# Returns: +# (str): The expanded value of variable +# +# Raises: +# KeyError, if any expansion is missing +# RecursionError, if recursion required for evaluation is too deep +# +cdef str _expand_expstr(dict content, list value, int counter = 0): if counter > 1000: raise RecursionError() + cdef Py_ssize_t idx = 0 + cdef Py_ssize_t value_len = len(value) + cdef str sub + cdef list acc = [] + while idx < value_len: acc.append(value[idx]) idx += 1 if idx < value_len: - _expand_expstr_helper(content, <list> content[value[idx]], acc, counter + 1) - + acc.append(_expand_var(content, <str> value[idx], counter + 1)) idx += 1 + return "".join(acc) -# Helper to expand a given top level expansion string tuple in the context -# of the given dictionary of expansion strings. -# -# Note: Will raise KeyError if any expansion is missing -cdef str _expand_expstr(dict content, list topvalue): - # Short-circuit constant strings - if len(topvalue) == 1: - return <str> topvalue[0] - - # Short-circuit strings which are entirely an expansion of another variable - # e.g. "%{another}" - if len(topvalue) == 2 and len(<str> topvalue[0]) == 0: - return _expand_expstr(content, <list> content[topvalue[1]]) - - cdef list result = [] - _expand_expstr_helper(content, topvalue, result) - return "".join(result) + +# Iterator for all flatten variables. +# Used by Variables.__iter__ +cdef class _VariablesIterator: + cdef dict _expstr_map + cdef object _iter + + def __init__(self, dict expstr_map): + self._expstr_map = expstr_map + self._iter = iter(expstr_map) + + def __iter__(self): + return self + + def __next__(self): + name = next(self._iter) + return name, _expand_var(self._expstr_map, name) diff --git a/src/buildstream/element.py b/src/buildstream/element.py index 09b0e4c06..917ad9ba9 100644 --- a/src/buildstream/element.py +++ b/src/buildstream/element.py @@ -852,7 +852,7 @@ class Element(Plugin): variable was declared with the given name. """ # Flat is not recognized correctly by Pylint as being a dictionary - return self.__variables.flat.get(varname) # pylint: disable=no-member + return self.__variables.get(varname) # pylint: disable=no-member def batch_prepare_assemble(self, flags: int, *, collect: Optional[str] = None) -> None: """ Configure command batching across prepare() and assemble() diff --git a/tests/format/variables.py b/tests/format/variables.py index 616dc20c1..c5e8eebad 100644 --- a/tests/format/variables.py +++ b/tests/format/variables.py @@ -63,6 +63,15 @@ def test_missing_variable(cli, datafiles, element): @pytest.mark.timeout(15, method="signal") @pytest.mark.datafiles(os.path.join(DATA_DIR, "cyclic_variables")) +def test_simple_cyclic_variables(cli, datafiles): + print_warning("Performing cyclic test, if this test times out it will " + "exit the test sequence") + project = str(datafiles) + result = cli.run(project=project, silent=True, args=["build", "simple-cyclic.bst"]) + result.assert_main_error(ErrorDomain.LOAD, LoadErrorReason.RECURSIVE_VARIABLE) + + +@pytest.mark.timeout(15, method="signal") +@pytest.mark.datafiles(os.path.join(DATA_DIR, "cyclic_variables")) def test_cyclic_variables(cli, datafiles): print_warning("Performing cyclic test, if this test times out it will " + "exit the test sequence") project = str(datafiles) diff --git a/tests/format/variables/cyclic_variables/simple-cyclic.bst b/tests/format/variables/cyclic_variables/simple-cyclic.bst new file mode 100644 index 000000000..806e1f390 --- /dev/null +++ b/tests/format/variables/cyclic_variables/simple-cyclic.bst @@ -0,0 +1,5 @@ +kind: manual + +variables: + a: "%{b}" + b: "%{a}" |