summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorbst-marge-bot <marge-bot@buildstream.build>2019-07-26 10:03:54 +0000
committerbst-marge-bot <marge-bot@buildstream.build>2019-07-26 10:03:54 +0000
commit737fbaca671140a1e1a6eb8bbcb01ed9bbc3fb21 (patch)
treed6dfede82d0983975e4030e8f8efc7b012f8b799
parent41b4eb57df05f52140c68c27b5e6dd3d1b461d32 (diff)
parent862e9c3f60b46b7f80a223dbff0b95cf5423a197 (diff)
downloadbuildstream-737fbaca671140a1e1a6eb8bbcb01ed9bbc3fb21.tar.gz
Merge branch 'bschubert/optimize-loader' into 'master'
Optimize loader See merge request BuildStream/buildstream!1493
-rw-r--r--.pylintrc1
-rwxr-xr-xsetup.py1
-rw-r--r--src/buildstream/_loader/loadelement.py132
-rw-r--r--src/buildstream/_loader/loadelement.pyx228
-rw-r--r--src/buildstream/_loader/loader.py95
5 files changed, 246 insertions, 211 deletions
diff --git a/.pylintrc b/.pylintrc
index e07219a41..cb4967a2f 100644
--- a/.pylintrc
+++ b/.pylintrc
@@ -6,6 +6,7 @@
extension-pkg-whitelist=
buildstream.node,
buildstream._loader._loader,
+ buildstream._loader.loadelement,
buildstream._loader.types,
buildstream._utils,
buildstream._variables,
diff --git a/setup.py b/setup.py
index 284e74d7f..141fb0539 100755
--- a/setup.py
+++ b/setup.py
@@ -404,6 +404,7 @@ BUILD_EXTENSIONS = []
register_cython_module("buildstream.node")
register_cython_module("buildstream._loader._loader")
+register_cython_module("buildstream._loader.loadelement")
register_cython_module("buildstream._loader.types", dependencies=["buildstream.node"])
register_cython_module("buildstream._yaml", dependencies=["buildstream.node"])
register_cython_module("buildstream._utils")
diff --git a/src/buildstream/_loader/loadelement.py b/src/buildstream/_loader/loadelement.py
deleted file mode 100644
index 773675e2b..000000000
--- a/src/buildstream/_loader/loadelement.py
+++ /dev/null
@@ -1,132 +0,0 @@
-#
-# Copyright (C) 2016 Codethink Limited
-#
-# This program is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library. If not, see <http://www.gnu.org/licenses/>.
-#
-# Authors:
-# Tristan Van Berkom <tristan.vanberkom@codethink.co.uk>
-
-# System imports
-from itertools import count
-
-from pyroaring import BitMap, FrozenBitMap # pylint: disable=no-name-in-module
-
-
-# LoadElement():
-#
-# A transient object breaking down what is loaded allowing us to
-# do complex operations in multiple passes.
-#
-# Args:
-# node (dict): A YAML loaded dictionary
-# name (str): The element name
-# loader (Loader): The Loader object for this element
-#
-class LoadElement():
- # Dependency():
- #
- # A link from a LoadElement to its dependencies.
- #
- # Keeps a link to one of the current Element's dependencies, together with
- # its dependency type.
- #
- # Args:
- # element (LoadElement): a LoadElement on which there is a dependency
- # dep_type (str): the type of dependency this dependency link is
- class Dependency:
- def __init__(self, element, dep_type):
- self.element = element
- self.dep_type = dep_type
-
- _counter = count()
-
- def __init__(self, node, filename, loader):
-
- #
- # Public members
- #
- self.node = node # The YAML node
- self.name = filename # The element name
- self.full_name = None # The element full name (with associated junction)
- self.meta_done = False # If the MetaElement for this LoadElement is done
- self.node_id = next(self._counter)
-
- #
- # Private members
- #
- self._loader = loader # The Loader object
- self._dep_cache = None # The dependency cache, to speed up depends()
-
- #
- # Initialization
- #
- if loader.project.junction:
- # dependency is in subproject, qualify name
- self.full_name = '{}:{}'.format(loader.project.junction.name, self.name)
- else:
- # dependency is in top-level project
- self.full_name = self.name
-
- # Ensure the root node is valid
- self.node.validate_keys([
- 'kind', 'depends', 'sources', 'sandbox',
- 'variables', 'environment', 'environment-nocache',
- 'config', 'public', 'description',
- 'build-depends', 'runtime-depends',
- ])
-
- self.dependencies = []
-
- @property
- def junction(self):
- return self._loader.project.junction
-
- # depends():
- #
- # Checks if this element depends on another element, directly
- # or indirectly.
- #
- # Args:
- # other (LoadElement): Another LoadElement
- #
- # Returns:
- # (bool): True if this LoadElement depends on 'other'
- #
- def depends(self, other):
- self._ensure_depends_cache()
- return other.node_id in self._dep_cache
-
- ###########################################
- # Private Methods #
- ###########################################
- def _ensure_depends_cache(self):
-
- if self._dep_cache:
- return
-
- self._dep_cache = BitMap()
-
- for dep in self.dependencies:
- elt = dep.element
-
- # Ensure the cache of the element we depend on
- elt._ensure_depends_cache()
-
- # We depend on this element
- self._dep_cache.add(elt.node_id)
-
- # And we depend on everything this element depends on
- self._dep_cache.update(elt._dep_cache)
-
- self._dep_cache = FrozenBitMap(self._dep_cache)
diff --git a/src/buildstream/_loader/loadelement.pyx b/src/buildstream/_loader/loadelement.pyx
new file mode 100644
index 000000000..867de6f29
--- /dev/null
+++ b/src/buildstream/_loader/loadelement.pyx
@@ -0,0 +1,228 @@
+#
+# Copyright (C) 2016 Codethink Limited
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library. If not, see <http://www.gnu.org/licenses/>.
+#
+# Authors:
+# Tristan Van Berkom <tristan.vanberkom@codethink.co.uk>
+
+from functools import cmp_to_key
+
+from pyroaring import BitMap, FrozenBitMap # pylint: disable=no-name-in-module
+
+from ..node cimport MappingNode
+from .types import Symbol
+
+
+# Counter to get ids to LoadElements
+cdef int _counter = 0
+
+cdef int _next_synthetic_counter():
+ global _counter
+ _counter += 1
+ return _counter
+
+
+# Dependency():
+#
+# A link from a LoadElement to its dependencies.
+#
+# Keeps a link to one of the current Element's dependencies, together with
+# its dependency type.
+#
+# Args:
+# element (LoadElement): a LoadElement on which there is a dependency
+# dep_type (str): the type of dependency this dependency link is
+cdef class Dependency:
+ cdef readonly LoadElement element
+ cdef readonly str dep_type
+
+ def __cinit__(self, LoadElement element, str dep_type):
+ self.element = element
+ self.dep_type = dep_type
+
+
+# LoadElement():
+#
+# A transient object breaking down what is loaded allowing us to
+# do complex operations in multiple passes.
+#
+# Args:
+# node (dict): A YAML loaded dictionary
+# name (str): The element name
+# loader (Loader): The Loader object for this element
+#
+cdef class LoadElement:
+
+ cdef readonly MappingNode node
+ cdef readonly str name
+ cdef readonly full_name
+ cdef public bint meta_done
+ cdef int node_id
+ cdef readonly object _loader
+ # TODO: if/when pyroaring exports symbols, we could type this statically
+ cdef object _dep_cache
+ cdef readonly list dependencies
+
+ def __cinit__(self, MappingNode node, str filename, object loader):
+
+ #
+ # Public members
+ #
+ self.node = node # The YAML node
+ self.name = filename # The element name
+ self.full_name = None # The element full name (with associated junction)
+ self.meta_done = False # If the MetaElement for this LoadElement is done
+ self.node_id = _next_synthetic_counter()
+
+ #
+ # Private members
+ #
+ self._loader = loader # The Loader object
+ self._dep_cache = None # The dependency cache, to speed up depends()
+
+ #
+ # Initialization
+ #
+ if loader.project.junction:
+ # dependency is in subproject, qualify name
+ self.full_name = '{}:{}'.format(loader.project.junction.name, self.name)
+ else:
+ # dependency is in top-level project
+ self.full_name = self.name
+
+ # Ensure the root node is valid
+ self.node.validate_keys([
+ 'kind', 'depends', 'sources', 'sandbox',
+ 'variables', 'environment', 'environment-nocache',
+ 'config', 'public', 'description',
+ 'build-depends', 'runtime-depends',
+ ])
+
+ self.dependencies = []
+
+ @property
+ def junction(self):
+ return self._loader.project.junction
+
+ # depends():
+ #
+ # Checks if this element depends on another element, directly
+ # or indirectly.
+ #
+ # Args:
+ # other (LoadElement): Another LoadElement
+ #
+ # Returns:
+ # (bool): True if this LoadElement depends on 'other'
+ #
+ def depends(self, LoadElement other not None):
+ self._ensure_depends_cache()
+ return other.node_id in self._dep_cache
+
+ ###########################################
+ # Private Methods #
+ ###########################################
+ cdef void _ensure_depends_cache(self):
+ cdef Dependency dep
+
+ if self._dep_cache:
+ return
+
+ self._dep_cache = BitMap()
+
+ for dep in self.dependencies:
+ elt = dep.element
+
+ # Ensure the cache of the element we depend on
+ elt._ensure_depends_cache()
+
+ # We depend on this element
+ self._dep_cache.add(elt.node_id)
+
+ # And we depend on everything this element depends on
+ self._dep_cache.update(elt._dep_cache)
+
+ self._dep_cache = FrozenBitMap(self._dep_cache)
+
+
+def _dependency_cmp(Dependency dep_a, Dependency dep_b):
+ cdef LoadElement element_a = dep_a.element
+ cdef LoadElement element_b = dep_b.element
+
+ # Sort on inter element dependency first
+ if element_a.depends(element_b):
+ return 1
+ elif element_b.depends(element_a):
+ return -1
+
+ # If there are no inter element dependencies, place
+ # runtime only dependencies last
+ if dep_a.dep_type != dep_b.dep_type:
+ if dep_a.dep_type == Symbol.RUNTIME:
+ return 1
+ elif dep_b.dep_type == Symbol.RUNTIME:
+ return -1
+
+ # All things being equal, string comparison.
+ if element_a.name > element_b.name:
+ return 1
+ elif element_a.name < element_b.name:
+ return -1
+
+ # Sort local elements before junction elements
+ # and use string comparison between junction elements
+ if element_a.junction and element_b.junction:
+ if element_a.junction > element_b.junction:
+ return 1
+ elif element_a.junction < element_b.junction:
+ return -1
+ elif element_a.junction:
+ return -1
+ elif element_b.junction:
+ return 1
+
+ # This wont ever happen
+ return 0
+
+
+# sort_dependencies():
+#
+# Sort dependencies of each element by their dependencies,
+# so that direct dependencies which depend on other direct
+# dependencies (directly or indirectly) appear later in the
+# list.
+#
+# This avoids the need for performing multiple topological
+# sorts throughout the build process.
+#
+# Args:
+# element (LoadElement): The element to sort
+#
+def sort_dependencies(LoadElement element):
+ cdef list working_elements = [element]
+ cdef set visited = set(working_elements)
+ cdef Dependency dep
+
+ # Now dependency sort, we ensure that if any direct dependency
+ # directly or indirectly depends on another direct dependency,
+ # it is found later in the list.
+ while working_elements:
+ element = working_elements.pop()
+ for dep in element.dependencies:
+ if dep.element not in visited:
+ visited.add(dep.element)
+ working_elements.append(dep.element)
+
+ element.dependencies.sort(key=cmp_to_key(_dependency_cmp))
diff --git a/src/buildstream/_loader/loader.py b/src/buildstream/_loader/loader.py
index cdfc1dd56..6108f10b8 100644
--- a/src/buildstream/_loader/loader.py
+++ b/src/buildstream/_loader/loader.py
@@ -18,7 +18,6 @@
# Tristan Van Berkom <tristan.vanberkom@codethink.co.uk>
import os
-from functools import cmp_to_key
from .._exceptions import LoadError, LoadErrorReason
from .. import Consistency
@@ -30,7 +29,8 @@ from .._includes import Includes
from ._loader import valid_chars_name
from .types import Symbol, extract_depends_from_node
-from .loadelement import LoadElement
+from . import loadelement
+from .loadelement import Dependency, LoadElement
from .metaelement import MetaElement
from .metasource import MetaSource
from ..types import CoreWarnings
@@ -121,8 +121,9 @@ class Loader():
# Set up a dummy element that depends on all top-level targets
# to resolve potential circular dependencies between them
dummy_target = LoadElement(Node.from_dict({}), "", self)
- dummy_target.dependencies.extend(
- LoadElement.Dependency(element, Symbol.RUNTIME)
+ # Pylint is not very happy with Cython and can't understand 'dependencies' is a list
+ dummy_target.dependencies.extend( # pylint: disable=no-member
+ Dependency(element, Symbol.RUNTIME)
for element in target_elements
)
@@ -136,7 +137,7 @@ class Loader():
for element in target_elements:
loader = element._loader
with PROFILER.profile(Topics.SORT_DEPENDENCIES, element.name):
- loader._sort_dependencies(element)
+ loadelement.sort_dependencies(element)
# Finally, wrap what we have into LoadElements and return the target
#
@@ -169,6 +170,11 @@ class Loader():
#
del state['_fetch_subprojects']
+ # Also there's no gain in pickling over the caches, and they might
+ # contain things which are unpleasantly large or unable to pickle.
+ del state['_elements']
+ del state['_meta_elements']
+
return state
# clean_caches()
@@ -329,13 +335,15 @@ class Loader():
dep_deps = extract_depends_from_node(dep_element.node)
loader_queue.append((dep_element, list(reversed(dep_deps)), []))
- if dep_element.node.get_str(Symbol.KIND) == 'junction':
+ # Pylint is not very happy about Cython and can't understand 'node' is a 'MappingNode'
+ if dep_element.node.get_str(Symbol.KIND) == 'junction': # pylint: disable=no-member
raise LoadError("{}: Cannot depend on junction" .format(dep.provenance),
LoadErrorReason.INVALID_DATA)
# All is well, push the dependency onto the LoadElement
- current_element[0].dependencies.append(
- LoadElement.Dependency(dep_element, dep.dep_type))
+ # Pylint is not very happy with Cython and can't understand 'dependencies' is a list
+ current_element[0].dependencies.append( # pylint: disable=no-member
+ Dependency(dep_element, dep.dep_type))
else:
# We do not have any more dependencies to load for this
# element on the queue, report any invalid dep names
@@ -397,77 +405,6 @@ class Loader():
check_elements.remove(this_element)
validated.add(this_element)
- # _sort_dependencies():
- #
- # Sort dependencies of each element by their dependencies,
- # so that direct dependencies which depend on other direct
- # dependencies (directly or indirectly) appear later in the
- # list.
- #
- # This avoids the need for performing multiple topological
- # sorts throughout the build process.
- #
- # Args:
- # element (LoadElement): The element to sort
- #
- @staticmethod
- def _sort_dependencies(element):
-
- working_elements = [element]
- visited = set(working_elements)
-
- def dependency_cmp(dep_a, dep_b):
- element_a = dep_a.element
- element_b = dep_b.element
-
- # Sort on inter element dependency first
- if element_a.depends(element_b):
- return 1
- elif element_b.depends(element_a):
- return -1
-
- # If there are no inter element dependencies, place
- # runtime only dependencies last
- if dep_a.dep_type != dep_b.dep_type:
- if dep_a.dep_type == Symbol.RUNTIME:
- return 1
- elif dep_b.dep_type == Symbol.RUNTIME:
- return -1
-
- # All things being equal, string comparison.
- if element_a.name > element_b.name:
- return 1
- elif element_a.name < element_b.name:
- return -1
-
- # Sort local elements before junction elements
- # and use string comparison between junction elements
- if element_a.junction and element_b.junction:
- if element_a.junction > element_b.junction:
- return 1
- elif element_a.junction < element_b.junction:
- return -1
- elif element_a.junction:
- return -1
- elif element_b.junction:
- return 1
-
- # This wont ever happen
- return 0
-
- # Now dependency sort, we ensure that if any direct dependency
- # directly or indirectly depends on another direct dependency,
- # it is found later in the list.
- while working_elements:
- element = working_elements.pop()
- for dep in element.dependencies:
- dep_element = dep.element
- if dep_element not in visited:
- visited.add(dep_element)
- working_elements.append(dep_element)
-
- element.dependencies.sort(key=cmp_to_key(dependency_cmp))
-
# _collect_element_no_deps()
#
# Collect a single element, without its dependencies, into a meta_element