summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorbst-marge-bot <marge-bot@buildstream.build>2019-05-30 09:34:49 +0000
committerbst-marge-bot <marge-bot@buildstream.build>2019-05-30 09:34:49 +0000
commit8b302c55d2da4f7ab29e346496a8e2fe3d7a7660 (patch)
tree8e46707ab3af4ecca682bfa754ea4d5233921690
parent4b450f664b1bdd0f342f36453f8e46edbd0c887a (diff)
parent32ee4163bc6c628dc1d475a110022b37bf661fa5 (diff)
downloadbuildstream-8b302c55d2da4f7ab29e346496a8e2fe3d7a7660.tar.gz
Merge branch 'bschubert/cython' into 'master'
Introduce Cython for better performances See merge request BuildStream/buildstream!1350
-rw-r--r--.coveragerc3
-rw-r--r--.gitignore10
-rw-r--r--.pylintrc2
-rw-r--r--CONTRIBUTING.rst41
-rw-r--r--MANIFEST.in8
-rw-r--r--pyproject.toml9
-rw-r--r--requirements/cov-requirements.in1
-rw-r--r--requirements/cov-requirements.txt1
-rwxr-xr-xsetup.py110
-rw-r--r--src/buildstream/_variables.pyx (renamed from src/buildstream/_variables.py)130
-rw-r--r--src/buildstream/_yaml.pxd44
-rw-r--r--src/buildstream/_yaml.pyx (renamed from src/buildstream/_yaml.py)686
-rw-r--r--src/buildstream/element.py3
-rw-r--r--tests/internals/yaml.py14
-rw-r--r--tox.ini20
15 files changed, 725 insertions, 357 deletions
diff --git a/.coveragerc b/.coveragerc
index 457c439a6..5b06d817a 100644
--- a/.coveragerc
+++ b/.coveragerc
@@ -1,5 +1,6 @@
[run]
concurrency = multiprocessing
+plugins = Cython.Coverage
omit =
# Omit some internals
@@ -11,6 +12,8 @@ omit =
*/.eggs/*
# Omit .tox directory
*/.tox/*
+ # Omit a dynamically generated Cython file
+ */stringsource
[report]
show_missing = True
diff --git a/.gitignore b/.gitignore
index 3bc4e29af..a61a975ac 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,12 +2,22 @@
src/buildstream/**/*.pyc
tests/**/*.pyc
+# Generated C files
+src/buildstream/**/*.c
+src/buildstream/**/*.so
+
+# Cython report files when using annotations
+src/buildstream/**/*.html
+
# Build output directory
build
# Setuptools distribution folder.
/dist/
+# Pip build metadata
+pip-wheel-metadata/
+
# Python egg metadata, regenerated from source files by setuptools.
/src/*.egg-info
.eggs
diff --git a/.pylintrc b/.pylintrc
index c47ef92cf..e0941862f 100644
--- a/.pylintrc
+++ b/.pylintrc
@@ -3,7 +3,7 @@
# A comma-separated list of package or module names from where C extensions may
# be loaded. Extensions are loading into the active Python interpreter and may
# run arbitrary code
-extension-pkg-whitelist=
+extension-pkg-whitelist=buildstream._variables,buildstream._yaml
# Add files or directories to the blacklist. They should be base names, not
# paths.
diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst
index 921cee909..b128c7f9b 100644
--- a/CONTRIBUTING.rst
+++ b/CONTRIBUTING.rst
@@ -1598,6 +1598,15 @@ can run ``tox`` with ``-r`` or ``--recreate`` option.
./setup.py test --addopts 'tests/frontend/buildtrack.py::test_build_track'
+ If you want to run coverage, you will need need to add ``BST_CYTHON_TRACE=1``
+ to your environment if you also want coverage on cython files. You could then
+ get coverage by running::
+
+ BST_CYTHON_TRACE=1 coverage run ./setup.py test
+
+ Note that to be able to run ``./setup.py test``, you will need to have ``Cython``
+ installed.
+
.. tip::
We also have an environment called 'venv' which takes any arguments
@@ -1737,6 +1746,12 @@ You can install it with `pip install snakeviz`. Here is an example invocation:
It will then start a webserver and launch a browser to the relevant page.
+.. note::
+
+ If you want to also profile cython files, you will need to add
+ BST_CYTHON_PROFILE=1 and recompile the cython files.
+ ``pip install`` would take care of that.
+
Profiling specific parts of BuildStream with BST_PROFILE
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
BuildStream can also turn on cProfile for specific parts of execution
@@ -1753,6 +1768,32 @@ call most of `initialized`, for each element. These profile files
are in the same cProfile format as those mentioned in the previous
section, and can be analysed in the same way.
+Fixing performance issues
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+BuildStream uses `Cython <https://cython.org/>`_ in order to speed up specific parts of the
+code base.
+
+.. note::
+
+ When optimizing for performance, please ensure that you optimize the algorithms before
+ jumping into Cython code. Cython will make the code harder to maintain and less accessible
+ to all developers.
+
+When adding a new cython file to the codebase, you will need to register it in ``setup.py``.
+
+For example, for a module ``buildstream._my_module``, to be written in ``src/buildstream/_my_module.pyx``, you would do::
+
+ register_cython_module("buildstream._my_module")
+
+In ``setup.py`` and the build tool will automatically use your module.
+
+.. note::
+
+ Please register cython modules at the same place as the others.
+
+When adding a definition class to share cython symbols between modules, please document the implementation file
+and only expose the required definitions.
Managing data files
-------------------
diff --git a/MANIFEST.in b/MANIFEST.in
index d0de0e593..07369c481 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -6,6 +6,11 @@ include MAINTAINERS
include NEWS
include README.rst
+# Cython files
+recursive-include src/buildstream *.pyx
+recursive-include src/buildstream *.pxd
+recursive-include src/buildstream *.c
+
# Data files required by BuildStream's generic source tests
recursive-include src/buildstream/testing/_sourcetests/project *
@@ -43,3 +48,6 @@ include requirements/plugin-requirements.txt
# Versioneer
include versioneer.py
+
+# setuptools.build_meta don't include setup.py by default. Add it
+include setup.py
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 000000000..4dd02d1e5
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,9 @@
+[build-system]
+requires = [
+ # We need at least version 36.6.0 that introduced "build_meta"
+ "setuptools>=36.6.0",
+ # In order to build wheels, and as required by PEP 517
+ "wheel",
+ "Cython"
+]
+build-backend = "setuptools.build_meta"
diff --git a/requirements/cov-requirements.in b/requirements/cov-requirements.in
index 455b91ba6..fb582f816 100644
--- a/requirements/cov-requirements.in
+++ b/requirements/cov-requirements.in
@@ -1,2 +1,3 @@
coverage == 4.4.0
pytest-cov >= 2.5.0
+Cython
diff --git a/requirements/cov-requirements.txt b/requirements/cov-requirements.txt
index 843df85f3..a8ba7843b 100644
--- a/requirements/cov-requirements.txt
+++ b/requirements/cov-requirements.txt
@@ -1,4 +1,5 @@
coverage==4.4
+cython==0.29.7
pytest-cov==2.6.1
## The following requirements were added by pip freeze:
atomicwrites==1.3.0
diff --git a/setup.py b/setup.py
index 267ca6fb4..a57e65b56 100755
--- a/setup.py
+++ b/setup.py
@@ -17,15 +17,18 @@
#
# Authors:
# Tristan Van Berkom <tristan.vanberkom@codethink.co.uk>
+# Benjamin Schubert <bschubert15@bloomberg.net>
import os
+from pathlib import Path
import re
import shutil
import subprocess
import sys
-import versioneer
-from pathlib import Path
+# Add local directory to the path, in order to be able to import versioneer
+sys.path.append(os.path.dirname(__file__))
+import versioneer # noqa
##################################################################
@@ -39,7 +42,7 @@ if sys.version_info[0] != REQUIRED_PYTHON_MAJOR or sys.version_info[1] < REQUIRE
sys.exit(1)
try:
- from setuptools import setup, find_packages, Command
+ from setuptools import setup, find_packages, Command, Extension
from setuptools.command.easy_install import ScriptWriter
from setuptools.command.test import test as TestCommand
except ImportError:
@@ -303,6 +306,95 @@ with open(os.path.join(os.path.dirname(os.path.realpath(__file__)),
#####################################################
+# Setup Cython and extensions #
+#####################################################
+# We want to ensure that source distributions always
+# include the .c files, in order to allow users to
+# not need cython when building.
+def assert_cython_required():
+ if "sdist" not in sys.argv:
+ return
+
+ print("Cython is required when building 'sdist' in order to "
+ "ensure source distributions can be built without Cython. "
+ "Please install it using your package manager (usually 'python3-cython') "
+ "or pip (pip install cython).",
+ file=sys.stderr)
+
+ raise SystemExit(1)
+
+
+extension_macros = [
+ ("CYTHON_TRACE", os.environ.get("BST_CYTHON_TRACE", 0))
+]
+
+
+def cythonize(extensions, **kwargs):
+ try:
+ from Cython.Build import cythonize as _cythonize
+ except ImportError:
+ assert_cython_required()
+
+ print("Cython not found. Using preprocessed c files instead")
+
+ missing_c_sources = []
+
+ for extension in extensions:
+ for source in extension.sources:
+ if source.endswith(".pyx"):
+ c_file = source.replace(".pyx", ".c")
+
+ if not os.path.exists(c_file):
+ missing_c_sources.append((extension, c_file))
+
+ if missing_c_sources:
+ for extension, source in missing_c_sources:
+ print("Missing '{}' for building extension '{}'".format(source, extension.name))
+
+ raise SystemExit(1)
+ return extensions
+
+ return _cythonize(extensions, **kwargs)
+
+
+def register_cython_module(module_name, dependencies=None):
+ def files_from_module(modname):
+ basename = "src/{}".format(modname.replace(".", "/"))
+ return "{}.pyx".format(basename), "{}.pxd".format(basename)
+
+ if dependencies is None:
+ dependencies = []
+
+ implementation_file, definition_file = files_from_module(module_name)
+
+ assert os.path.exists(implementation_file)
+
+ depends = []
+ if os.path.exists(definition_file):
+ depends.append(definition_file)
+
+ for module in dependencies:
+ imp_file, def_file = files_from_module(module)
+ assert os.path.exists(imp_file), "Dependency file not found: {}".format(imp_file)
+ assert os.path.exists(def_file), "Dependency declaration file not found: {}".format(def_file)
+
+ depends.append(imp_file)
+ depends.append(def_file)
+
+ BUILD_EXTENSIONS.append(Extension(
+ name=module_name,
+ sources=[implementation_file],
+ depends=depends,
+ define_macros=extension_macros,
+ ))
+
+
+BUILD_EXTENSIONS = []
+
+register_cython_module("buildstream._yaml")
+register_cython_module("buildstream._variables", dependencies=["buildstream._yaml"])
+
+#####################################################
# Main setup() Invocation #
#####################################################
setup(name='BuildStream',
@@ -360,4 +452,16 @@ setup(name='BuildStream',
install_requires=install_requires,
entry_points=bst_install_entry_points,
tests_require=dev_requires,
+ ext_modules=cythonize(
+ BUILD_EXTENSIONS,
+ compiler_directives={
+ # Version of python to use
+ # https://cython.readthedocs.io/en/latest/src/userguide/source_files_and_compilation.html#arguments
+ "language_level": "3",
+ # Enable line tracing, this is needed in order to generate coverage.
+ # This is not enabled unless the CYTHON_TRACE macro for distutils is defined.
+ "linetrace": True,
+ "profile": os.environ.get("BST_CYTHON_PROFILE", False),
+ }
+ ),
zip_safe=False)
diff --git a/src/buildstream/_variables.py b/src/buildstream/_variables.pyx
index 74314cf1f..9b8b5a902 100644
--- a/src/buildstream/_variables.py
+++ b/src/buildstream/_variables.pyx
@@ -18,12 +18,13 @@
# Authors:
# Tristan Van Berkom <tristan.vanberkom@codethink.co.uk>
# Daniel Silverstone <daniel.silverstone@codethink.co.uk>
+# Benjamin Schubert <bschubert@bloomberg.net>
import re
import sys
from ._exceptions import LoadError, LoadErrorReason
-from . import _yaml
+from . cimport _yaml
# Variables are allowed to have dashes here
#
@@ -34,9 +35,8 @@ PARSE_EXPANSION = re.compile(r"\%\{([a-zA-Z][a-zA-Z0-9_-]*)\}")
# These hold data structures called "expansion strings" and are the parsed
# form of the strings which are the input to this subsystem. Strings
# such as "Hello %{name}, how are you?" are parsed into the form:
-# (3, ["Hello ", "name", ", how are you?"])
-# i.e. a tuple of an integer and a list, where the integer is the cached
-# length of the list, and the list consists of one or more strings.
+# ["Hello ", "name", ", how are you?"]
+# i.e. a list which consists of one or more strings.
# Strings in even indices of the list (0, 2, 4, etc) are constants which
# are copied into the output of the expansion algorithm. Strings in the
# odd indices (1, 3, 5, etc) are the names of further expansions to make.
@@ -58,15 +58,18 @@ PARSE_EXPANSION = re.compile(r"\%\{([a-zA-Z][a-zA-Z0-9_-]*)\}")
# variable settings for the element.
#
# Args:
-# node (dict): A node loaded and composited with yaml tools
+# node (Node): A node loaded and composited with yaml tools
#
# Raises:
# LoadError, if unresolved variables, or cycles in resolution, occur.
#
-class Variables():
+cdef class Variables:
- def __init__(self, node):
+ cdef _yaml.Node original
+ cdef dict _expstr_map
+ cdef public dict flat
+ def __init__(self, _yaml.Node node):
self.original = node
self._expstr_map = self._resolve(node)
self.flat = self._flatten()
@@ -84,7 +87,7 @@ class Variables():
# Raises:
# LoadError, if the string contains unresolved variable references.
#
- def subst(self, string):
+ def subst(self, str string):
expstr = _parse_expstr(string)
try:
@@ -93,7 +96,7 @@ class Variables():
unmatched = []
# Look for any unmatched variable names in the expansion string
- for var in expstr[1][1::2]:
+ for var in expstr[1::2]:
if var not in self._expstr_map:
unmatched.append(var)
@@ -112,17 +115,20 @@ class Variables():
#
# Here we resolve all of our inputs into a dictionary, ready for use
# in subst()
- def _resolve(self, node):
+ cdef dict _resolve(self, _yaml.Node node):
# Special case, if notparallel is specified in the variables for this
# element, then override max-jobs to be 1.
# Initialize it as a string as all variables are processed as strings.
#
- if _yaml.node_get(node, bool, 'notparallel', default_value=False):
+ if _yaml.node_get(node, bool, 'notparallel', None, False):
_yaml.node_set(node, 'max-jobs', str(1))
- ret = {}
- for key, value in _yaml.node_items(node):
- value = _yaml.node_get(node, str, key)
+ cdef dict ret = {}
+ cdef str key
+ cdef str value
+
+ for key in _yaml.node_keys(node):
+ value = <str> _yaml.node_get(node, str, key)
ret[sys.intern(key)] = _parse_expstr(value)
return ret
@@ -130,7 +136,7 @@ class Variables():
# First the check for anything unresolvable
summary = []
for key, expstr in self._expstr_map.items():
- for var in expstr[1][1::2]:
+ for var in expstr[1::2]:
if var not in self._expstr_map:
line = " unresolved variable '{unmatched}' in declaration of '{variable}' at: {provenance}"
provenance = _yaml.node_get_provenance(self.original, key)
@@ -142,7 +148,7 @@ class Variables():
def _check_for_cycles(self):
# And now the cycle checks
def cycle_check(expstr, visited, cleared):
- for var in expstr[1][1::2]:
+ for var in expstr[1::2]:
if var in cleared:
continue
if var in visited:
@@ -169,14 +175,18 @@ class Variables():
# LoadError, if the string contains unresolved variable references or
# if cycles are detected in the variable references
#
- def _flatten(self):
- flat = {}
+ cdef dict _flatten(self):
+ cdef dict flat = {}
+ cdef str key
+ cdef list expstr
+
try:
for key, expstr in self._expstr_map.items():
- if expstr[0] > 1:
- expstr = (1, [sys.intern(_expand_expstr(self._expstr_map, expstr))])
+ if len(expstr) > 1:
+ # FIXME: do we really gain anything by interning?
+ expstr = [sys.intern(_expand_expstr(self._expstr_map, expstr))]
self._expstr_map[key] = expstr
- flat[key] = expstr[1][0]
+ flat[key] = expstr[0]
except KeyError:
self._check_for_missing()
raise
@@ -190,19 +200,21 @@ class Variables():
# something which might "waste" memory, in reality each of these
# will live as long as the element which uses it, which is the
# vast majority of the memory usage across the execution of BuildStream.
-PARSE_CACHE = {
+cdef dict PARSE_CACHE = {
# Prime the cache with the empty string since otherwise that can
# cause issues with the parser, complications to which cause slowdown
- "": (1, [""]),
+ "": [""],
}
# Helper to parse a string into an expansion string tuple, caching
# the results so that future parse requests don't need to think about
# the string
-def _parse_expstr(instr):
+cdef list _parse_expstr(str instr):
+ cdef list ret
+
try:
- return PARSE_CACHE[instr]
+ return <list> PARSE_CACHE[instr]
except KeyError:
# This use of the regex turns a string like "foo %{bar} baz" into
# a list ["foo ", "bar", " baz"]
@@ -211,41 +223,61 @@ def _parse_expstr(instr):
# which we can optimise away, making the expansion routines not need
# a test for this.
if splits[-1] == '':
- splits = splits[:-1]
+ del splits [-1]
# Cache an interned copy of this. We intern it to try and reduce the
# memory impact of the cache. It seems odd to cache the list length
# but this is measurably cheaper than calculating it each time during
# string expansion.
- PARSE_CACHE[instr] = (len(splits), [sys.intern(s) for s in splits])
- return PARSE_CACHE[instr]
+ ret = [sys.intern(<str> s) for s in <list> splits]
+ PARSE_CACHE[instr] = ret
+ return ret
+
+
+# Helper to expand recursively an expansion string in the context
+# of the given dictionary of expansion string
+#
+# Args:
+# content (dict): dictionary context for resolving the variables
+# value (list): expansion string to expand
+# acc (list): list in which to add the result
+# counter (int): counter to count the number of recursion in order to
+# detect cycles.
+#
+# Raises:
+# KeyError: if any expansion is missing
+# RecursionError: if a variable is defined recursively
+#
+cdef void _expand_expstr_helper(dict content, list value, list acc, int counter = 0) except *:
+ cdef Py_ssize_t idx = 0
+ cdef Py_ssize_t value_len = len(value)
+
+ if counter > 1000:
+ raise RecursionError()
+
+ while idx < value_len:
+ acc.append(value[idx])
+ idx += 1
+
+ if idx < value_len:
+ _expand_expstr_helper(content, <list> content[value[idx]], acc, counter + 1)
+
+ idx += 1
# Helper to expand a given top level expansion string tuple in the context
# of the given dictionary of expansion strings.
#
# Note: Will raise KeyError if any expansion is missing
-def _expand_expstr(content, topvalue):
+cdef str _expand_expstr(dict content, list topvalue):
# Short-circuit constant strings
- if topvalue[0] == 1:
- return topvalue[1][0]
+ if len(topvalue) == 1:
+ return <str> topvalue[0]
# Short-circuit strings which are entirely an expansion of another variable
# e.g. "%{another}"
- if topvalue[0] == 2 and topvalue[1][0] == "":
- return _expand_expstr(content, content[topvalue[1][1]])
-
- # Otherwise process fully...
- def internal_expand(value):
- (expansion_len, expansion_bits) = value
- idx = 0
- while idx < expansion_len:
- # First yield any constant string content
- yield expansion_bits[idx]
- idx += 1
- # Now, if there is an expansion variable left to expand, yield
- # the expansion of that variable too
- if idx < expansion_len:
- yield from internal_expand(content[expansion_bits[idx]])
- idx += 1
-
- return "".join(internal_expand(topvalue))
+ if len(topvalue) == 2 and len(<str> topvalue[0]) == 0:
+ return _expand_expstr(content, <list> content[topvalue[1]])
+
+ cdef list result = []
+ _expand_expstr_helper(content, topvalue, result)
+ return "".join(result)
diff --git a/src/buildstream/_yaml.pxd b/src/buildstream/_yaml.pxd
new file mode 100644
index 000000000..27a1a888e
--- /dev/null
+++ b/src/buildstream/_yaml.pxd
@@ -0,0 +1,44 @@
+#
+# Copyright (C) 2019 Bloomberg L.P.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library. If not, see <http://www.gnu.org/licenses/>.
+#
+# Authors:
+# Benjamin Schubert <bschubert@bloomberg.net>
+
+# Documentation for each class and method here can be found in the adjacent
+# implementation file (_yaml.pyx)
+
+cdef class Node:
+
+ cdef public object value
+ cdef public int file_index
+ cdef public int line
+ cdef public int column
+
+
+cdef class ProvenanceInformation:
+
+ cdef public Node node
+ cdef str displayname
+ cdef public str filename, shortname
+ cdef public int col, line
+ cdef public object project, toplevel
+ cdef public bint is_synthetic
+
+
+cpdef object node_get(Node node, object expected_type, str key, list indices=*, object default_value=*, bint allow_none=*)
+cpdef void node_set(Node node, object key, object value, list indices=*) except *
+cpdef list node_keys(object node)
+cpdef ProvenanceInformation node_get_provenance(Node node, str key=*, list indices=*)
diff --git a/src/buildstream/_yaml.py b/src/buildstream/_yaml.pyx
index cdab4269e..50f0c64a9 100644
--- a/src/buildstream/_yaml.py
+++ b/src/buildstream/_yaml.pyx
@@ -19,14 +19,14 @@
# Tristan Van Berkom <tristan.vanberkom@codethink.co.uk>
# Daniel Silverstone <daniel.silverstone@codethink.co.uk>
# James Ennis <james.ennis@codethink.co.uk>
+# Benjamin Schubert <bschubert@bloomberg.net>
import sys
import string
from contextlib import ExitStack
-from collections import OrderedDict, namedtuple
+from collections import OrderedDict
from collections.abc import Mapping, Sequence
from copy import deepcopy
-from itertools import count
from ruamel import yaml
from ._exceptions import LoadError, LoadErrorReason
@@ -55,50 +55,66 @@ from ._exceptions import LoadError, LoadErrorReason
# line (int): The line number within the file where the value appears.
# col (int): The column number within the file where the value appears.
#
-# For efficiency, each field should be accessed by its integer index:
-# value = Node[0]
-# file_index = Node[1]
-# line = Node[2]
-# column = Node[3]
-#
-class Node(namedtuple('Node', ['value', 'file_index', 'line', 'column'])):
+cdef class Node:
+
+ def __init__(self, object value, int file_index, int line, int column):
+ self.value = value
+ self.file_index = file_index
+ self.line = line
+ self.column = column
+
def __contains__(self, what):
# Delegate to the inner value, though this will likely not work
# very well if the node is a list or string, it's unlikely that
# code which has access to such nodes would do this.
- return what in self[0]
+ return what in self.value
+
+
+# Metadata container for a yaml toplevel node.
+#
+# This class contains metadata around a yaml node in order to be able
+# to trace back the provenance of a node to the file.
+#
+cdef class FileInfo:
+
+ cdef str filename, shortname, displayname
+ cdef Node toplevel,
+ cdef object project
+
+ def __init__(self, str filename, str shortname, str displayname, Node toplevel, object project):
+ self.filename = filename
+ self.shortname = shortname
+ self.displayname = displayname
+ self.toplevel = toplevel
+ self.project = project
# File name handling
-_FILE_LIST = []
+cdef _FILE_LIST = []
-# Purely synthetic node will have None for the file number, have line number
+# Purely synthetic node will have _SYNTHETIC_FILE_INDEX for the file number, have line number
# zero, and a negative column number which comes from inverting the next value
# out of this counter. Synthetic nodes created with a reference node will
# have a file number from the reference node, some unknown line number, and
# a negative column number from this counter.
-_SYNTHETIC_COUNTER = count(start=-1, step=-1)
+cdef int _SYNTHETIC_FILE_INDEX = -1
+cdef int __counter = 0
+
+cdef int next_synthetic_counter():
+ global __counter
+ __counter -= 1
+ return __counter
# Returned from node_get_provenance
-class ProvenanceInformation:
-
- __slots__ = (
- "filename",
- "shortname",
- "displayname",
- "line",
- "col",
- "toplevel",
- "node",
- "project",
- "is_synthetic",
- )
-
- def __init__(self, nodeish):
+cdef class ProvenanceInformation:
+
+ def __init__(self, Node nodeish):
+ cdef FileInfo fileinfo
+
self.node = nodeish
- if (nodeish is None) or (nodeish[1] is None):
+ if (nodeish is None) or (nodeish.file_index is None):
self.filename = ""
self.shortname = ""
self.displayname = ""
@@ -107,15 +123,15 @@ class ProvenanceInformation:
self.toplevel = None
self.project = None
else:
- fileinfo = _FILE_LIST[nodeish[1]]
- self.filename = fileinfo[0]
- self.shortname = fileinfo[1]
- self.displayname = fileinfo[2]
+ fileinfo = <FileInfo> _FILE_LIST[nodeish.file_index]
+ self.filename = fileinfo.filename
+ self.shortname = fileinfo.shortname
+ self.displayname = fileinfo.displayname
# We add 1 here to convert from computerish to humanish
- self.line = nodeish[2] + 1
- self.col = nodeish[3]
- self.toplevel = fileinfo[3]
- self.project = fileinfo[4]
+ self.line = nodeish.line + 1
+ self.col = nodeish.column
+ self.toplevel = fileinfo.toplevel
+ self.project = fileinfo.project
self.is_synthetic = (self.filename == '') or (self.col < 0)
# Convert a Provenance to a string for error reporting
@@ -140,6 +156,19 @@ class YAMLLoadError(Exception):
pass
+# Represents the various states in which the Representer can be
+# while parsing yaml.
+cdef enum RepresenterState:
+ doc
+ init
+ stream
+ wait_key
+ wait_list_item
+ wait_value
+
+
+ctypedef RepresenterState (*representer_action)(Representer, object)
+
# Representer for YAML events comprising input to the BuildStream format.
#
# All streams MUST represent a single document which must be a Mapping.
@@ -148,13 +177,11 @@ class YAMLLoadError(Exception):
# Mappings must only have string keys, values are always represented as
# strings if they are scalar, or else as simple dictionaries and lists.
#
-class Representer:
- __slots__ = (
- "_file_index",
- "state",
- "output",
- "keys",
- )
+cdef class Representer:
+
+ cdef int _file_index
+ cdef RepresenterState state
+ cdef list output, keys
# Initialise a new representer
#
@@ -163,9 +190,9 @@ class Representer:
#
# Args:
# file_index (int): The index of this YAML file
- def __init__(self, file_index):
+ def __init__(self, int file_index):
self._file_index = file_index
- self.state = "init"
+ self.state = RepresenterState.init
self.output = []
self.keys = []
@@ -176,12 +203,14 @@ class Representer:
#
# Raises:
# YAMLLoadError: Something went wrong.
- def handle_event(self, event):
+ cdef void handle_event(self, event) except *:
if getattr(event, "anchor", None) is not None:
raise YAMLLoadError("Anchors are disallowed in BuildStream at line {} column {}"
.format(event.start_mark.line, event.start_mark.column))
- if event.__class__.__name__ == "ScalarEvent":
+ cdef str event_name = event.__class__.__name__
+
+ if event_name == "ScalarEvent":
if event.tag is not None:
if not event.tag.startswith("tag:yaml.org,2002:"):
raise YAMLLoadError(
@@ -189,104 +218,139 @@ class Representer:
"This is disallowed in BuildStream. At line {} column {}"
.format(event.start_mark.line, event.start_mark.column))
- handler = "_handle_{}_{}".format(self.state, event.__class__.__name__)
- handler = getattr(self, handler, None)
- if handler is None:
+ cdef representer_action handler = self._get_handler_for_event(event_name)
+ if not handler:
raise YAMLLoadError(
"Invalid input detected. No handler for {} in state {} at line {} column {}"
.format(event, self.state, event.start_mark.line, event.start_mark.column))
- self.state = handler(event) # pylint: disable=not-callable
+ # Cython weirdness here, we need to pass self to the function
+ self.state = <RepresenterState> handler(self, event) # pylint: disable=not-callable
# Get the output of the YAML parse
#
# Returns:
# (Node or None): Return the Node instance of the top level mapping or
# None if there wasn't one.
- def get_output(self):
- try:
+ cdef Node get_output(self):
+ if len(self.output):
return self.output[0]
- except IndexError:
- return None
-
- def _handle_init_StreamStartEvent(self, ev):
- return "stream"
-
- def _handle_stream_DocumentStartEvent(self, ev):
- return "doc"
+ return None
- def _handle_doc_MappingStartEvent(self, ev):
+ cdef representer_action _get_handler_for_event(self, str event_name):
+ if self.state == RepresenterState.wait_list_item:
+ if event_name == "ScalarEvent":
+ return self._handle_wait_list_item_ScalarEvent
+ elif event_name == "MappingStartEvent":
+ return self._handle_wait_list_item_MappingStartEvent
+ elif event_name == "SequenceStartEvent":
+ return self._handle_wait_list_item_SequenceStartEvent
+ elif event_name == "SequenceEndEvent":
+ return self._handle_wait_list_item_SequenceEndEvent
+ elif self.state == RepresenterState.wait_value:
+ if event_name == "ScalarEvent":
+ return self._handle_wait_value_ScalarEvent
+ elif event_name == "MappingStartEvent":
+ return self._handle_wait_value_MappingStartEvent
+ elif event_name == "SequenceStartEvent":
+ return self._handle_wait_value_SequenceStartEvent
+ elif self.state == RepresenterState.wait_key:
+ if event_name == "ScalarEvent":
+ return self._handle_wait_key_ScalarEvent
+ elif event_name == "MappingEndEvent":
+ return self._handle_wait_key_MappingEndEvent
+ elif self.state == RepresenterState.stream:
+ if event_name == "DocumentStartEvent":
+ return self._handle_stream_DocumentStartEvent
+ elif event_name == "StreamEndEvent":
+ return self._handle_stream_StreamEndEvent
+ elif self.state == RepresenterState.doc:
+ if event_name == "MappingStartEvent":
+ return self._handle_doc_MappingStartEvent
+ elif event_name == "DocumentEndEvent":
+ return self._handle_doc_DocumentEndEvent
+ elif self.state == RepresenterState.init and event_name == "StreamStartEvent":
+ return self._handle_init_StreamStartEvent
+ return NULL
+
+ cdef RepresenterState _handle_init_StreamStartEvent(self, object ev):
+ return RepresenterState.stream
+
+ cdef RepresenterState _handle_stream_DocumentStartEvent(self, object ev):
+ return RepresenterState.doc
+
+ cdef RepresenterState _handle_doc_MappingStartEvent(self, object ev):
newmap = Node({}, self._file_index, ev.start_mark.line, ev.start_mark.column)
self.output.append(newmap)
- return "wait_key"
+ return RepresenterState.wait_key
- def _handle_wait_key_ScalarEvent(self, ev):
+ cdef RepresenterState _handle_wait_key_ScalarEvent(self, object ev):
self.keys.append(ev.value)
- return "wait_value"
+ return RepresenterState.wait_value
- def _handle_wait_value_ScalarEvent(self, ev):
+ cdef RepresenterState _handle_wait_value_ScalarEvent(self, object ev):
key = self.keys.pop()
- self.output[-1][0][key] = \
+ (<dict> (<Node> self.output[-1]).value)[key] = \
Node(ev.value, self._file_index, ev.start_mark.line, ev.start_mark.column)
- return "wait_key"
+ return RepresenterState.wait_key
- def _handle_wait_value_MappingStartEvent(self, ev):
- new_state = self._handle_doc_MappingStartEvent(ev)
+ cdef RepresenterState _handle_wait_value_MappingStartEvent(self, object ev):
+ cdef RepresenterState new_state = self._handle_doc_MappingStartEvent(ev)
key = self.keys.pop()
- self.output[-2][0][key] = self.output[-1]
+ (<dict> (<Node> self.output[-2]).value)[key] = self.output[-1]
return new_state
- def _handle_wait_key_MappingEndEvent(self, ev):
+ cdef RepresenterState _handle_wait_key_MappingEndEvent(self, object ev):
# We've finished a mapping, so pop it off the output stack
# unless it's the last one in which case we leave it
if len(self.output) > 1:
self.output.pop()
- if type(self.output[-1][0]) is list:
- return "wait_list_item"
+ if type((<Node> self.output[-1]).value) is list:
+ return RepresenterState.wait_list_item
else:
- return "wait_key"
+ return RepresenterState.wait_key
else:
- return "doc"
+ return RepresenterState.doc
- def _handle_wait_value_SequenceStartEvent(self, ev):
+ cdef RepresenterState _handle_wait_value_SequenceStartEvent(self, object ev):
self.output.append(Node([], self._file_index, ev.start_mark.line, ev.start_mark.column))
- self.output[-2][0][self.keys[-1]] = self.output[-1]
- return "wait_list_item"
+ (<dict> (<Node> self.output[-2]).value)[self.keys[-1]] = self.output[-1]
+ return RepresenterState.wait_list_item
- def _handle_wait_list_item_SequenceStartEvent(self, ev):
- self.keys.append(len(self.output[-1][0]))
+ cdef RepresenterState _handle_wait_list_item_SequenceStartEvent(self, object ev):
+ self.keys.append(len((<Node> self.output[-1]).value))
self.output.append(Node([], self._file_index, ev.start_mark.line, ev.start_mark.column))
- self.output[-2][0].append(self.output[-1])
- return "wait_list_item"
+ (<list> (<Node> self.output[-2]).value).append(self.output[-1])
+ return RepresenterState.wait_list_item
- def _handle_wait_list_item_SequenceEndEvent(self, ev):
+ cdef RepresenterState _handle_wait_list_item_SequenceEndEvent(self, object ev):
# When ending a sequence, we need to pop a key because we retain the
# key until the end so that if we need to mutate the underlying entry
# we can.
key = self.keys.pop()
self.output.pop()
if type(key) is int:
- return "wait_list_item"
+ return RepresenterState.wait_list_item
else:
- return "wait_key"
+ return RepresenterState.wait_key
- def _handle_wait_list_item_ScalarEvent(self, ev):
- self.output[-1][0].append(
+ cdef RepresenterState _handle_wait_list_item_ScalarEvent(self, object ev):
+ (<Node> self.output[-1]).value.append(
Node(ev.value, self._file_index, ev.start_mark.line, ev.start_mark.column))
- return "wait_list_item"
+ return RepresenterState.wait_list_item
- def _handle_wait_list_item_MappingStartEvent(self, ev):
- new_state = self._handle_doc_MappingStartEvent(ev)
- self.output[-2][0].append(self.output[-1])
+ cdef RepresenterState _handle_wait_list_item_MappingStartEvent(self, object ev):
+ cdef RepresenterState new_state = self._handle_doc_MappingStartEvent(ev)
+ (<list> (<Node> self.output[-2]).value).append(self.output[-1])
return new_state
- def _handle_doc_DocumentEndEvent(self, ev):
+ cdef RepresenterState _handle_doc_DocumentEndEvent(self, object ev):
if len(self.output) != 1:
raise YAMLLoadError("Zero, or more than one document found in YAML stream")
- return "stream"
+ return RepresenterState.stream
- def _handle_stream_StreamEndEvent(self, ev):
- return "init"
+ cdef RepresenterState _handle_stream_StreamEndEvent(self, object ev):
+ return RepresenterState.init
# Loads a dictionary from some YAML
@@ -302,17 +366,20 @@ class Representer:
#
# Raises: LoadError
#
-def load(filename, shortname=None, copy_tree=False, *, project=None):
+cpdef Node load(str filename, str shortname=None, bint copy_tree=False, object project=None):
if not shortname:
shortname = filename
+ cdef str displayname
if (project is not None) and (project.junction is not None):
displayname = "{}:{}".format(project.junction.name, shortname)
else:
displayname = shortname
- file_number = len(_FILE_LIST)
- _FILE_LIST.append((filename, shortname, displayname, None, project))
+ cdef Py_ssize_t file_number = len(_FILE_LIST)
+ _FILE_LIST.append(FileInfo(filename, shortname, displayname, None, project))
+
+ cdef Node data
try:
with open(filename) as f:
@@ -337,12 +404,20 @@ def load(filename, shortname=None, copy_tree=False, *, project=None):
# Like load(), but doesnt require the data to be in a file
#
-def load_data(data, file_index=None, file_name=None, copy_tree=False):
+cpdef Node load_data(str data, int file_index=_SYNTHETIC_FILE_INDEX, str file_name=None, bint copy_tree=False):
+ cdef Representer rep
+ cdef FileInfo f_info
try:
rep = Representer(file_index)
- for event in yaml.parse(data, Loader=yaml.CBaseLoader):
- rep.handle_event(event)
+ parser = yaml.CParser(data)
+
+ try:
+ while parser.check_event():
+ rep.handle_event(parser.get_event())
+ finally:
+ parser.dispose()
+
contents = rep.get_output()
except YAMLLoadError as e:
raise LoadError(LoadErrorReason.INVALID_YAML,
@@ -351,7 +426,7 @@ def load_data(data, file_index=None, file_name=None, copy_tree=False):
raise LoadError(LoadErrorReason.INVALID_YAML,
"Severely malformed YAML:\n\n{}\n\n".format(e)) from e
- if not isinstance(contents, tuple) or not isinstance(contents[0], dict):
+ if type(contents) != Node:
# Special case allowance for None, when the loaded file has only comments in it.
if contents is None:
contents = Node({}, file_index, 0, 0)
@@ -362,12 +437,14 @@ def load_data(data, file_index=None, file_name=None, copy_tree=False):
# Store this away because we'll use it later for "top level" provenance
if file_index is not None:
- _FILE_LIST[file_index] = (
- _FILE_LIST[file_index][0], # Filename
- _FILE_LIST[file_index][1], # Shortname
- _FILE_LIST[file_index][2], # Displayname
+ f_info = <FileInfo> _FILE_LIST[file_index]
+
+ _FILE_LIST[file_index] = FileInfo(
+ f_info.filename,
+ f_info.shortname,
+ f_info.displayname,
contents,
- _FILE_LIST[file_index][4], # Project
+ f_info.project,
)
if copy_tree:
@@ -386,7 +463,7 @@ def load_data(data, file_index=None, file_name=None, copy_tree=False):
# Args:
# contents (any): Content to write out
# filename (str): The (optional) file name to write out to
-def dump(contents, filename=None):
+def dump(object contents, str filename=None):
roundtrip_dump(node_sanitize(contents), file=filename)
@@ -395,25 +472,25 @@ def dump(contents, filename=None):
# Gets the provenance for a node
#
# Args:
-# node (dict): a dictionary
+# node (Node): a dictionary
# key (str): key in the dictionary
# indices (list of indexes): Index path, in the case of list values
#
# Returns: The Provenance of the dict, member or list element
#
-def node_get_provenance(node, key=None, indices=None):
- assert is_node(node)
+cpdef ProvenanceInformation node_get_provenance(Node node, str key=None, list indices=None):
+ assert type(node.value) is dict
if key is None:
# Retrieving the provenance for this node directly
return ProvenanceInformation(node)
if key and not indices:
- return ProvenanceInformation(node[0].get(key))
+ return ProvenanceInformation(node.value.get(key))
- nodeish = node[0].get(key)
+ cdef Node nodeish = <Node> node.value.get(key)
for idx in indices:
- nodeish = nodeish[0][idx]
+ nodeish = <Node> nodeish.value[idx]
return ProvenanceInformation(nodeish)
@@ -446,57 +523,54 @@ _sentinel = object()
# Note:
# Returned strings are stripped of leading and trailing whitespace
#
-def node_get(node, expected_type, key, indices=None, *, default_value=_sentinel, allow_none=False):
- assert type(node) is Node
-
+cpdef object node_get(Node node, object expected_type, str key, list indices=None, object default_value=_sentinel, bint allow_none=False):
if indices is None:
if default_value is _sentinel:
- value = node[0].get(key, Node(default_value, None, 0, 0))
+ value = node.value.get(key, Node(default_value, _SYNTHETIC_FILE_INDEX, 0, 0))
else:
- value = node[0].get(key, Node(default_value, None, 0, next(_SYNTHETIC_COUNTER)))
+ value = node.value.get(key, Node(default_value, _SYNTHETIC_FILE_INDEX, 0, next_synthetic_counter()))
- if value[0] is _sentinel:
+ if value.value is _sentinel:
provenance = node_get_provenance(node)
raise LoadError(LoadErrorReason.INVALID_DATA,
"{}: Dictionary did not contain expected key '{}'".format(provenance, key))
else:
# Implied type check of the element itself
# No need to synthesise useful node content as we destructure it immediately
- value = Node(node_get(node, list, key), None, 0, 0)
+ value = Node(node_get(node, list, key), _SYNTHETIC_FILE_INDEX, 0, 0)
for index in indices:
- value = value[0][index]
+ value = value.value[index]
if type(value) is not Node:
- value = (value,)
+ value = Node(value, _SYNTHETIC_FILE_INDEX, 0, 0)
# Optionally allow None as a valid value for any type
- if value[0] is None and (allow_none or default_value is None):
+ if value.value is None and (allow_none or default_value is None):
return None
- if (expected_type is not None) and (not isinstance(value[0], expected_type)):
+ if (expected_type is not None) and (not isinstance(value.value, expected_type)):
# Attempt basic conversions if possible, typically we want to
# be able to specify numeric values and convert them to strings,
# but we dont want to try converting dicts/lists
try:
- if (expected_type == bool and isinstance(value[0], str)):
+ if (expected_type == bool and isinstance(value.value, str)):
# Dont coerce booleans to string, this makes "False" strings evaluate to True
# We don't structure into full nodes since there's no need.
- if value[0] in ('True', 'true'):
- value = (True, None, 0, 0)
- elif value[0] in ('False', 'false'):
- value = (False, None, 0, 0)
+ if value.value in ('True', 'true'):
+ value = Node(True, _SYNTHETIC_FILE_INDEX, 0, 0)
+ elif value.value in ('False', 'false'):
+ value = Node(False, _SYNTHETIC_FILE_INDEX, 0, 0)
else:
raise ValueError()
elif not (expected_type == list or
expected_type == dict or
- isinstance(value[0], (list, dict))):
- value = (expected_type(value[0]), None, 0, 0)
+ isinstance(value.value, (list, dict))):
+ value = Node(expected_type(value.value), _SYNTHETIC_FILE_INDEX, 0, 0)
else:
raise ValueError()
except (ValueError, TypeError):
provenance = node_get_provenance(node, key=key, indices=indices)
if indices:
- path = [key]
- path.extend("[{:d}]".format(i) for i in indices)
+ path = [key, *["[{:d}]".format(i) for i in indices]]
path = "".join(path)
else:
path = key
@@ -505,8 +579,8 @@ def node_get(node, expected_type, key, indices=None, *, default_value=_sentinel,
.format(provenance, path, expected_type.__name__))
# Now collapse lists, and scalars, to their value, leaving nodes as-is
- if type(value[0]) is not dict:
- value = value[0]
+ if type(value.value) is not dict:
+ value = value.value
# Trim it at the bud, let all loaded strings from yaml be stripped of whitespace
if type(value) is str:
@@ -520,17 +594,17 @@ def node_get(node, expected_type, key, indices=None, *, default_value=_sentinel,
return value
-def __trim_list_provenance(value):
- ret = []
+cdef list __trim_list_provenance(list value):
+ cdef list ret = []
for entry in value:
if type(entry) is not Node:
- entry = (entry, None, 0, 0)
- if type(entry[0]) is list:
- ret.append(__trim_list_provenance(entry[0]))
- elif type(entry[0]) is dict:
+ entry = Node(entry, _SYNTHETIC_FILE_INDEX, 0, 0)
+ if type(entry.value) is list:
+ ret.append(__trim_list_provenance(entry.value))
+ elif type(entry.value) is dict:
ret.append(entry)
else:
- ret.append(entry[0])
+ ret.append(entry.value)
return ret
@@ -541,30 +615,32 @@ def __trim_list_provenance(value):
# create entries before using `node_set`
#
# Args:
-# node (tuple): The node
+# node (Node): The node
# key (str): The key name
# value: The value
# indices: Any indices to index into the list referenced by key, like in
# `node_get` (must be a list of integers)
#
-def node_set(node, key, value, indices=None):
+cpdef void node_set(Node node, object key, object value, list indices=None) except *:
+ cdef int idx
+
if indices:
- node = node[0][key]
+ node = <Node> (<dict> node.value)[key]
key = indices.pop()
for idx in indices:
- node = node[0][idx]
+ node = <Node> (<list> node.value)[idx]
if type(value) is Node:
- node[0][key] = value
+ node.value[key] = value
else:
try:
# Need to do this just in case we're modifying a list
- old_value = node[0][key]
+ old_value = <Node> node.value[key]
except KeyError:
old_value = None
if old_value is None:
- node[0][key] = Node(value, node[1], node[2], next(_SYNTHETIC_COUNTER))
+ node.value[key] = Node(value, node.file_index, node.line, next_synthetic_counter())
else:
- node[0][key] = Node(value, old_value[1], old_value[2], old_value[3])
+ node.value[key] = Node(value, old_value.file_index, old_value.line, old_value.column)
# node_extend_list()
@@ -582,23 +658,21 @@ def node_set(node, key, value, indices=None):
# key (str): The list name in the node
# length (int): The length to extend the list to
# default (any): The default value to extend with.
-def node_extend_list(node, key, length, default):
+def node_extend_list(Node node, str key, Py_ssize_t length, object default):
assert type(default) is str or default in ([], {})
- list_node = node[0].get(key)
+ cdef Node list_node = <Node> node.value.get(key)
if list_node is None:
- list_node = node[0][key] = Node([], node[1], node[2], next(_SYNTHETIC_COUNTER))
+ list_node = node.value[key] = Node([], node.file_index, node.line, next_synthetic_counter())
- assert type(list_node[0]) is list
-
- the_list = list_node[0]
+ cdef list the_list = list_node.value
def_type = type(default)
- file_index = node[1]
+ file_index = node.file_index
if the_list:
line_num = the_list[-1][2]
else:
- line_num = list_node[2]
+ line_num = list_node.line
while length > len(the_list):
if def_type is str:
@@ -610,7 +684,7 @@ def node_extend_list(node, key, length, default):
line_num += 1
- the_list.append(Node(value, file_index, line_num, next(_SYNTHETIC_COUNTER)))
+ the_list.append(Node(value, file_index, line_num, next_synthetic_counter()))
# node_items()
@@ -627,16 +701,19 @@ def node_extend_list(node, key, length, default):
#
def node_items(node):
if type(node) is not Node:
- node = Node(node, None, 0, 0)
- for key, value in node[0].items():
+ node = Node(node, _SYNTHETIC_FILE_INDEX, 0, 0)
+
+ cdef str key
+
+ for key, value in node.value.items():
if type(value) is not Node:
- value = Node(value, None, 0, 0)
- if type(value[0]) is dict:
+ value = Node(value, _SYNTHETIC_FILE_INDEX, 0, 0)
+ if type(value.value) is dict:
yield (key, value)
- elif type(value[0]) is list:
- yield (key, __trim_list_provenance(value[0]))
+ elif type(value.value) is list:
+ yield (key, __trim_list_provenance(value.value))
else:
- yield (key, value[0])
+ yield (key, value.value)
# node_keys()
@@ -650,10 +727,10 @@ def node_items(node):
# Yields:
# (str): The key name
#
-def node_keys(node):
- if type(node) is not Node:
- node = Node(node, None, 0, 0)
- yield from node[0].keys()
+cpdef list node_keys(object node):
+ if type(node) is Node:
+ return list((<Node> node).value.keys())
+ return list(node.keys())
# node_del()
@@ -666,9 +743,9 @@ def node_keys(node):
# key (str): The key we want to remove
# safe (bool): Whether to raise a KeyError if unable
#
-def node_del(node, key, safe=False):
+def node_del(Node node, str key, bint safe=False):
try:
- del node[0][key]
+ del node.value[key]
except KeyError:
if not safe:
raise
@@ -689,7 +766,7 @@ def node_del(node, key, safe=False):
def is_node(maybenode):
# It's a programming error to give this a Node which isn't a mapping
# so assert that.
- assert (type(maybenode) is not Node) or (type(maybenode[0]) is dict)
+ assert (type(maybenode) is not Node) or (type(maybenode.value) is dict)
# Now return the type check
return type(maybenode) is Node
@@ -708,10 +785,11 @@ def is_node(maybenode):
# (Node): An empty YAML mapping node, whose provenance is to this new
# synthetic file
#
-def new_synthetic_file(filename, project=None):
- file_index = len(_FILE_LIST)
- node = Node({}, file_index, 0, 0)
- _FILE_LIST.append((filename,
+def new_synthetic_file(str filename, object project=None):
+ cdef Py_ssize_t file_index = len(_FILE_LIST)
+ cdef Node node = Node({}, file_index, 0, 0)
+
+ _FILE_LIST.append(FileInfo(filename,
filename,
"<synthetic {}>".format(filename),
node,
@@ -727,11 +805,11 @@ def new_synthetic_file(filename, project=None):
# Returns
# (Node): A new empty YAML mapping node
#
-def new_empty_node(ref_node=None):
+def new_empty_node(Node ref_node=None):
if ref_node is not None:
- return Node({}, ref_node[1], ref_node[2], next(_SYNTHETIC_COUNTER))
+ return Node({}, ref_node.file_index, ref_node.line, next_synthetic_counter())
else:
- return Node({}, None, 0, 0)
+ return Node({}, _SYNTHETIC_FILE_INDEX, 0, 0)
# new_node_from_dict()
@@ -742,8 +820,9 @@ def new_empty_node(ref_node=None):
# Returns:
# (Node): A new synthetic YAML tree which represents this dictionary
#
-def new_node_from_dict(indict):
- ret = {}
+cpdef Node new_node_from_dict(dict indict):
+ cdef dict ret = {}
+ cdef str k
for k, v in indict.items():
vtype = type(v)
if vtype is dict:
@@ -751,13 +830,13 @@ def new_node_from_dict(indict):
elif vtype is list:
ret[k] = __new_node_from_list(v)
else:
- ret[k] = Node(str(v), None, 0, next(_SYNTHETIC_COUNTER))
- return Node(ret, None, 0, next(_SYNTHETIC_COUNTER))
+ ret[k] = Node(str(v), _SYNTHETIC_FILE_INDEX, 0, next_synthetic_counter())
+ return Node(ret, _SYNTHETIC_FILE_INDEX, 0, next_synthetic_counter())
# Internal function to help new_node_from_dict() to handle lists
-def __new_node_from_list(inlist):
- ret = []
+cdef Node __new_node_from_list(list inlist):
+ cdef list ret = []
for v in inlist:
vtype = type(v)
if vtype is dict:
@@ -765,8 +844,8 @@ def __new_node_from_list(inlist):
elif vtype is list:
ret.append(__new_node_from_list(v))
else:
- ret.append(Node(str(v), None, 0, next(_SYNTHETIC_COUNTER)))
- return Node(ret, None, 0, next(_SYNTHETIC_COUNTER))
+ ret.append(Node(str(v), _SYNTHETIC_FILE_INDEX, 0, next_synthetic_counter()))
+ return Node(ret, _SYNTHETIC_FILE_INDEX, 0, next_synthetic_counter())
# _is_composite_list
@@ -785,13 +864,13 @@ def __new_node_from_list(inlist):
# (LoadError): If node was a mapping and contained a mix of
# list composition directives and other keys
#
-def _is_composite_list(node):
+cdef bint _is_composite_list(Node node):
+ cdef bint has_directives = False
+ cdef bint has_keys = False
+ cdef str key
- if type(node[0]) is dict:
- has_directives = False
- has_keys = False
-
- for key, _ in node_items(node):
+ if type(node.value) is dict:
+ for key in node_keys(node):
if key in ['(>)', '(<)', '(=)']: # pylint: disable=simplifiable-if-statement
has_directives = True
else:
@@ -816,36 +895,36 @@ def _is_composite_list(node):
# target (Node): A composite list
# source (Node): A composite list
#
-def _compose_composite_list(target, source):
- clobber = source[0].get("(=)")
- prefix = source[0].get("(<)")
- suffix = source[0].get("(>)")
+cdef void _compose_composite_list(Node target, Node source):
+ clobber = source.value.get("(=)")
+ prefix = source.value.get("(<)")
+ suffix = source.value.get("(>)")
if clobber is not None:
# We want to clobber the target list
# which basically means replacing the target list
# with ourselves
- target[0]["(=)"] = clobber
+ target.value["(=)"] = clobber
if prefix is not None:
- target[0]["(<)"] = prefix
- elif "(<)" in target[0]:
- target[0]["(<)"][0].clear()
+ target.value["(<)"] = prefix
+ elif "(<)" in target.value:
+ target.value["(<)"].value.clear()
if suffix is not None:
- target[0]["(>)"] = suffix
- elif "(>)" in target[0]:
- target[0]["(>)"][0].clear()
+ target.value["(>)"] = suffix
+ elif "(>)" in target.value:
+ target.value["(>)"].value.clear()
else:
# Not clobbering, so prefix the prefix and suffix the suffix
if prefix is not None:
- if "(<)" in target[0]:
- for v in reversed(prefix[0]):
- target[0]["(<)"][0].insert(0, v)
+ if "(<)" in target.value:
+ for v in reversed(prefix.value):
+ target.value["(<)"].value.insert(0, v)
else:
- target[0]["(<)"] = prefix
+ target.value["(<)"] = prefix
if suffix is not None:
- if "(>)" in target[0]:
- target[0]["(>)"][0].extend(suffix[0])
+ if "(>)" in target.value:
+ target.value["(>)"].value.extend(suffix.value)
else:
- target[0]["(>)"] = suffix
+ target.value["(>)"] = suffix
# _compose_list()
@@ -857,18 +936,18 @@ def _compose_composite_list(target, source):
# target (Node): The target list to be composed into
# source (Node): The composition list to be composed from
#
-def _compose_list(target, source):
- clobber = source[0].get("(=)")
- prefix = source[0].get("(<)")
- suffix = source[0].get("(>)")
+cdef void _compose_list(Node target, Node source):
+ clobber = source.value.get("(=)")
+ prefix = source.value.get("(<)")
+ suffix = source.value.get("(>)")
if clobber is not None:
- target[0].clear()
- target[0].extend(clobber[0])
+ target.value.clear()
+ target.value.extend(clobber.value)
if prefix is not None:
- for v in reversed(prefix[0]):
- target[0].insert(0, v)
+ for v in reversed(prefix.value):
+ target.value.insert(0, v)
if suffix is not None:
- target[0].extend(suffix[0])
+ target.value.extend(suffix.value)
# composite_dict()
@@ -882,16 +961,19 @@ def _compose_list(target, source):
#
# Raises: CompositeError
#
-def composite_dict(target, source, path=None):
+cpdef void composite_dict(Node target, Node source, list path=None) except *:
+ cdef str k
+ cdef Node v, target_value
+
if path is None:
path = []
- for k, v in source[0].items():
+ for k, v in source.value.items():
path.append(k)
- if type(v[0]) is list:
+ if type(v.value) is list:
# List clobbers anything list-like
- target_value = target[0].get(k)
+ target_value = target.value.get(k)
if not (target_value is None or
- type(target_value[0]) is list or
+ type(target_value.value) is list or
_is_composite_list(target_value)):
raise CompositeError(path,
"{}: List cannot overwrite {} at: {}"
@@ -899,51 +981,51 @@ def composite_dict(target, source, path=None):
k,
node_get_provenance(target, k)))
# Looks good, clobber it
- target[0][k] = v
+ target.value[k] = v
elif _is_composite_list(v):
- if k not in target[0]:
+ if k not in target.value:
# Composite list clobbers empty space
- target[0][k] = v
- elif type(target[0][k][0]) is list:
+ target.value[k] = v
+ elif type(target.value[k].value) is list:
# Composite list composes into a list
- _compose_list(target[0][k], v)
- elif _is_composite_list(target[0][k]):
+ _compose_list(target.value[k], v)
+ elif _is_composite_list(target.value[k]):
# Composite list merges into composite list
- _compose_composite_list(target[0][k], v)
+ _compose_composite_list(target.value[k], v)
else:
# Else composing on top of normal dict or a scalar, so raise...
raise CompositeError(path,
"{}: Cannot compose lists onto {}".format(
node_get_provenance(v),
- node_get_provenance(target[0][k])))
- elif type(v[0]) is dict:
+ node_get_provenance(target.value[k])))
+ elif type(v.value) is dict:
# We're composing a dict into target now
- if k not in target[0]:
+ if k not in target.value:
# Target lacks a dict at that point, make a fresh one with
# the same provenance as the incoming dict
- target[0][k] = Node({}, v[1], v[2], v[3])
- if type(target[0]) is not dict:
+ target.value[k] = Node({}, v.file_index, v.line, v.column)
+ if type(target.value) is not dict:
raise CompositeError(path,
"{}: Cannot compose dictionary onto {}".format(
node_get_provenance(v),
- node_get_provenance(target[0][k])))
- composite_dict(target[0][k], v, path)
+ node_get_provenance(target.value[k])))
+ composite_dict(target.value[k], v, path)
else:
- target_value = target[0].get(k)
- if target_value is not None and type(target_value[0]) is not str:
+ target_value = target.value.get(k)
+ if target_value is not None and type(target_value.value) is not str:
raise CompositeError(path,
"{}: Cannot compose scalar on non-scalar at {}".format(
node_get_provenance(v),
- node_get_provenance(target[0][k])))
- target[0][k] = v
+ node_get_provenance(target.value[k])))
+ target.value[k] = v
path.pop()
# Like composite_dict(), but raises an all purpose LoadError for convenience
#
-def composite(target, source):
- assert type(source[0]) is dict
- assert type(target[0]) is dict
+cpdef void composite(Node target, Node source) except *:
+ assert type(source.value) is dict
+ assert type(target.value) is dict
try:
composite_dict(target, source)
@@ -961,14 +1043,16 @@ def composite(target, source):
# Like composite(target, source), but where target overrides source instead.
#
-def composite_and_move(target, source):
+def composite_and_move(Node target, Node source):
composite(source, target)
- to_delete = [key for key in target[0].keys() if key not in source[0]]
- for key, value in source[0].items():
- target[0][key] = value
+ cdef str key
+ cdef Node value
+ cdef list to_delete = [key for key in target.value.keys() if key not in source.value]
+ for key, value in source.value.items():
+ target.value[key] = value
for key in to_delete:
- del target[0][key]
+ del target.value[key]
# Types we can short-circuit in node_sanitize for speed.
@@ -982,12 +1066,12 @@ __SANITIZE_SHORT_CIRCUIT_TYPES = (int, float, str, bool)
#
# Only dicts are ordered, list elements are left in order.
#
-def node_sanitize(node, *, dict_type=OrderedDict):
+cpdef object node_sanitize(object node, object dict_type=OrderedDict):
node_type = type(node)
# If we have an unwrappable node, unwrap it
if node_type is Node:
- node = node[0]
+ node = node.value
node_type = type(node)
# Short-circuit None which occurs ca. twice per element
@@ -1015,7 +1099,7 @@ def node_sanitize(node, *, dict_type=OrderedDict):
# Sometimes we're handed tuples and we can't be sure what they contain
# so we have to sanitize into them
elif node_type is tuple:
- return tuple((node_sanitize(v, dict_type=dict_type) for v in node))
+ return tuple([node_sanitize(v, dict_type=dict_type) for v in node])
# Everything else just gets returned as-is.
return node
@@ -1028,18 +1112,18 @@ def node_sanitize(node, *, dict_type=OrderedDict):
# means a typo which would otherwise not trigger an error).
#
# Args:
-# node (dict): A dictionary loaded from YAML
+# node (Node): A dictionary loaded from YAML
# valid_keys (list): A list of valid keys for the specified node
#
# Raises:
# LoadError: In the case that the specified node contained
# one or more invalid keys
#
-def node_validate(node, valid_keys):
+def node_validate(Node node, list valid_keys):
# Probably the fastest way to do this: https://stackoverflow.com/a/23062482
- valid_keys = set(valid_keys)
- invalid = next((key for key in node[0] if key not in valid_keys), None)
+ cdef set valid_keys_set = set(valid_keys)
+ invalid = next((key for key in node.value if key not in valid_keys_set), None)
if invalid:
provenance = node_get_provenance(node, key=invalid)
@@ -1075,10 +1159,13 @@ __NODE_ASSERT_COMPOSITION_DIRECTIVES = ('(>)', '(<)', '(=)')
# Returns:
# (Node): A deep copy of source with provenance preserved.
#
-def node_copy(source):
- copy = {}
- for key, value in source[0].items():
- value_type = type(value[0])
+cpdef Node node_copy(Node source):
+ cdef dict copy = {}
+ cdef str key
+ cdef Node value
+
+ for key, value in source.value.items():
+ value_type = type(value.value)
if value_type is dict:
copy[key] = node_copy(value)
elif value_type is list:
@@ -1088,14 +1175,18 @@ def node_copy(source):
else:
raise ValueError("Unable to be quick about node_copy of {}".format(value_type))
- return Node(copy, source[1], source[2], source[3])
+ return Node(copy, source.file_index, source.line, source.column)
# Internal function to help node_copy() but for lists.
-def _list_copy(source):
- copy = []
- for item in source[0]:
- item_type = type(item[0])
+cdef Node _list_copy(Node source):
+ cdef list copy = []
+ for item in source.value:
+ if type(item) is Node:
+ item_type = type(item.value)
+ else:
+ item_type = type(item)
+
if item_type is dict:
copy.append(node_copy(item))
elif item_type is list:
@@ -1105,7 +1196,7 @@ def _list_copy(source):
else:
raise ValueError("Unable to be quick about list_copy of {}".format(item_type))
- return Node(copy, source[1], source[2], source[3])
+ return Node(copy, source.file_index, source.line, source.column)
# node_final_assertions()
@@ -1119,10 +1210,11 @@ def _list_copy(source):
# Raises:
# (LoadError): If any assertions fail
#
-def node_final_assertions(node):
- assert type(node) is Node
+cpdef void node_final_assertions(Node node) except *:
+ cdef str key
+ cdef Node value
- for key, value in node[0].items():
+ for key, value in node.value.items():
# Assert that list composition directives dont remain, this
# indicates that the user intended to override a list which
@@ -1133,7 +1225,7 @@ def node_final_assertions(node):
raise LoadError(LoadErrorReason.TRAILING_LIST_DIRECTIVE,
"{}: Attempt to override non-existing list".format(provenance))
- value_type = type(value[0])
+ value_type = type(value.value)
if value_type is dict:
node_final_assertions(value)
@@ -1142,9 +1234,9 @@ def node_final_assertions(node):
# Helper function for node_final_assertions(), but for lists.
-def _list_final_assertions(values):
- for value in values[0]:
- value_type = type(value[0])
+def _list_final_assertions(Node values):
+ for value in values.value:
+ value_type = type(value.value)
if value_type is dict:
node_final_assertions(value)
@@ -1170,12 +1262,12 @@ def _list_final_assertions(values):
# Note that dashes are generally preferred for variable names and
# usage in YAML, but things such as option names which will be
# evaluated with jinja2 cannot use dashes.
-def assert_symbol_name(provenance, symbol_name, purpose, *, allow_dashes=True):
- valid_chars = string.digits + string.ascii_letters + '_'
+def assert_symbol_name(ProvenanceInformation provenance, str symbol_name, str purpose, *, bint allow_dashes=True):
+ cdef str valid_chars = string.digits + string.ascii_letters + '_'
if allow_dashes:
valid_chars += '-'
- valid = True
+ cdef bint valid = True
if not symbol_name:
valid = False
elif any(x not in valid_chars for x in symbol_name):
@@ -1214,13 +1306,11 @@ def assert_symbol_name(provenance, symbol_name, purpose, *, allow_dashes=True):
#
# Returns:
# (list): A path from `node` to `target` or None if `target` is not in the subtree
-def node_find_target(node, target, *, key=None):
- assert type(node) is Node
- assert type(target) is Node
+cpdef list node_find_target(Node node, Node target, str key=None):
if key is not None:
- target = target[0][key]
+ target = target.value[key]
- path = []
+ cdef list path = []
if _walk_find_target(node, path, target):
if key:
# Remove key from end of path
@@ -1230,19 +1320,22 @@ def node_find_target(node, target, *, key=None):
# Helper for node_find_target() which walks a value
-def _walk_find_target(node, path, target):
- if node[1:] == target[1:]:
+cdef bint _walk_find_target(Node node, list path, Node target):
+ if node.file_index == target.file_index and node.line == target.line and node.column == target.column:
return True
- elif type(node[0]) is dict:
+ elif type(node.value) is dict:
return _walk_dict_node(node, path, target)
- elif type(node[0]) is list:
+ elif type(node.value) is list:
return _walk_list_node(node, path, target)
return False
# Helper for node_find_target() which walks a list
-def _walk_list_node(node, path, target):
- for i, v in enumerate(node[0]):
+cdef bint _walk_list_node(Node node, list path, Node target):
+ cdef int i
+ cdef Node v
+
+ for i, v in enumerate(node.value):
path.append(i)
if _walk_find_target(v, path, target):
return True
@@ -1251,8 +1344,11 @@ def _walk_list_node(node, path, target):
# Helper for node_find_target() which walks a mapping
-def _walk_dict_node(node, path, target):
- for k, v in node[0].items():
+cdef bint _walk_dict_node(Node node, list path, Node target):
+ cdef str k
+ cdef Node v
+
+ for k, v in node.value.items():
path.append(k)
if _walk_find_target(v, path, target):
return True
diff --git a/src/buildstream/element.py b/src/buildstream/element.py
index 08326c6f3..8e006ea6b 100644
--- a/src/buildstream/element.py
+++ b/src/buildstream/element.py
@@ -931,7 +931,8 @@ class Element(Plugin):
(str): The resolved value for *varname*, or None if no
variable was declared with the given name.
"""
- return self.__variables.flat.get(varname)
+ # Flat is not recognized correctly by Pylint as being a dictionary
+ return self.__variables.flat.get(varname) # pylint: disable=no-member
def batch_prepare_assemble(self, flags, *, collect=None):
""" Configure command batching across prepare() and assemble()
diff --git a/tests/internals/yaml.py b/tests/internals/yaml.py
index dae20b326..6b26fdfaf 100644
--- a/tests/internals/yaml.py
+++ b/tests/internals/yaml.py
@@ -22,7 +22,7 @@ def test_load_yaml(datafiles):
'basics.yaml')
loaded = _yaml.load(filename)
- assert loaded[0].get('kind')[0] == 'pony'
+ assert loaded.value.get('kind').value == 'pony'
def assert_provenance(filename, line, col, node, key=None, indices=None):
@@ -43,7 +43,7 @@ def test_basic_provenance(datafiles):
'basics.yaml')
loaded = _yaml.load(filename)
- assert loaded[0].get('kind')[0] == 'pony'
+ assert loaded.value.get('kind').value == 'pony'
assert_provenance(filename, 1, 0, loaded)
@@ -56,7 +56,7 @@ def test_member_provenance(datafiles):
'basics.yaml')
loaded = _yaml.load(filename)
- assert loaded[0].get('kind')[0] == 'pony'
+ assert loaded.value.get('kind').value == 'pony'
assert_provenance(filename, 2, 13, loaded, 'description')
@@ -68,7 +68,7 @@ def test_element_provenance(datafiles):
'basics.yaml')
loaded = _yaml.load(filename)
- assert loaded[0].get('kind')[0] == 'pony'
+ assert loaded.value.get('kind').value == 'pony'
assert_provenance(filename, 5, 2, loaded, 'moods', [1])
@@ -102,7 +102,7 @@ def test_node_get(datafiles):
'basics.yaml')
base = _yaml.load(filename)
- assert base[0].get('kind')[0] == 'pony'
+ assert base.value.get('kind').value == 'pony'
children = _yaml.node_get(base, list, 'children')
assert isinstance(children, list)
@@ -522,9 +522,9 @@ def test_node_find_target(datafiles, case):
# laid out. Client code should never do this.
def _walk(node, entry, rest):
if rest:
- return _walk(node[0][entry], rest[0], rest[1:])
+ return _walk(node.value[entry], rest[0], rest[1:])
else:
- return node[0][entry]
+ return node.value[entry]
want = _walk(loaded, case[0], case[1:])
found_path = _yaml.node_find_target(toplevel, want)
diff --git a/tox.ini b/tox.ini
index a7a4874c7..94e96d9b0 100644
--- a/tox.ini
+++ b/tox.ini
@@ -4,6 +4,7 @@
[tox]
envlist = py{35,36,37}
skip_missing_interpreters = true
+isolated_build = true
#
# Defaults for all environments
@@ -11,6 +12,10 @@ skip_missing_interpreters = true
# Anything specified here is inherited by the sections
#
[testenv]
+usedevelop =
+ # This is required by Cython in order to get coverage for cython files.
+ py{35,36,37}-!nocover: True
+
commands =
# Running with coverage reporting enabled
py{35,36,37}-!external-!nocover: pytest --basetemp {envtmpdir} --cov=buildstream --cov-config .coveragerc {posargs}
@@ -51,6 +56,8 @@ setenv =
py{35,36,37}: XDG_CACHE_HOME = {envtmpdir}/cache
py{35,36,37}: XDG_CONFIG_HOME = {envtmpdir}/config
py{35,36,37}: XDG_DATA_HOME = {envtmpdir}/share
+ # This is required to get coverage for Cython
+ py{35,36,37}-!nocover: BST_CYTHON_TRACE = 1
whitelist_externals =
py{35,36,37}:
@@ -61,7 +68,9 @@ whitelist_externals =
# Coverage reporting
#
[testenv:coverage]
-skip_install = true
+# This is required by Cython in order to get coverage for cython files.
+usedevelop = True
+
commands =
coverage combine --rcfile={toxinidir}/.coveragerc {toxinidir}/.coverage-reports/
coverage html --rcfile={toxinidir}/.coveragerc --directory={toxinidir}/.coverage-reports/
@@ -75,6 +84,10 @@ setenv =
# Running linters
#
[testenv:lint]
+commands_pre =
+ # Build C extensions to allow Pylint to analyse them
+ {envpython} setup.py build_ext --inplace
+
commands =
pycodestyle
pylint src/buildstream tests
@@ -147,3 +160,8 @@ deps =
-rrequirements/requirements.txt
-rrequirements/dev-requirements.txt
-rrequirements/plugin-requirements.txt
+
+# When building using PEP518 and 517, we don't want default dependencies
+# installed by the base environment.
+[testenv:.package]
+deps =