diff options
author | Daniel Silverstone <daniel.silverstone@codethink.co.uk> | 2019-03-25 15:08:06 +0000 |
---|---|---|
committer | Daniel Silverstone <daniel.silverstone@codethink.co.uk> | 2019-03-27 20:46:43 +0000 |
commit | 1e698622caee2da202a2511bbf41f476224d4cb8 (patch) | |
tree | 9d627c5645863d3bcedc002663a7ac95e9d7f614 | |
parent | 49c22f209b32247bc1335f8632518a971c056d9e (diff) | |
download | buildstream-1e698622caee2da202a2511bbf41f476224d4cb8.tar.gz |
YAML Cache: Remove the YAML Cache
The new YAML world order doesn't need the YAML cache, so remove it
before we begin the change.
Signed-off-by: Daniel Silverstone <daniel.silverstone@codethink.co.uk>
-rw-r--r-- | buildstream/_loader/loader.py | 30 | ||||
-rw-r--r-- | buildstream/_yaml.py | 10 | ||||
-rw-r--r-- | buildstream/_yamlcache.py | 371 | ||||
-rw-r--r-- | tests/frontend/yamlcache.py | 146 | ||||
-rw-r--r-- | tests/internals/yaml.py | 36 |
5 files changed, 21 insertions, 572 deletions
diff --git a/buildstream/_loader/loader.py b/buildstream/_loader/loader.py index 2a67a6e2a..aaa6f0019 100644 --- a/buildstream/_loader/loader.py +++ b/buildstream/_loader/loader.py @@ -27,7 +27,6 @@ from .. import _yaml from ..element import Element from .._profile import Topics, profile_start, profile_end from .._includes import Includes -from .._yamlcache import YamlCache from .types import Symbol from .loadelement import LoadElement, _extract_depends_from_node @@ -108,19 +107,13 @@ class Loader(): # target_elements = [] - # XXX This will need to be changed to the context's top-level project if this method - # is ever used for subprojects - top_dir = self.project.directory - - cache_file = YamlCache.get_cache_file(top_dir) - with YamlCache.open(self._context, cache_file) as yaml_cache: - for target in targets: - profile_start(Topics.LOAD_PROJECT, target) - _junction, name, loader = self._parse_name(target, rewritable, ticker, - fetch_subprojects=fetch_subprojects) - element = loader._load_file(name, rewritable, ticker, fetch_subprojects, yaml_cache) - target_elements.append(element) - profile_end(Topics.LOAD_PROJECT, target) + for target in targets: + profile_start(Topics.LOAD_PROJECT, target) + _junction, name, loader = self._parse_name(target, rewritable, ticker, + fetch_subprojects=fetch_subprojects) + element = loader._load_file(name, rewritable, ticker, fetch_subprojects) + target_elements.append(element) + profile_end(Topics.LOAD_PROJECT, target) # # Now that we've resolve the dependencies, scan them for circular dependencies @@ -186,13 +179,12 @@ class Loader(): # rewritable (bool): Whether we should load in round trippable mode # ticker (callable): A callback to report loaded filenames to the frontend # fetch_subprojects (bool): Whether to fetch subprojects while loading - # yaml_cache (YamlCache): A yaml cache # provenance (Provenance): The location from where the file was referred to, or None # # Returns: # (LoadElement): A loaded LoadElement # - def _load_file(self, filename, rewritable, ticker, fetch_subprojects, yaml_cache=None, provenance=None): + def _load_file(self, filename, rewritable, ticker, fetch_subprojects, provenance=None): # Silently ignore already loaded files if filename in self._elements: @@ -206,7 +198,7 @@ class Loader(): fullpath = os.path.join(self._basedir, filename) try: node = _yaml.load(fullpath, shortname=filename, copy_tree=rewritable, - project=self.project, yaml_cache=yaml_cache) + project=self.project) except LoadError as e: if e.reason == LoadErrorReason.MISSING_FILE: @@ -264,14 +256,14 @@ class Loader(): # Load all dependency files for the new LoadElement for dep in dependencies: if dep.junction: - self._load_file(dep.junction, rewritable, ticker, fetch_subprojects, yaml_cache, dep.provenance) + self._load_file(dep.junction, rewritable, ticker, fetch_subprojects, dep.provenance) loader = self._get_loader(dep.junction, rewritable=rewritable, ticker=ticker, fetch_subprojects=fetch_subprojects, provenance=dep.provenance) else: loader = self dep_element = loader._load_file(dep.name, rewritable, ticker, - fetch_subprojects, yaml_cache, dep.provenance) + fetch_subprojects, dep.provenance) if _yaml.node_get(dep_element.node, str, Symbol.KIND) == 'junction': raise LoadError(LoadErrorReason.INVALID_DATA, diff --git a/buildstream/_yaml.py b/buildstream/_yaml.py index 26da5e3be..4c2ae2b4d 100644 --- a/buildstream/_yaml.py +++ b/buildstream/_yaml.py @@ -186,13 +186,12 @@ class CompositeTypeError(CompositeError): # shortname (str): The filename in shorthand for error reporting (or None) # copy_tree (bool): Whether to make a copy, preserving the original toplevels # for later serialization -# yaml_cache (YamlCache): A yaml cache to consult rather than parsing # # Returns (dict): A loaded copy of the YAML file with provenance information # # Raises: LoadError # -def load(filename, shortname=None, copy_tree=False, *, project=None, yaml_cache=None): +def load(filename, shortname=None, copy_tree=False, *, project=None): if not shortname: shortname = filename @@ -202,13 +201,8 @@ def load(filename, shortname=None, copy_tree=False, *, project=None, yaml_cache= data = None with open(filename) as f: contents = f.read() - if yaml_cache: - data, key = yaml_cache.get(project, filename, contents, copy_tree) - if not data: - data = load_data(contents, file, copy_tree=copy_tree) - if yaml_cache: - yaml_cache.put_from_key(project, filename, key, data) + data = load_data(contents, file, copy_tree=copy_tree) return data except FileNotFoundError as e: diff --git a/buildstream/_yamlcache.py b/buildstream/_yamlcache.py deleted file mode 100644 index 89117007b..000000000 --- a/buildstream/_yamlcache.py +++ /dev/null @@ -1,371 +0,0 @@ -# -# Copyright 2018 Bloomberg Finance LP -# -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library. If not, see <http://www.gnu.org/licenses/>. -# -# Authors: -# Jonathan Maw <jonathan.maw@codethink.co.uk> - -import os -import pickle -import hashlib -import io - -import sys - -from contextlib import contextmanager -from collections import namedtuple - -from ._context import Context -from . import _yaml - - -YAML_CACHE_FILENAME = "yaml_cache.pickle" - - -# YamlCache() -# -# A cache that wraps around the loading of yaml in projects. -# -# The recommended way to use a YamlCache is: -# with YamlCache.open(context) as yamlcache: -# # Load all the yaml -# ... -# -# Args: -# context (Context): The invocation Context -# -class YamlCache(): - - def __init__(self, context): - self._project_caches = {} - self._context = context - - ################## - # Public Methods # - ################## - - # is_cached(): - # - # Checks whether a file is cached. - # - # Args: - # project (Project): The project this file is in. - # filepath (str): The path to the file, *relative to the project's directory*. - # - # Returns: - # (bool): Whether the file is cached. - def is_cached(self, project, filepath): - cache_path = self._get_filepath(project, filepath) - project_name = self.get_project_name(project) - try: - project_cache = self._project_caches[project_name] - if cache_path in project_cache.elements: - return True - except KeyError: - pass - return False - - # open(): - # - # Return an instance of the YamlCache which writes to disk when it leaves scope. - # - # Args: - # context (Context): The context. - # cachefile (str): The path to the cache file. - # - # Returns: - # (YamlCache): A YamlCache. - @staticmethod - @contextmanager - def open(context, cachefile): - # Try to load from disk first - cache = None - if os.path.exists(cachefile): - try: - with open(cachefile, "rb") as f: - cache = BstUnpickler(f, context).load() - except EOFError: - # The file was empty - pass - except pickle.UnpicklingError as e: - sys.stderr.write("Failed to load YamlCache, {}\n".format(e)) - - # Failed to load from disk, create a new one - if not cache: - cache = YamlCache(context) - - yield cache - - cache._write(cachefile) - - # get_cache_file(): - # - # Retrieves a path to the yaml cache file. - # - # Returns: - # (str): The path to the cache file - @staticmethod - def get_cache_file(top_dir): - return os.path.join(top_dir, ".bst", YAML_CACHE_FILENAME) - - # get(): - # - # Gets a parsed file from the cache. - # - # Args: - # project (Project) or None: The project this file is in, if it exists. - # filepath (str): The absolute path to the file. - # contents (str): The contents of the file to be cached - # copy_tree (bool): Whether the data should make a copy when it's being generated - # (i.e. exactly as when called in yaml) - # - # Returns: - # (decorated dict): The parsed yaml from the cache, or None if the file isn't in the cache. - # (str): The key used to look up the parsed yaml in the cache - def get(self, project, filepath, contents, copy_tree): - key = self._calculate_key(contents, copy_tree) - data = self._get(project, filepath, key) - return data, key - - # put(): - # - # Puts a parsed file into the cache. - # - # Args: - # project (Project): The project this file is in. - # filepath (str): The path to the file. - # contents (str): The contents of the file that has been cached - # copy_tree (bool): Whether the data should make a copy when it's being generated - # (i.e. exactly as when called in yaml) - # value (decorated dict): The data to put into the cache. - def put(self, project, filepath, contents, copy_tree, value): - key = self._calculate_key(contents, copy_tree) - self.put_from_key(project, filepath, key, value) - - # put_from_key(): - # - # Put a parsed file into the cache when given a key. - # - # Args: - # project (Project): The project this file is in. - # filepath (str): The path to the file. - # key (str): The key to the file within the cache. Typically, this is the - # value of `calculate_key()` with the file's unparsed contents - # and any relevant metadata passed in. - # value (decorated dict): The data to put into the cache. - def put_from_key(self, project, filepath, key, value): - cache_path = self._get_filepath(project, filepath) - project_name = self.get_project_name(project) - try: - project_cache = self._project_caches[project_name] - except KeyError: - project_cache = self._project_caches[project_name] = CachedProject({}) - - project_cache.elements[cache_path] = CachedYaml(key, value) - - ################### - # Private Methods # - ################### - - # Writes the yaml cache to the specified path. - # - # Args: - # path (str): The path to the cache file. - def _write(self, path): - parent_dir = os.path.dirname(path) - os.makedirs(parent_dir, exist_ok=True) - with open(path, "wb") as f: - BstPickler(f).dump(self) - - # _get_filepath(): - # - # Returns a file path relative to a project if passed, or the original path if - # the project is None - # - # Args: - # project (Project) or None: The project the filepath exists within - # full_path (str): The path that the returned path is based on - # - # Returns: - # (str): The path to the file, relative to a project if it exists - def _get_filepath(self, project, full_path): - if project: - assert full_path.startswith(project.directory) - filepath = os.path.relpath(full_path, project.directory) - else: - filepath = full_path - return filepath - - # _calculate_key(): - # - # Calculates a key for putting into the cache. - # - # Args: - # (basic object)... : Any number of strictly-ordered basic objects - # - # Returns: - # (str): A key made out of every arg passed in - @staticmethod - def _calculate_key(*args): - string = pickle.dumps(args) - return hashlib.sha1(string).hexdigest() - - # _get(): - # - # Gets a parsed file from the cache when given a key. - # - # Args: - # project (Project): The project this file is in. - # filepath (str): The path to the file. - # key (str): The key to the file within the cache. Typically, this is the - # value of `calculate_key()` with the file's unparsed contents - # and any relevant metadata passed in. - # - # Returns: - # (decorated dict): The parsed yaml from the cache, or None if the file isn't in the cache. - def _get(self, project, filepath, key): - cache_path = self._get_filepath(project, filepath) - project_name = self.get_project_name(project) - try: - project_cache = self._project_caches[project_name] - try: - cachedyaml = project_cache.elements[cache_path] - if cachedyaml._key == key: - # We've unpickled the YamlCache, but not the specific file - if cachedyaml._contents is None: - cachedyaml._contents = BstUnpickler.loads(cachedyaml._pickled_contents, self._context) - return cachedyaml._contents - except KeyError: - pass - except KeyError: - pass - return None - - # get_project_name(): - # - # Gets a name appropriate for Project. Projects must use their junction's - # name if present, otherwise elements with the same contents under the - # same path with identically-named projects are considered the same yaml - # object, despite existing in different Projects. - # - # Args: - # project (Project): The project this file is in, or None. - # - # Returns: - # (str): The project's junction's name if present, the project's name, - # or an empty string if there is no project - @staticmethod - def get_project_name(project): - if project: - if project.junction: - project_name = project.junction.name - else: - project_name = project.name - else: - project_name = "" - return project_name - - -CachedProject = namedtuple('CachedProject', ['elements']) - - -class CachedYaml(): - def __init__(self, key, contents): - self._key = key - self.set_contents(contents) - - # Sets the contents of the CachedYaml. - # - # Args: - # contents (provenanced dict): The contents to put in the cache. - # - def set_contents(self, contents): - self._contents = contents - self._pickled_contents = BstPickler.dumps(contents) - - # Pickling helper method, prevents 'contents' from being serialised - def __getstate__(self): - data = self.__dict__.copy() - data['_contents'] = None - return data - - -# In _yaml.load, we have a ProvenanceFile that stores the project the file -# came from. Projects can't be pickled, but it's always going to be the same -# project between invocations (unless the entire project is moved but the -# file stayed in the same place) -class BstPickler(pickle.Pickler): - def persistent_id(self, obj): - if isinstance(obj, _yaml.ProvenanceFile): - if obj.project: - # ProvenanceFile's project object cannot be stored as it is. - project_tag = YamlCache.get_project_name(obj.project) - # ProvenanceFile's filename must be stored relative to the - # project, as the project dir may move. - name = os.path.relpath(obj.name, obj.project.directory) - else: - project_tag = None - name = obj.name - return ("ProvenanceFile", name, obj.shortname, project_tag) - elif isinstance(obj, Context): - return ("Context",) - else: - return None - - @staticmethod - def dumps(obj): - stream = io.BytesIO() - BstPickler(stream).dump(obj) - stream.seek(0) - return stream.read() - - -class BstUnpickler(pickle.Unpickler): - def __init__(self, file, context): - super().__init__(file) - self._context = context - - def persistent_load(self, pid): - if pid[0] == "ProvenanceFile": - _, tagged_name, shortname, project_tag = pid - - if project_tag is not None: - for p in self._context.get_projects(): - if YamlCache.get_project_name(p) == project_tag: - project = p - break - - name = os.path.join(project.directory, tagged_name) - - if not project: - projects = [YamlCache.get_project_name(p) for p in self._context.get_projects()] - raise pickle.UnpicklingError("No project with name {} found in {}" - .format(project_tag, projects)) - else: - project = None - name = tagged_name - - return _yaml.ProvenanceFile(name, shortname, project) - elif pid[0] == "Context": - return self._context - else: - raise pickle.UnpicklingError("Unsupported persistent object, {}".format(pid)) - - @staticmethod - def loads(text, context): - stream = io.BytesIO() - stream.write(bytes(text)) - stream.seek(0) - return BstUnpickler(stream, context).load() diff --git a/tests/frontend/yamlcache.py b/tests/frontend/yamlcache.py deleted file mode 100644 index 34b689d8c..000000000 --- a/tests/frontend/yamlcache.py +++ /dev/null @@ -1,146 +0,0 @@ -# Pylint doesn't play well with fixtures and dependency injection from pytest -# pylint: disable=redefined-outer-name - -from contextlib import contextmanager -import os -import tempfile - -import pytest -from ruamel import yaml - -from tests.testutils import generate_junction, create_element_size -from buildstream.plugintestutils import cli # pylint: disable=unused-import -from buildstream import _yaml -from buildstream._yamlcache import YamlCache -from buildstream._project import Project -from buildstream._context import Context - - -def generate_project(tmpdir, ref_storage, with_junction, name="test"): - if with_junction == 'junction': - subproject_dir = generate_project( - tmpdir, ref_storage, - 'no-junction', name='test-subproject' - ) - - project_dir = os.path.join(tmpdir, name) - os.makedirs(project_dir) - # project.conf - project_conf_path = os.path.join(project_dir, 'project.conf') - elements_path = 'elements' - project_conf = { - 'name': name, - 'element-path': elements_path, - 'ref-storage': ref_storage, - } - _yaml.dump(project_conf, project_conf_path) - - # elements - if with_junction == 'junction': - junction_name = 'junction.bst' - junction_dir = os.path.join(project_dir, elements_path) - junction_path = os.path.join(project_dir, elements_path, junction_name) - os.makedirs(junction_dir) - generate_junction(tmpdir, subproject_dir, junction_path) - element_depends = [{'junction': junction_name, 'filename': 'test.bst'}] - else: - element_depends = [] - - element_name = 'test.bst' - create_element_size(element_name, project_dir, elements_path, element_depends, 1) - - return project_dir - - -@contextmanager -def with_yamlcache(project_dir): - context = Context() - project = Project(project_dir, context) - cache_file = YamlCache.get_cache_file(project_dir) - with YamlCache.open(context, cache_file) as yamlcache: - yield yamlcache, project - - -def yamlcache_key(yamlcache, in_file, copy_tree=False): - with open(in_file) as f: - key = yamlcache._calculate_key(f.read(), copy_tree) - return key - - -def modified_file(input_file, tmpdir): - with open(input_file) as f: - data = f.read() - assert 'variables' not in data - data += '\nvariables: {modified: True}\n' - _, temppath = tempfile.mkstemp(dir=tmpdir, text=True) - with open(temppath, 'w') as f: - f.write(data) - - return temppath - - -@pytest.mark.parametrize('ref_storage', ['inline', 'project.refs']) -@pytest.mark.parametrize('with_junction', ['no-junction', 'junction']) -@pytest.mark.parametrize('move_project', ['move', 'no-move']) -def test_yamlcache_used(cli, tmpdir, ref_storage, with_junction, move_project): - # Generate the project - project = generate_project(str(tmpdir), ref_storage, with_junction) - if with_junction == 'junction': - result = cli.run(project=project, args=['source', 'fetch', '--track', 'junction.bst']) - result.assert_success() - - # bst show to put it in the cache - result = cli.run(project=project, args=['show', 'test.bst']) - result.assert_success() - - element_path = os.path.join(project, 'elements', 'test.bst') - with with_yamlcache(project) as (yc, prj): - # Check that it's in the cache - assert yc.is_cached(prj, element_path) - - # *Absolutely* horrible cache corruption to check it's being used - # Modifying the data from the cache is fraught with danger, - # so instead I'll load a modified version of the original file - temppath = modified_file(element_path, str(tmpdir)) - contents = _yaml.load(temppath, copy_tree=False, project=prj) - key = yamlcache_key(yc, element_path) - yc.put_from_key(prj, element_path, key, contents) - - # Show that a variable has been added - result = cli.run(project=project, args=['show', '--deps', 'none', '--format', '%{vars}', 'test.bst']) - result.assert_success() - data = yaml.safe_load(result.output) - assert 'modified' in data - assert data['modified'] == 'True' - - -@pytest.mark.parametrize('ref_storage', ['inline', 'project.refs']) -@pytest.mark.parametrize('with_junction', ['junction', 'no-junction']) -def test_yamlcache_changed_file(cli, tmpdir, ref_storage, with_junction): - # i.e. a file is cached, the file is changed, loading the file (with cache) returns new data - # inline and junction can only be changed by opening a workspace - # Generate the project - project = generate_project(str(tmpdir), ref_storage, with_junction) - if with_junction == 'junction': - result = cli.run(project=project, args=['source', 'fetch', '--track', 'junction.bst']) - result.assert_success() - - # bst show to put it in the cache - result = cli.run(project=project, args=['show', 'test.bst']) - result.assert_success() - - element_path = os.path.join(project, 'elements', 'test.bst') - with with_yamlcache(project) as (yc, prj): - # Check that it's in the cache then modify - assert yc.is_cached(prj, element_path) - with open(element_path, "a") as f: - f.write('\nvariables: {modified: True}\n') - # Load modified yaml cache file into cache - _yaml.load(element_path, copy_tree=False, project=prj, yaml_cache=yc) - - # Show that a variable has been added - result = cli.run(project=project, args=['show', '--deps', 'none', '--format', '%{vars}', 'test.bst']) - result.assert_success() - data = yaml.safe_load(result.output) - assert 'modified' in data - assert data['modified'] == 'True' diff --git a/tests/internals/yaml.py b/tests/internals/yaml.py index c505fbd20..86ae11d45 100644 --- a/tests/internals/yaml.py +++ b/tests/internals/yaml.py @@ -1,13 +1,10 @@ from collections.abc import Mapping import os -import tempfile import pytest from buildstream import _yaml from buildstream._exceptions import LoadError, LoadErrorReason -from buildstream._context import Context -from buildstream._yamlcache import YamlCache DATA_DIR = os.path.join( os.path.dirname(os.path.realpath(__file__)), @@ -154,23 +151,6 @@ def test_composite_preserve_originals(datafiles): assert _yaml.node_get(orig_extra, str, 'old') == 'new' -def load_yaml_file(filename, *, cache_path, shortname=None, from_cache='raw'): - - _, temppath = tempfile.mkstemp(dir=os.path.join(cache_path.dirname, cache_path.basename), text=True) - context = Context() - - with YamlCache.open(context, temppath) as yc: - if from_cache == 'raw': - return _yaml.load(filename, shortname) - elif from_cache == 'cached': - _yaml.load(filename, shortname, yaml_cache=yc) - return _yaml.load(filename, shortname, yaml_cache=yc) - else: - raise Exception( - "Invalid value for parameter 'from_cache', Expected 'raw' or 'cached'" - ) - - # Tests for list composition # # Each test composits a filename on top of basics.yaml, and tests @@ -223,8 +203,8 @@ def test_list_composition(datafiles, filename, tmpdir, base_file = os.path.join(datafiles.dirname, datafiles.basename, 'basics.yaml') overlay_file = os.path.join(datafiles.dirname, datafiles.basename, filename) - base = load_yaml_file(base_file, cache_path=tmpdir, shortname='basics.yaml', from_cache=caching) - overlay = load_yaml_file(overlay_file, cache_path=tmpdir, shortname=filename, from_cache=caching) + base = _yaml.load(base_file, 'basics.yaml') + overlay = _yaml.load(overlay_file, filename) _yaml.composite_dict(base, overlay) @@ -344,9 +324,9 @@ def test_list_composition_twice(datafiles, tmpdir, filename1, filename2, ##################### # Round 1 - Fight ! ##################### - base = load_yaml_file(file_base, cache_path=tmpdir, shortname='basics.yaml', from_cache=caching) - overlay1 = load_yaml_file(file1, cache_path=tmpdir, shortname=filename1, from_cache=caching) - overlay2 = load_yaml_file(file2, cache_path=tmpdir, shortname=filename2, from_cache=caching) + base = _yaml.load(file_base, 'basics.yaml') + overlay1 = _yaml.load(file1, filename1) + overlay2 = _yaml.load(file2, filename2) _yaml.composite_dict(base, overlay1) _yaml.composite_dict(base, overlay2) @@ -361,9 +341,9 @@ def test_list_composition_twice(datafiles, tmpdir, filename1, filename2, ##################### # Round 2 - Fight ! ##################### - base = load_yaml_file(file_base, cache_path=tmpdir, shortname='basics.yaml', from_cache=caching) - overlay1 = load_yaml_file(file1, cache_path=tmpdir, shortname=filename1, from_cache=caching) - overlay2 = load_yaml_file(file2, cache_path=tmpdir, shortname=filename2, from_cache=caching) + base = _yaml.load(file_base, 'basics.yaml') + overlay1 = _yaml.load(file1, filename1) + overlay2 = _yaml.load(file2, filename2) _yaml.composite_dict(overlay1, overlay2) _yaml.composite_dict(base, overlay1) |