diff options
-rw-r--r-- | buildstream/_loader/loader.py | 31 | ||||
-rw-r--r-- | buildstream/_yaml.py | 16 | ||||
-rw-r--r-- | buildstream/_yamlcache.py | 348 |
3 files changed, 382 insertions, 13 deletions
diff --git a/buildstream/_loader/loader.py b/buildstream/_loader/loader.py index 1b27d9d55..1bdbca90f 100644 --- a/buildstream/_loader/loader.py +++ b/buildstream/_loader/loader.py @@ -29,6 +29,7 @@ from .. import _yaml from ..element import Element from .._profile import Topics, profile_start, profile_end from .._includes import Includes +from .._yamlcache import YamlCache from .types import Symbol, Dependency from .loadelement import LoadElement @@ -108,13 +109,19 @@ class Loader(): # deps = [] - for target in targets: - profile_start(Topics.LOAD_PROJECT, target) - junction, name, loader = self._parse_name(target, rewritable, ticker, - fetch_subprojects=fetch_subprojects) - loader._load_file(name, rewritable, ticker, fetch_subprojects) - deps.append(Dependency(name, junction=junction)) - profile_end(Topics.LOAD_PROJECT, target) + # XXX This will need to be changed to the context's top-level project if this method + # is ever used for subprojects + top_dir = self.project.directory + + cache_file = YamlCache.get_cache_file(top_dir) + with YamlCache.open(self._context, cache_file) as yaml_cache: + for target in targets: + profile_start(Topics.LOAD_PROJECT, target) + junction, name, loader = self._parse_name(target, rewritable, ticker, + fetch_subprojects=fetch_subprojects) + loader._load_file(name, rewritable, ticker, fetch_subprojects, yaml_cache) + deps.append(Dependency(name, junction=junction)) + profile_end(Topics.LOAD_PROJECT, target) # # Now that we've resolve the dependencies, scan them for circular dependencies @@ -201,11 +208,12 @@ class Loader(): # rewritable (bool): Whether we should load in round trippable mode # ticker (callable): A callback to report loaded filenames to the frontend # fetch_subprojects (bool): Whether to fetch subprojects while loading + # yaml_cache (YamlCache): A yaml cache # # Returns: # (LoadElement): A loaded LoadElement # - def _load_file(self, filename, rewritable, ticker, fetch_subprojects): + def _load_file(self, filename, rewritable, ticker, fetch_subprojects, yaml_cache=None): # Silently ignore already loaded files if filename in self._elements: @@ -218,7 +226,8 @@ class Loader(): # Load the data and process any conditional statements therein fullpath = os.path.join(self._basedir, filename) try: - node = _yaml.load(fullpath, shortname=filename, copy_tree=rewritable, project=self.project) + node = _yaml.load(fullpath, shortname=filename, copy_tree=rewritable, + project=self.project, yaml_cache=yaml_cache) except LoadError as e: if e.reason == LoadErrorReason.MISSING_FILE: # If we can't find the file, try to suggest plausible @@ -261,13 +270,13 @@ class Loader(): # Load all dependency files for the new LoadElement for dep in element.deps: if dep.junction: - self._load_file(dep.junction, rewritable, ticker, fetch_subprojects) + self._load_file(dep.junction, rewritable, ticker, fetch_subprojects, yaml_cache) loader = self._get_loader(dep.junction, rewritable=rewritable, ticker=ticker, fetch_subprojects=fetch_subprojects) else: loader = self - dep_element = loader._load_file(dep.name, rewritable, ticker, fetch_subprojects) + dep_element = loader._load_file(dep.name, rewritable, ticker, fetch_subprojects, yaml_cache) if _yaml.node_get(dep_element.node, str, Symbol.KIND) == 'junction': raise LoadError(LoadErrorReason.INVALID_DATA, diff --git a/buildstream/_yaml.py b/buildstream/_yaml.py index 4ee12a18c..e24d482f0 100644 --- a/buildstream/_yaml.py +++ b/buildstream/_yaml.py @@ -183,20 +183,32 @@ class CompositeTypeError(CompositeError): # shortname (str): The filename in shorthand for error reporting (or None) # copy_tree (bool): Whether to make a copy, preserving the original toplevels # for later serialization +# yaml_cache (YamlCache): A yaml cache to consult rather than parsing # # Returns (dict): A loaded copy of the YAML file with provenance information # # Raises: LoadError # -def load(filename, shortname=None, copy_tree=False, *, project=None): +def load(filename, shortname=None, copy_tree=False, *, project=None, yaml_cache=None): if not shortname: shortname = filename file = ProvenanceFile(filename, shortname, project) try: + data = None with open(filename) as f: - return load_data(f, file, copy_tree=copy_tree) + contents = f.read() + if yaml_cache: + data, key = yaml_cache.get(project, filename, contents, copy_tree) + + if not data: + data = load_data(contents, file, copy_tree=copy_tree) + + if yaml_cache: + yaml_cache.put_from_key(project, filename, key, data) + + return data except FileNotFoundError as e: raise LoadError(LoadErrorReason.MISSING_FILE, "Could not find file at {}".format(filename)) from e diff --git a/buildstream/_yamlcache.py b/buildstream/_yamlcache.py new file mode 100644 index 000000000..39b24cccc --- /dev/null +++ b/buildstream/_yamlcache.py @@ -0,0 +1,348 @@ +# +# Copyright 2018 Bloomberg Finance LP +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library. If not, see <http://www.gnu.org/licenses/>. +# +# Authors: +# Jonathan Maw <jonathan.maw@codethink.co.uk> + +import os +import pickle +import hashlib +import io + +import sys + +from contextlib import contextmanager +from collections import namedtuple + +from ._cachekey import generate_key +from ._context import Context +from . import utils, _yaml + + +YAML_CACHE_FILENAME = "yaml_cache.pickle" + + +# YamlCache() +# +# A cache that wraps around the loading of yaml in projects. +# +# The recommended way to use a YamlCache is: +# with YamlCache.open(context) as yamlcache: +# # Load all the yaml +# ... +# +# Args: +# context (Context): The invocation Context +# +class YamlCache(): + + def __init__(self, context): + self._project_caches = {} + self._context = context + + ################## + # Public Methods # + ################## + + # is_cached(): + # + # Checks whether a file is cached. + # + # Args: + # project (Project): The project this file is in. + # filepath (str): The path to the file, *relative to the project's directory*. + # + # Returns: + # (bool): Whether the file is cached. + def is_cached(self, project, filepath): + cache_path = self._get_filepath(project, filepath) + project_name = project.name if project else "" + try: + project_cache = self._project_caches[project_name] + if cache_path in project_cache.elements: + return True + except KeyError: + pass + return False + + # open(): + # + # Return an instance of the YamlCache which writes to disk when it leaves scope. + # + # Args: + # context (Context): The context. + # cachefile (str): The path to the cache file. + # + # Returns: + # (YamlCache): A YamlCache. + @staticmethod + @contextmanager + def open(context, cachefile): + # Try to load from disk first + cache = None + if os.path.exists(cachefile): + try: + with open(cachefile, "rb") as f: + cache = BstUnpickler(f, context).load() + except EOFError: + # The file was empty + pass + except pickle.UnpicklingError as e: + sys.stderr.write("Failed to load YamlCache, {}\n".format(e)) + + # Failed to load from disk, create a new one + if not cache: + cache = YamlCache(context) + + yield cache + + cache._write(cachefile) + + # get_cache_file(): + # + # Retrieves a path to the yaml cache file. + # + # Returns: + # (str): The path to the cache file + @staticmethod + def get_cache_file(top_dir): + return os.path.join(top_dir, ".bst", YAML_CACHE_FILENAME) + + # get(): + # + # Gets a parsed file from the cache. + # + # Args: + # project (Project) or None: The project this file is in, if it exists. + # filepath (str): The absolute path to the file. + # contents (str): The contents of the file to be cached + # copy_tree (bool): Whether the data should make a copy when it's being generated + # (i.e. exactly as when called in yaml) + # + # Returns: + # (decorated dict): The parsed yaml from the cache, or None if the file isn't in the cache. + # (str): The key used to look up the parsed yaml in the cache + def get(self, project, filepath, contents, copy_tree): + key = self._calculate_key(contents, copy_tree) + data = self._get(project, filepath, key) + return data, key + + # put(): + # + # Puts a parsed file into the cache. + # + # Args: + # project (Project): The project this file is in. + # filepath (str): The path to the file. + # contents (str): The contents of the file that has been cached + # copy_tree (bool): Whether the data should make a copy when it's being generated + # (i.e. exactly as when called in yaml) + # value (decorated dict): The data to put into the cache. + def put(self, project, filepath, contents, copy_tree, value): + key = self._calculate_key(contents, copy_tree) + self.put_from_key(project, filepath, key, value) + + # put_from_key(): + # + # Put a parsed file into the cache when given a key. + # + # Args: + # project (Project): The project this file is in. + # filepath (str): The path to the file. + # key (str): The key to the file within the cache. Typically, this is the + # value of `calculate_key()` with the file's unparsed contents + # and any relevant metadata passed in. + # value (decorated dict): The data to put into the cache. + def put_from_key(self, project, filepath, key, value): + cache_path = self._get_filepath(project, filepath) + project_name = project.name if project else "" + try: + project_cache = self._project_caches[project_name] + except KeyError: + project_cache = self._project_caches[project_name] = CachedProject({}) + + project_cache.elements[cache_path] = CachedYaml(key, value) + + ################### + # Private Methods # + ################### + + # Writes the yaml cache to the specified path. + # + # Args: + # path (str): The path to the cache file. + def _write(self, path): + parent_dir = os.path.dirname(path) + os.makedirs(parent_dir, exist_ok=True) + with open(path, "wb") as f: + BstPickler(f).dump(self) + + # _get_filepath(): + # + # Returns a file path relative to a project if passed, or the original path if + # the project is None + # + # Args: + # project (Project) or None: The project the filepath exists within + # full_path (str): The path that the returned path is based on + # + # Returns: + # (str): The path to the file, relative to a project if it exists + def _get_filepath(self, project, full_path): + if project: + assert full_path.startswith(project.directory) + filepath = os.path.relpath(full_path, project.directory) + else: + filepath = full_path + return full_path + + # _calculate_key(): + # + # Calculates a key for putting into the cache. + # + # Args: + # (basic object)... : Any number of strictly-ordered basic objects + # + # Returns: + # (str): A key made out of every arg passed in + @staticmethod + def _calculate_key(*args): + string = pickle.dumps(args) + return hashlib.sha1(string).hexdigest() + + # _get(): + # + # Gets a parsed file from the cache when given a key. + # + # Args: + # project (Project): The project this file is in. + # filepath (str): The path to the file. + # key (str): The key to the file within the cache. Typically, this is the + # value of `calculate_key()` with the file's unparsed contents + # and any relevant metadata passed in. + # + # Returns: + # (decorated dict): The parsed yaml from the cache, or None if the file isn't in the cache. + def _get(self, project, filepath, key): + cache_path = self._get_filepath(project, filepath) + project_name = project.name if project else "" + try: + project_cache = self._project_caches[project_name] + try: + cachedyaml = project_cache.elements[cache_path] + if cachedyaml._key == key: + # We've unpickled the YamlCache, but not the specific file + if cachedyaml._contents is None: + cachedyaml._contents = BstUnpickler.loads(cachedyaml._pickled_contents, self._context) + return cachedyaml._contents + except KeyError: + pass + except KeyError: + pass + return None + + +CachedProject = namedtuple('CachedProject', ['elements']) + + +class CachedYaml(): + def __init__(self, key, contents): + self._key = key + self.set_contents(contents) + + # Sets the contents of the CachedYaml. + # + # Args: + # contents (provenanced dict): The contents to put in the cache. + # + def set_contents(self, contents): + self._contents = contents + self._pickled_contents = BstPickler.dumps(contents) + + # Pickling helper method, prevents 'contents' from being serialised + def __getstate__(self): + data = self.__dict__.copy() + data['_contents'] = None + return data + + +# In _yaml.load, we have a ProvenanceFile that stores the project the file +# came from. Projects can't be pickled, but it's always going to be the same +# project between invocations (unless the entire project is moved but the +# file stayed in the same place) +class BstPickler(pickle.Pickler): + def persistent_id(self, obj): + if isinstance(obj, _yaml.ProvenanceFile): + if obj.project: + # ProvenanceFile's project object cannot be stored as it is. + project_tag = obj.project.name + # ProvenanceFile's filename must be stored relative to the + # project, as the project dir may move. + name = os.path.relpath(obj.name, obj.project.directory) + else: + project_tag = None + name = obj.name + return ("ProvenanceFile", name, obj.shortname, project_tag) + elif isinstance(obj, Context): + return ("Context",) + else: + return None + + @staticmethod + def dumps(obj): + stream = io.BytesIO() + BstPickler(stream).dump(obj) + stream.seek(0) + return stream.read() + + +class BstUnpickler(pickle.Unpickler): + def __init__(self, file, context): + super().__init__(file) + self._context = context + + def persistent_load(self, pid): + if pid[0] == "ProvenanceFile": + _, tagged_name, shortname, project_tag = pid + + if project_tag is not None: + for p in self._context.get_projects(): + if project_tag == p.name: + project = p + break + + name = os.path.join(project.directory, tagged_name) + + if not project: + projects = [p.name for p in self._context.get_projects()] + raise pickle.UnpicklingError("No project with name {} found in {}" + .format(key_id, projects)) + else: + project = None + name = tagged_name + + return _yaml.ProvenanceFile(name, shortname, project) + elif pid[0] == "Context": + return self._context + else: + raise pickle.UnpicklingError("Unsupported persistent object, {}".format(pid)) + + @staticmethod + def loads(text, context): + stream = io.BytesIO() + stream.write(bytes(text)) + stream.seek(0) + return BstUnpickler(stream, context).load() |