From b8939e3f9abb9c3640a3817247c72af29acbbf0e Mon Sep 17 00:00:00 2001 From: Jonathan Maw Date: Mon, 8 Oct 2018 17:27:37 +0100 Subject: SQUASHME: yamlcache: hide key calculation and fix public/private Previously, key calculation had to be done manually, creating unnecessary boilerplate. This has now been subsumed into YamlCache.get and YamlCache.put (Yamlcache._get and Yamlcache._put_from_key exist for old behaviour). YamlCache.write is now private. Private methods have been put at the end of the class. --- buildstream/_loader/loader.py | 25 +++-- buildstream/_yaml.py | 5 +- buildstream/_yamlcache.py | 220 ++++++++++++++++++++++++++++-------------- tests/frontend/yamlcache.py | 13 +-- 4 files changed, 166 insertions(+), 97 deletions(-) diff --git a/buildstream/_loader/loader.py b/buildstream/_loader/loader.py index 984ca87f2..1bdbca90f 100644 --- a/buildstream/_loader/loader.py +++ b/buildstream/_loader/loader.py @@ -109,20 +109,19 @@ class Loader(): # deps = [] - for target in targets: - profile_start(Topics.LOAD_PROJECT, target) - junction, name, loader = self._parse_name(target, rewritable, ticker, - fetch_subprojects=fetch_subprojects) - - # XXX This will need to be changed to the context's top-level project if this method - # is ever used for subprojects - top_dir = self.project.directory - - cache_file = YamlCache.get_cache_file(top_dir) - with YamlCache.open(self._context, cache_file) as yaml_cache: + # XXX This will need to be changed to the context's top-level project if this method + # is ever used for subprojects + top_dir = self.project.directory + + cache_file = YamlCache.get_cache_file(top_dir) + with YamlCache.open(self._context, cache_file) as yaml_cache: + for target in targets: + profile_start(Topics.LOAD_PROJECT, target) + junction, name, loader = self._parse_name(target, rewritable, ticker, + fetch_subprojects=fetch_subprojects) loader._load_file(name, rewritable, ticker, fetch_subprojects, yaml_cache) - deps.append(Dependency(name, junction=junction)) - profile_end(Topics.LOAD_PROJECT, target) + deps.append(Dependency(name, junction=junction)) + profile_end(Topics.LOAD_PROJECT, target) # # Now that we've resolve the dependencies, scan them for circular dependencies diff --git a/buildstream/_yaml.py b/buildstream/_yaml.py index de2ef0dd2..e24d482f0 100644 --- a/buildstream/_yaml.py +++ b/buildstream/_yaml.py @@ -200,14 +200,13 @@ def load(filename, shortname=None, copy_tree=False, *, project=None, yaml_cache= with open(filename) as f: contents = f.read() if yaml_cache: - key = yaml_cache.calculate_key(contents, copy_tree) - data = yaml_cache.get(project, filename, key) + data, key = yaml_cache.get(project, filename, contents, copy_tree) if not data: data = load_data(contents, file, copy_tree=copy_tree) if yaml_cache: - yaml_cache.put(project, filename, key, data) + yaml_cache.put_from_key(project, filename, key, data) return data except FileNotFoundError as e: diff --git a/buildstream/_yamlcache.py b/buildstream/_yamlcache.py index 12fb94034..39b24cccc 100644 --- a/buildstream/_yamlcache.py +++ b/buildstream/_yamlcache.py @@ -53,71 +53,20 @@ class YamlCache(): self._project_caches = {} self._context = context - # Writes the yaml cache to the specified path. - # - # Args: - # path (str): The path to the cache file. - def write(self, path): - parent_dir = os.path.dirname(path) - os.makedirs(parent_dir, exist_ok=True) - with open(path, "wb") as f: - BstPickler(f).dump(self) + ################## + # Public Methods # + ################## - # Gets a parsed file from the cache. + # is_cached(): # - # Args: - # project (Project): The project this file is in. - # filepath (str): The path to the file. - # key (str): The key to the file within the cache. Typically, this is the - # value of `calculate_key()` with the file's unparsed contents - # and any relevant metadata passed in. - # - # Returns: - # (decorated dict): The parsed yaml from the cache, or None if the file isn't in the cache. - def get(self, project, filepath, key): - cache_path = self._get_filepath(project, filepath) - project_name = project.name if project else "" - try: - project_cache = self._project_caches[project_name] - try: - cachedyaml = project_cache.elements[cache_path] - if cachedyaml._key == key: - # We've unpickled the YamlCache, but not the specific file - if cachedyaml._contents is None: - cachedyaml._contents = BstUnpickler.loads(cachedyaml._pickled_contents, self._context) - return cachedyaml._contents - except KeyError: - pass - except KeyError: - pass - return None - - # Put a parsed file into the cache. + # Checks whether a file is cached. # # Args: # project (Project): The project this file is in. - # filepath (str): The path to the file. - # key (str): The key to the file within the cache. Typically, this is the - # value of `calculate_key()` with the file's unparsed contents - # and any relevant metadata passed in. - # value (decorated dict): The data to put into the cache. - def put(self, project, filepath, key, value): - cache_path = self._get_filepath(project, filepath) - project_name = project.name if project else "" - try: - project_cache = self._project_caches[project_name] - except KeyError: - project_cache = self._project_caches[project_name] = CachedProject({}) - - project_cache.elements[cache_path] = CachedYaml(key, value) - - # Checks whether a file is cached - # Args: - # project (Project): The project this file is in. # filepath (str): The path to the file, *relative to the project's directory*. # # Returns: - # (bool): Whether the file is cached + # (bool): Whether the file is cached. def is_cached(self, project, filepath): cache_path = self._get_filepath(project, filepath) project_name = project.name if project else "" @@ -129,14 +78,8 @@ class YamlCache(): pass return False - def _get_filepath(self, project, full_path): - if project: - assert full_path.startswith(project.directory) - filepath = os.path.relpath(full_path, project.directory) - else: - filepath = full_path - return full_path - + # open(): + # # Return an instance of the YamlCache which writes to disk when it leaves scope. # # Args: @@ -154,6 +97,9 @@ class YamlCache(): try: with open(cachefile, "rb") as f: cache = BstUnpickler(f, context).load() + except EOFError: + # The file was empty + pass except pickle.UnpicklingError as e: sys.stderr.write("Failed to load YamlCache, {}\n".format(e)) @@ -163,18 +109,150 @@ class YamlCache(): yield cache - cache.write(cachefile) + cache._write(cachefile) + # get_cache_file(): + # + # Retrieves a path to the yaml cache file. + # + # Returns: + # (str): The path to the cache file + @staticmethod + def get_cache_file(top_dir): + return os.path.join(top_dir, ".bst", YAML_CACHE_FILENAME) + + # get(): + # + # Gets a parsed file from the cache. + # + # Args: + # project (Project) or None: The project this file is in, if it exists. + # filepath (str): The absolute path to the file. + # contents (str): The contents of the file to be cached + # copy_tree (bool): Whether the data should make a copy when it's being generated + # (i.e. exactly as when called in yaml) + # + # Returns: + # (decorated dict): The parsed yaml from the cache, or None if the file isn't in the cache. + # (str): The key used to look up the parsed yaml in the cache + def get(self, project, filepath, contents, copy_tree): + key = self._calculate_key(contents, copy_tree) + data = self._get(project, filepath, key) + return data, key + + # put(): + # + # Puts a parsed file into the cache. + # + # Args: + # project (Project): The project this file is in. + # filepath (str): The path to the file. + # contents (str): The contents of the file that has been cached + # copy_tree (bool): Whether the data should make a copy when it's being generated + # (i.e. exactly as when called in yaml) + # value (decorated dict): The data to put into the cache. + def put(self, project, filepath, contents, copy_tree, value): + key = self._calculate_key(contents, copy_tree) + self.put_from_key(project, filepath, key, value) + + # put_from_key(): + # + # Put a parsed file into the cache when given a key. + # + # Args: + # project (Project): The project this file is in. + # filepath (str): The path to the file. + # key (str): The key to the file within the cache. Typically, this is the + # value of `calculate_key()` with the file's unparsed contents + # and any relevant metadata passed in. + # value (decorated dict): The data to put into the cache. + def put_from_key(self, project, filepath, key, value): + cache_path = self._get_filepath(project, filepath) + project_name = project.name if project else "" + try: + project_cache = self._project_caches[project_name] + except KeyError: + project_cache = self._project_caches[project_name] = CachedProject({}) + + project_cache.elements[cache_path] = CachedYaml(key, value) + + ################### + # Private Methods # + ################### + + # Writes the yaml cache to the specified path. + # + # Args: + # path (str): The path to the cache file. + def _write(self, path): + parent_dir = os.path.dirname(path) + os.makedirs(parent_dir, exist_ok=True) + with open(path, "wb") as f: + BstPickler(f).dump(self) + + # _get_filepath(): + # + # Returns a file path relative to a project if passed, or the original path if + # the project is None + # + # Args: + # project (Project) or None: The project the filepath exists within + # full_path (str): The path that the returned path is based on + # + # Returns: + # (str): The path to the file, relative to a project if it exists + def _get_filepath(self, project, full_path): + if project: + assert full_path.startswith(project.directory) + filepath = os.path.relpath(full_path, project.directory) + else: + filepath = full_path + return full_path + + # _calculate_key(): + # # Calculates a key for putting into the cache. + # + # Args: + # (basic object)... : Any number of strictly-ordered basic objects + # + # Returns: + # (str): A key made out of every arg passed in @staticmethod - def calculate_key(*args): + def _calculate_key(*args): string = pickle.dumps(args) return hashlib.sha1(string).hexdigest() - # Retrieves a path to the yaml cache file. - @staticmethod - def get_cache_file(top_dir): - return os.path.join(top_dir, ".bst", YAML_CACHE_FILENAME) + # _get(): + # + # Gets a parsed file from the cache when given a key. + # + # Args: + # project (Project): The project this file is in. + # filepath (str): The path to the file. + # key (str): The key to the file within the cache. Typically, this is the + # value of `calculate_key()` with the file's unparsed contents + # and any relevant metadata passed in. + # + # Returns: + # (decorated dict): The parsed yaml from the cache, or None if the file isn't in the cache. + def _get(self, project, filepath, key): + cache_path = self._get_filepath(project, filepath) + project_name = project.name if project else "" + try: + project_cache = self._project_caches[project_name] + try: + cachedyaml = project_cache.elements[cache_path] + if cachedyaml._key == key: + # We've unpickled the YamlCache, but not the specific file + if cachedyaml._contents is None: + cachedyaml._contents = BstUnpickler.loads(cachedyaml._pickled_contents, self._context) + return cachedyaml._contents + except KeyError: + pass + except KeyError: + pass + return None CachedProject = namedtuple('CachedProject', ['elements']) diff --git a/tests/frontend/yamlcache.py b/tests/frontend/yamlcache.py index aa1fba94d..0bcc423ec 100644 --- a/tests/frontend/yamlcache.py +++ b/tests/frontend/yamlcache.py @@ -59,7 +59,7 @@ def with_yamlcache(project_dir): def yamlcache_key(yamlcache, in_file, copy_tree=False): with open(in_file) as f: - key = yamlcache.calculate_key(f.read(), copy_tree) + key = yamlcache._calculate_key(f.read(), copy_tree) return key @@ -100,7 +100,7 @@ def test_yamlcache_used(cli, tmpdir, ref_storage, with_junction, move_project): temppath = modified_file(element_path, str(tmpdir)) contents = _yaml.load(temppath, copy_tree=False, project=prj) key = yamlcache_key(yc, element_path) - yc.put(prj, element_path, key, contents) + yc.put_from_key(prj, element_path, key, contents) # Show that a variable has been added result = cli.run(project=project, args=['show', '--format', '%{vars}', 'test.bst']) @@ -114,13 +114,6 @@ def test_yamlcache_used(cli, tmpdir, ref_storage, with_junction, move_project): @pytest.mark.parametrize('with_junction', ['junction', 'no-junction']) @pytest.mark.parametrize('move_project', ['move', 'no-move']) def test_yamlcache_changed_file(cli, ref_storage, with_junction, move_project): + # i.e. a file is cached, the file is changed, loading the file (with cache) returns new data # inline and junction can only be changed by opening a workspace pass - - -@pytest.mark.parametrize('ref_storage', ['inline', 'project.refs']) -@pytest.mark.parametrize('with_junction', ['junction', 'no-junction']) -@pytest.mark.parametrize('move_project', ['move', 'no-move']) -def test_yamlcache_track_changed(cli, ref_storage, with_junction, move_project): - # Skip inline junction, tracking those is forbidden - pass -- cgit v1.2.1