summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJonathan Maw <jonathan.maw@codethink.co.uk>2019-01-31 16:30:55 +0000
committerJonathan Maw <jonathan.maw@codethink.co.uk>2019-02-18 18:12:35 +0000
commitb67b8bba1d49b555af9a651c6a618bdfc2aba228 (patch)
treead60b34cbb5fdc0e5eea54661e92ff1653df13fb
parent86466e7ed6bef1a3373f1d2091023adc1b549d35 (diff)
downloadbuildstream-jonathan/yamlcache-no-read.tar.gz
yamlcache: Lookup files in the cache without reading the filejonathan/yamlcache-no-read
For files that are not part of a junction (i.e. not checked-out into a temporary directory), use the mtime to see if the file has changed. For files that are, use the junction element's cache key.
-rw-r--r--buildstream/_yaml.py8
-rw-r--r--buildstream/_yamlcache.py26
-rw-r--r--tests/frontend/yamlcache.py32
3 files changed, 33 insertions, 33 deletions
diff --git a/buildstream/_yaml.py b/buildstream/_yaml.py
index 7e12183e3..f1d16e206 100644
--- a/buildstream/_yaml.py
+++ b/buildstream/_yaml.py
@@ -197,12 +197,12 @@ def load(filename, shortname=None, copy_tree=False, *, project=None, yaml_cache=
try:
data = None
- with open(filename) as f:
- contents = f.read()
- if yaml_cache:
- data, key = yaml_cache.get(project, filename, contents, copy_tree)
+ if yaml_cache:
+ data, key = yaml_cache.get(project, filename, copy_tree)
if not data:
+ with open(filename) as f:
+ contents = f.read()
data = load_data(contents, file, copy_tree=copy_tree)
if yaml_cache:
yaml_cache.put_from_key(project, filename, key, data)
diff --git a/buildstream/_yamlcache.py b/buildstream/_yamlcache.py
index 89117007b..07a1b8d5f 100644
--- a/buildstream/_yamlcache.py
+++ b/buildstream/_yamlcache.py
@@ -127,15 +127,14 @@ class YamlCache():
# Args:
# project (Project) or None: The project this file is in, if it exists.
# filepath (str): The absolute path to the file.
- # contents (str): The contents of the file to be cached
# copy_tree (bool): Whether the data should make a copy when it's being generated
# (i.e. exactly as when called in yaml)
#
# Returns:
# (decorated dict): The parsed yaml from the cache, or None if the file isn't in the cache.
# (str): The key used to look up the parsed yaml in the cache
- def get(self, project, filepath, contents, copy_tree):
- key = self._calculate_key(contents, copy_tree)
+ def get(self, project, filepath, copy_tree):
+ key = self._calculate_key(project, filepath, copy_tree)
data = self._get(project, filepath, key)
return data, key
@@ -146,12 +145,11 @@ class YamlCache():
# Args:
# project (Project): The project this file is in.
# filepath (str): The path to the file.
- # contents (str): The contents of the file that has been cached
# copy_tree (bool): Whether the data should make a copy when it's being generated
# (i.e. exactly as when called in yaml)
# value (decorated dict): The data to put into the cache.
- def put(self, project, filepath, contents, copy_tree, value):
- key = self._calculate_key(contents, copy_tree)
+ def put(self, project, filepath, copy_tree, value):
+ key = self._calculate_key(project, filepath, copy_tree)
self.put_from_key(project, filepath, key, value)
# put_from_key():
@@ -213,13 +211,23 @@ class YamlCache():
# Calculates a key for putting into the cache.
#
# Args:
- # (basic object)... : Any number of strictly-ordered basic objects
+ # project (Project) or None: The project this file is in.
+ # filepath (str): The path to the file.
+ # copy_tree (bool): Whether the data should make a copy when it's being generated
+ # (i.e. exactly as when called in yaml)
#
# Returns:
# (str): A key made out of every arg passed in
@staticmethod
- def _calculate_key(*args):
- string = pickle.dumps(args)
+ def _calculate_key(project, filepath, copy_tree):
+ if project and project.junction:
+ # files in a junction only change if the junction element changes
+ # NOTE: This may change when junction workspaces are revisited/fixed
+ content_key = project.junction._get_cache_key()
+ else:
+ stat = os.stat(filepath)
+ content_key = stat.st_mtime
+ string = pickle.dumps(content_key, copy_tree)
return hashlib.sha1(string).hexdigest()
# _get():
diff --git a/tests/frontend/yamlcache.py b/tests/frontend/yamlcache.py
index 99b5d71c3..5dc52d1a0 100644
--- a/tests/frontend/yamlcache.py
+++ b/tests/frontend/yamlcache.py
@@ -14,10 +14,10 @@ from contextlib import contextmanager
def generate_project(tmpdir, ref_storage, with_junction, name="test"):
- if with_junction == 'junction':
+ if with_junction:
subproject_dir = generate_project(
tmpdir, ref_storage,
- 'no-junction', name='test-subproject'
+ False, name='test-subproject'
)
project_dir = os.path.join(tmpdir, name)
@@ -33,7 +33,7 @@ def generate_project(tmpdir, ref_storage, with_junction, name="test"):
_yaml.dump(project_conf, project_conf_path)
# elements
- if with_junction == 'junction':
+ if with_junction:
junction_name = 'junction.bst'
junction_dir = os.path.join(project_dir, elements_path)
junction_path = os.path.join(project_dir, elements_path, junction_name)
@@ -58,12 +58,6 @@ def with_yamlcache(project_dir):
yield yamlcache, project
-def yamlcache_key(yamlcache, in_file, copy_tree=False):
- with open(in_file) as f:
- key = yamlcache._calculate_key(f.read(), copy_tree)
- return key
-
-
def modified_file(input_file, tmpdir):
with open(input_file) as f:
data = f.read()
@@ -77,12 +71,13 @@ def modified_file(input_file, tmpdir):
@pytest.mark.parametrize('ref_storage', ['inline', 'project.refs'])
-@pytest.mark.parametrize('with_junction', ['no-junction', 'junction'])
-@pytest.mark.parametrize('move_project', ['move', 'no-move'])
-def test_yamlcache_used(cli, tmpdir, ref_storage, with_junction, move_project):
+@pytest.mark.parametrize('with_junction', [True, False], ids=['junction', 'no-junction'])
+def test_yamlcache_used(cli, tmpdir, ref_storage, with_junction):
# Generate the project
project = generate_project(str(tmpdir), ref_storage, with_junction)
- if with_junction == 'junction':
+ element_path = os.path.join(project, 'elements', 'test.bst')
+ element_mtime = 0
+ if with_junction:
result = cli.run(project=project, args=['source', 'fetch', '--track', 'junction.bst'])
result.assert_success()
@@ -90,17 +85,14 @@ def test_yamlcache_used(cli, tmpdir, ref_storage, with_junction, move_project):
result = cli.run(project=project, args=['show', 'test.bst'])
result.assert_success()
- element_path = os.path.join(project, 'elements', 'test.bst')
with with_yamlcache(project) as (yc, prj):
# Check that it's in the cache
assert yc.is_cached(prj, element_path)
- # *Absolutely* horrible cache corruption to check it's being used
- # Modifying the data from the cache is fraught with danger,
- # so instead I'll load a modified version of the original file
+ # Modify files in the yaml cache to test whether it's being used
temppath = modified_file(element_path, str(tmpdir))
contents = _yaml.load(temppath, copy_tree=False, project=prj)
- key = yamlcache_key(yc, element_path)
+ key = yc._calculate_key(prj, element_path, copy_tree=False)
yc.put_from_key(prj, element_path, key, contents)
# Show that a variable has been added
@@ -112,13 +104,13 @@ def test_yamlcache_used(cli, tmpdir, ref_storage, with_junction, move_project):
@pytest.mark.parametrize('ref_storage', ['inline', 'project.refs'])
-@pytest.mark.parametrize('with_junction', ['junction', 'no-junction'])
+@pytest.mark.parametrize('with_junction', [True, False], ids=['junction', 'no-junction'])
def test_yamlcache_changed_file(cli, tmpdir, ref_storage, with_junction):
# i.e. a file is cached, the file is changed, loading the file (with cache) returns new data
# inline and junction can only be changed by opening a workspace
# Generate the project
project = generate_project(str(tmpdir), ref_storage, with_junction)
- if with_junction == 'junction':
+ if with_junction:
result = cli.run(project=project, args=['source', 'fetch', '--track', 'junction.bst'])
result.assert_success()