From 3efb9f405ae8f1dafcfbb7f06e009f0a6b054f10 Mon Sep 17 00:00:00 2001 From: Daniel Silverstone Date: Fri, 28 Mar 2014 14:08:27 +0000 Subject: CacheKeyComputer: Memoise cache keys more effectively. We were previously memoising the computation of the dictionaries but this patch adds support for memoising the computation of the cache key itself (the SHA string). This massively improves cache key computation performance. Signed-Off-By: Daniel Silverstone --- morphlib/cachekeycomputer.py | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) (limited to 'morphlib/cachekeycomputer.py') diff --git a/morphlib/cachekeycomputer.py b/morphlib/cachekeycomputer.py index 3efe1cbb..ca374436 100644 --- a/morphlib/cachekeycomputer.py +++ b/morphlib/cachekeycomputer.py @@ -25,6 +25,7 @@ class CacheKeyComputer(object): def __init__(self, build_env): self._build_env = build_env self._calculated = {} + self._hashed = {} def _filterenv(self, env): keys = ["LOGNAME", "MORPH_ARCH", "TARGET", "TARGET_STAGE1", @@ -32,11 +33,18 @@ class CacheKeyComputer(object): return dict([(k, env[k]) for k in keys]) def compute_key(self, artifact): - logging.debug('computing cache key for artifact %s from source ' - 'repo %s, sha1 %s, filename %s' % - (artifact.name, artifact.source.repo_name, - artifact.source.sha1, artifact.source.filename)) - return self._hash_id(self.get_cache_id(artifact)) + try: + ret = self._hashed[artifact] + logging.debug('returning cached key for artifact %s from source ', + (artifact.name, artifact.source.repo_name, + artifact.source.sha1, artifact.source.filename)) + return ret + except KeyError: + logging.debug('computing cache key for artifact %s from source ', + (artifact.name, artifact.source.repo_name, + artifact.source.sha1, artifact.source.filename)) + self._hashed[artifact] = self._hash_id(self.get_cache_id(artifact)) + return self._hashed[artifact] def _hash_id(self, cache_id): sha = hashlib.sha256() @@ -66,13 +74,18 @@ class CacheKeyComputer(object): self._hash_thing(sha, item) def get_cache_id(self, artifact): - logging.debug('computing cache id for artifact %s from source ' - 'repo %s, sha1 %s, filename %s' % - (artifact.name, artifact.source.repo_name, - artifact.source.sha1, artifact.source.filename)) try: - return self._calculated[artifact] + ret = self._calculated[artifact] + logging.debug('returning cached id for artifact %s from source ' + 'repo %s, sha1 %s, filename %s' % + (artifact.name, artifact.source.repo_name, + artifact.source.sha1, artifact.source.filename)) + return ret except KeyError: + logging.debug('computing cache id for artifact %s from source ' + 'repo %s, sha1 %s, filename %s' % + (artifact.name, artifact.source.repo_name, + artifact.source.sha1, artifact.source.filename)) cacheid = self._calculate(artifact) self._calculated[artifact] = cacheid return cacheid -- cgit v1.2.1