From 25617bd6277e2ec44c8d6acd742280a2708a6a2c Mon Sep 17 00:00:00 2001 From: Tiago Gomes Date: Sun, 15 Nov 2015 18:14:42 +0000 Subject: Cease modifying the morphologies With `morph edit` removed, there is no need to load all the morphologies, check whether a chunk was `morph edited`, change the current ref to the build ref and write back the morphologies to a temporary branch. That is a lot of work as code profiling demonstrated. With this patch applied, morph execution finishes around 10 seconds sooner on my machine. This is not a big achievement when a full build is performed where the wall clock time is dictated by the actual build commands, but it will provide a much quicker feedback when the build artifacts were already cached, or the semantic validation of morphologies fail. We add the option `--untracked-files` to _get_status() in the GitIndex class so that uncommitted morphologies on a uncommitted directory are considered. Previously this was being done by calling the following call in inject_build_refs(): self._root_index.add_files_from_index_info( self._hash_morphologies(self._root, morphs.morphologies)) This commit also removes some now unused code. Change-Id: I14215db5c06ab06045ce901131e4e341271a039d --- morphlib/buildbranch.py | 80 +--------------------------------------------- morphlib/gitindex.py | 35 ++------------------ morphlib/gitindex_tests.py | 10 ------ 3 files changed, 3 insertions(+), 122 deletions(-) (limited to 'morphlib') diff --git a/morphlib/buildbranch.py b/morphlib/buildbranch.py index c1d15874..9937f9ca 100644 --- a/morphlib/buildbranch.py +++ b/morphlib/buildbranch.py @@ -151,76 +151,6 @@ class BuildBranch(object): index.add_files_from_working_tree(changed) return changes_made - def _hash_morphologies(self, gd, morphologies): - '''Hash morphologies and return object info''' - loader = self.get_morphology_loader() - for morphology in morphologies: - loader.unset_defaults(morphology) - sha1 = gd.store_blob(loader.save_to_string(morphology)) - yield 0o100644, sha1, morphology.filename - - def get_morphology_loader(self): - if self._sb: - return self._sb.get_morphology_loader() - else: - return self._root.get_morphology_loader() - - def load_all_morphologies(self): - if self._sb: - return self._sb.load_all_morphologies() - else: - return self._root.load_all_morphologies() - - def inject_build_refs(self, use_local_repos, inject_cb=lambda **kwargs: - None): - '''Update system and stratum morphologies to point to our branch. - - For all edited repositories, this alter the temporary GitIndex - of the morphs repositories to point their temporary build branch - versions. - - ''' - root_repo = self._root.remote_url - root_ref = self._root.HEAD - morphs = morphlib.morphset.MorphologySet() - - for morph in self.load_all_morphologies(): - morphs.add_morphology(morph) - - sb_info = {} - for gd, (build_ref, index) in self._to_push.iteritems(): - if gd == self._root: - repo, ref = root_repo, root_ref - else: - # This branch can only run if we are in a Morph system branch - # checkout, because only there will we consider chunk repos. - repo, ref = gd.get_config('morph.repository'), gd.HEAD - sb_info[repo, ref] = (gd, build_ref) - - def filter(m, kind, spec): - return (spec.get('repo'), spec.get('ref')) in sb_info - def process(m, kind, spec): - repo, ref = spec['repo'], spec['ref'] - gd, build_ref = sb_info[repo, ref] - if (repo, ref) == (root_repo, root_ref): - spec['repo'] = None - spec['ref'] = None - return True - if use_local_repos: - spec['repo'] = urlparse.urljoin('file://', gd.dirname) - spec['ref'] = build_ref - return True - - morphs.traverse_specs(process, filter) - - if any(m.dirty for m in morphs.morphologies): - inject_cb(gd=self._root) - - # TODO: Prevent it hashing unchanged morphologies, while still - # hashing uncommitted ones. - self._root_index.add_files_from_index_info( - self._hash_morphologies(self._root, morphs.morphologies)) - def update_build_refs(self, name, email, uuid, commit_cb=lambda **kwargs: None): '''Commit changes in temporary GitIndexes to temporary branches. @@ -233,8 +163,7 @@ class BuildBranch(object): the repositories in the SystemBranch with: 1. The tree of anything currently in the temporary GitIndex. This is the same as the current commit on HEAD unless - `add_uncommitted_changes` or `inject_build_refs` have - been called. + `add_uncommitted_changes` has been called. 2. the parent of the previous temporary commit, or the last commit of the working tree if there has been no previous commits @@ -384,13 +313,6 @@ def pushed_build_branch(bb, changes_need_pushing, name, email, yield bb.root_repo_url, bb.root_commit, bb.root_ref return - def report_inject(gd): - status(msg='Injecting temporary build refs '\ - 'into morphologies in %(dirname)s', - dirname=gd.dirname, chatty=True) - bb.inject_build_refs(use_local_repos=not changes_need_pushing, - inject_cb=report_inject) - def report_commit(gd, build_ref): status(msg='Committing changes in %(dirname)s '\ 'to %(ref)s', diff --git a/morphlib/gitindex.py b/morphlib/gitindex.py index 00098fbf..c741718a 100644 --- a/morphlib/gitindex.py +++ b/morphlib/gitindex.py @@ -82,7 +82,8 @@ class GitIndex(object): # a space, then the path. # If our status code starts with R then it's a rename, hence # has a second path, requiring us to pop an extra token. - status = self._run_git('status', '-z', '--ignored') + status = self._run_git('status', '-z', '--ignored', + '--untracked-files') tokens = collections.deque(status.split('\0')) while True: tok = tokens.popleft() @@ -113,27 +114,6 @@ class GitIndex(object): '''Modify the index to contain the contents of the treeish.''' self._run_git('read-tree', treeish) - def add_files_from_index_info(self, infos): - '''Add files without interacting with the working tree. - - `infos` is an iterable of (file mode string, object sha1, path) - There are no constraints on the size of the iterable - - ''' - - # update-index may take NUL terminated input lines of the entries - # to add so we generate a string for the input, rather than - # having many command line arguments, since for a large amount - # of entries, this can be too many arguments to process and the - # exec will fail. - # Generating the input as a string uses more memory than using - # subprocess.Popen directly and using .communicate, but is much - # less verbose. - feed_stdin = '\0'.join('%o %s\t%s' % (mode, sha1, path) - for mode, sha1, path in infos) + '\0' - self._run_git('update-index', '--add', '-z', '--index-info', - feed_stdin=feed_stdin) - def add_files_from_working_tree(self, paths): '''Add existing files to the index. @@ -142,17 +122,6 @@ class GitIndex(object): add the contents of the files to git's object store, and the index. - This is similar to the following: - - gd = GitDirectory(...) - idx = gd.get_index() - for path in paths: - fullpath = os.path.join(gd,dirname, path) - with open(fullpath, 'r') as f: - sha1 = gd.store_blob(f) - idx.add_files_from_index_info([(os.stat(fullpath).st_mode, - sha1, path)]) - ''' if self._gd.is_bare(): diff --git a/morphlib/gitindex_tests.py b/morphlib/gitindex_tests.py index a3196764..60cbf6c8 100644 --- a/morphlib/gitindex_tests.py +++ b/morphlib/gitindex_tests.py @@ -66,16 +66,6 @@ class GitIndexTests(unittest.TestCase): idx.set_to_tree(gd.HEAD) self.assertEqual(list(idx.get_uncommitted_changes()),[]) - def test_add_files_from_index_info(self): - gd = morphlib.gitdir.GitDirectory(self.dirname) - idx = gd.get_index(os.path.join(self.tempdir, 'index')) - filepath = os.path.join(gd.dirname, 'foo') - with open(filepath, 'r') as f: - sha1 = gd.store_blob(f) - idx.add_files_from_index_info( - [(os.stat(filepath).st_mode, sha1, 'foo')]) - self.assertEqual(list(idx.get_uncommitted_changes()),[]) - def test_add_files_from_working_tree(self): gd = morphlib.gitdir.GitDirectory(self.dirname) idx = gd.get_index() -- cgit v1.2.1