From 670bcd28354ed2d44c02eaa0d0d1a78f6c49458d Mon Sep 17 00:00:00 2001 From: Richard Maw Date: Wed, 18 Jun 2014 14:07:14 +0000 Subject: Don't check if a file exists before trying to read it We used to check whether a file existed before trying to read it. We used to be able to get away with only looking at the top-level directory, which made using ls-files before trying to cat-file it better. Unfortunately, we need to look at the files in subdirectories now, so this no longer works. We could make it include files in subdirectories, but for repositories with many files, you would end up reading a file listing longer than the morphology, so even in the slow case of needing to read the entire morphology file, it would be faster to attempt to read the file first. So now we beg forgiveness rather than asking permission. --- morphlib/morphologyfactory.py | 56 +++++++++++++++++++++---------------------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/morphlib/morphologyfactory.py b/morphlib/morphologyfactory.py index cd59d4ec..1cde2c77 100644 --- a/morphlib/morphologyfactory.py +++ b/morphlib/morphologyfactory.py @@ -14,6 +14,8 @@ # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +import os + import morphlib import cliapp @@ -74,43 +76,41 @@ class MorphologyFactory(object): self._app.status(*args, **kwargs) def _get_morphology_text(self, reponame, sha1, filename): + morph_name = os.path.splitext(os.path.basename(filename))[0] if self._lrc.has_repo(reponame): self.status(msg="Looking for %s in local repo cache" % filename, chatty=True) - repo = self._lrc.get_repo(reponame) - file_list = repo.ls_tree(sha1) - - if filename in file_list: - return repo.cat(sha1, filename) + try: + repo = self._lrc.get_repo(reponame) + text = repo.cat(sha1, filename) + except IOError: + text = None + file_list = repo.ls_tree(sha1) elif self._rrc is not None: - self.status(msg="Looking for %s in remote repo cache" % filename, + self.status(msg="Retrieving %(reponame)s %(sha1)s %(filename)s" + " from the remote artifact cache.", + reponame=reponame, sha1=sha1, filename=filename, chatty=True) - file_list = self._rrc.ls_tree(reponame, sha1) - - if filename in file_list: - self.status(msg='Retrieving %s %s %s' - 'from the remote artifact cache.' - % (reponame, sha1, filename), chatty=True) - return self._rrc.cat_file(reponame, sha1, filename) + try: + text = self._rrc.cat_file(reponame, sha1, filename) + except morphlib.remoterepocache.CatFileError: + text = None + file_list = self._rrc.ls_tree(reponame, sha1) else: raise NotcachedError(reponame) - self.status(msg="File %s doesn't exist: " - "attempting to infer chunk morph from repo's build system" - % filename, chatty=True) - bs = morphlib.buildsystem.detect_build_system(file_list) - if bs is None: - raise MorphologyNotFoundError(filename) - # TODO consider changing how morphs are located to be by morph - # name rather than filename, it would save creating a - # filename only to strip it back to its morph name again - # and would allow future changes like morphologies being - # stored as git metadata instead of as a file in the repo - morph_name = filename[:-len('.morph')] - return bs.get_morphology_text(morph_name) + if text is None: + self.status(msg="File %s doesn't exist: attempting to infer " + "chunk morph from repo's build system" + % filename, chatty=True) + bs = morphlib.buildsystem.detect_build_system(file_list) + if bs is None: + raise MorphologyNotFoundError(filename) + text = bs.get_morphology_text(morph_name) + return morph_name, text def get_morphology(self, reponame, sha1, filename): - text = self._get_morphology_text(reponame, sha1, filename) + morph_name, text = self._get_morphology_text(reponame, sha1, filename) try: morphology = morphlib.morph2.Morphology(text) @@ -118,7 +118,7 @@ class MorphologyFactory(object): raise morphlib.Error("Error parsing %s: %s" % (filename, str(e))) - if filename != morphology['name'] + '.morph': + if morph_name != morphology['name']: raise morphlib.Error( "Name %s does not match basename of morphology file %s" % (morphology['name'], filename)) -- cgit v1.2.1