From 15b9129ec639112e94ea96b6a395ad9b149515d1 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sun, 11 Oct 2009 19:07:03 +0200 Subject: lazymixin system now supports per-attribute baking, it is up to the class whether it bakes more. This also leads to more efficient use of memory as values are only cached and set when required - the baking system does not require an own tracking variable anymore, and values are only to be cached once - then python will natively find the cache without involving any additional overhead. This works by using __getattr__ instead of __get_attribute__ which would always be called --- lib/git/base.py | 96 ++++++++++++++++++++++++------------------------- lib/git/commit.py | 69 +++++++++++++++-------------------- lib/git/head.py | 14 -------- lib/git/tag.py | 52 +++++++++++++-------------- lib/git/tree.py | 25 ++++++------- test/git/test_commit.py | 3 -- test/git/test_tree.py | 2 +- 7 files changed, 114 insertions(+), 147 deletions(-) diff --git a/lib/git/base.py b/lib/git/base.py index 22c73491..f3510558 100644 --- a/lib/git/base.py +++ b/lib/git/base.py @@ -7,43 +7,38 @@ import os class LazyMixin(object): lazy_properties = [] + __slots__ = tuple() - __slots__ = "__baked__" - - def __init__(self): - self.__baked__ = False - - def __getattribute__(self, attr): - val = object.__getattribute__(self, attr) - if val is not None: - return val - else: - self.__prebake__() - return object.__getattribute__(self, attr) - - def __bake__(self): - """ This method should be overridden in the derived class. """ - raise NotImplementedError(" '__bake__' method has not been implemented.") - - def __prebake__(self): - if self.__baked__: - return - self.__bake__() - self.__baked__ = True + def __getattr__(self, attr): + """ + Whenever an attribute is requested that we do not know, we allow it + to be created and set. Next time the same attribute is reqeusted, it is simply + returned from our dict/slots. + """ + self._set_cache_(attr) + # will raise in case the cache was not created + return object.__getattribute__(self, attr) - def __bake_it__(self): - self.__baked__ = True + def _set_cache_(self, attr): + """ This method should be overridden in the derived class. + It should check whether the attribute named by attr can be created + and cached. Do nothing if you do not know the attribute or call your subclass + The derived class may create as many additional attributes as it deems + necessary in case a git command returns more information than represented + in the single attribute.""" + pass + class Object(LazyMixin): """ Implements an Object which may be Blobs, Trees, Commits and Tags """ TYPES = ("blob", "tree", "commit", "tag") - __slots__ = ("repo", "id", "size", "_data_cached" ) + __slots__ = ("repo", "id", "size", "data" ) type = None # to be set by subclass - def __init__(self, repo, id, size=None): + def __init__(self, repo, id): """ Initialize an object by identifying it by its id. All keyword arguments will be set on demand if None. @@ -53,21 +48,32 @@ class Object(LazyMixin): ``id`` SHA1 or ref suitable for git-rev-parse - - ``size`` - Size of the object's data in bytes """ super(Object,self).__init__() self.repo = repo self.id = id - self.size = size - self._data_cached = type(None) - def __bake__(self): + def _set_self_from_args_(self, args_dict): + """ + Initialize attributes on self from the given dict that was retrieved + from locals() in the calling method. + + Will only set an attribute on self if the corresponding value in args_dict + is not None + """ + for attr, val in args_dict.items(): + if attr != "self" and val is not None: + setattr( self, attr, val ) + # END set all non-None attributes + + def _set_cache_(self, attr): """ Retrieve object information """ - self.size = int(self.repo.git.cat_file(self.id, s=True).rstrip()) + if attr == "size": + self.size = int(self.repo.git.cat_file(self.id, s=True).rstrip()) + elif attr == "data": + self.data = self.repo.git.cat_file(self.id, p=True, with_raw_output=True) def __eq__(self, other): """ @@ -105,18 +111,12 @@ class Object(LazyMixin): return '' % (self.__class__.__name__, self.id) @property - def data(self): + def id_abbrev(self): """ - The binary contents of this object. - Returns - str - - NOTE - The data will be cached after the first access. + First 7 bytes of the commit's sha id as an abbreviation of the full string. """ - self._data_cached = ( self._data_cached is not type(None) and self._data_cached ) or self.repo.git.cat_file(self.id, p=True, with_raw_output=True) - return self._data_cached + return self.id[0:7] @classmethod def get_type_by_name(cls, object_type_name): @@ -154,7 +154,7 @@ class IndexObject(Object): """ __slots__ = ("path", "mode") - def __init__(self, repo, id, mode=None, path=None, size = None): + def __init__(self, repo, id, mode=None, path=None): """ Initialize a newly instanced IndexObject ``repo`` @@ -169,14 +169,11 @@ class IndexObject(Object): ``path`` : str is the path to the file in the file system, relative to the git repository root, i.e. file.ext or folder/other.ext - - ``size`` : int - size of the object data in bytes """ - super(IndexObject, self).__init__(repo, id, size) + super(IndexObject, self).__init__(repo, id) self.mode = mode self.path = path - + @property def basename(self): """ @@ -304,5 +301,6 @@ class Ref(object): git.Head """ full_path, hexsha, type_name, object_size = line.split("\x00") - obj = Object.get_type_by_name(type_name)(repo, hexsha, object_size) + obj = Object.get_type_by_name(type_name)(repo, hexsha) + obj.size = object_size return cls(full_path, obj) diff --git a/lib/git/commit.py b/lib/git/commit.py index 1ae84799..5d494621 100644 --- a/lib/git/commit.py +++ b/lib/git/commit.py @@ -25,6 +25,8 @@ class Commit(base.Object): # object configuration type = "commit" + __slots__ = ("tree", "author", "authored_date", "committer", "committed_date", + "message", "parents") def __init__(self, repo, id, tree=None, author=None, authored_date=None, committer=None, committed_date=None, message=None, parents=None): @@ -38,7 +40,7 @@ class Commit(base.Object): is the sha id of the commit ``parents`` : list( Commit, ... ) - is a list of commit ids + is a list of commit ids or actual Commits ``tree`` : Tree is the corresponding tree id @@ -61,49 +63,34 @@ class Commit(base.Object): Returns git.Commit """ - super(Commit,self).__init__(repo, id, "commit") - self.parents = None - self.tree = None - self.author = author - self.authored_date = authored_date - self.committer = committer - self.committed_date = committed_date - self.message = message - - if self.id: - if parents is not None: - self.parents = [Commit(repo, p) for p in parents] - if tree is not None: - self.tree = Tree(repo, id=tree) - - def __eq__(self, other): - return self.id == other.id - - def __ne__(self, other): - return self.id != other.id - - def __bake__(self): - """ - Called by LazyMixin superclass when the first uninitialized member needs - to be set as it is queried. - """ - super(Commit, self).__bake__() - temp = Commit.find_all(self.repo, self.id, max_count=1)[0] - self.parents = temp.parents - self.tree = temp.tree - self.author = temp.author - self.authored_date = temp.authored_date - self.committer = temp.committer - self.committed_date = temp.committed_date - self.message = temp.message + super(Commit,self).__init__(repo, id) + self._set_self_from_args_(locals()) - @property - def id_abbrev(self): + if parents is not None: + self.parents = tuple( self.__class__(repo, p) for p in parents ) + # END for each parent to convert + + if self.id and tree is not None: + self.tree = Tree(repo, id=tree) + # END id to tree conversion + + def _set_cache_(self, attr): """ - Returns - First 7 bytes of the commit's sha id as an abbreviation of the full string. + Called by LazyMixin superclass when the given uninitialized member needs + to be set. + We set all values at once. """ - return self.id[0:7] + if attr in self.__slots__: + temp = Commit.find_all(self.repo, self.id, max_count=1)[0] + self.parents = temp.parents + self.tree = temp.tree + self.author = temp.author + self.authored_date = temp.authored_date + self.committer = temp.committer + self.committed_date = temp.committed_date + self.message = temp.message + else: + super(Commit, self)._set_cache_(attr) @property def summary(self): diff --git a/lib/git/head.py b/lib/git/head.py index f4e94637..42dfd735 100644 --- a/lib/git/head.py +++ b/lib/git/head.py @@ -27,20 +27,6 @@ class Head(base.Ref): '1c09f116cbc2cb4100fb6935bb162daa4723f455' """ - def __init__(self, path, commit): - """ - Initialize a newly instanced Head - - ``path`` - is the path to the head ref, relative to the .git directory, i.e. - refs/heads/master - - `commit` - is the Commit object that the head points to - """ - super(Head, self).__init__(name, commit) - - @property def commit(self): """ diff --git a/lib/git/tag.py b/lib/git/tag.py index 4266a7a9..89060ee0 100644 --- a/lib/git/tag.py +++ b/lib/git/tag.py @@ -74,7 +74,7 @@ class TagObject(base.Object): type = "tag" __slots__ = ( "object", "tag", "tagger", "tagged_date", "message" ) - def __init__(self, repo, id, size=None, object=None, tag=None, + def __init__(self, repo, id, object=None, tag=None, tagger=None, tagged_date=None, message=None): """ Initialize a tag object with additional data @@ -85,9 +85,6 @@ class TagObject(base.Object): ``id`` SHA1 or ref suitable for git-rev-parse - ``size`` - Size of the object's data in bytes - ``object`` Object instance of object we are pointing to @@ -100,29 +97,30 @@ class TagObject(base.Object): ``tagged_date`` : (tm_year, tm_mon, tm_mday, tm_hour, tm_min, tm_sec, tm_wday, tm_yday, tm_isdst) is the DateTime of the tag creation """ - super(TagObject, self).__init__(repo, id , size) - self.object = object - self.tag = tag - self.tagger = tagger - self.tagged_date = tagged_date - self.message = message - - def __bake__(self): - super(TagObject, self).__bake__() - - output = self.repo.git.cat_file(self.type,self.id) - lines = output.split("\n") - - obj, hexsha = lines[0].split(" ") # object - type_token, type_name = lines[1].split(" ") # type - self.object = base.Object.get_type_by_name(type_name)(self.repo, hexsha) - - self.tag = lines[2][4:] # tag - - tagger_info = lines[3][7:]# tagger - self.tagger, self.tagged_date = commit.Commit._actor(tagger_info) + super(TagObject, self).__init__(repo, id ) + self._set_self_from_args_(locals()) - # line 4 empty - check git source to figure out purpose - self.message = "\n".join(lines[5:]) + def _set_cache_(self, attr): + """ + Cache all our attributes at once + """ + if attr in self.__slots__: + output = self.repo.git.cat_file(self.type,self.id) + lines = output.split("\n") + + obj, hexsha = lines[0].split(" ") # object + type_token, type_name = lines[1].split(" ") # type + self.object = base.Object.get_type_by_name(type_name)(self.repo, hexsha) + + self.tag = lines[2][4:] # tag + + tagger_info = lines[3][7:]# tagger + self.tagger, self.tagged_date = commit.Commit._actor(tagger_info) + + # line 4 empty - check git source to figure out purpose + self.message = "\n".join(lines[5:]) + # END check our attributes + else: + super(TagObject, self)._set_cache_(attr) diff --git a/lib/git/tree.py b/lib/git/tree.py index 90f1b72d..db4a3e22 100644 --- a/lib/git/tree.py +++ b/lib/git/tree.py @@ -13,18 +13,19 @@ class Tree(base.IndexObject): type = "tree" __slots__ = "_contents" - def __init__(self, repo, id, mode=None, path=None, size=None): - super(Tree, self).__init__(repo, id, mode, path, size) - self._contents = None - - def __bake__(self): - # Read the tree contents. - super(Tree, self).__bake__() - self._contents = {} - for line in self.repo.git.ls_tree(self.id).splitlines(): - obj = self.content_from_string(self.repo, line) - if obj is not None: - self._contents[obj.path] = obj + def __init__(self, repo, id, mode=None, path=None): + super(Tree, self).__init__(repo, id, mode, path) + + def _set_cache_(self, attr): + if attr == "_contents": + # Read the tree contents. + self._contents = {} + for line in self.repo.git.ls_tree(self.id).splitlines(): + obj = self.content_from_string(self.repo, line) + if obj is not None: + self._contents[obj.path] = obj + else: + super(Tree, self)._set_cache_(attr) @staticmethod def content_from_string(repo, text): diff --git a/test/git/test_commit.py b/test/git/test_commit.py index e92c13dd..341d72e2 100644 --- a/test/git/test_commit.py +++ b/test/git/test_commit.py @@ -140,7 +140,6 @@ class TestCommit(object): git.return_value = fixture('diff_i') commit = Commit(self.repo, id='634396b2f541a9f2d58b00be1a07f0c358b999b3') - commit.__bake_it__() diffs = commit.diffs assert_equal(10, len(diffs)) @@ -178,7 +177,6 @@ class TestCommit(object): git.return_value = fixture('diff_mode_only') commit = Commit(self.repo, id='91169e1f5fa4de2eaea3f176461f5dc784796769') - commit.__bake_it__() diffs = commit.diffs # in case of mode-only changes, there is no blob @@ -196,7 +194,6 @@ class TestCommit(object): git.return_value = fixture('diff_tree_numstat_root') commit = Commit(self.repo, id='634396b2f541a9f2d58b00be1a07f0c358b999b3') - commit.__bake_it__() stats = commit.stats keys = stats.files.keys() diff --git a/test/git/test_tree.py b/test/git/test_tree.py index d52a8e0a..205fbf1a 100644 --- a/test/git/test_tree.py +++ b/test/git/test_tree.py @@ -43,7 +43,7 @@ class TestTree(object): assert_equal("aa94e396335d2957ca92606f909e53e7beaf3fbb", tree.id) assert_equal("100644", tree.mode) assert_equal("grit.rb", tree.path) - + def test_content_from_string_tree_should_return_commit(self): text = fixture('ls_tree_commit').split("\n")[1] -- cgit v1.2.1