summaryrefslogtreecommitdiff
path: root/git/objects
diff options
context:
space:
mode:
authorSebastian Thiel <byronimo@gmail.com>2010-11-25 18:10:33 +0100
committerSebastian Thiel <byronimo@gmail.com>2010-11-25 18:18:15 +0100
commitf8ce24a835cae8c623e2936bec2618a8855c605b (patch)
treed4c1d392579e24285381613a4ac1b7cc2d6b6fae /git/objects
parent65747a216c67c3101c6ae2edaa8119d786b793cb (diff)
parent9004e3a1cf33110f2cbc458f1dc3259c930ad9b4 (diff)
downloadgitpython-f8ce24a835cae8c623e2936bec2618a8855c605b.tar.gz
-#######->WARNING<-####### Directory structure changed, see commit message
If you use git-python as a submodule of your own project, which alters the sys.path to import it, you will have to adjust your code to take the changed directory structure into consideration. Previously, you would put the path ./git-python/lib into your syspath. All modules moved one level up into the 'git' subdirectory, which means that the 'git-python' directory now contains the 'git' root package. To allow git to be found, add ./git-python into your path. To finalize your update, run the following commands git submodule update --init --recursive As there will be left-over directories, consider running git-clean
Diffstat (limited to 'git/objects')
-rw-r--r--git/objects/__init__.py21
-rw-r--r--git/objects/base.py172
-rw-r--r--git/objects/blob.py27
-rw-r--r--git/objects/commit.py465
-rw-r--r--git/objects/fun.py199
-rw-r--r--git/objects/submodule/__init__.py2
-rw-r--r--git/objects/submodule/base.py924
-rw-r--r--git/objects/submodule/root.py315
-rw-r--r--git/objects/submodule/util.py101
-rw-r--r--git/objects/tag.py76
-rw-r--r--git/objects/tree.py280
-rw-r--r--git/objects/util.py315
12 files changed, 2897 insertions, 0 deletions
diff --git a/git/objects/__init__.py b/git/objects/__init__.py
new file mode 100644
index 00000000..77f69d29
--- /dev/null
+++ b/git/objects/__init__.py
@@ -0,0 +1,21 @@
+"""
+Import all submodules main classes into the package space
+"""
+import inspect
+from base import *
+# Fix import dependency - add IndexObject to the util module, so that it can be
+# imported by the submodule.base
+import submodule.util
+submodule.util.IndexObject = IndexObject
+submodule.util.Object = Object
+from submodule.base import *
+from submodule.root import *
+
+# must come after submodule was made available
+from tag import *
+from blob import *
+from commit import *
+from tree import *
+
+__all__ = [ name for name, obj in locals().items()
+ if not (name.startswith('_') or inspect.ismodule(obj)) ] \ No newline at end of file
diff --git a/git/objects/base.py b/git/objects/base.py
new file mode 100644
index 00000000..5f2f7809
--- /dev/null
+++ b/git/objects/base.py
@@ -0,0 +1,172 @@
+# base.py
+# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors
+#
+# This module is part of GitPython and is released under
+# the BSD License: http://www.opensource.org/licenses/bsd-license.php
+from git.util import LazyMixin, join_path_native, stream_copy
+from util import get_object_type_by_name
+from gitdb.util import (
+ hex_to_bin,
+ bin_to_hex,
+ basename
+ )
+
+import gitdb.typ as dbtyp
+
+_assertion_msg_format = "Created object %r whose python type %r disagrees with the acutal git object type %r"
+
+__all__ = ("Object", "IndexObject")
+
+class Object(LazyMixin):
+ """Implements an Object which may be Blobs, Trees, Commits and Tags"""
+ NULL_HEX_SHA = '0'*40
+ NULL_BIN_SHA = '\0'*20
+
+ TYPES = (dbtyp.str_blob_type, dbtyp.str_tree_type, dbtyp.str_commit_type, dbtyp.str_tag_type)
+ __slots__ = ("repo", "binsha", "size" )
+ type = None # to be set by subclass
+
+ def __init__(self, repo, binsha):
+ """Initialize an object by identifying it by its binary sha.
+ All keyword arguments will be set on demand if None.
+
+ :param repo: repository this object is located in
+
+ :param binsha: 20 byte SHA1"""
+ super(Object,self).__init__()
+ self.repo = repo
+ self.binsha = binsha
+ assert len(binsha) == 20, "Require 20 byte binary sha, got %r, len = %i" % (binsha, len(binsha))
+
+ @classmethod
+ def new(cls, repo, id):
+ """
+ :return: New Object instance of a type appropriate to the object type behind
+ id. The id of the newly created object will be a binsha even though
+ the input id may have been a Reference or Rev-Spec
+
+ :param id: reference, rev-spec, or hexsha
+
+ :note: This cannot be a __new__ method as it would always call __init__
+ with the input id which is not necessarily a binsha."""
+ return repo.rev_parse(str(id))
+
+ @classmethod
+ def new_from_sha(cls, repo, sha1):
+ """
+ :return: new object instance of a type appropriate to represent the given
+ binary sha1
+ :param sha1: 20 byte binary sha1"""
+ if sha1 == cls.NULL_BIN_SHA:
+ # the NULL binsha is always the root commit
+ return get_object_type_by_name('commit')(repo, sha1)
+ #END handle special case
+ oinfo = repo.odb.info(sha1)
+ inst = get_object_type_by_name(oinfo.type)(repo, oinfo.binsha)
+ inst.size = oinfo.size
+ return inst
+
+ def _set_cache_(self, attr):
+ """Retrieve object information"""
+ if attr == "size":
+ oinfo = self.repo.odb.info(self.binsha)
+ self.size = oinfo.size
+ # assert oinfo.type == self.type, _assertion_msg_format % (self.binsha, oinfo.type, self.type)
+ else:
+ super(Object,self)._set_cache_(attr)
+
+ def __eq__(self, other):
+ """:return: True if the objects have the same SHA1"""
+ return self.binsha == other.binsha
+
+ def __ne__(self, other):
+ """:return: True if the objects do not have the same SHA1 """
+ return self.binsha != other.binsha
+
+ def __hash__(self):
+ """:return: Hash of our id allowing objects to be used in dicts and sets"""
+ return hash(self.binsha)
+
+ def __str__(self):
+ """:return: string of our SHA1 as understood by all git commands"""
+ return bin_to_hex(self.binsha)
+
+ def __repr__(self):
+ """:return: string with pythonic representation of our object"""
+ return '<git.%s "%s">' % (self.__class__.__name__, self.hexsha)
+
+ @property
+ def hexsha(self):
+ """:return: 40 byte hex version of our 20 byte binary sha"""
+ return bin_to_hex(self.binsha)
+
+ @property
+ def data_stream(self):
+ """ :return: File Object compatible stream to the uncompressed raw data of the object
+ :note: returned streams must be read in order"""
+ return self.repo.odb.stream(self.binsha)
+
+ def stream_data(self, ostream):
+ """Writes our data directly to the given output stream
+ :param ostream: File object compatible stream object.
+ :return: self"""
+ istream = self.repo.odb.stream(self.binsha)
+ stream_copy(istream, ostream)
+ return self
+
+
+class IndexObject(Object):
+ """Base for all objects that can be part of the index file , namely Tree, Blob and
+ SubModule objects"""
+ __slots__ = ("path", "mode")
+
+ # for compatability with iterable lists
+ _id_attribute_ = 'path'
+
+ def __init__(self, repo, binsha, mode=None, path=None):
+ """Initialize a newly instanced IndexObject
+ :param repo: is the Repo we are located in
+ :param binsha: 20 byte sha1
+ :param mode: is the stat compatible file mode as int, use the stat module
+ to evaluate the infomration
+ :param path:
+ is the path to the file in the file system, relative to the git repository root, i.e.
+ file.ext or folder/other.ext
+ :note:
+ Path may not be set of the index object has been created directly as it cannot
+ be retrieved without knowing the parent tree."""
+ super(IndexObject, self).__init__(repo, binsha)
+ if mode is not None:
+ self.mode = mode
+ if path is not None:
+ self.path = path
+
+ def __hash__(self):
+ """:return:
+ Hash of our path as index items are uniquely identifyable by path, not
+ by their data !"""
+ return hash(self.path)
+
+ def _set_cache_(self, attr):
+ if attr in IndexObject.__slots__:
+ # they cannot be retrieved lateron ( not without searching for them )
+ raise AttributeError( "path and mode attributes must have been set during %s object creation" % type(self).__name__ )
+ else:
+ super(IndexObject, self)._set_cache_(attr)
+ # END hanlde slot attribute
+
+ @property
+ def name(self):
+ """:return: Name portion of the path, effectively being the basename"""
+ return basename(self.path)
+
+ @property
+ def abspath(self):
+ """
+ :return:
+ Absolute path to this index object in the file system ( as opposed to the
+ .path field which is a path relative to the git repository ).
+
+ The returned path will be native to the system and contains '\' on windows. """
+ return join_path_native(self.repo.working_tree_dir, self.path)
+
diff --git a/git/objects/blob.py b/git/objects/blob.py
new file mode 100644
index 00000000..32f8c61c
--- /dev/null
+++ b/git/objects/blob.py
@@ -0,0 +1,27 @@
+# blob.py
+# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors
+#
+# This module is part of GitPython and is released under
+# the BSD License: http://www.opensource.org/licenses/bsd-license.php
+
+from mimetypes import guess_type
+import base
+
+__all__ = ('Blob', )
+
+class Blob(base.IndexObject):
+ """A Blob encapsulates a git blob object"""
+ DEFAULT_MIME_TYPE = "text/plain"
+ type = "blob"
+
+ __slots__ = tuple()
+
+ @property
+ def mime_type(self):
+ """
+ :return: String describing the mime type of this file (based on the filename)
+ :note: Defaults to 'text/plain' in case the actual file type is unknown. """
+ guesses = None
+ if self.path:
+ guesses = guess_type(self.path)
+ return guesses and guesses[0] or self.DEFAULT_MIME_TYPE
diff --git a/git/objects/commit.py b/git/objects/commit.py
new file mode 100644
index 00000000..69a3adc4
--- /dev/null
+++ b/git/objects/commit.py
@@ -0,0 +1,465 @@
+# commit.py
+# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors
+#
+# This module is part of GitPython and is released under
+# the BSD License: http://www.opensource.org/licenses/bsd-license.php
+
+from git.util import (
+ Actor,
+ Iterable,
+ Stats,
+ )
+from git.diff import Diffable
+from tree import Tree
+from gitdb import IStream
+from cStringIO import StringIO
+
+import base
+from gitdb.util import (
+ hex_to_bin
+ )
+from util import (
+ Traversable,
+ Serializable,
+ parse_date,
+ altz_to_utctz_str,
+ parse_actor_and_date
+ )
+from time import (
+ time,
+ altzone
+ )
+import os
+import sys
+
+__all__ = ('Commit', )
+
+class Commit(base.Object, Iterable, Diffable, Traversable, Serializable):
+ """Wraps a git Commit object.
+
+ This class will act lazily on some of its attributes and will query the
+ value on demand only if it involves calling the git binary."""
+
+ # ENVIRONMENT VARIABLES
+ # read when creating new commits
+ env_author_date = "GIT_AUTHOR_DATE"
+ env_committer_date = "GIT_COMMITTER_DATE"
+
+ # CONFIGURATION KEYS
+ conf_encoding = 'i18n.commitencoding'
+
+ # INVARIANTS
+ default_encoding = "UTF-8"
+
+
+ # object configuration
+ type = "commit"
+ __slots__ = ("tree",
+ "author", "authored_date", "author_tz_offset",
+ "committer", "committed_date", "committer_tz_offset",
+ "message", "parents", "encoding")
+ _id_attribute_ = "binsha"
+
+ def __init__(self, repo, binsha, tree=None, author=None, authored_date=None, author_tz_offset=None,
+ committer=None, committed_date=None, committer_tz_offset=None,
+ message=None, parents=None, encoding=None):
+ """Instantiate a new Commit. All keyword arguments taking None as default will
+ be implicitly set on first query.
+
+ :param binsha: 20 byte sha1
+ :param parents: tuple( Commit, ... )
+ is a tuple of commit ids or actual Commits
+ :param tree: Tree
+ Tree object
+ :param author: Actor
+ is the author string ( will be implicitly converted into an Actor object )
+ :param authored_date: int_seconds_since_epoch
+ is the authored DateTime - use time.gmtime() to convert it into a
+ different format
+ :param author_tz_offset: int_seconds_west_of_utc
+ is the timezone that the authored_date is in
+ :param committer: Actor
+ is the committer string
+ :param committed_date: int_seconds_since_epoch
+ is the committed DateTime - use time.gmtime() to convert it into a
+ different format
+ :param committer_tz_offset: int_seconds_west_of_utc
+ is the timezone that the authored_date is in
+ :param message: string
+ is the commit message
+ :param encoding: string
+ encoding of the message, defaults to UTF-8
+ :param parents:
+ List or tuple of Commit objects which are our parent(s) in the commit
+ dependency graph
+ :return: git.Commit
+
+ :note: Timezone information is in the same format and in the same sign
+ as what time.altzone returns. The sign is inverted compared to git's
+ UTC timezone."""
+ super(Commit,self).__init__(repo, binsha)
+ if tree is not None:
+ assert isinstance(tree, Tree), "Tree needs to be a Tree instance, was %s" % type(tree)
+ if tree is not None:
+ self.tree = tree
+ if author is not None:
+ self.author = author
+ if authored_date is not None:
+ self.authored_date = authored_date
+ if author_tz_offset is not None:
+ self.author_tz_offset = author_tz_offset
+ if committer is not None:
+ self.committer = committer
+ if committed_date is not None:
+ self.committed_date = committed_date
+ if committer_tz_offset is not None:
+ self.committer_tz_offset = committer_tz_offset
+ if message is not None:
+ self.message = message
+ if parents is not None:
+ self.parents = parents
+ if encoding is not None:
+ self.encoding = encoding
+
+ @classmethod
+ def _get_intermediate_items(cls, commit):
+ return commit.parents
+
+ def _set_cache_(self, attr):
+ if attr in Commit.__slots__:
+ # read the data in a chunk, its faster - then provide a file wrapper
+ binsha, typename, self.size, stream = self.repo.odb.stream(self.binsha)
+ self._deserialize(StringIO(stream.read()))
+ else:
+ super(Commit, self)._set_cache_(attr)
+ # END handle attrs
+
+ @property
+ def summary(self):
+ """:return: First line of the commit message"""
+ return self.message.split('\n', 1)[0]
+
+ def count(self, paths='', **kwargs):
+ """Count the number of commits reachable from this commit
+
+ :param paths:
+ is an optinal path or a list of paths restricting the return value
+ to commits actually containing the paths
+
+ :param kwargs:
+ Additional options to be passed to git-rev-list. They must not alter
+ the ouput style of the command, or parsing will yield incorrect results
+ :return: int defining the number of reachable commits"""
+ # yes, it makes a difference whether empty paths are given or not in our case
+ # as the empty paths version will ignore merge commits for some reason.
+ if paths:
+ return len(self.repo.git.rev_list(self.hexsha, '--', paths, **kwargs).splitlines())
+ else:
+ return len(self.repo.git.rev_list(self.hexsha, **kwargs).splitlines())
+
+
+ @property
+ def name_rev(self):
+ """
+ :return:
+ String describing the commits hex sha based on the closest Reference.
+ Mostly useful for UI purposes"""
+ return self.repo.git.name_rev(self)
+
+ @classmethod
+ def iter_items(cls, repo, rev, paths='', **kwargs):
+ """Find all commits matching the given criteria.
+
+ :param repo: is the Repo
+ :param rev: revision specifier, see git-rev-parse for viable options
+ :param paths:
+ is an optinal path or list of paths, if set only Commits that include the path
+ or paths will be considered
+ :param kwargs:
+ optional keyword arguments to git rev-list where
+ ``max_count`` is the maximum number of commits to fetch
+ ``skip`` is the number of commits to skip
+ ``since`` all commits since i.e. '1970-01-01'
+ :return: iterator yielding Commit items"""
+ if 'pretty' in kwargs:
+ raise ValueError("--pretty cannot be used as parsing expects single sha's only")
+ # END handle pretty
+ args = list()
+ if paths:
+ args.extend(('--', paths))
+ # END if paths
+
+ proc = repo.git.rev_list(rev, args, as_process=True, **kwargs)
+ return cls._iter_from_process_or_stream(repo, proc)
+
+ def iter_parents(self, paths='', **kwargs):
+ """Iterate _all_ parents of this commit.
+
+ :param paths:
+ Optional path or list of paths limiting the Commits to those that
+ contain at least one of the paths
+ :param kwargs: All arguments allowed by git-rev-list
+ :return: Iterator yielding Commit objects which are parents of self """
+ # skip ourselves
+ skip = kwargs.get("skip", 1)
+ if skip == 0: # skip ourselves
+ skip = 1
+ kwargs['skip'] = skip
+
+ return self.iter_items(self.repo, self, paths, **kwargs)
+
+ @property
+ def stats(self):
+ """Create a git stat from changes between this commit and its first parent
+ or from all changes done if this is the very first commit.
+
+ :return: git.Stats"""
+ if not self.parents:
+ text = self.repo.git.diff_tree(self.hexsha, '--', numstat=True, root=True)
+ text2 = ""
+ for line in text.splitlines()[1:]:
+ (insertions, deletions, filename) = line.split("\t")
+ text2 += "%s\t%s\t%s\n" % (insertions, deletions, filename)
+ text = text2
+ else:
+ text = self.repo.git.diff(self.parents[0].hexsha, self.hexsha, '--', numstat=True)
+ return Stats._list_from_string(self.repo, text)
+
+ @classmethod
+ def _iter_from_process_or_stream(cls, repo, proc_or_stream):
+ """Parse out commit information into a list of Commit objects
+ We expect one-line per commit, and parse the actual commit information directly
+ from our lighting fast object database
+
+ :param proc: git-rev-list process instance - one sha per line
+ :return: iterator returning Commit objects"""
+ stream = proc_or_stream
+ if not hasattr(stream,'readline'):
+ stream = proc_or_stream.stdout
+
+ readline = stream.readline
+ while True:
+ line = readline()
+ if not line:
+ break
+ hexsha = line.strip()
+ if len(hexsha) > 40:
+ # split additional information, as returned by bisect for instance
+ hexsha, rest = line.split(None, 1)
+ # END handle extra info
+
+ assert len(hexsha) == 40, "Invalid line: %s" % hexsha
+ yield Commit(repo, hex_to_bin(hexsha))
+ # END for each line in stream
+
+
+ @classmethod
+ def create_from_tree(cls, repo, tree, message, parent_commits=None, head=False):
+ """Commit the given tree, creating a commit object.
+
+ :param repo: Repo object the commit should be part of
+ :param tree: Tree object or hex or bin sha
+ the tree of the new commit
+ :param message: Commit message. It may be an empty string if no message is provided.
+ It will be converted to a string in any case.
+ :param parent_commits:
+ Optional Commit objects to use as parents for the new commit.
+ If empty list, the commit will have no parents at all and become
+ a root commit.
+ If None , the current head commit will be the parent of the
+ new commit object
+ :param head:
+ If True, the HEAD will be advanced to the new commit automatically.
+ Else the HEAD will remain pointing on the previous commit. This could
+ lead to undesired results when diffing files.
+
+ :return: Commit object representing the new commit
+
+ :note:
+ Additional information about the committer and Author are taken from the
+ environment or from the git configuration, see git-commit-tree for
+ more information"""
+ parents = parent_commits
+ if parent_commits is None:
+ try:
+ parent_commits = [ repo.head.commit ]
+ except ValueError:
+ # empty repositories have no head commit
+ parent_commits = list()
+ # END handle parent commits
+ # END if parent commits are unset
+
+ # retrieve all additional information, create a commit object, and
+ # serialize it
+ # Generally:
+ # * Environment variables override configuration values
+ # * Sensible defaults are set according to the git documentation
+
+ # COMMITER AND AUTHOR INFO
+ cr = repo.config_reader()
+ env = os.environ
+
+ committer = Actor.committer(cr)
+ author = Actor.author(cr)
+
+ # PARSE THE DATES
+ unix_time = int(time())
+ offset = altzone
+
+ author_date_str = env.get(cls.env_author_date, '')
+ if author_date_str:
+ author_time, author_offset = parse_date(author_date_str)
+ else:
+ author_time, author_offset = unix_time, offset
+ # END set author time
+
+ committer_date_str = env.get(cls.env_committer_date, '')
+ if committer_date_str:
+ committer_time, committer_offset = parse_date(committer_date_str)
+ else:
+ committer_time, committer_offset = unix_time, offset
+ # END set committer time
+
+ # assume utf8 encoding
+ enc_section, enc_option = cls.conf_encoding.split('.')
+ conf_encoding = cr.get_value(enc_section, enc_option, cls.default_encoding)
+
+
+ # if the tree is no object, make sure we create one - otherwise
+ # the created commit object is invalid
+ if isinstance(tree, str):
+ tree = repo.tree(tree)
+ # END tree conversion
+
+ # CREATE NEW COMMIT
+ new_commit = cls(repo, cls.NULL_BIN_SHA, tree,
+ author, author_time, author_offset,
+ committer, committer_time, committer_offset,
+ message, parent_commits, conf_encoding)
+
+ stream = StringIO()
+ new_commit._serialize(stream)
+ streamlen = stream.tell()
+ stream.seek(0)
+
+ istream = repo.odb.store(IStream(cls.type, streamlen, stream))
+ new_commit.binsha = istream.binsha
+
+ if head:
+ # need late import here, importing git at the very beginning throws
+ # as well ...
+ import git.refs
+ try:
+ repo.head.set_commit(new_commit, logmsg="commit: %s" % message)
+ except ValueError:
+ # head is not yet set to the ref our HEAD points to
+ # Happens on first commit
+ import git.refs
+ master = git.refs.Head.create(repo, repo.head.ref, commit=new_commit, logmsg="commit (initial): %s" % message)
+ repo.head.set_reference(master, logmsg='commit: Switching to %s' % master)
+ # END handle empty repositories
+ # END advance head handling
+
+ return new_commit
+
+ #{ Serializable Implementation
+
+ def _serialize(self, stream):
+ write = stream.write
+ write("tree %s\n" % self.tree)
+ for p in self.parents:
+ write("parent %s\n" % p)
+
+ a = self.author
+ aname = a.name
+ if isinstance(aname, unicode):
+ aname = aname.encode(self.encoding)
+ # END handle unicode in name
+
+ c = self.committer
+ fmt = "%s %s <%s> %s %s\n"
+ write(fmt % ("author", aname, a.email,
+ self.authored_date,
+ altz_to_utctz_str(self.author_tz_offset)))
+
+ # encode committer
+ aname = c.name
+ if isinstance(aname, unicode):
+ aname = aname.encode(self.encoding)
+ # END handle unicode in name
+ write(fmt % ("committer", aname, c.email,
+ self.committed_date,
+ altz_to_utctz_str(self.committer_tz_offset)))
+
+ if self.encoding != self.default_encoding:
+ write("encoding %s\n" % self.encoding)
+
+ write("\n")
+
+ # write plain bytes, be sure its encoded according to our encoding
+ if isinstance(self.message, unicode):
+ write(self.message.encode(self.encoding))
+ else:
+ write(self.message)
+ # END handle encoding
+ return self
+
+ def _deserialize(self, stream):
+ """:param from_rev_list: if true, the stream format is coming from the rev-list command
+ Otherwise it is assumed to be a plain data stream from our object"""
+ readline = stream.readline
+ self.tree = Tree(self.repo, hex_to_bin(readline().split()[1]), Tree.tree_id<<12, '')
+
+ self.parents = list()
+ next_line = None
+ while True:
+ parent_line = readline()
+ if not parent_line.startswith('parent'):
+ next_line = parent_line
+ break
+ # END abort reading parents
+ self.parents.append(type(self)(self.repo, hex_to_bin(parent_line.split()[-1])))
+ # END for each parent line
+ self.parents = tuple(self.parents)
+
+ self.author, self.authored_date, self.author_tz_offset = parse_actor_and_date(next_line)
+ self.committer, self.committed_date, self.committer_tz_offset = parse_actor_and_date(readline())
+
+
+ # now we can have the encoding line, or an empty line followed by the optional
+ # message.
+ self.encoding = self.default_encoding
+ # read encoding or empty line to separate message
+ enc = readline()
+ enc = enc.strip()
+ if enc:
+ self.encoding = enc[enc.find(' ')+1:]
+ # now comes the message separator
+ readline()
+ # END handle encoding
+
+ # decode the authors name
+ try:
+ self.author.name = self.author.name.decode(self.encoding)
+ except UnicodeDecodeError:
+ print >> sys.stderr, "Failed to decode author name '%s' using encoding %s" % (self.author.name, self.encoding)
+ # END handle author's encoding
+
+ # decode committer name
+ try:
+ self.committer.name = self.committer.name.decode(self.encoding)
+ except UnicodeDecodeError:
+ print >> sys.stderr, "Failed to decode committer name '%s' using encoding %s" % (self.committer.name, self.encoding)
+ # END handle author's encoding
+
+ # a stream from our data simply gives us the plain message
+ # The end of our message stream is marked with a newline that we strip
+ self.message = stream.read()
+ try:
+ self.message = self.message.decode(self.encoding)
+ except UnicodeDecodeError:
+ print >> sys.stderr, "Failed to decode message '%s' using encoding %s" % (self.message, self.encoding)
+ # END exception handling
+ return self
+
+ #} END serializable implementation
diff --git a/git/objects/fun.py b/git/objects/fun.py
new file mode 100644
index 00000000..9b0a377c
--- /dev/null
+++ b/git/objects/fun.py
@@ -0,0 +1,199 @@
+"""Module with functions which are supposed to be as fast as possible"""
+from stat import S_ISDIR
+
+__all__ = ('tree_to_stream', 'tree_entries_from_data', 'traverse_trees_recursive',
+ 'traverse_tree_recursive')
+
+
+
+
+def tree_to_stream(entries, write):
+ """Write the give list of entries into a stream using its write method
+ :param entries: **sorted** list of tuples with (binsha, mode, name)
+ :param write: write method which takes a data string"""
+ ord_zero = ord('0')
+ bit_mask = 7 # 3 bits set
+
+ for binsha, mode, name in entries:
+ mode_str = ''
+ for i in xrange(6):
+ mode_str = chr(((mode >> (i*3)) & bit_mask) + ord_zero) + mode_str
+ # END for each 8 octal value
+
+ # git slices away the first octal if its zero
+ if mode_str[0] == '0':
+ mode_str = mode_str[1:]
+ # END save a byte
+
+ # here it comes: if the name is actually unicode, the replacement below
+ # will not work as the binsha is not part of the ascii unicode encoding -
+ # hence we must convert to an utf8 string for it to work properly.
+ # According to my tests, this is exactly what git does, that is it just
+ # takes the input literally, which appears to be utf8 on linux.
+ if isinstance(name, unicode):
+ name = name.encode("utf8")
+ write("%s %s\0%s" % (mode_str, name, binsha))
+ # END for each item
+
+
+def tree_entries_from_data(data):
+ """Reads the binary representation of a tree and returns tuples of Tree items
+ :param data: data block with tree data
+ :return: list(tuple(binsha, mode, tree_relative_path), ...)"""
+ ord_zero = ord('0')
+ len_data = len(data)
+ i = 0
+ out = list()
+ while i < len_data:
+ mode = 0
+
+ # read mode
+ # Some git versions truncate the leading 0, some don't
+ # The type will be extracted from the mode later
+ while data[i] != ' ':
+ # move existing mode integer up one level being 3 bits
+ # and add the actual ordinal value of the character
+ mode = (mode << 3) + (ord(data[i]) - ord_zero)
+ i += 1
+ # END while reading mode
+
+ # byte is space now, skip it
+ i += 1
+
+ # parse name, it is NULL separated
+
+ ns = i
+ while data[i] != '\0':
+ i += 1
+ # END while not reached NULL
+
+ # default encoding for strings in git is utf8
+ # Only use the respective unicode object if the byte stream was encoded
+ name = data[ns:i]
+ name_enc = name.decode("utf-8")
+ if len(name) > len(name_enc):
+ name = name_enc
+ # END handle encoding
+
+ # byte is NULL, get next 20
+ i += 1
+ sha = data[i:i+20]
+ i = i + 20
+ out.append((sha, mode, name))
+ # END for each byte in data stream
+ return out
+
+
+def _find_by_name(tree_data, name, is_dir, start_at):
+ """return data entry matching the given name and tree mode
+ or None.
+ Before the item is returned, the respective data item is set
+ None in the tree_data list to mark it done"""
+ try:
+ item = tree_data[start_at]
+ if item and item[2] == name and S_ISDIR(item[1]) == is_dir:
+ tree_data[start_at] = None
+ return item
+ except IndexError:
+ pass
+ # END exception handling
+ for index, item in enumerate(tree_data):
+ if item and item[2] == name and S_ISDIR(item[1]) == is_dir:
+ tree_data[index] = None
+ return item
+ # END if item matches
+ # END for each item
+ return None
+
+def _to_full_path(item, path_prefix):
+ """Rebuild entry with given path prefix"""
+ if not item:
+ return item
+ return (item[0], item[1], path_prefix+item[2])
+
+def traverse_trees_recursive(odb, tree_shas, path_prefix):
+ """
+ :return: list with entries according to the given binary tree-shas.
+ The result is encoded in a list
+ of n tuple|None per blob/commit, (n == len(tree_shas)), where
+ * [0] == 20 byte sha
+ * [1] == mode as int
+ * [2] == path relative to working tree root
+ The entry tuple is None if the respective blob/commit did not
+ exist in the given tree.
+ :param tree_shas: iterable of shas pointing to trees. All trees must
+ be on the same level. A tree-sha may be None in which case None
+ :param path_prefix: a prefix to be added to the returned paths on this level,
+ set it '' for the first iteration
+ :note: The ordering of the returned items will be partially lost"""
+ trees_data = list()
+ nt = len(tree_shas)
+ for tree_sha in tree_shas:
+ if tree_sha is None:
+ data = list()
+ else:
+ data = tree_entries_from_data(odb.stream(tree_sha).read())
+ # END handle muted trees
+ trees_data.append(data)
+ # END for each sha to get data for
+
+ out = list()
+ out_append = out.append
+
+ # find all matching entries and recursively process them together if the match
+ # is a tree. If the match is a non-tree item, put it into the result.
+ # Processed items will be set None
+ for ti, tree_data in enumerate(trees_data):
+ for ii, item in enumerate(tree_data):
+ if not item:
+ continue
+ # END skip already done items
+ entries = [ None for n in range(nt) ]
+ entries[ti] = item
+ sha, mode, name = item # its faster to unpack
+ is_dir = S_ISDIR(mode) # type mode bits
+
+ # find this item in all other tree data items
+ # wrap around, but stop one before our current index, hence
+ # ti+nt, not ti+1+nt
+ for tio in range(ti+1, ti+nt):
+ tio = tio % nt
+ entries[tio] = _find_by_name(trees_data[tio], name, is_dir, ii)
+ # END for each other item data
+
+ # if we are a directory, enter recursion
+ if is_dir:
+ out.extend(traverse_trees_recursive(odb, [((ei and ei[0]) or None) for ei in entries], path_prefix+name+'/'))
+ else:
+ out_append(tuple(_to_full_path(e, path_prefix) for e in entries))
+ # END handle recursion
+
+ # finally mark it done
+ tree_data[ii] = None
+ # END for each item
+
+ # we are done with one tree, set all its data empty
+ del(tree_data[:])
+ # END for each tree_data chunk
+ return out
+
+def traverse_tree_recursive(odb, tree_sha, path_prefix):
+ """
+ :return: list of entries of the tree pointed to by the binary tree_sha. An entry
+ has the following format:
+ * [0] 20 byte sha
+ * [1] mode as int
+ * [2] path relative to the repository
+ :param path_prefix: prefix to prepend to the front of all returned paths"""
+ entries = list()
+ data = tree_entries_from_data(odb.stream(tree_sha).read())
+
+ # unpacking/packing is faster than accessing individual items
+ for sha, mode, name in data:
+ if S_ISDIR(mode):
+ entries.extend(traverse_tree_recursive(odb, sha, path_prefix+name+'/'))
+ else:
+ entries.append((sha, mode, path_prefix+name))
+ # END for each item
+
+ return entries
diff --git a/git/objects/submodule/__init__.py b/git/objects/submodule/__init__.py
new file mode 100644
index 00000000..82df59b0
--- /dev/null
+++ b/git/objects/submodule/__init__.py
@@ -0,0 +1,2 @@
+# NOTE: Cannot import anything here as the top-level _init_ has to handle
+# our dependencies
diff --git a/git/objects/submodule/base.py b/git/objects/submodule/base.py
new file mode 100644
index 00000000..fd6c9396
--- /dev/null
+++ b/git/objects/submodule/base.py
@@ -0,0 +1,924 @@
+import util
+from util import (
+ mkhead,
+ sm_name,
+ sm_section,
+ unbare_repo,
+ SubmoduleConfigParser,
+ find_first_remote_branch
+ )
+from git.objects.util import Traversable
+from StringIO import StringIO # need a dict to set bloody .name field
+from git.util import (
+ Iterable,
+ join_path_native,
+ to_native_path_linux,
+ RemoteProgress
+ )
+
+from git.config import SectionConstraint
+from git.exc import (
+ InvalidGitRepositoryError,
+ NoSuchPathError
+ )
+
+import stat
+import git
+
+import os
+import sys
+import time
+
+import shutil
+
+__all__ = ["Submodule", "UpdateProgress"]
+
+
+class UpdateProgress(RemoteProgress):
+ """Class providing detailed progress information to the caller who should
+ derive from it and implement the ``update(...)`` message"""
+ CLONE, FETCH, UPDWKTREE = [1 << x for x in range(RemoteProgress._num_op_codes, RemoteProgress._num_op_codes+3)]
+ _num_op_codes = RemoteProgress._num_op_codes + 3
+
+ __slots__ = tuple()
+
+
+BEGIN = UpdateProgress.BEGIN
+END = UpdateProgress.END
+CLONE = UpdateProgress.CLONE
+FETCH = UpdateProgress.FETCH
+UPDWKTREE = UpdateProgress.UPDWKTREE
+
+
+# IndexObject comes via util module, its a 'hacky' fix thanks to pythons import
+# mechanism which cause plenty of trouble of the only reason for packages and
+# modules is refactoring - subpackages shoudn't depend on parent packages
+class Submodule(util.IndexObject, Iterable, Traversable):
+ """Implements access to a git submodule. They are special in that their sha
+ represents a commit in the submodule's repository which is to be checked out
+ at the path of this instance.
+ The submodule type does not have a string type associated with it, as it exists
+ solely as a marker in the tree and index.
+
+ All methods work in bare and non-bare repositories."""
+
+ _id_attribute_ = "name"
+ k_modules_file = '.gitmodules'
+ k_head_option = 'branch'
+ k_head_default = 'master'
+ k_default_mode = stat.S_IFDIR | stat.S_IFLNK # submodules are directories with link-status
+
+ # this is a bogus type for base class compatability
+ type = 'submodule'
+
+ __slots__ = ('_parent_commit', '_url', '_branch_path', '_name', '__weakref__')
+ _cache_attrs = ('path', '_url', '_branch_path')
+
+ def __init__(self, repo, binsha, mode=None, path=None, name = None, parent_commit=None, url=None, branch_path=None):
+ """Initialize this instance with its attributes. We only document the ones
+ that differ from ``IndexObject``
+
+ :param repo: Our parent repository
+ :param binsha: binary sha referring to a commit in the remote repository, see url parameter
+ :param parent_commit: see set_parent_commit()
+ :param url: The url to the remote repository which is the submodule
+ :param branch_path: full (relative) path to ref to checkout when cloning the remote repository"""
+ super(Submodule, self).__init__(repo, binsha, mode, path)
+ self.size = 0
+ if parent_commit is not None:
+ self._parent_commit = parent_commit
+ if url is not None:
+ self._url = url
+ if branch_path is not None:
+ assert isinstance(branch_path, basestring)
+ self._branch_path = branch_path
+ if name is not None:
+ self._name = name
+
+ def _set_cache_(self, attr):
+ if attr == '_parent_commit':
+ # set a default value, which is the root tree of the current head
+ self._parent_commit = self.repo.commit()
+ elif attr in ('path', '_url', '_branch_path'):
+ reader = self.config_reader()
+ # default submodule values
+ self.path = reader.get_value('path')
+ self._url = reader.get_value('url')
+ # git-python extension values - optional
+ self._branch_path = reader.get_value(self.k_head_option, git.Head.to_full_path(self.k_head_default))
+ elif attr == '_name':
+ raise AttributeError("Cannot retrieve the name of a submodule if it was not set initially")
+ else:
+ super(Submodule, self)._set_cache_(attr)
+ # END handle attribute name
+
+ def _get_intermediate_items(self, item):
+ """:return: all the submodules of our module repository"""
+ try:
+ return type(self).list_items(item.module())
+ except InvalidGitRepositoryError:
+ return list()
+ # END handle intermeditate items
+
+ def __eq__(self, other):
+ """Compare with another submodule"""
+ # we may only compare by name as this should be the ID they are hashed with
+ # Otherwise this type wouldn't be hashable
+ # return self.path == other.path and self.url == other.url and super(Submodule, self).__eq__(other)
+ return self._name == other._name
+
+ def __ne__(self, other):
+ """Compare with another submodule for inequality"""
+ return not (self == other)
+
+ def __hash__(self):
+ """Hash this instance using its logical id, not the sha"""
+ return hash(self._name)
+
+ def __str__(self):
+ return self._name
+
+ def __repr__(self):
+ return "git.%s(name=%s, path=%s, url=%s, branch_path=%s)" % (type(self).__name__, self._name, self.path, self.url, self.branch_path)
+
+ @classmethod
+ def _config_parser(cls, repo, parent_commit, read_only):
+ """:return: Config Parser constrained to our submodule in read or write mode
+ :raise IOError: If the .gitmodules file cannot be found, either locally or in the repository
+ at the given parent commit. Otherwise the exception would be delayed until the first
+ access of the config parser"""
+ parent_matches_head = repo.head.commit == parent_commit
+ if not repo.bare and parent_matches_head:
+ fp_module = cls.k_modules_file
+ fp_module_path = os.path.join(repo.working_tree_dir, fp_module)
+ if not os.path.isfile(fp_module_path):
+ raise IOError("%s file was not accessible" % fp_module_path)
+ # END handle existance
+ fp_module = fp_module_path
+ else:
+ try:
+ fp_module = cls._sio_modules(parent_commit)
+ except KeyError:
+ raise IOError("Could not find %s file in the tree of parent commit %s" % (cls.k_modules_file, parent_commit))
+ # END handle exceptions
+ # END handle non-bare working tree
+
+ if not read_only and (repo.bare or not parent_matches_head):
+ raise ValueError("Cannot write blobs of 'historical' submodule configurations")
+ # END handle writes of historical submodules
+
+ return SubmoduleConfigParser(fp_module, read_only = read_only)
+
+ def _clear_cache(self):
+ # clear the possibly changed values
+ for name in self._cache_attrs:
+ try:
+ delattr(self, name)
+ except AttributeError:
+ pass
+ # END try attr deletion
+ # END for each name to delete
+
+ @classmethod
+ def _sio_modules(cls, parent_commit):
+ """:return: Configuration file as StringIO - we only access it through the respective blob's data"""
+ sio = StringIO(parent_commit.tree[cls.k_modules_file].data_stream.read())
+ sio.name = cls.k_modules_file
+ return sio
+
+ def _config_parser_constrained(self, read_only):
+ """:return: Config Parser constrained to our submodule in read or write mode"""
+ parser = self._config_parser(self.repo, self._parent_commit, read_only)
+ parser.set_submodule(self)
+ return SectionConstraint(parser, sm_section(self.name))
+
+ #{ Edit Interface
+
+ @classmethod
+ def add(cls, repo, name, path, url=None, branch=None, no_checkout=False):
+ """Add a new submodule to the given repository. This will alter the index
+ as well as the .gitmodules file, but will not create a new commit.
+ If the submodule already exists, no matter if the configuration differs
+ from the one provided, the existing submodule will be returned.
+
+ :param repo: Repository instance which should receive the submodule
+ :param name: The name/identifier for the submodule
+ :param path: repository-relative or absolute path at which the submodule
+ should be located
+ It will be created as required during the repository initialization.
+ :param url: git-clone compatible URL, see git-clone reference for more information
+ If None, the repository is assumed to exist, and the url of the first
+ remote is taken instead. This is useful if you want to make an existing
+ repository a submodule of anotherone.
+ :param branch: branch at which the submodule should (later) be checked out.
+ The given branch must exist in the remote repository, and will be checked
+ out locally as a tracking branch.
+ It will only be written into the configuration if it not None, which is
+ when the checked out branch will be the one the remote HEAD pointed to.
+ The result you get in these situation is somewhat fuzzy, and it is recommended
+ to specify at least 'master' here
+ :param no_checkout: if True, and if the repository has to be cloned manually,
+ no checkout will be performed
+ :return: The newly created submodule instance
+ :note: works atomically, such that no change will be done if the repository
+ update fails for instance"""
+ if repo.bare:
+ raise InvalidGitRepositoryError("Cannot add submodules to bare repositories")
+ # END handle bare repos
+
+ path = to_native_path_linux(path)
+ if path.endswith('/'):
+ path = path[:-1]
+ # END handle trailing slash
+
+ # assure we never put backslashes into the url, as some operating systems
+ # like it ...
+ if url != None:
+ url = to_native_path_linux(url)
+ #END assure url correctness
+
+ # INSTANTIATE INTERMEDIATE SM
+ sm = cls(repo, cls.NULL_BIN_SHA, cls.k_default_mode, path, name)
+ if sm.exists():
+ # reretrieve submodule from tree
+ try:
+ return repo.head.commit.tree[path]
+ except KeyError:
+ # could only be in index
+ index = repo.index
+ entry = index.entries[index.entry_key(path, 0)]
+ sm.binsha = entry.binsha
+ return sm
+ # END handle exceptions
+ # END handle existing
+
+ br = git.Head.to_full_path(str(branch) or cls.k_head_default)
+ has_module = sm.module_exists()
+ branch_is_default = branch is None
+ if has_module and url is not None:
+ if url not in [r.url for r in sm.module().remotes]:
+ raise ValueError("Specified URL '%s' does not match any remote url of the repository at '%s'" % (url, sm.abspath))
+ # END check url
+ # END verify urls match
+
+ mrepo = None
+ if url is None:
+ if not has_module:
+ raise ValueError("A URL was not given and existing repository did not exsit at %s" % path)
+ # END check url
+ mrepo = sm.module()
+ urls = [r.url for r in mrepo.remotes]
+ if not urls:
+ raise ValueError("Didn't find any remote url in repository at %s" % sm.abspath)
+ # END verify we have url
+ url = urls[0]
+ else:
+ # clone new repo
+ kwargs = {'n' : no_checkout}
+ if not branch_is_default:
+ kwargs['b'] = br
+ # END setup checkout-branch
+ mrepo = git.Repo.clone_from(url, path, **kwargs)
+ # END verify url
+
+ # update configuration and index
+ index = sm.repo.index
+ writer = sm.config_writer(index=index, write=False)
+ writer.set_value('url', url)
+ writer.set_value('path', path)
+
+ sm._url = url
+ if not branch_is_default:
+ # store full path
+ writer.set_value(cls.k_head_option, br)
+ sm._branch_path = br
+ # END handle path
+ del(writer)
+
+ # we deliberatly assume that our head matches our index !
+ pcommit = repo.head.commit
+ sm._parent_commit = pcommit
+ sm.binsha = mrepo.head.commit.binsha
+ index.add([sm], write=True)
+
+ return sm
+
+ def update(self, recursive=False, init=True, to_latest_revision=False, progress=None,
+ dry_run=False):
+ """Update the repository of this submodule to point to the checkout
+ we point at with the binsha of this instance.
+
+ :param recursive: if True, we will operate recursively and update child-
+ modules as well.
+ :param init: if True, the module repository will be cloned into place if necessary
+ :param to_latest_revision: if True, the submodule's sha will be ignored during checkout.
+ Instead, the remote will be fetched, and the local tracking branch updated.
+ This only works if we have a local tracking branch, which is the case
+ if the remote repository had a master branch, or of the 'branch' option
+ was specified for this submodule and the branch existed remotely
+ :param progress: UpdateProgress instance or None of no progress should be shown
+ :param dry_run: if True, the operation will only be simulated, but not performed.
+ All performed operations are read-only
+ :note: does nothing in bare repositories
+ :note: method is definitely not atomic if recurisve is True
+ :return: self"""
+ if self.repo.bare:
+ return self
+ #END pass in bare mode
+
+ if progress is None:
+ progress = UpdateProgress()
+ #END handle progress
+ prefix = ''
+ if dry_run:
+ prefix = "DRY-RUN: "
+ #END handle prefix
+
+ # to keep things plausible in dry-run mode
+ if dry_run:
+ mrepo = None
+ #END init mrepo
+
+ # ASSURE REPO IS PRESENT AND UPTODATE
+ #####################################
+ try:
+ mrepo = self.module()
+ rmts = mrepo.remotes
+ len_rmts = len(rmts)
+ for i, remote in enumerate(rmts):
+ op = FETCH
+ if i == 0:
+ op |= BEGIN
+ #END handle start
+
+ progress.update(op, i, len_rmts, prefix+"Fetching remote %s of submodule %r" % (remote, self.name))
+ #===============================
+ if not dry_run:
+ remote.fetch(progress=progress)
+ #END handle dry-run
+ #===============================
+ if i == len_rmts-1:
+ op |= END
+ #END handle end
+ progress.update(op, i, len_rmts, prefix+"Done fetching remote of submodule %r" % self.name)
+ #END fetch new data
+ except InvalidGitRepositoryError:
+ if not init:
+ return self
+ # END early abort if init is not allowed
+ import git
+
+ # there is no git-repository yet - but delete empty paths
+ module_path = join_path_native(self.repo.working_tree_dir, self.path)
+ if not dry_run and os.path.isdir(module_path):
+ try:
+ os.rmdir(module_path)
+ except OSError:
+ raise OSError("Module directory at %r does already exist and is non-empty" % module_path)
+ # END handle OSError
+ # END handle directory removal
+
+ # don't check it out at first - nonetheless it will create a local
+ # branch according to the remote-HEAD if possible
+ progress.update(BEGIN|CLONE, 0, 1, prefix+"Cloning %s to %s in submodule %r" % (self.url, module_path, self.name))
+ if not dry_run:
+ mrepo = git.Repo.clone_from(self.url, module_path, n=True)
+ #END handle dry-run
+ progress.update(END|CLONE, 0, 1, prefix+"Done cloning to %s" % module_path)
+
+
+ if not dry_run:
+ # see whether we have a valid branch to checkout
+ try:
+ # find a remote which has our branch - we try to be flexible
+ remote_branch = find_first_remote_branch(mrepo.remotes, self.branch_name)
+ local_branch = mkhead(mrepo, self.branch_path)
+
+ # have a valid branch, but no checkout - make sure we can figure
+ # that out by marking the commit with a null_sha
+ local_branch.set_object(util.Object(mrepo, self.NULL_BIN_SHA))
+ # END initial checkout + branch creation
+
+ # make sure HEAD is not detached
+ mrepo.head.set_reference(local_branch, logmsg="submodule: attaching head to %s" % local_branch)
+ mrepo.head.ref.set_tracking_branch(remote_branch)
+ except IndexError:
+ print >> sys.stderr, "Warning: Failed to checkout tracking branch %s" % self.branch_path
+ #END handle tracking branch
+
+ # NOTE: Have to write the repo config file as well, otherwise
+ # the default implementation will be offended and not update the repository
+ # Maybe this is a good way to assure it doesn't get into our way, but
+ # we want to stay backwards compatible too ... . Its so redundant !
+ self.repo.config_writer().set_value(sm_section(self.name), 'url', self.url)
+ #END handle dry_run
+ #END handle initalization
+
+
+ # DETERMINE SHAS TO CHECKOUT
+ ############################
+ binsha = self.binsha
+ hexsha = self.hexsha
+ if mrepo is not None:
+ # mrepo is only set if we are not in dry-run mode or if the module existed
+ is_detached = mrepo.head.is_detached
+ #END handle dry_run
+
+ if mrepo is not None and to_latest_revision:
+ msg_base = "Cannot update to latest revision in repository at %r as " % mrepo.working_dir
+ if not is_detached:
+ rref = mrepo.head.ref.tracking_branch()
+ if rref is not None:
+ rcommit = rref.commit
+ binsha = rcommit.binsha
+ hexsha = rcommit.hexsha
+ else:
+ print >> sys.stderr, "%s a tracking branch was not set for local branch '%s'" % (msg_base, mrepo.head.ref)
+ # END handle remote ref
+ else:
+ print >> sys.stderr, "%s there was no local tracking branch" % msg_base
+ # END handle detached head
+ # END handle to_latest_revision option
+
+ # update the working tree
+ # handles dry_run
+ if mrepo is not None and mrepo.head.commit.binsha != binsha:
+ progress.update(BEGIN|UPDWKTREE, 0, 1, prefix+"Updating working tree at %s for submodule %r to revision %s" % (self.path, self.name, hexsha))
+ if not dry_run:
+ if is_detached:
+ # NOTE: for now we force, the user is no supposed to change detached
+ # submodules anyway. Maybe at some point this becomes an option, to
+ # properly handle user modifications - see below for future options
+ # regarding rebase and merge.
+ mrepo.git.checkout(hexsha, force=True)
+ else:
+ # TODO: allow to specify a rebase, merge, or reset
+ # TODO: Warn if the hexsha forces the tracking branch off the remote
+ # branch - this should be prevented when setting the branch option
+ mrepo.head.reset(hexsha, index=True, working_tree=True)
+ # END handle checkout
+ #END handle dry_run
+ progress.update(END|UPDWKTREE, 0, 1, prefix+"Done updating working tree for submodule %r" % self.name)
+ # END update to new commit only if needed
+
+ # HANDLE RECURSION
+ ##################
+ if recursive:
+ # in dry_run mode, the module might not exist
+ if mrepo is not None:
+ for submodule in self.iter_items(self.module()):
+ submodule.update(recursive, init, to_latest_revision, progress=progress, dry_run=dry_run)
+ # END handle recursive update
+ #END handle dry run
+ # END for each submodule
+
+ return self
+
+ @unbare_repo
+ def move(self, module_path, configuration=True, module=True):
+ """Move the submodule to a another module path. This involves physically moving
+ the repository at our current path, changing the configuration, as well as
+ adjusting our index entry accordingly.
+
+ :param module_path: the path to which to move our module, given as
+ repository-relative path. Intermediate directories will be created
+ accordingly. If the path already exists, it must be empty.
+ Trailling (back)slashes are removed automatically
+ :param configuration: if True, the configuration will be adjusted to let
+ the submodule point to the given path.
+ :param module: if True, the repository managed by this submodule
+ will be moved, not the configuration. This will effectively
+ leave your repository in an inconsistent state unless the configuration
+ and index already point to the target location.
+ :return: self
+ :raise ValueError: if the module path existed and was not empty, or was a file
+ :note: Currently the method is not atomic, and it could leave the repository
+ in an inconsistent state if a sub-step fails for some reason
+ """
+ if module + configuration < 1:
+ raise ValueError("You must specify to move at least the module or the configuration of the submodule")
+ #END handle input
+
+ module_path = to_native_path_linux(module_path)
+ if module_path.endswith('/'):
+ module_path = module_path[:-1]
+ # END handle trailing slash
+
+ # VERIFY DESTINATION
+ if module_path == self.path:
+ return self
+ #END handle no change
+
+ dest_path = join_path_native(self.repo.working_tree_dir, module_path)
+ if os.path.isfile(dest_path):
+ raise ValueError("Cannot move repository onto a file: %s" % dest_path)
+ # END handle target files
+
+ index = self.repo.index
+ tekey = index.entry_key(module_path, 0)
+ # if the target item already exists, fail
+ if configuration and tekey in index.entries:
+ raise ValueError("Index entry for target path did alredy exist")
+ #END handle index key already there
+
+ # remove existing destination
+ if module:
+ if os.path.exists(dest_path):
+ if len(os.listdir(dest_path)):
+ raise ValueError("Destination module directory was not empty")
+ #END handle non-emptyness
+
+ if os.path.islink(dest_path):
+ os.remove(dest_path)
+ else:
+ os.rmdir(dest_path)
+ #END handle link
+ else:
+ # recreate parent directories
+ # NOTE: renames() does that now
+ pass
+ #END handle existance
+ # END handle module
+
+ # move the module into place if possible
+ cur_path = self.abspath
+ renamed_module = False
+ if module and os.path.exists(cur_path):
+ os.renames(cur_path, dest_path)
+ renamed_module = True
+ #END move physical module
+
+
+ # rename the index entry - have to manipulate the index directly as
+ # git-mv cannot be used on submodules ... yeah
+ try:
+ if configuration:
+ try:
+ ekey = index.entry_key(self.path, 0)
+ entry = index.entries[ekey]
+ del(index.entries[ekey])
+ nentry = git.IndexEntry(entry[:3]+(module_path,)+entry[4:])
+ index.entries[tekey] = nentry
+ except KeyError:
+ raise InvalidGitRepositoryError("Submodule's entry at %r did not exist" % (self.path))
+ #END handle submodule doesn't exist
+
+ # update configuration
+ writer = self.config_writer(index=index) # auto-write
+ writer.set_value('path', module_path)
+ self.path = module_path
+ del(writer)
+ # END handle configuration flag
+ except Exception:
+ if renamed_module:
+ os.renames(dest_path, cur_path)
+ # END undo module renaming
+ raise
+ #END handle undo rename
+
+ return self
+
+ @unbare_repo
+ def remove(self, module=True, force=False, configuration=True, dry_run=False):
+ """Remove this submodule from the repository. This will remove our entry
+ from the .gitmodules file and the entry in the .git/config file.
+
+ :param module: If True, the module we point to will be deleted
+ as well. If the module is currently on a commit which is not part
+ of any branch in the remote, if the currently checked out branch
+ working tree, or untracked files,
+ is ahead of its tracking branch, if you have modifications in the
+ In case the removal of the repository fails for these reasons, the
+ submodule status will not have been altered.
+ If this submodule has child-modules on its own, these will be deleted
+ prior to touching the own module.
+ :param force: Enforces the deletion of the module even though it contains
+ modifications. This basically enforces a brute-force file system based
+ deletion.
+ :param configuration: if True, the submodule is deleted from the configuration,
+ otherwise it isn't. Although this should be enabled most of the times,
+ this flag enables you to safely delete the repository of your submodule.
+ :param dry_run: if True, we will not actually do anything, but throw the errors
+ we would usually throw
+ :return: self
+ :note: doesn't work in bare repositories
+ :raise InvalidGitRepositoryError: thrown if the repository cannot be deleted
+ :raise OSError: if directories or files could not be removed"""
+ if not (module + configuration):
+ raise ValueError("Need to specify to delete at least the module, or the configuration")
+ # END handle params
+
+ # DELETE MODULE REPOSITORY
+ ##########################
+ if module and self.module_exists():
+ if force:
+ # take the fast lane and just delete everything in our module path
+ # TODO: If we run into permission problems, we have a highly inconsistent
+ # state. Delete the .git folders last, start with the submodules first
+ mp = self.abspath
+ method = None
+ if os.path.islink(mp):
+ method = os.remove
+ elif os.path.isdir(mp):
+ method = shutil.rmtree
+ elif os.path.exists(mp):
+ raise AssertionError("Cannot forcibly delete repository as it was neither a link, nor a directory")
+ #END handle brutal deletion
+ if not dry_run:
+ assert method
+ method(mp)
+ #END apply deletion method
+ else:
+ # verify we may delete our module
+ mod = self.module()
+ if mod.is_dirty(untracked_files=True):
+ raise InvalidGitRepositoryError("Cannot delete module at %s with any modifications, unless force is specified" % mod.working_tree_dir)
+ # END check for dirt
+
+ # figure out whether we have new commits compared to the remotes
+ # NOTE: If the user pulled all the time, the remote heads might
+ # not have been updated, so commits coming from the remote look
+ # as if they come from us. But we stay strictly read-only and
+ # don't fetch beforhand.
+ for remote in mod.remotes:
+ num_branches_with_new_commits = 0
+ rrefs = remote.refs
+ for rref in rrefs:
+ num_branches_with_new_commits = len(mod.git.cherry(rref)) != 0
+ # END for each remote ref
+ # not a single remote branch contained all our commits
+ if num_branches_with_new_commits == len(rrefs):
+ raise InvalidGitRepositoryError("Cannot delete module at %s as there are new commits" % mod.working_tree_dir)
+ # END handle new commits
+ # have to manually delete references as python's scoping is
+ # not existing, they could keep handles open ( on windows this is a problem )
+ if len(rrefs):
+ del(rref)
+ #END handle remotes
+ del(rrefs)
+ del(remote)
+ # END for each remote
+
+ # gently remove all submodule repositories
+ for sm in self.children():
+ sm.remove(module=True, force=False, configuration=False, dry_run=dry_run)
+ del(sm)
+ # END for each child-submodule
+
+ # finally delete our own submodule
+ if not dry_run:
+ wtd = mod.working_tree_dir
+ del(mod) # release file-handles (windows)
+ shutil.rmtree(wtd)
+ # END delete tree if possible
+ # END handle force
+ # END handle module deletion
+
+ # DELETE CONFIGURATION
+ ######################
+ if configuration and not dry_run:
+ # first the index-entry
+ index = self.repo.index
+ try:
+ del(index.entries[index.entry_key(self.path, 0)])
+ except KeyError:
+ pass
+ #END delete entry
+ index.write()
+
+ # now git config - need the config intact, otherwise we can't query
+ # inforamtion anymore
+ self.repo.config_writer().remove_section(sm_section(self.name))
+ self.config_writer().remove_section()
+ # END delete configuration
+
+ # void our data not to delay invalid access
+ self._clear_cache()
+
+ return self
+
+ def set_parent_commit(self, commit, check=True):
+ """Set this instance to use the given commit whose tree is supposed to
+ contain the .gitmodules blob.
+
+ :param commit: Commit'ish reference pointing at the root_tree
+ :param check: if True, relatively expensive checks will be performed to verify
+ validity of the submodule.
+ :raise ValueError: if the commit's tree didn't contain the .gitmodules blob.
+ :raise ValueError: if the parent commit didn't store this submodule under the
+ current path
+ :return: self"""
+ pcommit = self.repo.commit(commit)
+ pctree = pcommit.tree
+ if self.k_modules_file not in pctree:
+ raise ValueError("Tree of commit %s did not contain the %s file" % (commit, self.k_modules_file))
+ # END handle exceptions
+
+ prev_pc = self._parent_commit
+ self._parent_commit = pcommit
+
+ if check:
+ parser = self._config_parser(self.repo, self._parent_commit, read_only=True)
+ if not parser.has_section(sm_section(self.name)):
+ self._parent_commit = prev_pc
+ raise ValueError("Submodule at path %r did not exist in parent commit %s" % (self.path, commit))
+ # END handle submodule did not exist
+ # END handle checking mode
+
+ # update our sha, it could have changed
+ self.binsha = pctree[self.path].binsha
+
+ self._clear_cache()
+
+ return self
+
+ @unbare_repo
+ def config_writer(self, index=None, write=True):
+ """:return: a config writer instance allowing you to read and write the data
+ belonging to this submodule into the .gitmodules file.
+
+ :param index: if not None, an IndexFile instance which should be written.
+ defaults to the index of the Submodule's parent repository.
+ :param write: if True, the index will be written each time a configuration
+ value changes.
+ :note: the parameters allow for a more efficient writing of the index,
+ as you can pass in a modified index on your own, prevent automatic writing,
+ and write yourself once the whole operation is complete
+ :raise ValueError: if trying to get a writer on a parent_commit which does not
+ match the current head commit
+ :raise IOError: If the .gitmodules file/blob could not be read"""
+ writer = self._config_parser_constrained(read_only=False)
+ if index is not None:
+ writer.config._index = index
+ writer.config._auto_write = write
+ return writer
+
+ #} END edit interface
+
+ #{ Query Interface
+
+ @unbare_repo
+ def module(self):
+ """:return: Repo instance initialized from the repository at our submodule path
+ :raise InvalidGitRepositoryError: if a repository was not available. This could
+ also mean that it was not yet initialized"""
+ # late import to workaround circular dependencies
+ module_path = self.abspath
+ try:
+ repo = git.Repo(module_path)
+ if repo != self.repo:
+ return repo
+ # END handle repo uninitialized
+ except (InvalidGitRepositoryError, NoSuchPathError):
+ raise InvalidGitRepositoryError("No valid repository at %s" % self.path)
+ else:
+ raise InvalidGitRepositoryError("Repository at %r was not yet checked out" % module_path)
+ # END handle exceptions
+
+ def module_exists(self):
+ """:return: True if our module exists and is a valid git repository. See module() method"""
+ try:
+ self.module()
+ return True
+ except Exception:
+ return False
+ # END handle exception
+
+ def exists(self):
+ """
+ :return: True if the submodule exists, False otherwise. Please note that
+ a submodule may exist (in the .gitmodules file) even though its module
+ doesn't exist"""
+ # keep attributes for later, and restore them if we have no valid data
+ # this way we do not actually alter the state of the object
+ loc = locals()
+ for attr in self._cache_attrs:
+ if hasattr(self, attr):
+ loc[attr] = getattr(self, attr)
+ # END if we have the attribute cache
+ #END for each attr
+ self._clear_cache()
+
+ try:
+ try:
+ self.path
+ return True
+ except Exception:
+ return False
+ # END handle exceptions
+ finally:
+ for attr in self._cache_attrs:
+ if attr in loc:
+ setattr(self, attr, loc[attr])
+ # END if we have a cache
+ # END reapply each attribute
+ # END handle object state consistency
+
+ @property
+ def branch(self):
+ """:return: The branch instance that we are to checkout
+ :raise InvalidGitRepositoryError: if our module is not yet checked out"""
+ return mkhead(self.module(), self._branch_path)
+
+ @property
+ def branch_path(self):
+ """
+ :return: full (relative) path as string to the branch we would checkout
+ from the remote and track"""
+ return self._branch_path
+
+ @property
+ def branch_name(self):
+ """:return: the name of the branch, which is the shortest possible branch name"""
+ # use an instance method, for this we create a temporary Head instance
+ # which uses a repository that is available at least ( it makes no difference )
+ return git.Head(self.repo, self._branch_path).name
+
+ @property
+ def url(self):
+ """:return: The url to the repository which our module-repository refers to"""
+ return self._url
+
+ @property
+ def parent_commit(self):
+ """:return: Commit instance with the tree containing the .gitmodules file
+ :note: will always point to the current head's commit if it was not set explicitly"""
+ return self._parent_commit
+
+ @property
+ def name(self):
+ """:return: The name of this submodule. It is used to identify it within the
+ .gitmodules file.
+ :note: by default, the name is the path at which to find the submodule, but
+ in git-python it should be a unique identifier similar to the identifiers
+ used for remotes, which allows to change the path of the submodule
+ easily
+ """
+ return self._name
+
+ def config_reader(self):
+ """
+ :return: ConfigReader instance which allows you to qurey the configuration values
+ of this submodule, as provided by the .gitmodules file
+ :note: The config reader will actually read the data directly from the repository
+ and thus does not need nor care about your working tree.
+ :note: Should be cached by the caller and only kept as long as needed
+ :raise IOError: If the .gitmodules file/blob could not be read"""
+ return self._config_parser_constrained(read_only=True)
+
+ def children(self):
+ """
+ :return: IterableList(Submodule, ...) an iterable list of submodules instances
+ which are children of this submodule or 0 if the submodule is not checked out"""
+ return self._get_intermediate_items(self)
+
+ #} END query interface
+
+ #{ Iterable Interface
+
+ @classmethod
+ def iter_items(cls, repo, parent_commit='HEAD'):
+ """:return: iterator yielding Submodule instances available in the given repository"""
+ pc = repo.commit(parent_commit) # parent commit instance
+ try:
+ parser = cls._config_parser(repo, pc, read_only=True)
+ except IOError:
+ raise StopIteration
+ # END handle empty iterator
+
+ rt = pc.tree # root tree
+
+ for sms in parser.sections():
+ n = sm_name(sms)
+ p = parser.get_value(sms, 'path')
+ u = parser.get_value(sms, 'url')
+ b = cls.k_head_default
+ if parser.has_option(sms, cls.k_head_option):
+ b = parser.get_value(sms, cls.k_head_option)
+ # END handle optional information
+
+ # get the binsha
+ index = repo.index
+ try:
+ sm = rt[p]
+ except KeyError:
+ # try the index, maybe it was just added
+ try:
+ entry = index.entries[index.entry_key(p, 0)]
+ sm = cls(repo, entry.binsha, entry.mode, entry.path)
+ except KeyError:
+ raise InvalidGitRepositoryError("Gitmodule path %r did not exist in revision of parent commit %s" % (p, parent_commit))
+ # END handle keyerror
+ # END handle critical error
+
+ # fill in remaining info - saves time as it doesn't have to be parsed again
+ sm._name = n
+ sm._parent_commit = pc
+ sm._branch_path = git.Head.to_full_path(b)
+ sm._url = u
+
+ yield sm
+ # END for each section
+
+ #} END iterable interface
+
diff --git a/git/objects/submodule/root.py b/git/objects/submodule/root.py
new file mode 100644
index 00000000..36cd7209
--- /dev/null
+++ b/git/objects/submodule/root.py
@@ -0,0 +1,315 @@
+from base import Submodule, UpdateProgress
+from util import (
+ find_first_remote_branch
+ )
+from git.exc import InvalidGitRepositoryError
+import git
+
+import sys
+
+__all__ = ["RootModule", "RootUpdateProgress"]
+
+
+class RootUpdateProgress(UpdateProgress):
+ """Utility class which adds more opcodes to the UpdateProgress"""
+ REMOVE, PATHCHANGE, BRANCHCHANGE, URLCHANGE = [1 << x for x in range(UpdateProgress._num_op_codes, UpdateProgress._num_op_codes+4)]
+ _num_op_codes = UpdateProgress._num_op_codes+4
+
+ __slots__ = tuple()
+
+BEGIN = RootUpdateProgress.BEGIN
+END = RootUpdateProgress.END
+REMOVE = RootUpdateProgress.REMOVE
+BRANCHCHANGE = RootUpdateProgress.BRANCHCHANGE
+URLCHANGE = RootUpdateProgress.URLCHANGE
+PATHCHANGE = RootUpdateProgress.PATHCHANGE
+
+class RootModule(Submodule):
+ """A (virtual) Root of all submodules in the given repository. It can be used
+ to more easily traverse all submodules of the master repository"""
+
+ __slots__ = tuple()
+
+ k_root_name = '__ROOT__'
+
+ def __init__(self, repo):
+ # repo, binsha, mode=None, path=None, name = None, parent_commit=None, url=None, ref=None)
+ super(RootModule, self).__init__(
+ repo,
+ binsha = self.NULL_BIN_SHA,
+ mode = self.k_default_mode,
+ path = '',
+ name = self.k_root_name,
+ parent_commit = repo.head.commit,
+ url = '',
+ branch_path = git.Head.to_full_path(self.k_head_default)
+ )
+
+
+ def _clear_cache(self):
+ """May not do anything"""
+ pass
+
+ #{ Interface
+
+ def update(self, previous_commit=None, recursive=True, force_remove=False, init=True,
+ to_latest_revision=False, progress=None, dry_run=False):
+ """Update the submodules of this repository to the current HEAD commit.
+ This method behaves smartly by determining changes of the path of a submodules
+ repository, next to changes to the to-be-checked-out commit or the branch to be
+ checked out. This works if the submodules ID does not change.
+ Additionally it will detect addition and removal of submodules, which will be handled
+ gracefully.
+
+ :param previous_commit: If set to a commit'ish, the commit we should use
+ as the previous commit the HEAD pointed to before it was set to the commit it points to now.
+ If None, it defaults to HEAD@{1} otherwise
+ :param recursive: if True, the children of submodules will be updated as well
+ using the same technique
+ :param force_remove: If submodules have been deleted, they will be forcibly removed.
+ Otherwise the update may fail if a submodule's repository cannot be deleted as
+ changes have been made to it (see Submodule.update() for more information)
+ :param init: If we encounter a new module which would need to be initialized, then do it.
+ :param to_latest_revision: If True, instead of checking out the revision pointed to
+ by this submodule's sha, the checked out tracking branch will be merged with the
+ newest remote branch fetched from the repository's origin
+ :param progress: RootUpdateProgress instance or None if no progress should be sent
+ :param dry_run: if True, operations will not actually be performed. Progress messages
+ will change accordingly to indicate the WOULD DO state of the operation."""
+ if self.repo.bare:
+ raise InvalidGitRepositoryError("Cannot update submodules in bare repositories")
+ # END handle bare
+
+ if progress is None:
+ progress = RootUpdateProgress()
+ #END assure progress is set
+
+ prefix = ''
+ if dry_run:
+ prefix = 'DRY-RUN: '
+
+ repo = self.repo
+
+ # SETUP BASE COMMIT
+ ###################
+ cur_commit = repo.head.commit
+ if previous_commit is None:
+ try:
+ previous_commit = repo.commit(repo.head.log_entry(-1).oldhexsha)
+ if previous_commit.binsha == previous_commit.NULL_BIN_SHA:
+ raise IndexError
+ #END handle initial commit
+ except IndexError:
+ # in new repositories, there is no previous commit
+ previous_commit = cur_commit
+ #END exception handling
+ else:
+ previous_commit = repo.commit(previous_commit) # obtain commit object
+ # END handle previous commit
+
+
+ psms = self.list_items(repo, parent_commit=previous_commit)
+ sms = self.list_items(self.module())
+ spsms = set(psms)
+ ssms = set(sms)
+
+ # HANDLE REMOVALS
+ ###################
+ rrsm = (spsms - ssms)
+ len_rrsm = len(rrsm)
+ for i, rsm in enumerate(rrsm):
+ op = REMOVE
+ if i == 0:
+ op |= BEGIN
+ #END handle begin
+
+ # fake it into thinking its at the current commit to allow deletion
+ # of previous module. Trigger the cache to be updated before that
+ progress.update(op, i, len_rrsm, prefix+"Removing submodule %r at %s" % (rsm.name, rsm.abspath))
+ rsm._parent_commit = repo.head.commit
+ if not dry_run:
+ rsm.remove(configuration=False, module=True, force=force_remove)
+ #END handle dry-run
+
+ if i == len_rrsm-1:
+ op |= END
+ #END handle end
+ progress.update(op, i, len_rrsm, prefix+"Done removing submodule %r" % rsm.name)
+ # END for each removed submodule
+
+ # HANDLE PATH RENAMES
+ #####################
+ # url changes + branch changes
+ csms = (spsms & ssms)
+ len_csms = len(csms)
+ for i, csm in enumerate(csms):
+ psm = psms[csm.name]
+ sm = sms[csm.name]
+
+ #PATH CHANGES
+ ##############
+ if sm.path != psm.path and psm.module_exists():
+ progress.update(BEGIN|PATHCHANGE, i, len_csms, prefix+"Moving repository of submodule %r from %s to %s" % (sm.name, psm.abspath, sm.abspath))
+ # move the module to the new path
+ if not dry_run:
+ psm.move(sm.path, module=True, configuration=False)
+ #END handle dry_run
+ progress.update(END|PATHCHANGE, i, len_csms, prefix+"Done moving repository of submodule %r" % sm.name)
+ # END handle path changes
+
+ if sm.module_exists():
+ # HANDLE URL CHANGE
+ ###################
+ if sm.url != psm.url:
+ # Add the new remote, remove the old one
+ # This way, if the url just changes, the commits will not
+ # have to be re-retrieved
+ nn = '__new_origin__'
+ smm = sm.module()
+ rmts = smm.remotes
+
+ # don't do anything if we already have the url we search in place
+ if len([r for r in rmts if r.url == sm.url]) == 0:
+ progress.update(BEGIN|URLCHANGE, i, len_csms, prefix+"Changing url of submodule %r from %s to %s" % (sm.name, psm.url, sm.url))
+
+ if not dry_run:
+ assert nn not in [r.name for r in rmts]
+ smr = smm.create_remote(nn, sm.url)
+ smr.fetch(progress=progress)
+
+ # If we have a tracking branch, it should be available
+ # in the new remote as well.
+ if len([r for r in smr.refs if r.remote_head == sm.branch_name]) == 0:
+ raise ValueError("Submodule branch named %r was not available in new submodule remote at %r" % (sm.branch_name, sm.url))
+ # END head is not detached
+
+ # now delete the changed one
+ rmt_for_deletion = None
+ for remote in rmts:
+ if remote.url == psm.url:
+ rmt_for_deletion = remote
+ break
+ # END if urls match
+ # END for each remote
+
+ # if we didn't find a matching remote, but have exactly one,
+ # we can safely use this one
+ if rmt_for_deletion is None:
+ if len(rmts) == 1:
+ rmt_for_deletion = rmts[0]
+ else:
+ # if we have not found any remote with the original url
+ # we may not have a name. This is a special case,
+ # and its okay to fail here
+ # Alternatively we could just generate a unique name and leave all
+ # existing ones in place
+ raise InvalidGitRepositoryError("Couldn't find original remote-repo at url %r" % psm.url)
+ #END handle one single remote
+ # END handle check we found a remote
+
+ orig_name = rmt_for_deletion.name
+ smm.delete_remote(rmt_for_deletion)
+ # NOTE: Currently we leave tags from the deleted remotes
+ # as well as separate tracking branches in the possibly totally
+ # changed repository ( someone could have changed the url to
+ # another project ). At some point, one might want to clean
+ # it up, but the danger is high to remove stuff the user
+ # has added explicitly
+
+ # rename the new remote back to what it was
+ smr.rename(orig_name)
+
+ # early on, we verified that the our current tracking branch
+ # exists in the remote. Now we have to assure that the
+ # sha we point to is still contained in the new remote
+ # tracking branch.
+ smsha = sm.binsha
+ found = False
+ rref = smr.refs[self.branch_name]
+ for c in rref.commit.traverse():
+ if c.binsha == smsha:
+ found = True
+ break
+ # END traverse all commits in search for sha
+ # END for each commit
+
+ if not found:
+ # adjust our internal binsha to use the one of the remote
+ # this way, it will be checked out in the next step
+ # This will change the submodule relative to us, so
+ # the user will be able to commit the change easily
+ print >> sys.stderr, "WARNING: Current sha %s was not contained in the tracking branch at the new remote, setting it the the remote's tracking branch" % sm.hexsha
+ sm.binsha = rref.commit.binsha
+ #END reset binsha
+
+ #NOTE: All checkout is performed by the base implementation of update
+ #END handle dry_run
+ progress.update(END|URLCHANGE, i, len_csms, prefix+"Done adjusting url of submodule %r" % (sm.name))
+ # END skip remote handling if new url already exists in module
+ # END handle url
+
+ # HANDLE PATH CHANGES
+ #####################
+ if sm.branch_path != psm.branch_path:
+ # finally, create a new tracking branch which tracks the
+ # new remote branch
+ progress.update(BEGIN|BRANCHCHANGE, i, len_csms, prefix+"Changing branch of submodule %r from %s to %s" % (sm.name, psm.branch_path, sm.branch_path))
+ if not dry_run:
+ smm = sm.module()
+ smmr = smm.remotes
+ try:
+ tbr = git.Head.create(smm, sm.branch_name, logmsg='branch: Created from HEAD')
+ except OSError:
+ # ... or reuse the existing one
+ tbr = git.Head(smm, sm.branch_path)
+ #END assure tracking branch exists
+
+ tbr.set_tracking_branch(find_first_remote_branch(smmr, sm.branch_name))
+ # figure out whether the previous tracking branch contains
+ # new commits compared to the other one, if not we can
+ # delete it.
+ try:
+ tbr = find_first_remote_branch(smmr, psm.branch_name)
+ if len(smm.git.cherry(tbr, psm.branch)) == 0:
+ psm.branch.delete(smm, psm.branch)
+ #END delete original tracking branch if there are no changes
+ except InvalidGitRepositoryError:
+ # ignore it if the previous branch couldn't be found in the
+ # current remotes, this just means we can't handle it
+ pass
+ # END exception handling
+
+ #NOTE: All checkout is done in the base implementation of update
+ #END handle dry_run
+
+ progress.update(END|BRANCHCHANGE, i, len_csms, prefix+"Done changing branch of submodule %r" % sm.name)
+ #END handle branch
+ #END handle
+ # END for each common submodule
+
+ # FINALLY UPDATE ALL ACTUAL SUBMODULES
+ ######################################
+ for sm in sms:
+ # update the submodule using the default method
+ sm.update(recursive=False, init=init, to_latest_revision=to_latest_revision,
+ progress=progress, dry_run=dry_run)
+
+ # update recursively depth first - question is which inconsitent
+ # state will be better in case it fails somewhere. Defective branch
+ # or defective depth. The RootSubmodule type will never process itself,
+ # which was done in the previous expression
+ if recursive:
+ # the module would exist by now if we are not in dry_run mode
+ if sm.module_exists():
+ type(self)(sm.module()).update( recursive=True, force_remove=force_remove,
+ init=init, to_latest_revision=to_latest_revision,
+ progress=progress, dry_run=dry_run)
+ #END handle dry_run
+ #END handle recursive
+ # END for each submodule to update
+
+ def module(self):
+ """:return: the actual repository containing the submodules"""
+ return self.repo
+ #} END interface
+#} END classes
diff --git a/git/objects/submodule/util.py b/git/objects/submodule/util.py
new file mode 100644
index 00000000..9b32807a
--- /dev/null
+++ b/git/objects/submodule/util.py
@@ -0,0 +1,101 @@
+import git
+from git.exc import InvalidGitRepositoryError
+from git.config import GitConfigParser
+from StringIO import StringIO
+import weakref
+
+__all__ = ( 'sm_section', 'sm_name', 'mkhead', 'unbare_repo', 'find_first_remote_branch',
+ 'SubmoduleConfigParser')
+
+#{ Utilities
+
+def sm_section(name):
+ """:return: section title used in .gitmodules configuration file"""
+ return 'submodule "%s"' % name
+
+def sm_name(section):
+ """:return: name of the submodule as parsed from the section name"""
+ section = section.strip()
+ return section[11:-1]
+
+def mkhead(repo, path):
+ """:return: New branch/head instance"""
+ return git.Head(repo, git.Head.to_full_path(path))
+
+def unbare_repo(func):
+ """Methods with this decorator raise InvalidGitRepositoryError if they
+ encounter a bare repository"""
+ def wrapper(self, *args, **kwargs):
+ if self.repo.bare:
+ raise InvalidGitRepositoryError("Method '%s' cannot operate on bare repositories" % func.__name__)
+ #END bare method
+ return func(self, *args, **kwargs)
+ # END wrapper
+ wrapper.__name__ = func.__name__
+ return wrapper
+
+def find_first_remote_branch(remotes, branch_name):
+ """Find the remote branch matching the name of the given branch or raise InvalidGitRepositoryError"""
+ for remote in remotes:
+ try:
+ return remote.refs[branch_name]
+ except IndexError:
+ continue
+ # END exception handling
+ #END for remote
+ raise InvalidGitRepositoryError("Didn't find remote branch %r in any of the given remotes", branch_name)
+
+#} END utilities
+
+
+#{ Classes
+
+class SubmoduleConfigParser(GitConfigParser):
+ """
+ Catches calls to _write, and updates the .gitmodules blob in the index
+ with the new data, if we have written into a stream. Otherwise it will
+ add the local file to the index to make it correspond with the working tree.
+ Additionally, the cache must be cleared
+
+ Please note that no mutating method will work in bare mode
+ """
+
+ def __init__(self, *args, **kwargs):
+ self._smref = None
+ self._index = None
+ self._auto_write = True
+ super(SubmoduleConfigParser, self).__init__(*args, **kwargs)
+
+ #{ Interface
+ def set_submodule(self, submodule):
+ """Set this instance's submodule. It must be called before
+ the first write operation begins"""
+ self._smref = weakref.ref(submodule)
+
+ def flush_to_index(self):
+ """Flush changes in our configuration file to the index"""
+ assert self._smref is not None
+ # should always have a file here
+ assert not isinstance(self._file_or_files, StringIO)
+
+ sm = self._smref()
+ if sm is not None:
+ index = self._index
+ if index is None:
+ index = sm.repo.index
+ # END handle index
+ index.add([sm.k_modules_file], write=self._auto_write)
+ sm._clear_cache()
+ # END handle weakref
+
+ #} END interface
+
+ #{ Overridden Methods
+ def write(self):
+ rval = super(SubmoduleConfigParser, self).write()
+ self.flush_to_index()
+ return rval
+ # END overridden methods
+
+
+#} END classes
diff --git a/git/objects/tag.py b/git/objects/tag.py
new file mode 100644
index 00000000..c7d02abe
--- /dev/null
+++ b/git/objects/tag.py
@@ -0,0 +1,76 @@
+# objects.py
+# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors
+#
+# This module is part of GitPython and is released under
+# the BSD License: http://www.opensource.org/licenses/bsd-license.php
+""" Module containing all object based types. """
+import base
+from gitdb.util import hex_to_bin
+from util import (
+ get_object_type_by_name,
+ parse_actor_and_date
+ )
+
+__all__ = ("TagObject", )
+
+class TagObject(base.Object):
+ """Non-Lightweight tag carrying additional information about an object we are pointing to."""
+ type = "tag"
+ __slots__ = ( "object", "tag", "tagger", "tagged_date", "tagger_tz_offset", "message" )
+
+ def __init__(self, repo, binsha, object=None, tag=None,
+ tagger=None, tagged_date=None, tagger_tz_offset=None, message=None):
+ """Initialize a tag object with additional data
+
+ :param repo: repository this object is located in
+ :param binsha: 20 byte SHA1
+ :param object: Object instance of object we are pointing to
+ :param tag: name of this tag
+ :param tagger: Actor identifying the tagger
+ :param tagged_date: int_seconds_since_epoch
+ is the DateTime of the tag creation - use time.gmtime to convert
+ it into a different format
+ :param tagged_tz_offset: int_seconds_west_of_utc is the timezone that the
+ authored_date is in, in a format similar to time.altzone"""
+ super(TagObject, self).__init__(repo, binsha )
+ if object is not None:
+ self.object = object
+ if tag is not None:
+ self.tag = tag
+ if tagger is not None:
+ self.tagger = tagger
+ if tagged_date is not None:
+ self.tagged_date = tagged_date
+ if tagger_tz_offset is not None:
+ self.tagger_tz_offset = tagger_tz_offset
+ if message is not None:
+ self.message = message
+
+ def _set_cache_(self, attr):
+ """Cache all our attributes at once"""
+ if attr in TagObject.__slots__:
+ ostream = self.repo.odb.stream(self.binsha)
+ lines = ostream.read().splitlines()
+
+ obj, hexsha = lines[0].split(" ") # object <hexsha>
+ type_token, type_name = lines[1].split(" ") # type <type_name>
+ self.object = get_object_type_by_name(type_name)(self.repo, hex_to_bin(hexsha))
+
+ self.tag = lines[2][4:] # tag <tag name>
+
+ tagger_info = lines[3][7:]# tagger <actor> <date>
+ self.tagger, self.tagged_date, self.tagger_tz_offset = parse_actor_and_date(tagger_info)
+
+ # line 4 empty - it could mark the beginning of the next header
+ # in case there really is no message, it would not exist. Otherwise
+ # a newline separates header from message
+ if len(lines) > 5:
+ self.message = "\n".join(lines[5:])
+ else:
+ self.message = ''
+ # END check our attributes
+ else:
+ super(TagObject, self)._set_cache_(attr)
+
+
+
diff --git a/git/objects/tree.py b/git/objects/tree.py
new file mode 100644
index 00000000..67431686
--- /dev/null
+++ b/git/objects/tree.py
@@ -0,0 +1,280 @@
+# tree.py
+# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors
+#
+# This module is part of GitPython and is released under
+# the BSD License: http://www.opensource.org/licenses/bsd-license.php
+import util
+from base import IndexObject
+from git.util import join_path
+from blob import Blob
+from submodule.base import Submodule
+import git.diff as diff
+
+from fun import (
+ tree_entries_from_data,
+ tree_to_stream
+ )
+
+from gitdb.util import (
+ to_bin_sha,
+ )
+
+__all__ = ("TreeModifier", "Tree")
+
+class TreeModifier(object):
+ """A utility class providing methods to alter the underlying cache in a list-like fashion.
+
+ Once all adjustments are complete, the _cache, which really is a refernce to
+ the cache of a tree, will be sorted. Assuring it will be in a serializable state"""
+ __slots__ = '_cache'
+
+ def __init__(self, cache):
+ self._cache = cache
+
+ def _index_by_name(self, name):
+ """:return: index of an item with name, or -1 if not found"""
+ for i, t in enumerate(self._cache):
+ if t[2] == name:
+ return i
+ # END found item
+ # END for each item in cache
+ return -1
+
+ #{ Interface
+ def set_done(self):
+ """Call this method once you are done modifying the tree information.
+ It may be called several times, but be aware that each call will cause
+ a sort operation
+ :return self:"""
+ self._cache.sort(key=lambda t: t[2]) # sort by name
+ return self
+ #} END interface
+
+ #{ Mutators
+ def add(self, sha, mode, name, force=False):
+ """Add the given item to the tree. If an item with the given name already
+ exists, nothing will be done, but a ValueError will be raised if the
+ sha and mode of the existing item do not match the one you add, unless
+ force is True
+
+ :param sha: The 20 or 40 byte sha of the item to add
+ :param mode: int representing the stat compatible mode of the item
+ :param force: If True, an item with your name and information will overwrite
+ any existing item with the same name, no matter which information it has
+ :return: self"""
+ if '/' in name:
+ raise ValueError("Name must not contain '/' characters")
+ if (mode >> 12) not in Tree._map_id_to_type:
+ raise ValueError("Invalid object type according to mode %o" % mode)
+
+ sha = to_bin_sha(sha)
+ index = self._index_by_name(name)
+ item = (sha, mode, name)
+ if index == -1:
+ self._cache.append(item)
+ else:
+ if force:
+ self._cache[index] = item
+ else:
+ ex_item = self._cache[index]
+ if ex_item[0] != sha or ex_item[1] != mode:
+ raise ValueError("Item %r existed with different properties" % name)
+ # END handle mismatch
+ # END handle force
+ # END handle name exists
+ return self
+
+ def add_unchecked(self, binsha, mode, name):
+ """Add the given item to the tree, its correctness is assumed, which
+ puts the caller into responsibility to assure the input is correct.
+ For more information on the parameters, see ``add``
+ :param binsha: 20 byte binary sha"""
+ self._cache.append((binsha, mode, name))
+
+ def __delitem__(self, name):
+ """Deletes an item with the given name if it exists"""
+ index = self._index_by_name(name)
+ if index > -1:
+ del(self._cache[index])
+
+ #} END mutators
+
+
+class Tree(IndexObject, diff.Diffable, util.Traversable, util.Serializable):
+ """Tree objects represent an ordered list of Blobs and other Trees.
+
+ ``Tree as a list``::
+
+ Access a specific blob using the
+ tree['filename'] notation.
+
+ You may as well access by index
+ blob = tree[0]
+ """
+
+ type = "tree"
+ __slots__ = "_cache"
+
+ # actual integer ids for comparison
+ commit_id = 016 # equals stat.S_IFDIR | stat.S_IFLNK - a directory link
+ blob_id = 010
+ symlink_id = 012
+ tree_id = 004
+
+ _map_id_to_type = {
+ commit_id : Submodule,
+ blob_id : Blob,
+ symlink_id : Blob
+ # tree id added once Tree is defined
+ }
+
+
+ def __init__(self, repo, binsha, mode=tree_id<<12, path=None):
+ super(Tree, self).__init__(repo, binsha, mode, path)
+
+ @classmethod
+ def _get_intermediate_items(cls, index_object):
+ if index_object.type == "tree":
+ return tuple(index_object._iter_convert_to_object(index_object._cache))
+ return tuple()
+
+ def _set_cache_(self, attr):
+ if attr == "_cache":
+ # Set the data when we need it
+ ostream = self.repo.odb.stream(self.binsha)
+ self._cache = tree_entries_from_data(ostream.read())
+ else:
+ super(Tree, self)._set_cache_(attr)
+ # END handle attribute
+
+ def _iter_convert_to_object(self, iterable):
+ """Iterable yields tuples of (binsha, mode, name), which will be converted
+ to the respective object representation"""
+ for binsha, mode, name in iterable:
+ path = join_path(self.path, name)
+ try:
+ yield self._map_id_to_type[mode >> 12](self.repo, binsha, mode, path)
+ except KeyError:
+ raise TypeError("Unknown mode %o found in tree data for path '%s'" % (mode, path))
+ # END for each item
+
+ def __div__(self, file):
+ """Find the named object in this tree's contents
+ :return: ``git.Blob`` or ``git.Tree`` or ``git.Submodule``
+
+ :raise KeyError: if given file or tree does not exist in tree"""
+ msg = "Blob or Tree named %r not found"
+ if '/' in file:
+ tree = self
+ item = self
+ tokens = file.split('/')
+ for i,token in enumerate(tokens):
+ item = tree[token]
+ if item.type == 'tree':
+ tree = item
+ else:
+ # safety assertion - blobs are at the end of the path
+ if i != len(tokens)-1:
+ raise KeyError(msg % file)
+ return item
+ # END handle item type
+ # END for each token of split path
+ if item == self:
+ raise KeyError(msg % file)
+ return item
+ else:
+ for info in self._cache:
+ if info[2] == file: # [2] == name
+ return self._map_id_to_type[info[1] >> 12](self.repo, info[0], info[1], join_path(self.path, info[2]))
+ # END for each obj
+ raise KeyError( msg % file )
+ # END handle long paths
+
+
+ @property
+ def trees(self):
+ """:return: list(Tree, ...) list of trees directly below this tree"""
+ return [ i for i in self if i.type == "tree" ]
+
+ @property
+ def blobs(self):
+ """:return: list(Blob, ...) list of blobs directly below this tree"""
+ return [ i for i in self if i.type == "blob" ]
+
+ @property
+ def cache(self):
+ """
+ :return: An object allowing to modify the internal cache. This can be used
+ to change the tree's contents. When done, make sure you call ``set_done``
+ on the tree modifier, or serialization behaviour will be incorrect.
+ See the ``TreeModifier`` for more information on how to alter the cache"""
+ return TreeModifier(self._cache)
+
+ def traverse( self, predicate = lambda i,d: True,
+ prune = lambda i,d: False, depth = -1, branch_first=True,
+ visit_once = False, ignore_self=1 ):
+ """For documentation, see util.Traversable.traverse
+ Trees are set to visit_once = False to gain more performance in the traversal"""
+ return super(Tree, self).traverse(predicate, prune, depth, branch_first, visit_once, ignore_self)
+
+ # List protocol
+ def __getslice__(self, i, j):
+ return list(self._iter_convert_to_object(self._cache[i:j]))
+
+ def __iter__(self):
+ return self._iter_convert_to_object(self._cache)
+
+ def __len__(self):
+ return len(self._cache)
+
+ def __getitem__(self, item):
+ if isinstance(item, int):
+ info = self._cache[item]
+ return self._map_id_to_type[info[1] >> 12](self.repo, info[0], info[1], join_path(self.path, info[2]))
+
+ if isinstance(item, basestring):
+ # compatability
+ return self.__div__(item)
+ # END index is basestring
+
+ raise TypeError( "Invalid index type: %r" % item )
+
+
+ def __contains__(self, item):
+ if isinstance(item, IndexObject):
+ for info in self._cache:
+ if item.binsha == info[0]:
+ return True
+ # END compare sha
+ # END for each entry
+ # END handle item is index object
+ # compatability
+
+ # treat item as repo-relative path
+ path = self.path
+ for info in self._cache:
+ if item == join_path(path, info[2]):
+ return True
+ # END for each item
+ return False
+
+ def __reversed__(self):
+ return reversed(self._iter_convert_to_object(self._cache))
+
+ def _serialize(self, stream):
+ """Serialize this tree into the stream. Please note that we will assume
+ our tree data to be in a sorted state. If this is not the case, serialization
+ will not generate a correct tree representation as these are assumed to be sorted
+ by algorithms"""
+ tree_to_stream(self._cache, stream.write)
+ return self
+
+ def _deserialize(self, stream):
+ self._cache = tree_entries_from_data(stream.read())
+ return self
+
+
+# END tree
+
+# finalize map definition
+Tree._map_id_to_type[Tree.tree_id] = Tree
diff --git a/git/objects/util.py b/git/objects/util.py
new file mode 100644
index 00000000..4c9323b8
--- /dev/null
+++ b/git/objects/util.py
@@ -0,0 +1,315 @@
+# util.py
+# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors
+#
+# This module is part of GitPython and is released under
+# the BSD License: http://www.opensource.org/licenses/bsd-license.php
+"""Module for general utility functions"""
+from git.util import (
+ IterableList,
+ Actor
+ )
+
+import re
+from collections import deque as Deque
+
+from string import digits
+import time
+import os
+
+__all__ = ('get_object_type_by_name', 'parse_date', 'parse_actor_and_date',
+ 'ProcessStreamAdapter', 'Traversable', 'altz_to_utctz_str', 'utctz_to_altz',
+ 'verify_utctz', 'Actor')
+
+#{ Functions
+
+def mode_str_to_int(modestr):
+ """
+ :param modestr: string like 755 or 644 or 100644 - only the last 6 chars will be used
+ :return:
+ String identifying a mode compatible to the mode methods ids of the
+ stat module regarding the rwx permissions for user, group and other,
+ special flags and file system flags, i.e. whether it is a symlink
+ for example."""
+ mode = 0
+ for iteration, char in enumerate(reversed(modestr[-6:])):
+ mode += int(char) << iteration*3
+ # END for each char
+ return mode
+
+def get_object_type_by_name(object_type_name):
+ """
+ :return: type suitable to handle the given object type name.
+ Use the type to create new instances.
+
+ :param object_type_name: Member of TYPES
+
+ :raise ValueError: In case object_type_name is unknown"""
+ if object_type_name == "commit":
+ import commit
+ return commit.Commit
+ elif object_type_name == "tag":
+ import tag
+ return tag.TagObject
+ elif object_type_name == "blob":
+ import blob
+ return blob.Blob
+ elif object_type_name == "tree":
+ import tree
+ return tree.Tree
+ else:
+ raise ValueError("Cannot handle unknown object type: %s" % object_type_name)
+
+def utctz_to_altz(utctz):
+ """we convert utctz to the timezone in seconds, it is the format time.altzone
+ returns. Git stores it as UTC timezone which has the opposite sign as well,
+ which explains the -1 * ( that was made explicit here )
+ :param utctz: git utc timezone string, i.e. +0200"""
+ return -1 * int(float(utctz)/100*3600)
+
+def altz_to_utctz_str(altz):
+ """As above, but inverses the operation, returning a string that can be used
+ in commit objects"""
+ utci = -1 * int((altz / 3600)*100)
+ utcs = str(abs(utci))
+ utcs = "0"*(4-len(utcs)) + utcs
+ prefix = (utci < 0 and '-') or '+'
+ return prefix + utcs
+
+
+def verify_utctz(offset):
+ """:raise ValueError: if offset is incorrect
+ :return: offset"""
+ fmt_exc = ValueError("Invalid timezone offset format: %s" % offset)
+ if len(offset) != 5:
+ raise fmt_exc
+ if offset[0] not in "+-":
+ raise fmt_exc
+ if offset[1] not in digits or \
+ offset[2] not in digits or \
+ offset[3] not in digits or \
+ offset[4] not in digits:
+ raise fmt_exc
+ # END for each char
+ return offset
+
+def parse_date(string_date):
+ """
+ Parse the given date as one of the following
+
+ * Git internal format: timestamp offset
+ * RFC 2822: Thu, 07 Apr 2005 22:13:13 +0200.
+ * ISO 8601 2005-04-07T22:13:13
+ The T can be a space as well
+
+ :return: Tuple(int(timestamp), int(offset)), both in seconds since epoch
+ :raise ValueError: If the format could not be understood
+ :note: Date can also be YYYY.MM.DD, MM/DD/YYYY and DD.MM.YYYY"""
+ # git time
+ try:
+ if string_date.count(' ') == 1 and string_date.rfind(':') == -1:
+ timestamp, offset = string_date.split()
+ timestamp = int(timestamp)
+ return timestamp, utctz_to_altz(verify_utctz(offset))
+ else:
+ offset = "+0000" # local time by default
+ if string_date[-5] in '-+':
+ offset = verify_utctz(string_date[-5:])
+ string_date = string_date[:-6] # skip space as well
+ # END split timezone info
+
+ # now figure out the date and time portion - split time
+ date_formats = list()
+ splitter = -1
+ if ',' in string_date:
+ date_formats.append("%a, %d %b %Y")
+ splitter = string_date.rfind(' ')
+ else:
+ # iso plus additional
+ date_formats.append("%Y-%m-%d")
+ date_formats.append("%Y.%m.%d")
+ date_formats.append("%m/%d/%Y")
+ date_formats.append("%d.%m.%Y")
+
+ splitter = string_date.rfind('T')
+ if splitter == -1:
+ splitter = string_date.rfind(' ')
+ # END handle 'T' and ' '
+ # END handle rfc or iso
+
+ assert splitter > -1
+
+ # split date and time
+ time_part = string_date[splitter+1:] # skip space
+ date_part = string_date[:splitter]
+
+ # parse time
+ tstruct = time.strptime(time_part, "%H:%M:%S")
+
+ for fmt in date_formats:
+ try:
+ dtstruct = time.strptime(date_part, fmt)
+ fstruct = time.struct_time((dtstruct.tm_year, dtstruct.tm_mon, dtstruct.tm_mday,
+ tstruct.tm_hour, tstruct.tm_min, tstruct.tm_sec,
+ dtstruct.tm_wday, dtstruct.tm_yday, tstruct.tm_isdst))
+ return int(time.mktime(fstruct)), utctz_to_altz(offset)
+ except ValueError:
+ continue
+ # END exception handling
+ # END for each fmt
+
+ # still here ? fail
+ raise ValueError("no format matched")
+ # END handle format
+ except Exception:
+ raise ValueError("Unsupported date format: %s" % string_date)
+ # END handle exceptions
+
+
+# precompiled regex
+_re_actor_epoch = re.compile(r'^.+? (.*) (\d+) ([+-]\d+).*$')
+
+def parse_actor_and_date(line):
+ """Parse out the actor (author or committer) info from a line like::
+
+ author Tom Preston-Werner <tom@mojombo.com> 1191999972 -0700
+
+ :return: [Actor, int_seconds_since_epoch, int_timezone_offset]"""
+ m = _re_actor_epoch.search(line)
+ actor, epoch, offset = m.groups()
+ return (Actor._from_string(actor), int(epoch), utctz_to_altz(offset))
+
+
+#} END functions
+
+
+#{ Classes
+
+class ProcessStreamAdapter(object):
+ """Class wireing all calls to the contained Process instance.
+
+ Use this type to hide the underlying process to provide access only to a specified
+ stream. The process is usually wrapped into an AutoInterrupt class to kill
+ it if the instance goes out of scope."""
+ __slots__ = ("_proc", "_stream")
+ def __init__(self, process, stream_name):
+ self._proc = process
+ self._stream = getattr(process, stream_name)
+
+ def __getattr__(self, attr):
+ return getattr(self._stream, attr)
+
+
+class Traversable(object):
+ """Simple interface to perforam depth-first or breadth-first traversals
+ into one direction.
+ Subclasses only need to implement one function.
+ Instances of the Subclass must be hashable"""
+ __slots__ = tuple()
+
+ @classmethod
+ def _get_intermediate_items(cls, item):
+ """
+ Returns:
+ List of items connected to the given item.
+ Must be implemented in subclass
+ """
+ raise NotImplementedError("To be implemented in subclass")
+
+ def list_traverse(self, *args, **kwargs):
+ """
+ :return: IterableList with the results of the traversal as produced by
+ traverse()"""
+ out = IterableList(self._id_attribute_)
+ out.extend(self.traverse(*args, **kwargs))
+ return out
+
+ def traverse( self, predicate = lambda i,d: True,
+ prune = lambda i,d: False, depth = -1, branch_first=True,
+ visit_once = True, ignore_self=1, as_edge = False ):
+ """:return: iterator yieling of items found when traversing self
+
+ :param predicate: f(i,d) returns False if item i at depth d should not be included in the result
+
+ :param prune:
+ f(i,d) return True if the search should stop at item i at depth d.
+ Item i will not be returned.
+
+ :param depth:
+ define at which level the iteration should not go deeper
+ if -1, there is no limit
+ if 0, you would effectively only get self, the root of the iteration
+ i.e. if 1, you would only get the first level of predessessors/successors
+
+ :param branch_first:
+ if True, items will be returned branch first, otherwise depth first
+
+ :param visit_once:
+ if True, items will only be returned once, although they might be encountered
+ several times. Loops are prevented that way.
+
+ :param ignore_self:
+ if True, self will be ignored and automatically pruned from
+ the result. Otherwise it will be the first item to be returned.
+ If as_edge is True, the source of the first edge is None
+
+ :param as_edge:
+ if True, return a pair of items, first being the source, second the
+ destinatination, i.e. tuple(src, dest) with the edge spanning from
+ source to destination"""
+ visited = set()
+ stack = Deque()
+ stack.append( ( 0 ,self, None ) ) # self is always depth level 0
+
+ def addToStack( stack, item, branch_first, depth ):
+ lst = self._get_intermediate_items( item )
+ if not lst:
+ return
+ if branch_first:
+ stack.extendleft( ( depth , i, item ) for i in lst )
+ else:
+ reviter = ( ( depth , lst[i], item ) for i in range( len( lst )-1,-1,-1) )
+ stack.extend( reviter )
+ # END addToStack local method
+
+ while stack:
+ d, item, src = stack.pop() # depth of item, item, item_source
+
+ if visit_once and item in visited:
+ continue
+
+ if visit_once:
+ visited.add(item)
+
+ rval = ( as_edge and (src, item) ) or item
+ if prune( rval, d ):
+ continue
+
+ skipStartItem = ignore_self and ( item is self )
+ if not skipStartItem and predicate( rval, d ):
+ yield rval
+
+ # only continue to next level if this is appropriate !
+ nd = d + 1
+ if depth > -1 and nd > depth:
+ continue
+
+ addToStack( stack, item, branch_first, nd )
+ # END for each item on work stack
+
+
+class Serializable(object):
+ """Defines methods to serialize and deserialize objects from and into a data stream"""
+ __slots__ = tuple()
+
+ def _serialize(self, stream):
+ """Serialize the data of this object into the given data stream
+ :note: a serialized object would ``_deserialize`` into the same objet
+ :param stream: a file-like object
+ :return: self"""
+ raise NotImplementedError("To be implemented in subclass")
+
+ def _deserialize(self, stream):
+ """Deserialize all information regarding this object from the stream
+ :param stream: a file-like object
+ :return: self"""
+ raise NotImplementedError("To be implemented in subclass")