commit: initial version of commit_from_tree which could create commit objects if it could serialize itself

author: Sebastian Thiel <byronimo@gmail.com> 2010-06-02 00:48:16 +0200
committer: Sebastian Thiel <byronimo@gmail.com> 2010-06-02 01:04:16 +0200
commit: df0892351a394d768489b5647d47b73c24d3ef5f (patch)
tree: fc2cbca826f2b728fe58aa22a09e3a65c12543ae
parent: 6d9b1f4f9fa8c9f030e3207e7deacc5d5f8bba4e (diff)
download: gitpython-df0892351a394d768489b5647d47b73c24d3ef5f.tar.gz
5 files changed, 815 insertions, 613 deletions
diff --git a/CHANGES b/CHANGES
index 5d677b06..e24e723d 100644
--- a/CHANGES
+++ b/CHANGES
@@ -1,7 +1,11 @@
 =======
 CHANGES
 =======
-  
+
+0.2 Beta 2
+===========
+ * Commit objects now carry the 'encoding' information of their message. It wasn't parsed previously, and defaults to UTF-8
+
 0.2
 =====
 General
diff --git a/lib/git/objects/base.py b/lib/git/objects/base.py
index 6a51eed3..bb15192d 100644
--- a/lib/git/objects/base.py
+++ b/lib/git/objects/base.py
@@ -141,6 +141,7 @@ class Object(LazyMixin):
         self.repo.git.cat_file(self.type, self.sha, output_stream=ostream)
         return self
 
+
 class IndexObject(Object):
     """
     Base for all objects that can be part of the index file , namely Tree, Blob and
diff --git a/lib/git/objects/commit.py b/lib/git/objects/commit.py
index 826f684c..87eed49b 100644
--- a/lib/git/objects/commit.py
+++ b/lib/git/objects/commit.py
@@ -7,372 +7,425 @@
 from git.utils import Iterable
 import git.diff as diff
 import git.stats as stats
+from git.actor import Actor
 from tree import Tree
 import base
 import utils
-import tempfile
+import time
 import os
 
 class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable):
-    """
-    Wraps a git Commit object.
-    
-    This class will act lazily on some of its attributes and will query the 
-    value on demand only if it involves calling the git binary.
-    """
-    
-    # object configuration 
-    type = "commit"
-    __slots__ = ("tree",
-                 "author", "authored_date", "author_tz_offset",
-                 "committer", "committed_date", "committer_tz_offset",
-                 "message", "parents")
-    _id_attribute_ = "sha"
-    
-    def __init__(self, repo, sha, tree=None, author=None, authored_date=None, author_tz_offset=None,
-                 committer=None, committed_date=None, committer_tz_offset=None, message=None, parents=None):
-        """
-        Instantiate a new Commit. All keyword arguments taking None as default will 
-        be implicitly set if id names a valid sha. 
-        
-        The parameter documentation indicates the type of the argument after a colon ':'.
-
-        ``sha``
-            is the sha id of the commit or a ref
-
-        ``parents`` : tuple( Commit, ... )
-            is a tuple of commit ids or actual Commits
-
-        ``tree`` : Tree
-            is the corresponding tree id or an actual Tree
-
-        ``author`` : Actor
-            is the author string ( will be implicitly converted into an Actor object )
-
-        ``authored_date`` : int_seconds_since_epoch
-            is the authored DateTime - use time.gmtime() to convert it into a 
-            different format
-
-        ``author_tz_offset``: int_seconds_west_of_utc
-           is the timezone that the authored_date is in
-
-        ``committer`` : Actor
-            is the committer string
-
-        ``committed_date`` : int_seconds_since_epoch
-            is the committed DateTime - use time.gmtime() to convert it into a 
-            different format
-
-        ``committer_tz_offset``: int_seconds_west_of_utc
-           is the timezone that the authored_date is in
-
-        ``message`` : string
-            is the commit message
-
-        Returns
-            git.Commit
-        """
-        super(Commit,self).__init__(repo, sha)
-        self._set_self_from_args_(locals())
-
-        if parents is not None:
-            self.parents = tuple( self.__class__(repo, p) for p in parents )
-        # END for each parent to convert
-            
-        if self.sha and tree is not None:
-            self.tree = Tree(repo, tree, path='')
-        # END id to tree conversion
-        
-    @classmethod
-    def _get_intermediate_items(cls, commit):
-        return commit.parents
-
-    def _set_cache_(self, attr):
-        """
-        Called by LazyMixin superclass when the given uninitialized member needs 
-        to be set.
-        We set all values at once.
-        """
-        if attr in Commit.__slots__:
-            # prepare our data lines to match rev-list
-            data_lines = self.data.splitlines()
-            data_lines.insert(0, "commit %s" % self.sha)
-            temp = self._iter_from_process_or_stream(self.repo, iter(data_lines), False).next()
-            self.parents = temp.parents
-            self.tree = temp.tree
-            self.author = temp.author
-            self.authored_date = temp.authored_date
-            self.author_tz_offset = temp.author_tz_offset
-            self.committer = temp.committer
-            self.committed_date = temp.committed_date
-            self.committer_tz_offset = temp.committer_tz_offset
-            self.message = temp.message
-        else:
-            super(Commit, self)._set_cache_(attr)
-
-    @property
-    def summary(self):
-        """
-        Returns
-            First line of the commit message.
-        """
-        return self.message.split('\n', 1)[0]
-        
-    def count(self, paths='', **kwargs):
-        """
-        Count the number of commits reachable from this commit
-
-        ``paths``
-            is an optinal path or a list of paths restricting the return value 
-            to commits actually containing the paths
-
-        ``kwargs``
-            Additional options to be passed to git-rev-list. They must not alter
-            the ouput style of the command, or parsing will yield incorrect results
-        Returns
-            int
-        """
-        # yes, it makes a difference whether empty paths are given or not in our case
-        # as the empty paths version will ignore merge commits for some reason.
-        if paths:
-            return len(self.repo.git.rev_list(self.sha, '--', paths, **kwargs).splitlines())
-        else:
-            return len(self.repo.git.rev_list(self.sha, **kwargs).splitlines())
-        
-
-    @property
-    def name_rev(self):
-        """
-        Returns
-            String describing the commits hex sha based on the closest Reference.
-            Mostly useful for UI purposes
-        """
-        return self.repo.git.name_rev(self)
-
-    @classmethod
-    def iter_items(cls, repo, rev, paths='', **kwargs):
-        """
-        Find all commits matching the given criteria.
-
-        ``repo``
-            is the Repo
-
-        ``rev``
-            revision specifier, see git-rev-parse for viable options
-
-        ``paths``
-            is an optinal path or list of paths, if set only Commits that include the path 
-            or paths will be considered
-
-        ``kwargs``
-            optional keyword arguments to git rev-list where
-            ``max_count`` is the maximum number of commits to fetch
-            ``skip`` is the number of commits to skip
-            ``since`` all commits since i.e. '1970-01-01'
-
-        Returns
-            iterator yielding Commit items
-        """
-        options = {'pretty': 'raw', 'as_process' : True }
-        options.update(kwargs)
-        
-        args = list()
-        if paths:
-            args.extend(('--', paths))
-        # END if paths
-
-        proc = repo.git.rev_list(rev, args, **options)
-        return cls._iter_from_process_or_stream(repo, proc, True)
-        
-    def iter_parents(self, paths='', **kwargs):
-        """
-        Iterate _all_ parents of this commit.
-        
-        ``paths``
-            Optional path or list of paths limiting the Commits to those that 
-            contain at least one of the paths
-        
-        ``kwargs``
-            All arguments allowed by git-rev-list
-            
-        Return:
-            Iterator yielding Commit objects which are parents of self
-        """
-        # skip ourselves
-        skip = kwargs.get("skip", 1)
-        if skip == 0:   # skip ourselves 
-            skip = 1
-        kwargs['skip'] = skip
-        
-        return self.iter_items( self.repo, self, paths, **kwargs )
-
-    @property
-    def stats(self):
-        """
-        Create a git stat from changes between this commit and its first parent 
-        or from all changes done if this is the very first commit.
-        
-        Return
-            git.Stats
-        """
-        if not self.parents:
-            text = self.repo.git.diff_tree(self.sha, '--', numstat=True, root=True)
-            text2 = ""
-            for line in text.splitlines()[1:]:
-                (insertions, deletions, filename) = line.split("\t")
-                text2 += "%s\t%s\t%s\n" % (insertions, deletions, filename)
-            text = text2
-        else:
-            text = self.repo.git.diff(self.parents[0].sha, self.sha, '--', numstat=True)
-        return stats.Stats._list_from_string(self.repo, text)
-
-    @classmethod
-    def _iter_from_process_or_stream(cls, repo, proc_or_stream, from_rev_list):
-        """
-        Parse out commit information into a list of Commit objects
-
-        ``repo``
-            is the Repo
-
-        ``proc``
-            git-rev-list process instance (raw format)
-
-        ``from_rev_list``
-            If True, the stream was created by rev-list in which case we parse 
-            the message differently
-        Returns
-            iterator returning Commit objects
-        """
-        stream = proc_or_stream
-        if not hasattr(stream,'next'):
-            stream = proc_or_stream.stdout
-            
-        for line in stream:
-            commit_tokens = line.split() 
-            id = commit_tokens[1]
-            assert commit_tokens[0] == "commit"
-            tree = stream.next().split()[1]
-
-            parents = []
-            next_line = None
-            for parent_line in stream:
-                if not parent_line.startswith('parent'):
-                    next_line = parent_line
-                    break
-                # END abort reading parents
-                parents.append(parent_line.split()[-1])
-            # END for each parent line
-            
-            author, authored_date, author_tz_offset = utils.parse_actor_and_date(next_line)
-            committer, committed_date, committer_tz_offset = utils.parse_actor_and_date(stream.next())
-            
-            # empty line
-            stream.next()
-            
-            message_lines = []
-            if from_rev_list:
-                for msg_line in stream:
-                    if not msg_line.startswith('    '):
-                        # and forget about this empty marker
-                        break
-                    # END abort message reading 
-                    # strip leading 4 spaces
-                    message_lines.append(msg_line[4:])
-                # END while there are message lines
-            else:
-                # a stream from our data simply gives us the plain message
-                for msg_line in stream:
-                    message_lines.append(msg_line)
-            # END message parsing
-            message = '\n'.join(message_lines)
-            
-            yield Commit(repo, id, parents=tuple(parents), tree=tree,
-                         author=author, authored_date=authored_date, author_tz_offset=author_tz_offset,
-                         committer=committer, committed_date=committed_date, committer_tz_offset=committer_tz_offset,
-                         message=message)
-        # END for each line in stream
-        
-        
-    @classmethod
-    def create_from_tree(cls, repo, tree, message, parent_commits=None, head=False):
-        """
-        Commit the given tree, creating a commit object.
-        
-        ``repo``
-            is the Repo
-            
-        ``tree``
-            Sha of a tree or a tree object to become the tree of the new commit
-        
-        ``message``
-            Commit message. It may be an empty string if no message is provided.
-            It will be converted to a string in any case.
-            
-        ``parent_commits``
-            Optional Commit objects to use as parents for the new commit.
-            If empty list, the commit will have no parents at all and become 
-            a root commit.
-            If None , the current head commit will be the parent of the 
-            new commit object
-            
-        ``head``
-            If True, the HEAD will be advanced to the new commit automatically.
-            Else the HEAD will remain pointing on the previous commit. This could 
-            lead to undesired results when diffing files.
-            
-        Returns
-            Commit object representing the new commit
-            
-        Note:
-            Additional information about hte committer and Author are taken from the
-            environment or from the git configuration, see git-commit-tree for 
-            more information
-        """
-        parents = parent_commits
-        if parent_commits is None:
-            try:
-                parent_commits = [ repo.head.commit ]
-            except ValueError:
-                # empty repositories have no head commit
-                parent_commits = list()
-            # END handle parent commits
-        # END if parent commits are unset
-        
-        parent_args = [ ("-p", str(commit)) for commit in parent_commits ]
-        
-        # create message stream
-        tmp_file_path = tempfile.mktemp()
-        fp = open(tmp_file_path,"wb")
-        fp.write(str(message))
-        fp.close()
-        fp = open(tmp_file_path,"rb")
-        fp.seek(0)
-        
-        try:
-            # write the current index as tree
-            commit_sha = repo.git.commit_tree(tree, parent_args, istream=fp)
-            new_commit = cls(repo, commit_sha)
-            
-            if head:
-                try:
-                    repo.head.commit = new_commit
-                except ValueError:
-                    # head is not yet set to the ref our HEAD points to.
-                    import git.refs
-                    master = git.refs.Head.create(repo, repo.head.ref, commit=new_commit)
-                    repo.head.reference = master
-                # END handle empty repositories
-            # END advance head handling 
-            
-            return new_commit
-        finally:
-            fp.close()
-            os.remove(tmp_file_path)
-        
-    def __str__(self):
-        """ Convert commit to string which is SHA1 """
-        return self.sha
-
-    def __repr__(self):
-        return '<git.Commit "%s">' % self.sha
+	"""
+	Wraps a git Commit object.
+	
+	This class will act lazily on some of its attributes and will query the 
+	value on demand only if it involves calling the git binary.
+	"""
+	
+	# ENVIRONMENT VARIABLES
+	# read when creating new commits
+	env_author_name = "GIT_AUTHOR_NAME"
+	env_author_email = "GIT_AUTHOR_EMAIL"
+	env_author_date = "GIT_AUTHOR_DATE"
+	env_committer_name = "GIT_COMMITTER_NAME"
+	env_committer_email = "GIT_COMMITTER_EMAIL"
+	env_committer_date = "GIT_COMMITTER_DATE"
+	env_email = "EMAIL"
+	
+	# CONFIGURATION KEYS
+	conf_email = 'email'
+	conf_name = 'name'
+	conf_encoding = 'i18n.commitencoding'
+	
+	# INVARIANTS
+	default_encoding = "UTF-8"
+	
+	
+	# object configuration 
+	type = "commit"
+	__slots__ = ("tree",
+				 "author", "authored_date", "author_tz_offset",
+				 "committer", "committed_date", "committer_tz_offset",
+				 "message", "parents", "encoding")
+	_id_attribute_ = "sha"
+	
+	def __init__(self, repo, sha, tree=None, author=None, authored_date=None, author_tz_offset=None,
+				 committer=None, committed_date=None, committer_tz_offset=None, 
+				 message=None,  parents=None, encoding=None):
+		"""
+		Instantiate a new Commit. All keyword arguments taking None as default will 
+		be implicitly set if id names a valid sha. 
+		
+		The parameter documentation indicates the type of the argument after a colon ':'.
+
+		:param sha: is the sha id of the commit or a ref
+		:param parents: tuple( Commit, ... ) 
+			is a tuple of commit ids or actual Commits
+		:param tree: Tree
+			is the corresponding tree id or an actual Tree
+		:param author: Actor
+			is the author string ( will be implicitly converted into an Actor object )
+		:param authored_date: int_seconds_since_epoch
+			is the authored DateTime - use time.gmtime() to convert it into a 
+			different format
+		:param author_tz_offset: int_seconds_west_of_utc
+			is the timezone that the authored_date is in
+		:param committer: Actor
+			is the committer string
+		:param committed_date: int_seconds_since_epoch
+			is the committed DateTime - use time.gmtime() to convert it into a 
+			different format
+		:param committer_tz_offset: int_seconds_west_of_utc
+			is the timezone that the authored_date is in
+		:param message: string
+			is the commit message
+		:param encoding: string
+			encoding of the message, defaults to UTF-8
+		:return: git.Commit
+		
+		:note: Timezone information is in the same format and in the same sign 
+			as what time.altzone returns. The sign is inverted compared to git's 
+			UTC timezone.
+		"""
+		super(Commit,self).__init__(repo, sha)
+		self._set_self_from_args_(locals())
+
+		if parents is not None:
+			self.parents = tuple( self.__class__(repo, p) for p in parents )
+		# END for each parent to convert
+			
+		if self.sha and tree is not None:
+			self.tree = Tree(repo, tree, path='')
+		# END id to tree conversion
+		
+	@classmethod
+	def _get_intermediate_items(cls, commit):
+		return commit.parents
+
+	def _set_cache_(self, attr):
+		"""
+		Called by LazyMixin superclass when the given uninitialized member needs 
+		to be set.
+		We set all values at once.
+		"""
+		if attr in Commit.__slots__:
+			# prepare our data lines to match rev-list
+			data_lines = self.data.splitlines()
+			data_lines.insert(0, "commit %s" % self.sha)
+			temp = self._iter_from_process_or_stream(self.repo, iter(data_lines), False).next()
+			self.parents = temp.parents
+			self.tree = temp.tree
+			self.author = temp.author
+			self.authored_date = temp.authored_date
+			self.author_tz_offset = temp.author_tz_offset
+			self.committer = temp.committer
+			self.committed_date = temp.committed_date
+			self.committer_tz_offset = temp.committer_tz_offset
+			self.message = temp.message
+			self.encoding = temp.encoding
+		else:
+			super(Commit, self)._set_cache_(attr)
+
+	@property
+	def summary(self):
+		"""
+		Returns
+			First line of the commit message.
+		"""
+		return self.message.split('\n', 1)[0]
+		
+	def count(self, paths='', **kwargs):
+		"""
+		Count the number of commits reachable from this commit
+
+		``paths``
+			is an optinal path or a list of paths restricting the return value 
+			to commits actually containing the paths
+
+		``kwargs``
+			Additional options to be passed to git-rev-list. They must not alter
+			the ouput style of the command, or parsing will yield incorrect results
+		Returns
+			int
+		"""
+		# yes, it makes a difference whether empty paths are given or not in our case
+		# as the empty paths version will ignore merge commits for some reason.
+		if paths:
+			return len(self.repo.git.rev_list(self.sha, '--', paths, **kwargs).splitlines())
+		else:
+			return len(self.repo.git.rev_list(self.sha, **kwargs).splitlines())
+		
+
+	@property
+	def name_rev(self):
+		"""
+		Returns
+			String describing the commits hex sha based on the closest Reference.
+			Mostly useful for UI purposes
+		"""
+		return self.repo.git.name_rev(self)
+
+	@classmethod
+	def iter_items(cls, repo, rev, paths='', **kwargs):
+		"""
+		Find all commits matching the given criteria.
+
+		``repo``
+			is the Repo
+
+		``rev``
+			revision specifier, see git-rev-parse for viable options
+
+		``paths``
+			is an optinal path or list of paths, if set only Commits that include the path 
+			or paths will be considered
+
+		``kwargs``
+			optional keyword arguments to git rev-list where
+			``max_count`` is the maximum number of commits to fetch
+			``skip`` is the number of commits to skip
+			``since`` all commits since i.e. '1970-01-01'
+
+		Returns
+			iterator yielding Commit items
+		"""
+		options = {'pretty': 'raw', 'as_process' : True }
+		options.update(kwargs)
+		
+		args = list()
+		if paths:
+			args.extend(('--', paths))
+		# END if paths
+
+		proc = repo.git.rev_list(rev, args, **options)
+		return cls._iter_from_process_or_stream(repo, proc, True)
+		
+	def iter_parents(self, paths='', **kwargs):
+		"""
+		Iterate _all_ parents of this commit.
+		
+		``paths``
+			Optional path or list of paths limiting the Commits to those that 
+			contain at least one of the paths
+		
+		``kwargs``
+			All arguments allowed by git-rev-list
+			
+		Return:
+			Iterator yielding Commit objects which are parents of self
+		"""
+		# skip ourselves
+		skip = kwargs.get("skip", 1)
+		if skip == 0:	# skip ourselves 
+			skip = 1
+		kwargs['skip'] = skip
+		
+		return self.iter_items( self.repo, self, paths, **kwargs )
+
+	@property
+	def stats(self):
+		"""
+		Create a git stat from changes between this commit and its first parent 
+		or from all changes done if this is the very first commit.
+		
+		Return
+			git.Stats
+		"""
+		if not self.parents:
+			text = self.repo.git.diff_tree(self.sha, '--', numstat=True, root=True)
+			text2 = ""
+			for line in text.splitlines()[1:]:
+				(insertions, deletions, filename) = line.split("\t")
+				text2 += "%s\t%s\t%s\n" % (insertions, deletions, filename)
+			text = text2
+		else:
+			text = self.repo.git.diff(self.parents[0].sha, self.sha, '--', numstat=True)
+		return stats.Stats._list_from_string(self.repo, text)
+
+	@classmethod
+	def _iter_from_process_or_stream(cls, repo, proc_or_stream, from_rev_list):
+		"""
+		Parse out commit information into a list of Commit objects
+
+		``repo``
+			is the Repo
+
+		``proc``
+			git-rev-list process instance (raw format)
+
+		``from_rev_list``
+			If True, the stream was created by rev-list in which case we parse 
+			the message differently
+		Returns
+			iterator returning Commit objects
+		"""
+		stream = proc_or_stream
+		if not hasattr(stream,'next'):
+			stream = proc_or_stream.stdout
+			
+		for line in stream:
+			commit_tokens = line.split() 
+			id = commit_tokens[1]
+			assert commit_tokens[0] == "commit"
+			tree = stream.next().split()[1]
+
+			parents = []
+			next_line = None
+			for parent_line in stream:
+				if not parent_line.startswith('parent'):
+					next_line = parent_line
+					break
+				# END abort reading parents
+				parents.append(parent_line.split()[-1])
+			# END for each parent line
+			
+			author, authored_date, author_tz_offset = utils.parse_actor_and_date(next_line)
+			committer, committed_date, committer_tz_offset = utils.parse_actor_and_date(stream.next())
+			
+			
+			# empty line
+			encoding = stream.next()
+			encoding.strip()
+			if encoding:
+				encoding = encoding[encoding.find(' ')+1:]
+			# END parse encoding
+			
+			message_lines = list()
+			if from_rev_list:
+				for msg_line in stream:
+					if not msg_line.startswith('    '):
+						# and forget about this empty marker
+						break
+					# END abort message reading 
+					# strip leading 4 spaces
+					message_lines.append(msg_line[4:])
+				# END while there are message lines
+			else:
+				# a stream from our data simply gives us the plain message
+				for msg_line in stream:
+					message_lines.append(msg_line)
+			# END message parsing
+			message = '\n'.join(message_lines)
+			
+			
+			yield Commit(repo, id, tree,  
+						 author, authored_date, author_tz_offset,
+						 committer, committed_date, committer_tz_offset,
+						 message, tuple(parents), 
+						 encoding or cls.default_encoding)
+		# END for each line in stream
+		
+		
+	@classmethod
+	def create_from_tree(cls, repo, tree, message, parent_commits=None, head=False):
+		"""Commit the given tree, creating a commit object.
+		
+		:param repo: Repo object the commit should be part of 
+		:param tree: Sha of a tree or a tree object to become the tree of the new commit
+		:param message: Commit message. It may be an empty string if no message is provided.
+			It will be converted to a string in any case.
+		:param parent_commits:
+			Optional Commit objects to use as parents for the new commit.
+			If empty list, the commit will have no parents at all and become 
+			a root commit.
+			If None , the current head commit will be the parent of the 
+			new commit object
+		:param head:
+			If True, the HEAD will be advanced to the new commit automatically.
+			Else the HEAD will remain pointing on the previous commit. This could 
+			lead to undesired results when diffing files.
+			
+		:return: Commit object representing the new commit
+			
+		:note:
+			Additional information about the committer and Author are taken from the
+			environment or from the git configuration, see git-commit-tree for 
+			more information
+		"""
+		parents = parent_commits
+		if parent_commits is None:
+			try:
+				parent_commits = [ repo.head.commit ]
+			except ValueError:
+				# empty repositories have no head commit
+				parent_commits = list()
+			# END handle parent commits
+		# END if parent commits are unset
+		
+		# retrieve all additional information, create a commit object, and 
+		# serialize it
+		# Generally: 
+		# * Environment variables override configuration values
+		# * Sensible defaults are set according to the git documentation
+		
+		# COMMITER AND AUTHOR INFO
+		cr = repo.config_reader()
+		env = os.environ
+		default_email = utils.get_user_id()
+		default_name = default_email.split('@')[0]
+		
+		conf_name = cr.get_value('user', cls.conf_name, default_name)
+		conf_email = cr.get_value('user', cls.conf_email, default_email)
+		
+		author_name = env.get(cls.env_author_name, conf_name)
+		author_email = env.get(cls.env_author_email, default_email)
+		
+		committer_name = env.get(cls.env_committer_name, conf_name)
+		committer_email = env.get(cls.env_committer_email, conf_email)
+		
+		# PARSE THE DATES
+		unix_time = int(time.time())
+		offset = time.altzone
+		
+		author_date_str = env.get(cls.env_author_date, '')
+		if author_date_str:
+			author_time, author_offset = utils.parse_date(author_date_str)
+		else:
+			author_time, author_offset = unix_time, offset
+		# END set author time
+		
+		committer_date_str = env.get(cls.env_committer_date, '')
+		if committer_date_str: 
+			committer_time, committer_offset = utils.parse_date(committer_date_str)
+		else:
+			committer_time, committer_offset = unix_time, offset
+		# END set committer time
+		
+		# assume utf8 encoding
+		enc_section, enc_option = cls.conf_encoding.split('.')
+		conf_encoding = cr.get_value(enc_section, enc_option, default_encoding)
+		
+		author = Actor(author_name, author_email)
+		committer = Actor(committer_name, committer_email)
+		
+		
+		# CREATE NEW COMMIT
+		new_commit = cls(repo, cls.NULL_HEX_SHA, tree, 
+						author, author_time, author_offset, 
+						committer, committer_time, committer_offset,
+						message, parent_commits, conf_encoding)
+		
+		# serialize !
+		
+		if head:
+			try:
+				repo.head.commit = new_commit
+			except ValueError:
+				# head is not yet set to the ref our HEAD points to
+				# Happens on first commit
+				import git.refs
+				master = git.refs.Head.create(repo, repo.head.ref, commit=new_commit)
+				repo.head.reference = master
+			# END handle empty repositories
+		# END advance head handling 
+		
+		return new_commit
+	
+		
+	def __str__(self):
+		""" Convert commit to string which is SHA1 """
+		return self.sha
+
+	def __repr__(self):
+		return '<git.Commit "%s">' % self.sha
 
diff --git a/lib/git/objects/utils.py b/lib/git/objects/utils.py
index 4f17b652..7060e293 100644
--- a/lib/git/objects/utils.py
+++ b/lib/git/objects/utils.py
@@ -9,159 +9,274 @@ Module for general utility functions
 import re
 from collections import deque as Deque
 from git.actor import Actor
+import platform
+
+from string import digits
+import time
+import os
+
+__all__ = ('get_object_type_by_name', 'get_user_id', 'parse_date', 'parse_actor_and_date', 
+			'ProcessStreamAdapter', 'Traversable')
 
 def get_object_type_by_name(object_type_name):
-    """
-    Returns
-        type suitable to handle the given object type name.
-        Use the type to create new instances.
-        
-    ``object_type_name``
-        Member of TYPES
-        
-    Raises
-        ValueError: In case object_type_name is unknown
-    """
-    if object_type_name == "commit":
-        import commit
-        return commit.Commit
-    elif object_type_name == "tag":
-        import tag
-        return tag.TagObject
-    elif object_type_name == "blob":
-        import blob
-        return blob.Blob
-    elif object_type_name == "tree":
-        import tree
-        return tree.Tree
-    else:
-        raise ValueError("Cannot handle unknown object type: %s" % object_type_name)
-        
-    
+	"""
+	Returns
+		type suitable to handle the given object type name.
+		Use the type to create new instances.
+		
+	``object_type_name``
+		Member of TYPES
+		
+	Raises
+		ValueError: In case object_type_name is unknown
+	"""
+	if object_type_name == "commit":
+		import commit
+		return commit.Commit
+	elif object_type_name == "tag":
+		import tag
+		return tag.TagObject
+	elif object_type_name == "blob":
+		import blob
+		return blob.Blob
+	elif object_type_name == "tree":
+		import tree
+		return tree.Tree
+	else:
+		raise ValueError("Cannot handle unknown object type: %s" % object_type_name)
+		
+
+def get_user_id():
+	""":return: string identifying the currently active system user as name@node
+	:note: user can be set with the 'USER' environment variable, usually set on windows"""
+	ukn = 'UNKNOWN'
+	username = os.environ.get('USER', ukn)
+	if username == ukn and hasattr(os, 'getlogin'):
+		username = os.getlogin()
+	# END get username from login
+	return "%s@%s" % (username, platform.node())
+		
+
+def _utc_tz_to_altz(utctz):
+	"""we convert utctz to the timezone in seconds, it is the format time.altzone
+	returns. Git stores it as UTC timezon which has the opposite sign as well, 
+	which explains the -1 * ( that was made explicit here )
+	:param utctz: git utc timezone string, i.e. +0200"""
+	return -1 * int(float(utctz)/100*3600)
+
+def _verify_utctz(offset):
+	""":raise ValueError: if offset is incorrect
+	:return: offset"""
+	fmt_exc = ValueError("Invalid timezone offset format: %s" % offset)
+	if len(offset) != 5:
+		raise fmt_exc
+	if offset[0] not in "+-":
+		raise fmt_exc
+	if	offset[1] not in digits or \
+		offset[2] not in digits or \
+		offset[3] not in digits or \
+		offset[4] not in digits:
+		raise fmt_exc
+	# END for each char
+	return offset
+
+def parse_date(string_date):
+	"""
+	Parse the given date as one of the following
+		* Git internal format: timestamp offset
+		* RFC 2822: Thu, 07 Apr 2005 22:13:13 +0200. 
+		* ISO 8601 2005-04-07T22:13:13
+		 The T can be a space as well
+		 
+	:return: Tuple(int(timestamp), int(offset), both in seconds since epoch
+	:raise ValueError: If the format could not be understood
+	:note: Date can also be YYYY.MM.DD, MM/DD/YYYY and DD.MM.YYYY 
+	"""
+	# git time
+	try:
+		if string_date.count(' ') == 1 and string_date.rfind(':') == -1:
+			timestamp, offset = string_date.split()
+			timestamp = int(timestamp)
+			return timestamp, _utc_tz_to_altz(_verify_utctz(offset))
+		else:
+			offset = "+0000"					# local time by default
+			if string_date[-5] in '-+':
+				offset = _verify_utctz(string_date[-5:])
+				string_date = string_date[:-6]	# skip space as well
+			# END split timezone info
+			
+			# now figure out the date and time portion - split time
+			date_formats = list()
+			splitter = -1
+			if ',' in string_date:
+				date_formats.append("%a, %d %b %Y")
+				splitter = string_date.rfind(' ')
+			else:
+				# iso plus additional
+				date_formats.append("%Y-%m-%d")
+				date_formats.append("%Y.%m.%d")
+				date_formats.append("%m/%d/%Y")
+				date_formats.append("%d.%m.%Y")
+				
+				splitter = string_date.rfind('T')
+				if splitter == -1:
+					splitter = string_date.rfind(' ')
+				# END handle 'T' and ' '
+			# END handle rfc or iso 
+			
+			assert splitter > -1
+			
+			# split date and time
+			time_part = string_date[splitter+1:]	# skip space
+			date_part = string_date[:splitter]
+			
+			# parse time
+			tstruct = time.strptime(time_part, "%H:%M:%S")
+			
+			for fmt in date_formats:
+				try:
+					dtstruct = time.strptime(date_part, fmt)
+					fstruct = time.struct_time((dtstruct.tm_year, dtstruct.tm_mon, dtstruct.tm_mday, 
+												tstruct.tm_hour, tstruct.tm_min, tstruct.tm_sec,
+												dtstruct.tm_wday, dtstruct.tm_yday, tstruct.tm_isdst))
+					return int(time.mktime(fstruct)), _utc_tz_to_altz(offset)
+				except ValueError:
+					continue
+				# END exception handling
+			# END for each fmt
+			
+			# still here ? fail
+			raise ValueError("no format matched")
+		# END handle format
+	except Exception:
+		raise ValueError("Unsupported date format: %s" % string_date)  
+	# END handle exceptions
+
+	
 # precompiled regex
 _re_actor_epoch = re.compile(r'^.+? (.*) (\d+) ([+-]\d+).*$')
 
 def parse_actor_and_date(line):
-    """
-    Parse out the actor (author or committer) info from a line like::
-    
-     author Tom Preston-Werner <tom@mojombo.com> 1191999972 -0700
-    
-    Returns
-        [Actor, int_seconds_since_epoch, int_timezone_offset]
-    """
-    m = _re_actor_epoch.search(line)
-    actor, epoch, offset = m.groups()
-    return (Actor._from_string(actor), int(epoch), -int(float(offset)/100*3600))
-    
-    
-    
+	"""
+	Parse out the actor (author or committer) info from a line like::
+	
+	 author Tom Preston-Werner <tom@mojombo.com> 1191999972 -0700
+	
+	Returns
+		[Actor, int_seconds_since_epoch, int_timezone_offset]
+	"""
+	m = _re_actor_epoch.search(line)
+	actor, epoch, offset = m.groups()
+	return (Actor._from_string(actor), int(epoch), _utc_tz_to_altz(offset))
+	
+	
+	
 class ProcessStreamAdapter(object):
-    """
-    Class wireing all calls to the contained Process instance.
-    
-    Use this type to hide the underlying process to provide access only to a specified 
-    stream. The process is usually wrapped into an AutoInterrupt class to kill 
-    it if the instance goes out of scope.
-    """
-    __slots__ = ("_proc", "_stream")
-    def __init__(self, process, stream_name):
-        self._proc = process
-        self._stream = getattr(process, stream_name)
-    
-    def __getattr__(self, attr):
-        return getattr(self._stream, attr)
-        
-        
+	"""
+	Class wireing all calls to the contained Process instance.
+	
+	Use this type to hide the underlying process to provide access only to a specified 
+	stream. The process is usually wrapped into an AutoInterrupt class to kill 
+	it if the instance goes out of scope.
+	"""
+	__slots__ = ("_proc", "_stream")
+	def __init__(self, process, stream_name):
+		self._proc = process
+		self._stream = getattr(process, stream_name)
+	
+	def __getattr__(self, attr):
+		return getattr(self._stream, attr)
+		
+		
 class Traversable(object):
-    """Simple interface to perforam depth-first or breadth-first traversals 
-    into one direction.
-    Subclasses only need to implement one function.
-    Instances of the Subclass must be hashable"""
-    __slots__ = tuple()
-    
-    @classmethod
-    def _get_intermediate_items(cls, item):
-        """
-        Returns:
-            List of items connected to the given item.
-            Must be implemented in subclass
-        """
-        raise NotImplementedError("To be implemented in subclass")
-            
-    
-    def traverse( self, predicate = lambda i,d: True,
-                           prune = lambda i,d: False, depth = -1, branch_first=True,
-                           visit_once = True, ignore_self=1, as_edge = False ):
-        """
-        ``Returns``
-            iterator yieling of items found when traversing self
-            
-        ``predicate``
-            f(i,d) returns False if item i at depth d should not be included in the result
-            
-        ``prune``
-            f(i,d) return True if the search should stop at item i at depth d.
-            Item i will not be returned.
-            
-        ``depth``
-            define at which level the iteration should not go deeper
-            if -1, there is no limit
-            if 0, you would effectively only get self, the root of the iteration
-            i.e. if 1, you would only get the first level of predessessors/successors
-            
-        ``branch_first``
-            if True, items will be returned branch first, otherwise depth first
-            
-        ``visit_once``
-            if True, items will only be returned once, although they might be encountered
-            several times. Loops are prevented that way.
-        
-        ``ignore_self``
-            if True, self will be ignored and automatically pruned from
-            the result. Otherwise it will be the first item to be returned.
-            If as_edge is True, the source of the first edge is None
-            
-        ``as_edge``
-            if True, return a pair of items, first being the source, second the 
-            destinatination, i.e. tuple(src, dest) with the edge spanning from 
-            source to destination"""
-        visited = set()
-        stack = Deque()
-        stack.append( ( 0 ,self, None ) )       # self is always depth level 0
-    
-        def addToStack( stack, item, branch_first, depth ):
-            lst = self._get_intermediate_items( item )
-            if not lst:
-                return
-            if branch_first:
-                stack.extendleft( ( depth , i, item ) for i in lst )
-            else:
-                reviter = ( ( depth , lst[i], item ) for i in range( len( lst )-1,-1,-1) )
-                stack.extend( reviter )
-        # END addToStack local method
-    
-        while stack:
-            d, item, src = stack.pop()          # depth of item, item, item_source
-            
-            if visit_once and item in visited:
-                continue
-                
-            if visit_once:
-                visited.add(item)
-            
-            rval = ( as_edge and (src, item) ) or item
-            if prune( rval, d ):
-                continue
-    
-            skipStartItem = ignore_self and ( item == self )
-            if not skipStartItem and predicate( rval, d ):
-                yield rval
-    
-            # only continue to next level if this is appropriate !
-            nd = d + 1
-            if depth > -1 and nd > depth:
-                continue
-    
-            addToStack( stack, item, branch_first, nd )
-        # END for each item on work stack
+	"""Simple interface to perforam depth-first or breadth-first traversals 
+	into one direction.
+	Subclasses only need to implement one function.
+	Instances of the Subclass must be hashable"""
+	__slots__ = tuple()
+	
+	@classmethod
+	def _get_intermediate_items(cls, item):
+		"""
+		Returns:
+			List of items connected to the given item.
+			Must be implemented in subclass
+		"""
+		raise NotImplementedError("To be implemented in subclass")
+			
+	
+	def traverse( self, predicate = lambda i,d: True,
+						   prune = lambda i,d: False, depth = -1, branch_first=True,
+						   visit_once = True, ignore_self=1, as_edge = False ):
+		"""
+		``Returns``
+			iterator yieling of items found when traversing self
+			
+		``predicate``
+			f(i,d) returns False if item i at depth d should not be included in the result
+			
+		``prune``
+			f(i,d) return True if the search should stop at item i at depth d.
+			Item i will not be returned.
+			
+		``depth``
+			define at which level the iteration should not go deeper
+			if -1, there is no limit
+			if 0, you would effectively only get self, the root of the iteration
+			i.e. if 1, you would only get the first level of predessessors/successors
+			
+		``branch_first``
+			if True, items will be returned branch first, otherwise depth first
+			
+		``visit_once``
+			if True, items will only be returned once, although they might be encountered
+			several times. Loops are prevented that way.
+		
+		``ignore_self``
+			if True, self will be ignored and automatically pruned from
+			the result. Otherwise it will be the first item to be returned.
+			If as_edge is True, the source of the first edge is None
+			
+		``as_edge``
+			if True, return a pair of items, first being the source, second the 
+			destinatination, i.e. tuple(src, dest) with the edge spanning from 
+			source to destination"""
+		visited = set()
+		stack = Deque()
+		stack.append( ( 0 ,self, None ) )		# self is always depth level 0
+	
+		def addToStack( stack, item, branch_first, depth ):
+			lst = self._get_intermediate_items( item )
+			if not lst:
+				return
+			if branch_first:
+				stack.extendleft( ( depth , i, item ) for i in lst )
+			else:
+				reviter = ( ( depth , lst[i], item ) for i in range( len( lst )-1,-1,-1) )
+				stack.extend( reviter )
+		# END addToStack local method
+	
+		while stack:
+			d, item, src = stack.pop()			# depth of item, item, item_source
+			
+			if visit_once and item in visited:
+				continue
+				
+			if visit_once:
+				visited.add(item)
+			
+			rval = ( as_edge and (src, item) ) or item
+			if prune( rval, d ):
+				continue
+	
+			skipStartItem = ignore_self and ( item == self )
+			if not skipStartItem and predicate( rval, d ):
+				yield rval
+	
+			# only continue to next level if this is appropriate !
+			nd = d + 1
+			if depth > -1 and nd > depth:
+				continue
+	
+			addToStack( stack, item, branch_first, nd )
+		# END for each item on work stack
diff --git a/test/git/test_utils.py b/test/git/test_utils.py
index f843c12e..2c3c392b 100644
--- a/test/git/test_utils.py
+++ b/test/git/test_utils.py
@@ -9,112 +9,141 @@ import tempfile
 
 from test.testlib import *
 from git.utils import *
+from git.objects.utils import *
 from git import *
 from git.cmd import dashify
 import time
 
 
 class TestUtils(TestCase):
-    def setup(self):
-        self.testdict = {
-            "string":   "42",
-            "int":      42,
-            "array":    [ 42 ],
-        }
+	def setup(self):
+		self.testdict = {
+			"string":	"42",
+			"int":		42,
+			"array":	[ 42 ],
+		}
 
-    def test_it_should_dashify(self):
-        assert_equal('this-is-my-argument', dashify('this_is_my_argument'))
-        assert_equal('foo', dashify('foo'))
-        
-        
-    def test_lock_file(self):
-        my_file = tempfile.mktemp()
-        lock_file = LockFile(my_file)
-        assert not lock_file._has_lock()
-        # release lock we don't have  - fine
-        lock_file._release_lock()
-        
-        # get lock
-        lock_file._obtain_lock_or_raise()
-        assert lock_file._has_lock()
-        
-        # concurrent access
-        other_lock_file = LockFile(my_file)
-        assert not other_lock_file._has_lock()
-        self.failUnlessRaises(IOError, other_lock_file._obtain_lock_or_raise)
-        
-        lock_file._release_lock()
-        assert not lock_file._has_lock()
-        
-        other_lock_file._obtain_lock_or_raise()
-        self.failUnlessRaises(IOError, lock_file._obtain_lock_or_raise)
-        
-        # auto-release on destruction
-        del(other_lock_file)
-        lock_file._obtain_lock_or_raise()
-        lock_file._release_lock()
-        
-    def test_blocking_lock_file(self):
-        my_file = tempfile.mktemp()
-        lock_file = BlockingLockFile(my_file)
-        lock_file._obtain_lock()
-        
-        # next one waits for the lock
-        start = time.time()
-        wait_time = 0.1
-        wait_lock = BlockingLockFile(my_file, 0.05, wait_time)
-        self.failUnlessRaises(IOError, wait_lock._obtain_lock)
-        elapsed = time.time() - start
-        assert elapsed <= wait_time + 0.02  # some extra time it may cost
-        
-    def _cmp_contents(self, file_path, data):
-        # raise if data from file at file_path 
-        # does not match data string
-        fp = open(file_path, "rb")
-        try:
-            assert fp.read() == data
-        finally:
-            fp.close()
-        
-    def test_safe_operation(self):
-        my_file = tempfile.mktemp()
-        orig_data = "hello"
-        new_data = "world"
-        my_file_fp = open(my_file, "wb")
-        my_file_fp.write(orig_data)
-        my_file_fp.close()
-        
-        try:
-            cwrite = ConcurrentWriteOperation(my_file)
-            
-            # didn't start writing, doesnt matter
-            cwrite._end_writing(False)
-            cwrite._end_writing(True)
-            assert not cwrite._is_writing()
-            
-            # write data and fail
-            stream = cwrite._begin_writing()
-            assert cwrite._is_writing()
-            stream.write(new_data)
-            cwrite._end_writing(successful=False)
-            self._cmp_contents(my_file, orig_data)
-            assert not os.path.exists(stream.name)
-            
-            # write data - concurrently
-            ocwrite = ConcurrentWriteOperation(my_file)
-            stream = cwrite._begin_writing()
-            self.failUnlessRaises(IOError, ocwrite._begin_writing)
-            
-            stream.write("world")
-            cwrite._end_writing(successful=True)
-            self._cmp_contents(my_file, new_data)
-            assert not os.path.exists(stream.name)
-                
-            # could test automatic _end_writing on destruction
-        finally:
-            os.remove(my_file)
-        # END final cleanup
-        
-        
-        
-        
+	def test_it_should_dashify(self):
+		assert_equal('this-is-my-argument', dashify('this_is_my_argument'))
+		assert_equal('foo', dashify('foo'))
+		
+		
+	def test_lock_file(self):
+		my_file = tempfile.mktemp()
+		lock_file = LockFile(my_file)
+		assert not lock_file._has_lock()
+		# release lock we don't have  - fine
+		lock_file._release_lock()
+		
+		# get lock
+		lock_file._obtain_lock_or_raise()
+		assert lock_file._has_lock()
+		
+		# concurrent access
+		other_lock_file = LockFile(my_file)
+		assert not other_lock_file._has_lock()
+		self.failUnlessRaises(IOError, other_lock_file._obtain_lock_or_raise)
+		
+		lock_file._release_lock()
+		assert not lock_file._has_lock()
+		
+		other_lock_file._obtain_lock_or_raise()
+		self.failUnlessRaises(IOError, lock_file._obtain_lock_or_raise)
+		
+		# auto-release on destruction
+		del(other_lock_file)
+		lock_file._obtain_lock_or_raise()
+		lock_file._release_lock()
+		
+	def test_blocking_lock_file(self):
+		my_file = tempfile.mktemp()
+		lock_file = BlockingLockFile(my_file)
+		lock_file._obtain_lock()
+		
+		# next one waits for the lock
+		start = time.time()
+		wait_time = 0.1
+		wait_lock = BlockingLockFile(my_file, 0.05, wait_time)
+		self.failUnlessRaises(IOError, wait_lock._obtain_lock)
+		elapsed = time.time() - start
+		assert elapsed <= wait_time + 0.02	# some extra time it may cost
+		
+	def _cmp_contents(self, file_path, data):
+		# raise if data from file at file_path 
+		# does not match data string
+		fp = open(file_path, "rb")
+		try:
+			assert fp.read() == data
+		finally:
+			fp.close()
+		
+	def test_safe_operation(self):
+		my_file = tempfile.mktemp()
+		orig_data = "hello"
+		new_data = "world"
+		my_file_fp = open(my_file, "wb")
+		my_file_fp.write(orig_data)
+		my_file_fp.close()
+		
+		try:
+			cwrite = ConcurrentWriteOperation(my_file)
+			
+			# didn't start writing, doesnt matter
+			cwrite._end_writing(False)
+			cwrite._end_writing(True)
+			assert not cwrite._is_writing()
+			
+			# write data and fail
+			stream = cwrite._begin_writing()
+			assert cwrite._is_writing()
+			stream.write(new_data)
+			cwrite._end_writing(successful=False)
+			self._cmp_contents(my_file, orig_data)
+			assert not os.path.exists(stream.name)
+			
+			# write data - concurrently
+			ocwrite = ConcurrentWriteOperation(my_file)
+			stream = cwrite._begin_writing()
+			self.failUnlessRaises(IOError, ocwrite._begin_writing)
+			
+			stream.write("world")
+			cwrite._end_writing(successful=True)
+			self._cmp_contents(my_file, new_data)
+			assert not os.path.exists(stream.name)
+				
+			# could test automatic _end_writing on destruction
+		finally:
+			os.remove(my_file)
+		# END final cleanup
+		
+	  
+		
+	def test_user_id(self):
+		assert '@' in get_user_id()
+		
+	def test_parse_date(self):
+		# test all supported formats
+		def assert_rval(rval, veri_time, offset=0):
+			assert len(rval) == 2
+			assert isinstance(rval[0], int) and isinstance(rval[1], int)
+			assert rval[0] == veri_time
+			assert rval[1] == offset 
+		# END assert rval utility
+		
+		rfc = ("Thu, 07 Apr 2005 22:13:11 +0000", 0)
+		iso = ("2005-04-07T22:13:11 -0200", 7200)
+		iso2 = ("2005-04-07 22:13:11 +0400", -14400)
+		iso3 = ("2005.04.07 22:13:11 -0000", 0)
+		alt = ("04/07/2005 22:13:11", 0)
+		alt2 = ("07.04.2005 22:13:11", 0)
+		veri_time = 1112904791		# the time this represents
+		for date, offset in (rfc, iso, iso2, iso3, alt, alt2):
+			assert_rval(parse_date(date), veri_time, offset)
+		# END for each date type
+		
+		# and failure
+		self.failUnlessRaises(ValueError, parse_date, 'invalid format')
+		self.failUnlessRaises(ValueError, parse_date, '123456789 -02000')
+		self.failUnlessRaises(ValueError, parse_date, ' 123456789 -0200')
+		
+
author	Sebastian Thiel <byronimo@gmail.com>	2010-06-02 00:48:16 +0200
committer	Sebastian Thiel <byronimo@gmail.com>	2010-06-02 01:04:16 +0200
commit	df0892351a394d768489b5647d47b73c24d3ef5f (patch)
tree	fc2cbca826f2b728fe58aa22a09e3a65c12543ae
parent	6d9b1f4f9fa8c9f030e3207e7deacc5d5f8bba4e (diff)
download	gitpython-df0892351a394d768489b5647d47b73c24d3ef5f.tar.gz