summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSebastian Thiel <byronimo@gmail.com>2010-06-02 00:48:16 +0200
committerSebastian Thiel <byronimo@gmail.com>2010-06-02 01:04:16 +0200
commitdf0892351a394d768489b5647d47b73c24d3ef5f (patch)
treefc2cbca826f2b728fe58aa22a09e3a65c12543ae
parent6d9b1f4f9fa8c9f030e3207e7deacc5d5f8bba4e (diff)
downloadgitpython-df0892351a394d768489b5647d47b73c24d3ef5f.tar.gz
commit: initial version of commit_from_tree which could create commit objects if it could serialize itself
-rw-r--r--CHANGES6
-rw-r--r--lib/git/objects/base.py1
-rw-r--r--lib/git/objects/commit.py777
-rw-r--r--lib/git/objects/utils.py411
-rw-r--r--test/git/test_utils.py233
5 files changed, 815 insertions, 613 deletions
diff --git a/CHANGES b/CHANGES
index 5d677b06..e24e723d 100644
--- a/CHANGES
+++ b/CHANGES
@@ -1,7 +1,11 @@
=======
CHANGES
=======
-
+
+0.2 Beta 2
+===========
+ * Commit objects now carry the 'encoding' information of their message. It wasn't parsed previously, and defaults to UTF-8
+
0.2
=====
General
diff --git a/lib/git/objects/base.py b/lib/git/objects/base.py
index 6a51eed3..bb15192d 100644
--- a/lib/git/objects/base.py
+++ b/lib/git/objects/base.py
@@ -141,6 +141,7 @@ class Object(LazyMixin):
self.repo.git.cat_file(self.type, self.sha, output_stream=ostream)
return self
+
class IndexObject(Object):
"""
Base for all objects that can be part of the index file , namely Tree, Blob and
diff --git a/lib/git/objects/commit.py b/lib/git/objects/commit.py
index 826f684c..87eed49b 100644
--- a/lib/git/objects/commit.py
+++ b/lib/git/objects/commit.py
@@ -7,372 +7,425 @@
from git.utils import Iterable
import git.diff as diff
import git.stats as stats
+from git.actor import Actor
from tree import Tree
import base
import utils
-import tempfile
+import time
import os
class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable):
- """
- Wraps a git Commit object.
-
- This class will act lazily on some of its attributes and will query the
- value on demand only if it involves calling the git binary.
- """
-
- # object configuration
- type = "commit"
- __slots__ = ("tree",
- "author", "authored_date", "author_tz_offset",
- "committer", "committed_date", "committer_tz_offset",
- "message", "parents")
- _id_attribute_ = "sha"
-
- def __init__(self, repo, sha, tree=None, author=None, authored_date=None, author_tz_offset=None,
- committer=None, committed_date=None, committer_tz_offset=None, message=None, parents=None):
- """
- Instantiate a new Commit. All keyword arguments taking None as default will
- be implicitly set if id names a valid sha.
-
- The parameter documentation indicates the type of the argument after a colon ':'.
-
- ``sha``
- is the sha id of the commit or a ref
-
- ``parents`` : tuple( Commit, ... )
- is a tuple of commit ids or actual Commits
-
- ``tree`` : Tree
- is the corresponding tree id or an actual Tree
-
- ``author`` : Actor
- is the author string ( will be implicitly converted into an Actor object )
-
- ``authored_date`` : int_seconds_since_epoch
- is the authored DateTime - use time.gmtime() to convert it into a
- different format
-
- ``author_tz_offset``: int_seconds_west_of_utc
- is the timezone that the authored_date is in
-
- ``committer`` : Actor
- is the committer string
-
- ``committed_date`` : int_seconds_since_epoch
- is the committed DateTime - use time.gmtime() to convert it into a
- different format
-
- ``committer_tz_offset``: int_seconds_west_of_utc
- is the timezone that the authored_date is in
-
- ``message`` : string
- is the commit message
-
- Returns
- git.Commit
- """
- super(Commit,self).__init__(repo, sha)
- self._set_self_from_args_(locals())
-
- if parents is not None:
- self.parents = tuple( self.__class__(repo, p) for p in parents )
- # END for each parent to convert
-
- if self.sha and tree is not None:
- self.tree = Tree(repo, tree, path='')
- # END id to tree conversion
-
- @classmethod
- def _get_intermediate_items(cls, commit):
- return commit.parents
-
- def _set_cache_(self, attr):
- """
- Called by LazyMixin superclass when the given uninitialized member needs
- to be set.
- We set all values at once.
- """
- if attr in Commit.__slots__:
- # prepare our data lines to match rev-list
- data_lines = self.data.splitlines()
- data_lines.insert(0, "commit %s" % self.sha)
- temp = self._iter_from_process_or_stream(self.repo, iter(data_lines), False).next()
- self.parents = temp.parents
- self.tree = temp.tree
- self.author = temp.author
- self.authored_date = temp.authored_date
- self.author_tz_offset = temp.author_tz_offset
- self.committer = temp.committer
- self.committed_date = temp.committed_date
- self.committer_tz_offset = temp.committer_tz_offset
- self.message = temp.message
- else:
- super(Commit, self)._set_cache_(attr)
-
- @property
- def summary(self):
- """
- Returns
- First line of the commit message.
- """
- return self.message.split('\n', 1)[0]
-
- def count(self, paths='', **kwargs):
- """
- Count the number of commits reachable from this commit
-
- ``paths``
- is an optinal path or a list of paths restricting the return value
- to commits actually containing the paths
-
- ``kwargs``
- Additional options to be passed to git-rev-list. They must not alter
- the ouput style of the command, or parsing will yield incorrect results
- Returns
- int
- """
- # yes, it makes a difference whether empty paths are given or not in our case
- # as the empty paths version will ignore merge commits for some reason.
- if paths:
- return len(self.repo.git.rev_list(self.sha, '--', paths, **kwargs).splitlines())
- else:
- return len(self.repo.git.rev_list(self.sha, **kwargs).splitlines())
-
-
- @property
- def name_rev(self):
- """
- Returns
- String describing the commits hex sha based on the closest Reference.
- Mostly useful for UI purposes
- """
- return self.repo.git.name_rev(self)
-
- @classmethod
- def iter_items(cls, repo, rev, paths='', **kwargs):
- """
- Find all commits matching the given criteria.
-
- ``repo``
- is the Repo
-
- ``rev``
- revision specifier, see git-rev-parse for viable options
-
- ``paths``
- is an optinal path or list of paths, if set only Commits that include the path
- or paths will be considered
-
- ``kwargs``
- optional keyword arguments to git rev-list where
- ``max_count`` is the maximum number of commits to fetch
- ``skip`` is the number of commits to skip
- ``since`` all commits since i.e. '1970-01-01'
-
- Returns
- iterator yielding Commit items
- """
- options = {'pretty': 'raw', 'as_process' : True }
- options.update(kwargs)
-
- args = list()
- if paths:
- args.extend(('--', paths))
- # END if paths
-
- proc = repo.git.rev_list(rev, args, **options)
- return cls._iter_from_process_or_stream(repo, proc, True)
-
- def iter_parents(self, paths='', **kwargs):
- """
- Iterate _all_ parents of this commit.
-
- ``paths``
- Optional path or list of paths limiting the Commits to those that
- contain at least one of the paths
-
- ``kwargs``
- All arguments allowed by git-rev-list
-
- Return:
- Iterator yielding Commit objects which are parents of self
- """
- # skip ourselves
- skip = kwargs.get("skip", 1)
- if skip == 0: # skip ourselves
- skip = 1
- kwargs['skip'] = skip
-
- return self.iter_items( self.repo, self, paths, **kwargs )
-
- @property
- def stats(self):
- """
- Create a git stat from changes between this commit and its first parent
- or from all changes done if this is the very first commit.
-
- Return
- git.Stats
- """
- if not self.parents:
- text = self.repo.git.diff_tree(self.sha, '--', numstat=True, root=True)
- text2 = ""
- for line in text.splitlines()[1:]:
- (insertions, deletions, filename) = line.split("\t")
- text2 += "%s\t%s\t%s\n" % (insertions, deletions, filename)
- text = text2
- else:
- text = self.repo.git.diff(self.parents[0].sha, self.sha, '--', numstat=True)
- return stats.Stats._list_from_string(self.repo, text)
-
- @classmethod
- def _iter_from_process_or_stream(cls, repo, proc_or_stream, from_rev_list):
- """
- Parse out commit information into a list of Commit objects
-
- ``repo``
- is the Repo
-
- ``proc``
- git-rev-list process instance (raw format)
-
- ``from_rev_list``
- If True, the stream was created by rev-list in which case we parse
- the message differently
- Returns
- iterator returning Commit objects
- """
- stream = proc_or_stream
- if not hasattr(stream,'next'):
- stream = proc_or_stream.stdout
-
- for line in stream:
- commit_tokens = line.split()
- id = commit_tokens[1]
- assert commit_tokens[0] == "commit"
- tree = stream.next().split()[1]
-
- parents = []
- next_line = None
- for parent_line in stream:
- if not parent_line.startswith('parent'):
- next_line = parent_line
- break
- # END abort reading parents
- parents.append(parent_line.split()[-1])
- # END for each parent line
-
- author, authored_date, author_tz_offset = utils.parse_actor_and_date(next_line)
- committer, committed_date, committer_tz_offset = utils.parse_actor_and_date(stream.next())
-
- # empty line
- stream.next()
-
- message_lines = []
- if from_rev_list:
- for msg_line in stream:
- if not msg_line.startswith(' '):
- # and forget about this empty marker
- break
- # END abort message reading
- # strip leading 4 spaces
- message_lines.append(msg_line[4:])
- # END while there are message lines
- else:
- # a stream from our data simply gives us the plain message
- for msg_line in stream:
- message_lines.append(msg_line)
- # END message parsing
- message = '\n'.join(message_lines)
-
- yield Commit(repo, id, parents=tuple(parents), tree=tree,
- author=author, authored_date=authored_date, author_tz_offset=author_tz_offset,
- committer=committer, committed_date=committed_date, committer_tz_offset=committer_tz_offset,
- message=message)
- # END for each line in stream
-
-
- @classmethod
- def create_from_tree(cls, repo, tree, message, parent_commits=None, head=False):
- """
- Commit the given tree, creating a commit object.
-
- ``repo``
- is the Repo
-
- ``tree``
- Sha of a tree or a tree object to become the tree of the new commit
-
- ``message``
- Commit message. It may be an empty string if no message is provided.
- It will be converted to a string in any case.
-
- ``parent_commits``
- Optional Commit objects to use as parents for the new commit.
- If empty list, the commit will have no parents at all and become
- a root commit.
- If None , the current head commit will be the parent of the
- new commit object
-
- ``head``
- If True, the HEAD will be advanced to the new commit automatically.
- Else the HEAD will remain pointing on the previous commit. This could
- lead to undesired results when diffing files.
-
- Returns
- Commit object representing the new commit
-
- Note:
- Additional information about hte committer and Author are taken from the
- environment or from the git configuration, see git-commit-tree for
- more information
- """
- parents = parent_commits
- if parent_commits is None:
- try:
- parent_commits = [ repo.head.commit ]
- except ValueError:
- # empty repositories have no head commit
- parent_commits = list()
- # END handle parent commits
- # END if parent commits are unset
-
- parent_args = [ ("-p", str(commit)) for commit in parent_commits ]
-
- # create message stream
- tmp_file_path = tempfile.mktemp()
- fp = open(tmp_file_path,"wb")
- fp.write(str(message))
- fp.close()
- fp = open(tmp_file_path,"rb")
- fp.seek(0)
-
- try:
- # write the current index as tree
- commit_sha = repo.git.commit_tree(tree, parent_args, istream=fp)
- new_commit = cls(repo, commit_sha)
-
- if head:
- try:
- repo.head.commit = new_commit
- except ValueError:
- # head is not yet set to the ref our HEAD points to.
- import git.refs
- master = git.refs.Head.create(repo, repo.head.ref, commit=new_commit)
- repo.head.reference = master
- # END handle empty repositories
- # END advance head handling
-
- return new_commit
- finally:
- fp.close()
- os.remove(tmp_file_path)
-
- def __str__(self):
- """ Convert commit to string which is SHA1 """
- return self.sha
-
- def __repr__(self):
- return '<git.Commit "%s">' % self.sha
+ """
+ Wraps a git Commit object.
+
+ This class will act lazily on some of its attributes and will query the
+ value on demand only if it involves calling the git binary.
+ """
+
+ # ENVIRONMENT VARIABLES
+ # read when creating new commits
+ env_author_name = "GIT_AUTHOR_NAME"
+ env_author_email = "GIT_AUTHOR_EMAIL"
+ env_author_date = "GIT_AUTHOR_DATE"
+ env_committer_name = "GIT_COMMITTER_NAME"
+ env_committer_email = "GIT_COMMITTER_EMAIL"
+ env_committer_date = "GIT_COMMITTER_DATE"
+ env_email = "EMAIL"
+
+ # CONFIGURATION KEYS
+ conf_email = 'email'
+ conf_name = 'name'
+ conf_encoding = 'i18n.commitencoding'
+
+ # INVARIANTS
+ default_encoding = "UTF-8"
+
+
+ # object configuration
+ type = "commit"
+ __slots__ = ("tree",
+ "author", "authored_date", "author_tz_offset",
+ "committer", "committed_date", "committer_tz_offset",
+ "message", "parents", "encoding")
+ _id_attribute_ = "sha"
+
+ def __init__(self, repo, sha, tree=None, author=None, authored_date=None, author_tz_offset=None,
+ committer=None, committed_date=None, committer_tz_offset=None,
+ message=None, parents=None, encoding=None):
+ """
+ Instantiate a new Commit. All keyword arguments taking None as default will
+ be implicitly set if id names a valid sha.
+
+ The parameter documentation indicates the type of the argument after a colon ':'.
+
+ :param sha: is the sha id of the commit or a ref
+ :param parents: tuple( Commit, ... )
+ is a tuple of commit ids or actual Commits
+ :param tree: Tree
+ is the corresponding tree id or an actual Tree
+ :param author: Actor
+ is the author string ( will be implicitly converted into an Actor object )
+ :param authored_date: int_seconds_since_epoch
+ is the authored DateTime - use time.gmtime() to convert it into a
+ different format
+ :param author_tz_offset: int_seconds_west_of_utc
+ is the timezone that the authored_date is in
+ :param committer: Actor
+ is the committer string
+ :param committed_date: int_seconds_since_epoch
+ is the committed DateTime - use time.gmtime() to convert it into a
+ different format
+ :param committer_tz_offset: int_seconds_west_of_utc
+ is the timezone that the authored_date is in
+ :param message: string
+ is the commit message
+ :param encoding: string
+ encoding of the message, defaults to UTF-8
+ :return: git.Commit
+
+ :note: Timezone information is in the same format and in the same sign
+ as what time.altzone returns. The sign is inverted compared to git's
+ UTC timezone.
+ """
+ super(Commit,self).__init__(repo, sha)
+ self._set_self_from_args_(locals())
+
+ if parents is not None:
+ self.parents = tuple( self.__class__(repo, p) for p in parents )
+ # END for each parent to convert
+
+ if self.sha and tree is not None:
+ self.tree = Tree(repo, tree, path='')
+ # END id to tree conversion
+
+ @classmethod
+ def _get_intermediate_items(cls, commit):
+ return commit.parents
+
+ def _set_cache_(self, attr):
+ """
+ Called by LazyMixin superclass when the given uninitialized member needs
+ to be set.
+ We set all values at once.
+ """
+ if attr in Commit.__slots__:
+ # prepare our data lines to match rev-list
+ data_lines = self.data.splitlines()
+ data_lines.insert(0, "commit %s" % self.sha)
+ temp = self._iter_from_process_or_stream(self.repo, iter(data_lines), False).next()
+ self.parents = temp.parents
+ self.tree = temp.tree
+ self.author = temp.author
+ self.authored_date = temp.authored_date
+ self.author_tz_offset = temp.author_tz_offset
+ self.committer = temp.committer
+ self.committed_date = temp.committed_date
+ self.committer_tz_offset = temp.committer_tz_offset
+ self.message = temp.message
+ self.encoding = temp.encoding
+ else:
+ super(Commit, self)._set_cache_(attr)
+
+ @property
+ def summary(self):
+ """
+ Returns
+ First line of the commit message.
+ """
+ return self.message.split('\n', 1)[0]
+
+ def count(self, paths='', **kwargs):
+ """
+ Count the number of commits reachable from this commit
+
+ ``paths``
+ is an optinal path or a list of paths restricting the return value
+ to commits actually containing the paths
+
+ ``kwargs``
+ Additional options to be passed to git-rev-list. They must not alter
+ the ouput style of the command, or parsing will yield incorrect results
+ Returns
+ int
+ """
+ # yes, it makes a difference whether empty paths are given or not in our case
+ # as the empty paths version will ignore merge commits for some reason.
+ if paths:
+ return len(self.repo.git.rev_list(self.sha, '--', paths, **kwargs).splitlines())
+ else:
+ return len(self.repo.git.rev_list(self.sha, **kwargs).splitlines())
+
+
+ @property
+ def name_rev(self):
+ """
+ Returns
+ String describing the commits hex sha based on the closest Reference.
+ Mostly useful for UI purposes
+ """
+ return self.repo.git.name_rev(self)
+
+ @classmethod
+ def iter_items(cls, repo, rev, paths='', **kwargs):
+ """
+ Find all commits matching the given criteria.
+
+ ``repo``
+ is the Repo
+
+ ``rev``
+ revision specifier, see git-rev-parse for viable options
+
+ ``paths``
+ is an optinal path or list of paths, if set only Commits that include the path
+ or paths will be considered
+
+ ``kwargs``
+ optional keyword arguments to git rev-list where
+ ``max_count`` is the maximum number of commits to fetch
+ ``skip`` is the number of commits to skip
+ ``since`` all commits since i.e. '1970-01-01'
+
+ Returns
+ iterator yielding Commit items
+ """
+ options = {'pretty': 'raw', 'as_process' : True }
+ options.update(kwargs)
+
+ args = list()
+ if paths:
+ args.extend(('--', paths))
+ # END if paths
+
+ proc = repo.git.rev_list(rev, args, **options)
+ return cls._iter_from_process_or_stream(repo, proc, True)
+
+ def iter_parents(self, paths='', **kwargs):
+ """
+ Iterate _all_ parents of this commit.
+
+ ``paths``
+ Optional path or list of paths limiting the Commits to those that
+ contain at least one of the paths
+
+ ``kwargs``
+ All arguments allowed by git-rev-list
+
+ Return:
+ Iterator yielding Commit objects which are parents of self
+ """
+ # skip ourselves
+ skip = kwargs.get("skip", 1)
+ if skip == 0: # skip ourselves
+ skip = 1
+ kwargs['skip'] = skip
+
+ return self.iter_items( self.repo, self, paths, **kwargs )
+
+ @property
+ def stats(self):
+ """
+ Create a git stat from changes between this commit and its first parent
+ or from all changes done if this is the very first commit.
+
+ Return
+ git.Stats
+ """
+ if not self.parents:
+ text = self.repo.git.diff_tree(self.sha, '--', numstat=True, root=True)
+ text2 = ""
+ for line in text.splitlines()[1:]:
+ (insertions, deletions, filename) = line.split("\t")
+ text2 += "%s\t%s\t%s\n" % (insertions, deletions, filename)
+ text = text2
+ else:
+ text = self.repo.git.diff(self.parents[0].sha, self.sha, '--', numstat=True)
+ return stats.Stats._list_from_string(self.repo, text)
+
+ @classmethod
+ def _iter_from_process_or_stream(cls, repo, proc_or_stream, from_rev_list):
+ """
+ Parse out commit information into a list of Commit objects
+
+ ``repo``
+ is the Repo
+
+ ``proc``
+ git-rev-list process instance (raw format)
+
+ ``from_rev_list``
+ If True, the stream was created by rev-list in which case we parse
+ the message differently
+ Returns
+ iterator returning Commit objects
+ """
+ stream = proc_or_stream
+ if not hasattr(stream,'next'):
+ stream = proc_or_stream.stdout
+
+ for line in stream:
+ commit_tokens = line.split()
+ id = commit_tokens[1]
+ assert commit_tokens[0] == "commit"
+ tree = stream.next().split()[1]
+
+ parents = []
+ next_line = None
+ for parent_line in stream:
+ if not parent_line.startswith('parent'):
+ next_line = parent_line
+ break
+ # END abort reading parents
+ parents.append(parent_line.split()[-1])
+ # END for each parent line
+
+ author, authored_date, author_tz_offset = utils.parse_actor_and_date(next_line)
+ committer, committed_date, committer_tz_offset = utils.parse_actor_and_date(stream.next())
+
+
+ # empty line
+ encoding = stream.next()
+ encoding.strip()
+ if encoding:
+ encoding = encoding[encoding.find(' ')+1:]
+ # END parse encoding
+
+ message_lines = list()
+ if from_rev_list:
+ for msg_line in stream:
+ if not msg_line.startswith(' '):
+ # and forget about this empty marker
+ break
+ # END abort message reading
+ # strip leading 4 spaces
+ message_lines.append(msg_line[4:])
+ # END while there are message lines
+ else:
+ # a stream from our data simply gives us the plain message
+ for msg_line in stream:
+ message_lines.append(msg_line)
+ # END message parsing
+ message = '\n'.join(message_lines)
+
+
+ yield Commit(repo, id, tree,
+ author, authored_date, author_tz_offset,
+ committer, committed_date, committer_tz_offset,
+ message, tuple(parents),
+ encoding or cls.default_encoding)
+ # END for each line in stream
+
+
+ @classmethod
+ def create_from_tree(cls, repo, tree, message, parent_commits=None, head=False):
+ """Commit the given tree, creating a commit object.
+
+ :param repo: Repo object the commit should be part of
+ :param tree: Sha of a tree or a tree object to become the tree of the new commit
+ :param message: Commit message. It may be an empty string if no message is provided.
+ It will be converted to a string in any case.
+ :param parent_commits:
+ Optional Commit objects to use as parents for the new commit.
+ If empty list, the commit will have no parents at all and become
+ a root commit.
+ If None , the current head commit will be the parent of the
+ new commit object
+ :param head:
+ If True, the HEAD will be advanced to the new commit automatically.
+ Else the HEAD will remain pointing on the previous commit. This could
+ lead to undesired results when diffing files.
+
+ :return: Commit object representing the new commit
+
+ :note:
+ Additional information about the committer and Author are taken from the
+ environment or from the git configuration, see git-commit-tree for
+ more information
+ """
+ parents = parent_commits
+ if parent_commits is None:
+ try:
+ parent_commits = [ repo.head.commit ]
+ except ValueError:
+ # empty repositories have no head commit
+ parent_commits = list()
+ # END handle parent commits
+ # END if parent commits are unset
+
+ # retrieve all additional information, create a commit object, and
+ # serialize it
+ # Generally:
+ # * Environment variables override configuration values
+ # * Sensible defaults are set according to the git documentation
+
+ # COMMITER AND AUTHOR INFO
+ cr = repo.config_reader()
+ env = os.environ
+ default_email = utils.get_user_id()
+ default_name = default_email.split('@')[0]
+
+ conf_name = cr.get_value('user', cls.conf_name, default_name)
+ conf_email = cr.get_value('user', cls.conf_email, default_email)
+
+ author_name = env.get(cls.env_author_name, conf_name)
+ author_email = env.get(cls.env_author_email, default_email)
+
+ committer_name = env.get(cls.env_committer_name, conf_name)
+ committer_email = env.get(cls.env_committer_email, conf_email)
+
+ # PARSE THE DATES
+ unix_time = int(time.time())
+ offset = time.altzone
+
+ author_date_str = env.get(cls.env_author_date, '')
+ if author_date_str:
+ author_time, author_offset = utils.parse_date(author_date_str)
+ else:
+ author_time, author_offset = unix_time, offset
+ # END set author time
+
+ committer_date_str = env.get(cls.env_committer_date, '')
+ if committer_date_str:
+ committer_time, committer_offset = utils.parse_date(committer_date_str)
+ else:
+ committer_time, committer_offset = unix_time, offset
+ # END set committer time
+
+ # assume utf8 encoding
+ enc_section, enc_option = cls.conf_encoding.split('.')
+ conf_encoding = cr.get_value(enc_section, enc_option, default_encoding)
+
+ author = Actor(author_name, author_email)
+ committer = Actor(committer_name, committer_email)
+
+
+ # CREATE NEW COMMIT
+ new_commit = cls(repo, cls.NULL_HEX_SHA, tree,
+ author, author_time, author_offset,
+ committer, committer_time, committer_offset,
+ message, parent_commits, conf_encoding)
+
+ # serialize !
+
+ if head:
+ try:
+ repo.head.commit = new_commit
+ except ValueError:
+ # head is not yet set to the ref our HEAD points to
+ # Happens on first commit
+ import git.refs
+ master = git.refs.Head.create(repo, repo.head.ref, commit=new_commit)
+ repo.head.reference = master
+ # END handle empty repositories
+ # END advance head handling
+
+ return new_commit
+
+
+ def __str__(self):
+ """ Convert commit to string which is SHA1 """
+ return self.sha
+
+ def __repr__(self):
+ return '<git.Commit "%s">' % self.sha
diff --git a/lib/git/objects/utils.py b/lib/git/objects/utils.py
index 4f17b652..7060e293 100644
--- a/lib/git/objects/utils.py
+++ b/lib/git/objects/utils.py
@@ -9,159 +9,274 @@ Module for general utility functions
import re
from collections import deque as Deque
from git.actor import Actor
+import platform
+
+from string import digits
+import time
+import os
+
+__all__ = ('get_object_type_by_name', 'get_user_id', 'parse_date', 'parse_actor_and_date',
+ 'ProcessStreamAdapter', 'Traversable')
def get_object_type_by_name(object_type_name):
- """
- Returns
- type suitable to handle the given object type name.
- Use the type to create new instances.
-
- ``object_type_name``
- Member of TYPES
-
- Raises
- ValueError: In case object_type_name is unknown
- """
- if object_type_name == "commit":
- import commit
- return commit.Commit
- elif object_type_name == "tag":
- import tag
- return tag.TagObject
- elif object_type_name == "blob":
- import blob
- return blob.Blob
- elif object_type_name == "tree":
- import tree
- return tree.Tree
- else:
- raise ValueError("Cannot handle unknown object type: %s" % object_type_name)
-
-
+ """
+ Returns
+ type suitable to handle the given object type name.
+ Use the type to create new instances.
+
+ ``object_type_name``
+ Member of TYPES
+
+ Raises
+ ValueError: In case object_type_name is unknown
+ """
+ if object_type_name == "commit":
+ import commit
+ return commit.Commit
+ elif object_type_name == "tag":
+ import tag
+ return tag.TagObject
+ elif object_type_name == "blob":
+ import blob
+ return blob.Blob
+ elif object_type_name == "tree":
+ import tree
+ return tree.Tree
+ else:
+ raise ValueError("Cannot handle unknown object type: %s" % object_type_name)
+
+
+def get_user_id():
+ """:return: string identifying the currently active system user as name@node
+ :note: user can be set with the 'USER' environment variable, usually set on windows"""
+ ukn = 'UNKNOWN'
+ username = os.environ.get('USER', ukn)
+ if username == ukn and hasattr(os, 'getlogin'):
+ username = os.getlogin()
+ # END get username from login
+ return "%s@%s" % (username, platform.node())
+
+
+def _utc_tz_to_altz(utctz):
+ """we convert utctz to the timezone in seconds, it is the format time.altzone
+ returns. Git stores it as UTC timezon which has the opposite sign as well,
+ which explains the -1 * ( that was made explicit here )
+ :param utctz: git utc timezone string, i.e. +0200"""
+ return -1 * int(float(utctz)/100*3600)
+
+def _verify_utctz(offset):
+ """:raise ValueError: if offset is incorrect
+ :return: offset"""
+ fmt_exc = ValueError("Invalid timezone offset format: %s" % offset)
+ if len(offset) != 5:
+ raise fmt_exc
+ if offset[0] not in "+-":
+ raise fmt_exc
+ if offset[1] not in digits or \
+ offset[2] not in digits or \
+ offset[3] not in digits or \
+ offset[4] not in digits:
+ raise fmt_exc
+ # END for each char
+ return offset
+
+def parse_date(string_date):
+ """
+ Parse the given date as one of the following
+ * Git internal format: timestamp offset
+ * RFC 2822: Thu, 07 Apr 2005 22:13:13 +0200.
+ * ISO 8601 2005-04-07T22:13:13
+ The T can be a space as well
+
+ :return: Tuple(int(timestamp), int(offset), both in seconds since epoch
+ :raise ValueError: If the format could not be understood
+ :note: Date can also be YYYY.MM.DD, MM/DD/YYYY and DD.MM.YYYY
+ """
+ # git time
+ try:
+ if string_date.count(' ') == 1 and string_date.rfind(':') == -1:
+ timestamp, offset = string_date.split()
+ timestamp = int(timestamp)
+ return timestamp, _utc_tz_to_altz(_verify_utctz(offset))
+ else:
+ offset = "+0000" # local time by default
+ if string_date[-5] in '-+':
+ offset = _verify_utctz(string_date[-5:])
+ string_date = string_date[:-6] # skip space as well
+ # END split timezone info
+
+ # now figure out the date and time portion - split time
+ date_formats = list()
+ splitter = -1
+ if ',' in string_date:
+ date_formats.append("%a, %d %b %Y")
+ splitter = string_date.rfind(' ')
+ else:
+ # iso plus additional
+ date_formats.append("%Y-%m-%d")
+ date_formats.append("%Y.%m.%d")
+ date_formats.append("%m/%d/%Y")
+ date_formats.append("%d.%m.%Y")
+
+ splitter = string_date.rfind('T')
+ if splitter == -1:
+ splitter = string_date.rfind(' ')
+ # END handle 'T' and ' '
+ # END handle rfc or iso
+
+ assert splitter > -1
+
+ # split date and time
+ time_part = string_date[splitter+1:] # skip space
+ date_part = string_date[:splitter]
+
+ # parse time
+ tstruct = time.strptime(time_part, "%H:%M:%S")
+
+ for fmt in date_formats:
+ try:
+ dtstruct = time.strptime(date_part, fmt)
+ fstruct = time.struct_time((dtstruct.tm_year, dtstruct.tm_mon, dtstruct.tm_mday,
+ tstruct.tm_hour, tstruct.tm_min, tstruct.tm_sec,
+ dtstruct.tm_wday, dtstruct.tm_yday, tstruct.tm_isdst))
+ return int(time.mktime(fstruct)), _utc_tz_to_altz(offset)
+ except ValueError:
+ continue
+ # END exception handling
+ # END for each fmt
+
+ # still here ? fail
+ raise ValueError("no format matched")
+ # END handle format
+ except Exception:
+ raise ValueError("Unsupported date format: %s" % string_date)
+ # END handle exceptions
+
+
# precompiled regex
_re_actor_epoch = re.compile(r'^.+? (.*) (\d+) ([+-]\d+).*$')
def parse_actor_and_date(line):
- """
- Parse out the actor (author or committer) info from a line like::
-
- author Tom Preston-Werner <tom@mojombo.com> 1191999972 -0700
-
- Returns
- [Actor, int_seconds_since_epoch, int_timezone_offset]
- """
- m = _re_actor_epoch.search(line)
- actor, epoch, offset = m.groups()
- return (Actor._from_string(actor), int(epoch), -int(float(offset)/100*3600))
-
-
-
+ """
+ Parse out the actor (author or committer) info from a line like::
+
+ author Tom Preston-Werner <tom@mojombo.com> 1191999972 -0700
+
+ Returns
+ [Actor, int_seconds_since_epoch, int_timezone_offset]
+ """
+ m = _re_actor_epoch.search(line)
+ actor, epoch, offset = m.groups()
+ return (Actor._from_string(actor), int(epoch), _utc_tz_to_altz(offset))
+
+
+
class ProcessStreamAdapter(object):
- """
- Class wireing all calls to the contained Process instance.
-
- Use this type to hide the underlying process to provide access only to a specified
- stream. The process is usually wrapped into an AutoInterrupt class to kill
- it if the instance goes out of scope.
- """
- __slots__ = ("_proc", "_stream")
- def __init__(self, process, stream_name):
- self._proc = process
- self._stream = getattr(process, stream_name)
-
- def __getattr__(self, attr):
- return getattr(self._stream, attr)
-
-
+ """
+ Class wireing all calls to the contained Process instance.
+
+ Use this type to hide the underlying process to provide access only to a specified
+ stream. The process is usually wrapped into an AutoInterrupt class to kill
+ it if the instance goes out of scope.
+ """
+ __slots__ = ("_proc", "_stream")
+ def __init__(self, process, stream_name):
+ self._proc = process
+ self._stream = getattr(process, stream_name)
+
+ def __getattr__(self, attr):
+ return getattr(self._stream, attr)
+
+
class Traversable(object):
- """Simple interface to perforam depth-first or breadth-first traversals
- into one direction.
- Subclasses only need to implement one function.
- Instances of the Subclass must be hashable"""
- __slots__ = tuple()
-
- @classmethod
- def _get_intermediate_items(cls, item):
- """
- Returns:
- List of items connected to the given item.
- Must be implemented in subclass
- """
- raise NotImplementedError("To be implemented in subclass")
-
-
- def traverse( self, predicate = lambda i,d: True,
- prune = lambda i,d: False, depth = -1, branch_first=True,
- visit_once = True, ignore_self=1, as_edge = False ):
- """
- ``Returns``
- iterator yieling of items found when traversing self
-
- ``predicate``
- f(i,d) returns False if item i at depth d should not be included in the result
-
- ``prune``
- f(i,d) return True if the search should stop at item i at depth d.
- Item i will not be returned.
-
- ``depth``
- define at which level the iteration should not go deeper
- if -1, there is no limit
- if 0, you would effectively only get self, the root of the iteration
- i.e. if 1, you would only get the first level of predessessors/successors
-
- ``branch_first``
- if True, items will be returned branch first, otherwise depth first
-
- ``visit_once``
- if True, items will only be returned once, although they might be encountered
- several times. Loops are prevented that way.
-
- ``ignore_self``
- if True, self will be ignored and automatically pruned from
- the result. Otherwise it will be the first item to be returned.
- If as_edge is True, the source of the first edge is None
-
- ``as_edge``
- if True, return a pair of items, first being the source, second the
- destinatination, i.e. tuple(src, dest) with the edge spanning from
- source to destination"""
- visited = set()
- stack = Deque()
- stack.append( ( 0 ,self, None ) ) # self is always depth level 0
-
- def addToStack( stack, item, branch_first, depth ):
- lst = self._get_intermediate_items( item )
- if not lst:
- return
- if branch_first:
- stack.extendleft( ( depth , i, item ) for i in lst )
- else:
- reviter = ( ( depth , lst[i], item ) for i in range( len( lst )-1,-1,-1) )
- stack.extend( reviter )
- # END addToStack local method
-
- while stack:
- d, item, src = stack.pop() # depth of item, item, item_source
-
- if visit_once and item in visited:
- continue
-
- if visit_once:
- visited.add(item)
-
- rval = ( as_edge and (src, item) ) or item
- if prune( rval, d ):
- continue
-
- skipStartItem = ignore_self and ( item == self )
- if not skipStartItem and predicate( rval, d ):
- yield rval
-
- # only continue to next level if this is appropriate !
- nd = d + 1
- if depth > -1 and nd > depth:
- continue
-
- addToStack( stack, item, branch_first, nd )
- # END for each item on work stack
+ """Simple interface to perforam depth-first or breadth-first traversals
+ into one direction.
+ Subclasses only need to implement one function.
+ Instances of the Subclass must be hashable"""
+ __slots__ = tuple()
+
+ @classmethod
+ def _get_intermediate_items(cls, item):
+ """
+ Returns:
+ List of items connected to the given item.
+ Must be implemented in subclass
+ """
+ raise NotImplementedError("To be implemented in subclass")
+
+
+ def traverse( self, predicate = lambda i,d: True,
+ prune = lambda i,d: False, depth = -1, branch_first=True,
+ visit_once = True, ignore_self=1, as_edge = False ):
+ """
+ ``Returns``
+ iterator yieling of items found when traversing self
+
+ ``predicate``
+ f(i,d) returns False if item i at depth d should not be included in the result
+
+ ``prune``
+ f(i,d) return True if the search should stop at item i at depth d.
+ Item i will not be returned.
+
+ ``depth``
+ define at which level the iteration should not go deeper
+ if -1, there is no limit
+ if 0, you would effectively only get self, the root of the iteration
+ i.e. if 1, you would only get the first level of predessessors/successors
+
+ ``branch_first``
+ if True, items will be returned branch first, otherwise depth first
+
+ ``visit_once``
+ if True, items will only be returned once, although they might be encountered
+ several times. Loops are prevented that way.
+
+ ``ignore_self``
+ if True, self will be ignored and automatically pruned from
+ the result. Otherwise it will be the first item to be returned.
+ If as_edge is True, the source of the first edge is None
+
+ ``as_edge``
+ if True, return a pair of items, first being the source, second the
+ destinatination, i.e. tuple(src, dest) with the edge spanning from
+ source to destination"""
+ visited = set()
+ stack = Deque()
+ stack.append( ( 0 ,self, None ) ) # self is always depth level 0
+
+ def addToStack( stack, item, branch_first, depth ):
+ lst = self._get_intermediate_items( item )
+ if not lst:
+ return
+ if branch_first:
+ stack.extendleft( ( depth , i, item ) for i in lst )
+ else:
+ reviter = ( ( depth , lst[i], item ) for i in range( len( lst )-1,-1,-1) )
+ stack.extend( reviter )
+ # END addToStack local method
+
+ while stack:
+ d, item, src = stack.pop() # depth of item, item, item_source
+
+ if visit_once and item in visited:
+ continue
+
+ if visit_once:
+ visited.add(item)
+
+ rval = ( as_edge and (src, item) ) or item
+ if prune( rval, d ):
+ continue
+
+ skipStartItem = ignore_self and ( item == self )
+ if not skipStartItem and predicate( rval, d ):
+ yield rval
+
+ # only continue to next level if this is appropriate !
+ nd = d + 1
+ if depth > -1 and nd > depth:
+ continue
+
+ addToStack( stack, item, branch_first, nd )
+ # END for each item on work stack
diff --git a/test/git/test_utils.py b/test/git/test_utils.py
index f843c12e..2c3c392b 100644
--- a/test/git/test_utils.py
+++ b/test/git/test_utils.py
@@ -9,112 +9,141 @@ import tempfile
from test.testlib import *
from git.utils import *
+from git.objects.utils import *
from git import *
from git.cmd import dashify
import time
class TestUtils(TestCase):
- def setup(self):
- self.testdict = {
- "string": "42",
- "int": 42,
- "array": [ 42 ],
- }
+ def setup(self):
+ self.testdict = {
+ "string": "42",
+ "int": 42,
+ "array": [ 42 ],
+ }
- def test_it_should_dashify(self):
- assert_equal('this-is-my-argument', dashify('this_is_my_argument'))
- assert_equal('foo', dashify('foo'))
-
-
- def test_lock_file(self):
- my_file = tempfile.mktemp()
- lock_file = LockFile(my_file)
- assert not lock_file._has_lock()
- # release lock we don't have - fine
- lock_file._release_lock()
-
- # get lock
- lock_file._obtain_lock_or_raise()
- assert lock_file._has_lock()
-
- # concurrent access
- other_lock_file = LockFile(my_file)
- assert not other_lock_file._has_lock()
- self.failUnlessRaises(IOError, other_lock_file._obtain_lock_or_raise)
-
- lock_file._release_lock()
- assert not lock_file._has_lock()
-
- other_lock_file._obtain_lock_or_raise()
- self.failUnlessRaises(IOError, lock_file._obtain_lock_or_raise)
-
- # auto-release on destruction
- del(other_lock_file)
- lock_file._obtain_lock_or_raise()
- lock_file._release_lock()
-
- def test_blocking_lock_file(self):
- my_file = tempfile.mktemp()
- lock_file = BlockingLockFile(my_file)
- lock_file._obtain_lock()
-
- # next one waits for the lock
- start = time.time()
- wait_time = 0.1
- wait_lock = BlockingLockFile(my_file, 0.05, wait_time)
- self.failUnlessRaises(IOError, wait_lock._obtain_lock)
- elapsed = time.time() - start
- assert elapsed <= wait_time + 0.02 # some extra time it may cost
-
- def _cmp_contents(self, file_path, data):
- # raise if data from file at file_path
- # does not match data string
- fp = open(file_path, "rb")
- try:
- assert fp.read() == data
- finally:
- fp.close()
-
- def test_safe_operation(self):
- my_file = tempfile.mktemp()
- orig_data = "hello"
- new_data = "world"
- my_file_fp = open(my_file, "wb")
- my_file_fp.write(orig_data)
- my_file_fp.close()
-
- try:
- cwrite = ConcurrentWriteOperation(my_file)
-
- # didn't start writing, doesnt matter
- cwrite._end_writing(False)
- cwrite._end_writing(True)
- assert not cwrite._is_writing()
-
- # write data and fail
- stream = cwrite._begin_writing()
- assert cwrite._is_writing()
- stream.write(new_data)
- cwrite._end_writing(successful=False)
- self._cmp_contents(my_file, orig_data)
- assert not os.path.exists(stream.name)
-
- # write data - concurrently
- ocwrite = ConcurrentWriteOperation(my_file)
- stream = cwrite._begin_writing()
- self.failUnlessRaises(IOError, ocwrite._begin_writing)
-
- stream.write("world")
- cwrite._end_writing(successful=True)
- self._cmp_contents(my_file, new_data)
- assert not os.path.exists(stream.name)
-
- # could test automatic _end_writing on destruction
- finally:
- os.remove(my_file)
- # END final cleanup
-
-
-
-
+ def test_it_should_dashify(self):
+ assert_equal('this-is-my-argument', dashify('this_is_my_argument'))
+ assert_equal('foo', dashify('foo'))
+
+
+ def test_lock_file(self):
+ my_file = tempfile.mktemp()
+ lock_file = LockFile(my_file)
+ assert not lock_file._has_lock()
+ # release lock we don't have - fine
+ lock_file._release_lock()
+
+ # get lock
+ lock_file._obtain_lock_or_raise()
+ assert lock_file._has_lock()
+
+ # concurrent access
+ other_lock_file = LockFile(my_file)
+ assert not other_lock_file._has_lock()
+ self.failUnlessRaises(IOError, other_lock_file._obtain_lock_or_raise)
+
+ lock_file._release_lock()
+ assert not lock_file._has_lock()
+
+ other_lock_file._obtain_lock_or_raise()
+ self.failUnlessRaises(IOError, lock_file._obtain_lock_or_raise)
+
+ # auto-release on destruction
+ del(other_lock_file)
+ lock_file._obtain_lock_or_raise()
+ lock_file._release_lock()
+
+ def test_blocking_lock_file(self):
+ my_file = tempfile.mktemp()
+ lock_file = BlockingLockFile(my_file)
+ lock_file._obtain_lock()
+
+ # next one waits for the lock
+ start = time.time()
+ wait_time = 0.1
+ wait_lock = BlockingLockFile(my_file, 0.05, wait_time)
+ self.failUnlessRaises(IOError, wait_lock._obtain_lock)
+ elapsed = time.time() - start
+ assert elapsed <= wait_time + 0.02 # some extra time it may cost
+
+ def _cmp_contents(self, file_path, data):
+ # raise if data from file at file_path
+ # does not match data string
+ fp = open(file_path, "rb")
+ try:
+ assert fp.read() == data
+ finally:
+ fp.close()
+
+ def test_safe_operation(self):
+ my_file = tempfile.mktemp()
+ orig_data = "hello"
+ new_data = "world"
+ my_file_fp = open(my_file, "wb")
+ my_file_fp.write(orig_data)
+ my_file_fp.close()
+
+ try:
+ cwrite = ConcurrentWriteOperation(my_file)
+
+ # didn't start writing, doesnt matter
+ cwrite._end_writing(False)
+ cwrite._end_writing(True)
+ assert not cwrite._is_writing()
+
+ # write data and fail
+ stream = cwrite._begin_writing()
+ assert cwrite._is_writing()
+ stream.write(new_data)
+ cwrite._end_writing(successful=False)
+ self._cmp_contents(my_file, orig_data)
+ assert not os.path.exists(stream.name)
+
+ # write data - concurrently
+ ocwrite = ConcurrentWriteOperation(my_file)
+ stream = cwrite._begin_writing()
+ self.failUnlessRaises(IOError, ocwrite._begin_writing)
+
+ stream.write("world")
+ cwrite._end_writing(successful=True)
+ self._cmp_contents(my_file, new_data)
+ assert not os.path.exists(stream.name)
+
+ # could test automatic _end_writing on destruction
+ finally:
+ os.remove(my_file)
+ # END final cleanup
+
+
+
+ def test_user_id(self):
+ assert '@' in get_user_id()
+
+ def test_parse_date(self):
+ # test all supported formats
+ def assert_rval(rval, veri_time, offset=0):
+ assert len(rval) == 2
+ assert isinstance(rval[0], int) and isinstance(rval[1], int)
+ assert rval[0] == veri_time
+ assert rval[1] == offset
+ # END assert rval utility
+
+ rfc = ("Thu, 07 Apr 2005 22:13:11 +0000", 0)
+ iso = ("2005-04-07T22:13:11 -0200", 7200)
+ iso2 = ("2005-04-07 22:13:11 +0400", -14400)
+ iso3 = ("2005.04.07 22:13:11 -0000", 0)
+ alt = ("04/07/2005 22:13:11", 0)
+ alt2 = ("07.04.2005 22:13:11", 0)
+ veri_time = 1112904791 # the time this represents
+ for date, offset in (rfc, iso, iso2, iso3, alt, alt2):
+ assert_rval(parse_date(date), veri_time, offset)
+ # END for each date type
+
+ # and failure
+ self.failUnlessRaises(ValueError, parse_date, 'invalid format')
+ self.failUnlessRaises(ValueError, parse_date, '123456789 -02000')
+ self.failUnlessRaises(ValueError, parse_date, ' 123456789 -0200')
+
+