summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVincent Driessen <me@nvie.com>2016-04-13 14:52:18 +0200
committerVincent Driessen <me@nvie.com>2016-04-13 16:44:17 +0200
commite5b8220a1a967abdf2bae2124e3e22a9eea3729f (patch)
tree4d10bd3b123dd8321d6024665e323c00fa86b011
parent9debf6b0aafb6f7781ea9d1383c86939a1aacde3 (diff)
downloadgitpython-e5b8220a1a967abdf2bae2124e3e22a9eea3729f.tar.gz
Add incremental blame support
This adds a sibling method to Repo's blame method: Repo.blame_incremental(rev, path, **kwargs) This can alternatively be called using: Repo.blame(rev, path, incremental=True) The main difference is that blame incremental is a bit more efficient and does not return the full file's contents, just the commits and the line number ranges. The parser is a bit more straight-forward and faster since the incremental output format is defined a little stricter.
-rw-r--r--git/compat.py15
-rw-r--r--git/repo/base.py66
-rw-r--r--git/test/fixtures/blame_incremental30
-rw-r--r--git/test/test_repo.py24
4 files changed, 133 insertions, 2 deletions
diff --git a/git/compat.py b/git/compat.py
index 1ea2119e..146bfd4b 100644
--- a/git/compat.py
+++ b/git/compat.py
@@ -8,6 +8,7 @@
# flake8: noqa
import sys
+import six
from gitdb.utils.compat import (
PY3,
@@ -46,6 +47,20 @@ else:
def mviter(d):
return d.itervalues()
+PRE_PY27 = sys.version_info < (2, 7)
+
+
+def safe_decode(s):
+ """Safely decodes a binary string to unicode"""
+ if isinstance(s, six.text_type):
+ return s
+ elif isinstance(s, six.binary_type):
+ if PRE_PY27:
+ return s.decode(defenc) # we're screwed
+ else:
+ return s.decode(defenc, errors='replace')
+ raise TypeError('Expected bytes or text, but got %r' % (s,))
+
def with_metaclass(meta, *bases):
"""copied from https://github.com/Byron/bcore/blob/master/src/python/butility/future.py#L15"""
diff --git a/git/repo/base.py b/git/repo/base.py
index a23e767a..64374f80 100644
--- a/git/repo/base.py
+++ b/git/repo/base.py
@@ -52,12 +52,14 @@ from .fun import (
from git.compat import (
text_type,
defenc,
- PY3
+ PY3,
+ safe_decode,
)
import os
import sys
import re
+from six.moves import range
DefaultDBType = GitCmdObjectDB
if sys.version_info[:2] < (2, 5): # python 2.4 compatiblity
@@ -655,7 +657,64 @@ class Repo(object):
:return: Head to the active branch"""
return self.head.reference
- def blame(self, rev, file):
+ def blame_incremental(self, rev, file, **kwargs):
+ """Iterator for blame information for the given file at the given revision.
+
+ Unlike .blame(), this does not return the actual file's contents, only
+ a stream of (commit, range) tuples.
+
+ :parm rev: revision specifier, see git-rev-parse for viable options.
+ :return: lazy iterator of (git.Commit, range) tuples, where the commit
+ indicates the commit to blame for the line, and range
+ indicates a span of line numbers in the resulting file.
+
+ If you combine all line number ranges outputted by this command, you
+ should get a continuous range spanning all line numbers in the file.
+ """
+ data = self.git.blame(rev, '--', file, p=True, incremental=True, stdout_as_string=False, **kwargs)
+ commits = dict()
+
+ stream = iter(data.splitlines())
+ while True:
+ line = next(stream) # when exhausted, casues a StopIteration, terminating this function
+
+ hexsha, _, lineno, num_lines = line.split()
+ lineno = int(lineno)
+ num_lines = int(num_lines)
+ if hexsha not in commits:
+ # Now read the next few lines and build up a dict of properties
+ # for this commit
+ props = dict()
+ while True:
+ line = next(stream)
+ if line == b'boundary':
+ # "boundary" indicates a root commit and occurs
+ # instead of the "previous" tag
+ continue
+
+ tag, value = line.split(b' ', 1)
+ props[tag] = value
+ if tag == b'filename':
+ # "filename" formally terminates the entry for --incremental
+ break
+
+ c = Commit(self, hex_to_bin(hexsha),
+ author=Actor(safe_decode(props[b'author']),
+ safe_decode(props[b'author-mail'].lstrip(b'<').rstrip(b'>'))),
+ authored_date=int(props[b'author-time']),
+ committer=Actor(safe_decode(props[b'committer']),
+ safe_decode(props[b'committer-mail'].lstrip(b'<').rstrip(b'>'))),
+ committed_date=int(props[b'committer-time']),
+ message=safe_decode(props[b'summary']))
+ commits[hexsha] = c
+ else:
+ # Discard the next line (it's a filename end tag)
+ line = next(stream)
+ assert line.startswith(b'filename'), 'Unexpected git blame output'
+
+ yield commits[hexsha], range(lineno, lineno + num_lines)
+
+ def blame(self, rev, file, incremental=False):
"""The blame information for the given file at the given revision.
:parm rev: revision specifier, see git-rev-parse for viable options.
@@ -664,6 +723,9 @@ class Repo(object):
A list of tuples associating a Commit object with a list of lines that
changed within the given commit. The Commit objects will be given in order
of appearance."""
+ if incremental:
+ return self.blame_incremental(rev, file)
+
data = self.git.blame(rev, '--', file, p=True, stdout_as_string=False)
commits = dict()
blames = list()
diff --git a/git/test/fixtures/blame_incremental b/git/test/fixtures/blame_incremental
new file mode 100644
index 00000000..9a0d9e35
--- /dev/null
+++ b/git/test/fixtures/blame_incremental
@@ -0,0 +1,30 @@
+82b8902e033430000481eb355733cd7065342037 2 2 1
+author Sebastian Thiel
+author-mail <byronimo@gmail.com>
+author-time 1270634931
+author-tz +0200
+committer Sebastian Thiel
+committer-mail <byronimo@gmail.com>
+committer-time 1270634931
+committer-tz +0200
+summary Used this release for a first beta of the 0.2 branch of development
+previous 501bf602abea7d21c3dbb409b435976e92033145 AUTHORS
+filename AUTHORS
+82b8902e033430000481eb355733cd7065342037 14 14 1
+filename AUTHORS
+c76852d0bff115720af3f27acdb084c59361e5f6 1 1 1
+author Michael Trier
+author-mail <mtrier@gmail.com>
+author-time 1232829627
+author-tz -0500
+committer Michael Trier
+committer-mail <mtrier@gmail.com>
+committer-time 1232829627
+committer-tz -0500
+summary Lots of spring cleaning and added in Sphinx documentation.
+previous bcd57e349c08bd7f076f8d6d2f39b702015358c1 AUTHORS
+filename AUTHORS
+c76852d0bff115720af3f27acdb084c59361e5f6 2 3 11
+filename AUTHORS
+c76852d0bff115720af3f27acdb084c59361e5f6 13 15 2
+filename AUTHORS
diff --git a/git/test/test_repo.py b/git/test/test_repo.py
index 177aa176..ab6c502f 100644
--- a/git/test/test_repo.py
+++ b/git/test/test_repo.py
@@ -50,6 +50,16 @@ from io import BytesIO
from nose import SkipTest
+def iter_flatten(lol):
+ for items in lol:
+ for item in items:
+ yield item
+
+
+def flatten(lol):
+ return list(iter_flatten(lol))
+
+
class TestRepo(TestBase):
@raises(InvalidGitRepositoryError)
@@ -324,6 +334,20 @@ class TestRepo(TestBase):
assert nml, "There should at least be one blame commit that contains multiple lines"
@patch.object(Git, '_call_process')
+ def test_blame_incremental(self, git):
+ git.return_value = fixture('blame_incremental')
+ blame_output = self.rorepo.blame_incremental('9debf6b0aafb6f7781ea9d1383c86939a1aacde3', 'AUTHORS')
+ blame_output = list(blame_output)
+ assert len(blame_output) == 5
+
+ # Check all outputted line numbers
+ ranges = flatten([line_numbers for _, line_numbers in blame_output])
+ assert ranges == flatten([range(2, 3), range(14, 15), range(1, 2), range(3, 14), range(15, 17)]), str(ranges)
+
+ commits = [c.hexsha[:7] for c, _ in blame_output]
+ assert commits == ['82b8902', '82b8902', 'c76852d', 'c76852d', 'c76852d'], str(commits)
+
+ @patch.object(Git, '_call_process')
def test_blame_complex_revision(self, git):
git.return_value = fixture('blame_complex_revision')
res = self.rorepo.blame("HEAD~10..HEAD", "README.md")