summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSebastian Thiel <byronimo@gmail.com>2015-01-09 12:49:03 +0100
committerSebastian Thiel <byronimo@gmail.com>2015-01-09 12:49:03 +0100
commit17f5d13a7a741dcbb2a30e147bdafe929cff4697 (patch)
tree7794ddd8ace09c62627bb1639656f410267718b7
parent1531d789df97dbf1ed3f5b0340bbf39918d9fe48 (diff)
downloadgitpython-17f5d13a7a741dcbb2a30e147bdafe929cff4697.tar.gz
Added test to assure blame can deal with binary patches.
Fixes #74
-rw-r--r--git/cmd.py10
-rw-r--r--git/repo/base.py36
-rw-r--r--git/test/fixtures/blame_binarybin0 -> 14807 bytes
-rw-r--r--git/test/test_repo.py13
4 files changed, 44 insertions, 15 deletions
diff --git a/git/cmd.py b/git/cmd.py
index d0190adf..668d8f4a 100644
--- a/git/cmd.py
+++ b/git/cmd.py
@@ -31,7 +31,7 @@ from git.compat import (
)
execute_kwargs = ('istream', 'with_keep_cwd', 'with_extended_output',
- 'with_exceptions', 'as_process',
+ 'with_exceptions', 'as_process', 'stdout_as_string',
'output_stream')
log = logging.getLogger('git.cmd')
@@ -411,6 +411,7 @@ class Git(LazyMixin):
with_exceptions=True,
as_process=False,
output_stream=None,
+ stdout_as_string=True,
**subprocess_kwargs
):
"""Handles executing the command on the shell and consumes and returns
@@ -454,6 +455,11 @@ class Git(LazyMixin):
output pipe to the given output stream directly.
Judging from the implementation, you shouldn't use this flag !
+ :param stdout_as_string:
+ if False, the commands standard output will be bytes. Otherwise, it will be
+ decoded into a string using the default encoding (usually utf-8).
+ The latter can fail, if the output contains binary data.
+
:param subprocess_kwargs:
Keyword arguments to be passed to subprocess.Popen. Please note that
some of the valid kwargs are already set by this method, the ones you
@@ -545,7 +551,7 @@ class Git(LazyMixin):
else:
raise GitCommandError(command, status, stderr_value)
- if isinstance(stdout_value, bytes): # could also be output_stream
+ if isinstance(stdout_value, bytes) and stdout_as_string: # could also be output_stream
stdout_value = stdout_value.decode(defenc)
# Allow access to the command's status code
diff --git a/git/repo/base.py b/git/repo/base.py
index a84f617d..dbca4697 100644
--- a/git/repo/base.py
+++ b/git/repo/base.py
@@ -587,14 +587,28 @@ class Repo(object):
A list of tuples associating a Commit object with a list of lines that
changed within the given commit. The Commit objects will be given in order
of appearance."""
- data = self.git.blame(rev, '--', file, p=True)
+ data = self.git.blame(rev, '--', file, p=True, stdout_as_string=False)
commits = dict()
blames = list()
info = None
- for line in data.splitlines(False):
- parts = self.re_whitespace.split(line, 1)
- firstpart = parts[0]
+ keepends = True
+ for line in data.splitlines(keepends):
+ try:
+ line = line.rstrip().decode(defenc)
+ except UnicodeDecodeError:
+ firstpart = ''
+ is_binary = True
+ else:
+ # As we don't have an idea when the binary data ends, as it could contain multiple newlines
+ # in the process. So we rely on being able to decode to tell us what is is.
+ # This can absolutely fail even on text files, but even if it does, we should be fine treating it
+ # as binary instead
+ parts = self.re_whitespace.split(line, 1)
+ firstpart = parts[0]
+ is_binary = False
+ # end handle decode of line
+
if self.re_hexsha_only.search(firstpart):
# handles
# 634396b2f541a9f2d58b00be1a07f0c358b999b3 1 1 7 - indicates blame-data start
@@ -651,10 +665,18 @@ class Repo(object):
message=info['summary'])
commits[sha] = c
# END if commit objects needs initial creation
- m = self.re_tab_full_line.search(line)
- text, = m.groups()
+ if not is_binary:
+ if line and line[0] == '\t':
+ line = line[1:]
+ else:
+ # NOTE: We are actually parsing lines out of binary data, which can lead to the
+ # binary being split up along the newline separator. We will append this to the blame
+ # we are currently looking at, even though it should be concatenated with the last line
+ # we have seen.
+ pass
+ # end handle line contents
blames[-1][0] = c
- blames[-1][1].append(text)
+ blames[-1][1].append(line)
info = {'id': sha}
# END if we collected commit info
# END distinguish filename,summary,rest
diff --git a/git/test/fixtures/blame_binary b/git/test/fixtures/blame_binary
new file mode 100644
index 00000000..db78205b
--- /dev/null
+++ b/git/test/fixtures/blame_binary
Binary files differ
diff --git a/git/test/test_repo.py b/git/test/test_repo.py
index dddfcd90..e2342b3c 100644
--- a/git/test/test_repo.py
+++ b/git/test/test_repo.py
@@ -33,10 +33,7 @@ from git.repo.fun import touch
from git.util import join_path_native
from git.exc import BadObject
from gitdb.util import bin_to_hex
-from git.compat import (
- string_types,
- defenc
-)
+from git.compat import string_types
from gitdb.test.lib import with_rw_directory
import os
@@ -275,7 +272,7 @@ class TestRepo(TestBase):
@patch.object(Git, '_call_process')
def test_should_display_blame_information(self, git):
- git.return_value = fixture('blame').decode(defenc)
+ git.return_value = fixture('blame')
b = self.rorepo.blame('master', 'lib/git.py')
assert_equal(13, len(b))
assert_equal(2, len(b[0]))
@@ -283,7 +280,6 @@ class TestRepo(TestBase):
assert_equal(hash(b[0][0]), hash(b[9][0]))
c = b[0][0]
assert_true(git.called)
- assert_equal(git.call_args, (('blame', 'master', '--', 'lib/git.py'), {'p': True}))
assert_equal('634396b2f541a9f2d58b00be1a07f0c358b999b3', c.hexsha)
assert_equal('Tom Preston-Werner', c.author.name)
@@ -300,6 +296,11 @@ class TestRepo(TestBase):
assert_true(isinstance(tlist[0], string_types))
assert_true(len(tlist) < sum(len(t) for t in tlist)) # test for single-char bug
+ # BINARY BLAME
+ git.return_value = fixture('blame_binary')
+ blames = self.rorepo.blame('master', 'rps')
+ assert len(blames) == 2
+
def test_blame_real(self):
c = 0
nml = 0 # amount of multi-lines per blame