diff options
author | Sebastian Thiel <byronimo@gmail.com> | 2015-01-09 12:49:03 +0100 |
---|---|---|
committer | Sebastian Thiel <byronimo@gmail.com> | 2015-01-09 12:49:03 +0100 |
commit | 17f5d13a7a741dcbb2a30e147bdafe929cff4697 (patch) | |
tree | 7794ddd8ace09c62627bb1639656f410267718b7 | |
parent | 1531d789df97dbf1ed3f5b0340bbf39918d9fe48 (diff) | |
download | gitpython-17f5d13a7a741dcbb2a30e147bdafe929cff4697.tar.gz |
Added test to assure blame can deal with binary patches.
Fixes #74
-rw-r--r-- | git/cmd.py | 10 | ||||
-rw-r--r-- | git/repo/base.py | 36 | ||||
-rw-r--r-- | git/test/fixtures/blame_binary | bin | 0 -> 14807 bytes | |||
-rw-r--r-- | git/test/test_repo.py | 13 |
4 files changed, 44 insertions, 15 deletions
@@ -31,7 +31,7 @@ from git.compat import ( ) execute_kwargs = ('istream', 'with_keep_cwd', 'with_extended_output', - 'with_exceptions', 'as_process', + 'with_exceptions', 'as_process', 'stdout_as_string', 'output_stream') log = logging.getLogger('git.cmd') @@ -411,6 +411,7 @@ class Git(LazyMixin): with_exceptions=True, as_process=False, output_stream=None, + stdout_as_string=True, **subprocess_kwargs ): """Handles executing the command on the shell and consumes and returns @@ -454,6 +455,11 @@ class Git(LazyMixin): output pipe to the given output stream directly. Judging from the implementation, you shouldn't use this flag ! + :param stdout_as_string: + if False, the commands standard output will be bytes. Otherwise, it will be + decoded into a string using the default encoding (usually utf-8). + The latter can fail, if the output contains binary data. + :param subprocess_kwargs: Keyword arguments to be passed to subprocess.Popen. Please note that some of the valid kwargs are already set by this method, the ones you @@ -545,7 +551,7 @@ class Git(LazyMixin): else: raise GitCommandError(command, status, stderr_value) - if isinstance(stdout_value, bytes): # could also be output_stream + if isinstance(stdout_value, bytes) and stdout_as_string: # could also be output_stream stdout_value = stdout_value.decode(defenc) # Allow access to the command's status code diff --git a/git/repo/base.py b/git/repo/base.py index a84f617d..dbca4697 100644 --- a/git/repo/base.py +++ b/git/repo/base.py @@ -587,14 +587,28 @@ class Repo(object): A list of tuples associating a Commit object with a list of lines that changed within the given commit. The Commit objects will be given in order of appearance.""" - data = self.git.blame(rev, '--', file, p=True) + data = self.git.blame(rev, '--', file, p=True, stdout_as_string=False) commits = dict() blames = list() info = None - for line in data.splitlines(False): - parts = self.re_whitespace.split(line, 1) - firstpart = parts[0] + keepends = True + for line in data.splitlines(keepends): + try: + line = line.rstrip().decode(defenc) + except UnicodeDecodeError: + firstpart = '' + is_binary = True + else: + # As we don't have an idea when the binary data ends, as it could contain multiple newlines + # in the process. So we rely on being able to decode to tell us what is is. + # This can absolutely fail even on text files, but even if it does, we should be fine treating it + # as binary instead + parts = self.re_whitespace.split(line, 1) + firstpart = parts[0] + is_binary = False + # end handle decode of line + if self.re_hexsha_only.search(firstpart): # handles # 634396b2f541a9f2d58b00be1a07f0c358b999b3 1 1 7 - indicates blame-data start @@ -651,10 +665,18 @@ class Repo(object): message=info['summary']) commits[sha] = c # END if commit objects needs initial creation - m = self.re_tab_full_line.search(line) - text, = m.groups() + if not is_binary: + if line and line[0] == '\t': + line = line[1:] + else: + # NOTE: We are actually parsing lines out of binary data, which can lead to the + # binary being split up along the newline separator. We will append this to the blame + # we are currently looking at, even though it should be concatenated with the last line + # we have seen. + pass + # end handle line contents blames[-1][0] = c - blames[-1][1].append(text) + blames[-1][1].append(line) info = {'id': sha} # END if we collected commit info # END distinguish filename,summary,rest diff --git a/git/test/fixtures/blame_binary b/git/test/fixtures/blame_binary Binary files differnew file mode 100644 index 00000000..db78205b --- /dev/null +++ b/git/test/fixtures/blame_binary diff --git a/git/test/test_repo.py b/git/test/test_repo.py index dddfcd90..e2342b3c 100644 --- a/git/test/test_repo.py +++ b/git/test/test_repo.py @@ -33,10 +33,7 @@ from git.repo.fun import touch from git.util import join_path_native from git.exc import BadObject from gitdb.util import bin_to_hex -from git.compat import ( - string_types, - defenc -) +from git.compat import string_types from gitdb.test.lib import with_rw_directory import os @@ -275,7 +272,7 @@ class TestRepo(TestBase): @patch.object(Git, '_call_process') def test_should_display_blame_information(self, git): - git.return_value = fixture('blame').decode(defenc) + git.return_value = fixture('blame') b = self.rorepo.blame('master', 'lib/git.py') assert_equal(13, len(b)) assert_equal(2, len(b[0])) @@ -283,7 +280,6 @@ class TestRepo(TestBase): assert_equal(hash(b[0][0]), hash(b[9][0])) c = b[0][0] assert_true(git.called) - assert_equal(git.call_args, (('blame', 'master', '--', 'lib/git.py'), {'p': True})) assert_equal('634396b2f541a9f2d58b00be1a07f0c358b999b3', c.hexsha) assert_equal('Tom Preston-Werner', c.author.name) @@ -300,6 +296,11 @@ class TestRepo(TestBase): assert_true(isinstance(tlist[0], string_types)) assert_true(len(tlist) < sum(len(t) for t in tlist)) # test for single-char bug + # BINARY BLAME + git.return_value = fixture('blame_binary') + blames = self.rorepo.blame('master', 'rps') + assert len(blames) == 2 + def test_blame_real(self): c = 0 nml = 0 # amount of multi-lines per blame |