summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSebastian Thiel <byronimo@gmail.com>2015-01-09 17:14:32 +0100
committerSebastian Thiel <byronimo@gmail.com>2015-01-09 17:16:35 +0100
commitc767b5206f1e9c8536110dda4d515ebb9242bbeb (patch)
tree50b1aca035d522ea7d6a8f003f7fbf18d1e4fe4b
parent85a5a8c6a931f8b3a220ed61750d1f9758d0810a (diff)
downloadgitpython-c767b5206f1e9c8536110dda4d515ebb9242bbeb.tar.gz
Now Diff.__str__ works correctly in all python versions.
Additionally, unicode handling was improved to the point where we deal with all diff(create_path=True) data as binary. Therefore we don't claim to know all encodings of all textfiles in the world, even though we still assume that everything git throws at us is utf-8 encoded. Fixes #113
-rw-r--r--git/diff.py36
-rw-r--r--git/test/test_diff.py4
2 files changed, 31 insertions, 9 deletions
diff --git a/git/diff.py b/git/diff.py
index 595a8247..dfee00e2 100644
--- a/git/diff.py
+++ b/git/diff.py
@@ -10,7 +10,10 @@ from gitdb.util import hex_to_bin
from .objects.blob import Blob
from .objects.util import mode_str_to_int
-from git.compat import defenc
+from git.compat import (
+ defenc,
+ PY3
+)
__all__ = ('Diffable', 'DiffIndex', 'Diff')
@@ -194,7 +197,7 @@ class Diff(object):
(?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?
(?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)
\.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?
- """, re.VERBOSE | re.MULTILINE)
+ """.encode('ascii'), re.VERBOSE | re.MULTILINE)
# can be used for comparisons
NULL_HEX_SHA = "0" * 40
NULL_BIN_SHA = b"\0" * 20
@@ -280,11 +283,21 @@ class Diff(object):
msg += '\nfile renamed to %r' % self.rename_to
if self.diff:
msg += '\n---'
- msg += self.diff
+ try:
+ msg += self.diff.decode(defenc)
+ except UnicodeDecodeError:
+ msg += 'OMITTED BINARY DATA'
+ # end handle encoding
msg += '\n---'
# END diff info
- return h + msg
+ # Python2 sillyness: have to assure we convert our likely to be unicode object to a string with the
+ # right encoding. Otherwise it tries to convert it using ascii, which may fail ungracefully
+ res = h + msg
+ if not PY3:
+ res = res.encode(defenc)
+ # end
+ return res
@property
def renamed(self):
@@ -298,7 +311,7 @@ class Diff(object):
:param stream: result of 'git diff' as a stream (supporting file protocol)
:return: git.DiffIndex """
# for now, we have to bake the stream
- text = stream.read().decode(defenc)
+ text = stream.read()
index = DiffIndex()
previous_header = None
for header in cls.re_header.finditer(text):
@@ -317,8 +330,17 @@ class Diff(object):
# We just use the one mode we should have parsed
a_mode = old_mode or deleted_file_mode or (a_path and (b_mode or new_mode or new_file_mode))
b_mode = b_mode or new_mode or new_file_mode or (b_path and a_mode)
- index.append(Diff(repo, a_path, b_path, a_blob_id, b_blob_id, a_mode, b_mode,
- new_file, deleted_file, rename_from, rename_to, None))
+ index.append(Diff(repo,
+ a_path and a_path.decode(defenc),
+ b_path and b_path.decode(defenc),
+ a_blob_id and a_blob_id.decode(defenc),
+ b_blob_id and b_blob_id.decode(defenc),
+ a_mode and a_mode.decode(defenc),
+ b_mode and b_mode.decode(defenc),
+ new_file, deleted_file,
+ rename_from and rename_from.decode(defenc),
+ rename_to and rename_to.decode(defenc),
+ None))
previous_header = header
# end for each header we parse
diff --git a/git/test/test_diff.py b/git/test/test_diff.py
index 498f0586..42972603 100644
--- a/git/test/test_diff.py
+++ b/git/test/test_diff.py
@@ -72,7 +72,7 @@ class TestDiff(TestBase):
assert len(res) == 1
assert len(list(res.iter_change_type('M'))) == 1
if res[0].diff:
- assert res[0].diff == "Binary files a/rps and b/rps differ\n", "in patch mode, we get a diff text"
+ assert res[0].diff == b"Binary files a/rps and b/rps differ\n", "in patch mode, we get a diff text"
assert str(res[0]), "This call should just work"
# end for each method to test
@@ -86,7 +86,7 @@ class TestDiff(TestBase):
# end for each diff
dr = res[3]
- assert dr.diff.endswith("+Binary files a/rps and b/rps differ\n")
+ assert dr.diff.endswith(b"+Binary files a/rps and b/rps differ\n")
def test_diff_patch_format(self):
# test all of the 'old' format diffs for completness - it should at least