summaryrefslogtreecommitdiff
path: root/git/diff.py
diff options
context:
space:
mode:
authorSebastian Thiel <byronimo@gmail.com>2015-01-09 17:14:32 +0100
committerSebastian Thiel <byronimo@gmail.com>2015-01-09 17:16:35 +0100
commitc767b5206f1e9c8536110dda4d515ebb9242bbeb (patch)
tree50b1aca035d522ea7d6a8f003f7fbf18d1e4fe4b /git/diff.py
parent85a5a8c6a931f8b3a220ed61750d1f9758d0810a (diff)
downloadgitpython-c767b5206f1e9c8536110dda4d515ebb9242bbeb.tar.gz
Now Diff.__str__ works correctly in all python versions.
Additionally, unicode handling was improved to the point where we deal with all diff(create_path=True) data as binary. Therefore we don't claim to know all encodings of all textfiles in the world, even though we still assume that everything git throws at us is utf-8 encoded. Fixes #113
Diffstat (limited to 'git/diff.py')
-rw-r--r--git/diff.py36
1 files changed, 29 insertions, 7 deletions
diff --git a/git/diff.py b/git/diff.py
index 595a8247..dfee00e2 100644
--- a/git/diff.py
+++ b/git/diff.py
@@ -10,7 +10,10 @@ from gitdb.util import hex_to_bin
from .objects.blob import Blob
from .objects.util import mode_str_to_int
-from git.compat import defenc
+from git.compat import (
+ defenc,
+ PY3
+)
__all__ = ('Diffable', 'DiffIndex', 'Diff')
@@ -194,7 +197,7 @@ class Diff(object):
(?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?
(?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)
\.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?
- """, re.VERBOSE | re.MULTILINE)
+ """.encode('ascii'), re.VERBOSE | re.MULTILINE)
# can be used for comparisons
NULL_HEX_SHA = "0" * 40
NULL_BIN_SHA = b"\0" * 20
@@ -280,11 +283,21 @@ class Diff(object):
msg += '\nfile renamed to %r' % self.rename_to
if self.diff:
msg += '\n---'
- msg += self.diff
+ try:
+ msg += self.diff.decode(defenc)
+ except UnicodeDecodeError:
+ msg += 'OMITTED BINARY DATA'
+ # end handle encoding
msg += '\n---'
# END diff info
- return h + msg
+ # Python2 sillyness: have to assure we convert our likely to be unicode object to a string with the
+ # right encoding. Otherwise it tries to convert it using ascii, which may fail ungracefully
+ res = h + msg
+ if not PY3:
+ res = res.encode(defenc)
+ # end
+ return res
@property
def renamed(self):
@@ -298,7 +311,7 @@ class Diff(object):
:param stream: result of 'git diff' as a stream (supporting file protocol)
:return: git.DiffIndex """
# for now, we have to bake the stream
- text = stream.read().decode(defenc)
+ text = stream.read()
index = DiffIndex()
previous_header = None
for header in cls.re_header.finditer(text):
@@ -317,8 +330,17 @@ class Diff(object):
# We just use the one mode we should have parsed
a_mode = old_mode or deleted_file_mode or (a_path and (b_mode or new_mode or new_file_mode))
b_mode = b_mode or new_mode or new_file_mode or (b_path and a_mode)
- index.append(Diff(repo, a_path, b_path, a_blob_id, b_blob_id, a_mode, b_mode,
- new_file, deleted_file, rename_from, rename_to, None))
+ index.append(Diff(repo,
+ a_path and a_path.decode(defenc),
+ b_path and b_path.decode(defenc),
+ a_blob_id and a_blob_id.decode(defenc),
+ b_blob_id and b_blob_id.decode(defenc),
+ a_mode and a_mode.decode(defenc),
+ b_mode and b_mode.decode(defenc),
+ new_file, deleted_file,
+ rename_from and rename_from.decode(defenc),
+ rename_to and rename_to.decode(defenc),
+ None))
previous_header = header
# end for each header we parse