summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSebastian Thiel <byronimo@gmail.com>2016-06-20 06:51:08 +0200
committerSebastian Thiel <byronimo@gmail.com>2016-06-20 06:51:08 +0200
commitf128ae9a37036614c1b5d44e391ba070dd4326d1 (patch)
tree5332b1ef2152d1df93d1a0d35143d86a0d229a06
parent07a8f73dca7ec7c2aeb6aa47aaf421d8d22423ad (diff)
parente9405ac82af3a804dba1f9797bdb34815e1d7a18 (diff)
downloadgitpython-fix-non-ascii-chars-in-status-lines.tar.gz
Merge branch 'master' into fix-non-ascii-chars-in-status-linesfix-non-ascii-chars-in-status-lines
-rw-r--r--doc/source/changes.rst3
-rw-r--r--git/compat.py2
-rw-r--r--git/diff.py58
-rw-r--r--git/test/test_diff.py6
4 files changed, 51 insertions, 18 deletions
diff --git a/doc/source/changes.rst b/doc/source/changes.rst
index 9efbc0f0..428fa2b4 100644
--- a/doc/source/changes.rst
+++ b/doc/source/changes.rst
@@ -7,6 +7,9 @@ Changelog
* Fix: remote output parser now correctly matches refs with non-ASCII
chars in them
+* API: Diffs now have `a_rawpath`, `b_rawpath`, `raw_rename_from`,
+ `raw_rename_to` properties, which are the raw-bytes equivalents of their
+ unicode path counterparts.
* Fix: TypeError about passing keyword argument to string decode() on
Python 2.6.
* Feature: `setUrl API on Remotes <https://github.com/gitpython-developers/GitPython/pull/446#issuecomment-224670539>`_
diff --git a/git/compat.py b/git/compat.py
index 5b46255c..b3572474 100644
--- a/git/compat.py
+++ b/git/compat.py
@@ -35,6 +35,7 @@ if PY3:
return d.values()
range = xrange
unicode = str
+ binary_type = bytes
else:
FileType = file
# usually, this is just ascii, which might not enough for our encoding needs
@@ -44,6 +45,7 @@ else:
byte_ord = ord
bchr = chr
unicode = unicode
+ binary_type = str
range = xrange
def mviter(d):
return d.itervalues()
diff --git a/git/diff.py b/git/diff.py
index aeaa67d5..06193920 100644
--- a/git/diff.py
+++ b/git/diff.py
@@ -7,6 +7,7 @@ import re
from gitdb.util import hex_to_bin
+from .compat import binary_type
from .objects.blob import Blob
from .objects.util import mode_str_to_int
@@ -245,18 +246,20 @@ class Diff(object):
NULL_HEX_SHA = "0" * 40
NULL_BIN_SHA = b"\0" * 20
- __slots__ = ("a_blob", "b_blob", "a_mode", "b_mode", "a_path", "b_path",
- "new_file", "deleted_file", "rename_from", "rename_to", "diff")
+ __slots__ = ("a_blob", "b_blob", "a_mode", "b_mode", "a_rawpath", "b_rawpath",
+ "new_file", "deleted_file", "raw_rename_from", "raw_rename_to", "diff")
- def __init__(self, repo, a_path, b_path, a_blob_id, b_blob_id, a_mode,
- b_mode, new_file, deleted_file, rename_from,
- rename_to, diff):
+ def __init__(self, repo, a_rawpath, b_rawpath, a_blob_id, b_blob_id, a_mode,
+ b_mode, new_file, deleted_file, raw_rename_from,
+ raw_rename_to, diff):
self.a_mode = a_mode
self.b_mode = b_mode
- self.a_path = a_path
- self.b_path = b_path
+ assert a_rawpath is None or isinstance(a_rawpath, binary_type)
+ assert b_rawpath is None or isinstance(b_rawpath, binary_type)
+ self.a_rawpath = a_rawpath
+ self.b_rawpath = b_rawpath
if self.a_mode:
self.a_mode = mode_str_to_int(self.a_mode)
@@ -266,19 +269,21 @@ class Diff(object):
if a_blob_id is None or a_blob_id == self.NULL_HEX_SHA:
self.a_blob = None
else:
- self.a_blob = Blob(repo, hex_to_bin(a_blob_id), mode=self.a_mode, path=a_path)
+ self.a_blob = Blob(repo, hex_to_bin(a_blob_id), mode=self.a_mode, path=self.a_path)
if b_blob_id is None or b_blob_id == self.NULL_HEX_SHA:
self.b_blob = None
else:
- self.b_blob = Blob(repo, hex_to_bin(b_blob_id), mode=self.b_mode, path=b_path)
+ self.b_blob = Blob(repo, hex_to_bin(b_blob_id), mode=self.b_mode, path=self.b_path)
self.new_file = new_file
self.deleted_file = deleted_file
# be clear and use None instead of empty strings
- self.rename_from = rename_from or None
- self.rename_to = rename_to or None
+ assert raw_rename_from is None or isinstance(raw_rename_from, binary_type)
+ assert raw_rename_to is None or isinstance(raw_rename_to, binary_type)
+ self.raw_rename_from = raw_rename_from or None
+ self.raw_rename_to = raw_rename_to or None
self.diff = diff
@@ -345,6 +350,22 @@ class Diff(object):
return res
@property
+ def a_path(self):
+ return self.a_rawpath.decode(defenc, 'replace') if self.a_rawpath else None
+
+ @property
+ def b_path(self):
+ return self.b_rawpath.decode(defenc, 'replace') if self.b_rawpath else None
+
+ @property
+ def rename_from(self):
+ return self.raw_rename_from.decode(defenc, 'replace') if self.raw_rename_from else None
+
+ @property
+ def rename_to(self):
+ return self.raw_rename_to.decode(defenc, 'replace') if self.raw_rename_to else None
+
+ @property
def renamed(self):
""":returns: True if the blob of our diff has been renamed
:note: This property is deprecated, please use ``renamed_file`` instead.
@@ -388,6 +409,7 @@ class Diff(object):
new_file_mode, deleted_file_mode, \
a_blob_id, b_blob_id, b_mode, \
a_path, b_path = header.groups()
+
new_file, deleted_file = bool(new_file_mode), bool(deleted_file_mode)
a_path = cls._pick_best_path(a_path, rename_from, a_path_fallback)
@@ -404,15 +426,15 @@ class Diff(object):
a_mode = old_mode or deleted_file_mode or (a_path and (b_mode or new_mode or new_file_mode))
b_mode = b_mode or new_mode or new_file_mode or (b_path and a_mode)
index.append(Diff(repo,
- a_path and a_path.decode(defenc, 'replace'),
- b_path and b_path.decode(defenc, 'replace'),
+ a_path,
+ b_path,
a_blob_id and a_blob_id.decode(defenc),
b_blob_id and b_blob_id.decode(defenc),
a_mode and a_mode.decode(defenc),
b_mode and b_mode.decode(defenc),
new_file, deleted_file,
- rename_from and rename_from.decode(defenc, 'replace'),
- rename_to and rename_to.decode(defenc, 'replace'),
+ rename_from,
+ rename_to,
None))
previous_header = header
@@ -438,8 +460,8 @@ class Diff(object):
meta, _, path = line[1:].partition('\t')
old_mode, new_mode, a_blob_id, b_blob_id, change_type = meta.split(None, 4)
path = path.strip()
- a_path = path
- b_path = path
+ a_path = path.encode(defenc)
+ b_path = path.encode(defenc)
deleted_file = False
new_file = False
rename_from = None
@@ -455,6 +477,8 @@ class Diff(object):
new_file = True
elif change_type[0] == 'R': # parses RXXX, where XXX is a confidence value
a_path, b_path = path.split('\t', 1)
+ a_path = a_path.encode(defenc)
+ b_path = b_path.encode(defenc)
rename_from, rename_to = a_path, b_path
# END add/remove handling
diff --git a/git/test/test_diff.py b/git/test/test_diff.py
index 8d189b12..ba0d2d13 100644
--- a/git/test/test_diff.py
+++ b/git/test/test_diff.py
@@ -90,6 +90,8 @@ class TestDiff(TestBase):
assert_true(diff.renamed)
assert_equal(diff.rename_from, u'Jérôme')
assert_equal(diff.rename_to, u'müller')
+ assert_equal(diff.raw_rename_from, b'J\xc3\xa9r\xc3\xb4me')
+ assert_equal(diff.raw_rename_to, b'm\xc3\xbcller')
assert isinstance(str(diff), str)
output = StringProcessAdapter(fixture('diff_rename_raw'))
@@ -129,7 +131,7 @@ class TestDiff(TestBase):
output = StringProcessAdapter(fixture('diff_index_raw'))
res = Diff._index_from_raw_format(None, output.stdout)
assert res[0].deleted_file
- assert res[0].b_path == ''
+ assert res[0].b_path is None
def test_diff_initial_commit(self):
initial_commit = self.rorepo.commit('33ebe7acec14b25c5f84f35a664803fcab2f7781')
@@ -162,7 +164,9 @@ class TestDiff(TestBase):
self.assertEqual(res[7].b_path, u'path/with-question-mark?')
self.assertEqual(res[8].b_path, u'path/¯\\_(ツ)_|¯')
self.assertEqual(res[9].b_path, u'path/💩.txt')
+ self.assertEqual(res[9].b_rawpath, b'path/\xf0\x9f\x92\xa9.txt')
self.assertEqual(res[10].b_path, u'path/�-invalid-unicode-path.txt')
+ self.assertEqual(res[10].b_rawpath, b'path/\x80-invalid-unicode-path.txt')
# The "Moves"
# NOTE: The path prefixes a/ and b/ here are legit! We're actually