summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSebastian Thiel <byronimo@gmail.com>2016-04-20 08:57:47 +0200
committerSebastian Thiel <byronimo@gmail.com>2016-04-20 08:57:47 +0200
commitfcb6e8832a94776d670095935a7da579a111c028 (patch)
tree8a9ee5c69380d4694259dfb9b9feeddcb7bc04e8
parent08938d6cee0dc4b45744702e7d0e7f74f2713807 (diff)
parent19099f9ce7e8d6cb1f5cafae318859be8c082ca2 (diff)
downloadgitpython-fcb6e8832a94776d670095935a7da579a111c028.tar.gz
Merge pull request #415 from nvie/fix-for-unicode-paths
Fix diff patch parser for paths with unsafe chars
-rw-r--r--git/diff.py50
-rw-r--r--git/test/fixtures/diff_patch_unsafe_paths75
-rw-r--r--git/test/test_diff.py29
3 files changed, 139 insertions, 15 deletions
diff --git a/git/diff.py b/git/diff.py
index d4affd30..76426940 100644
--- a/git/diff.py
+++ b/git/diff.py
@@ -22,6 +22,23 @@ __all__ = ('Diffable', 'DiffIndex', 'Diff', 'NULL_TREE')
NULL_TREE = object()
+def decode_path(path, has_ab_prefix=True):
+ if path == b'/dev/null':
+ return None
+
+ if path.startswith(b'"') and path.endswith(b'"'):
+ path = (path[1:-1].replace(b'\\n', b'\n')
+ .replace(b'\\t', b'\t')
+ .replace(b'\\"', b'"')
+ .replace(b'\\\\', b'\\'))
+
+ if has_ab_prefix:
+ assert path.startswith(b'a/') or path.startswith(b'b/')
+ path = path[2:]
+
+ return path
+
+
class Diffable(object):
"""Common interface for all object that can be diffed against another object of compatible type.
@@ -196,9 +213,9 @@ class Diff(object):
be different to the version in the index or tree, and hence has been modified."""
# precompiled regex
- re_header = re.compile(r"""
+ re_header = re.compile(br"""
^diff[ ]--git
- [ ](?:a/)?(?P<a_path_fallback>.+?)[ ](?:b/)?(?P<b_path_fallback>.+?)\n
+ [ ](?P<a_path_fallback>"?a/.+?"?)[ ](?P<b_path_fallback>"?b/.+?"?)\n
(?:^old[ ]mode[ ](?P<old_mode>\d+)\n
^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?
(?:^similarity[ ]index[ ]\d+%\n
@@ -208,9 +225,9 @@ class Diff(object):
(?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?
(?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)
\.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?
- (?:^---[ ](?:a/)?(?P<a_path>[^\t\n\r\f\v]*)[\t\r\f\v]*(?:\n|$))?
- (?:^\+\+\+[ ](?:b/)?(?P<b_path>[^\t\n\r\f\v]*)[\t\r\f\v]*(?:\n|$))?
- """.encode('ascii'), re.VERBOSE | re.MULTILINE)
+ (?:^---[ ](?P<a_path>[^\t\n\r\f\v]*)[\t\r\f\v]*(?:\n|$))?
+ (?:^\+\+\+[ ](?P<b_path>[^\t\n\r\f\v]*)[\t\r\f\v]*(?:\n|$))?
+ """, re.VERBOSE | re.MULTILINE)
# can be used for comparisons
NULL_HEX_SHA = "0" * 40
NULL_BIN_SHA = b"\0" * 20
@@ -320,6 +337,19 @@ class Diff(object):
return self.rename_from != self.rename_to
@classmethod
+ def _pick_best_path(cls, path_match, rename_match, path_fallback_match):
+ if path_match:
+ return decode_path(path_match)
+
+ if rename_match:
+ return decode_path(rename_match, has_ab_prefix=False)
+
+ if path_fallback_match:
+ return decode_path(path_fallback_match)
+
+ return None
+
+ @classmethod
def _index_from_patch_format(cls, repo, stream):
"""Create a new DiffIndex from the given text which must be in patch format
:param repo: is the repository we are operating on - it is required
@@ -338,14 +368,8 @@ class Diff(object):
a_path, b_path = header.groups()
new_file, deleted_file = bool(new_file_mode), bool(deleted_file_mode)
- a_path = a_path or rename_from or a_path_fallback
- b_path = b_path or rename_to or b_path_fallback
-
- if a_path == b'/dev/null':
- a_path = None
-
- if b_path == b'/dev/null':
- b_path = None
+ a_path = cls._pick_best_path(a_path, rename_from, a_path_fallback)
+ b_path = cls._pick_best_path(b_path, rename_to, b_path_fallback)
# Our only means to find the actual text is to see what has not been matched by our regex,
# and then retro-actively assin it to our index
diff --git a/git/test/fixtures/diff_patch_unsafe_paths b/git/test/fixtures/diff_patch_unsafe_paths
new file mode 100644
index 00000000..14375f79
--- /dev/null
+++ b/git/test/fixtures/diff_patch_unsafe_paths
@@ -0,0 +1,75 @@
+diff --git a/path/ starting with a space b/path/ starting with a space
+new file mode 100644
+index 0000000000000000000000000000000000000000..eaf5f7510320b6a327fb308379de2f94d8859a54
+--- /dev/null
++++ b/path/ starting with a space
+@@ -0,0 +1 @@
++dummy content
+diff --git "a/path/\"with-quotes\"" "b/path/\"with-quotes\""
+new file mode 100644
+index 0000000000000000000000000000000000000000..eaf5f7510320b6a327fb308379de2f94d8859a54
+--- /dev/null
++++ "b/path/\"with-quotes\""
+@@ -0,0 +1 @@
++dummy content
+diff --git a/path/'with-single-quotes' b/path/'with-single-quotes'
+new file mode 100644
+index 0000000000000000000000000000000000000000..eaf5f7510320b6a327fb308379de2f94d8859a54
+--- /dev/null
++++ b/path/'with-single-quotes'
+@@ -0,0 +1 @@
++dummy content
+diff --git a/path/ending in a space b/path/ending in a space
+new file mode 100644
+index 0000000000000000000000000000000000000000..eaf5f7510320b6a327fb308379de2f94d8859a54
+--- /dev/null
++++ b/path/ending in a space
+@@ -0,0 +1 @@
++dummy content
+diff --git "a/path/with\ttab" "b/path/with\ttab"
+new file mode 100644
+index 0000000000000000000000000000000000000000..eaf5f7510320b6a327fb308379de2f94d8859a54
+--- /dev/null
++++ "b/path/with\ttab"
+@@ -0,0 +1 @@
++dummy content
+diff --git "a/path/with\nnewline" "b/path/with\nnewline"
+new file mode 100644
+index 0000000000000000000000000000000000000000..eaf5f7510320b6a327fb308379de2f94d8859a54
+--- /dev/null
++++ "b/path/with\nnewline"
+@@ -0,0 +1 @@
++dummy content
+diff --git a/path/with spaces b/path/with spaces
+new file mode 100644
+index 0000000000000000000000000000000000000000..eaf5f7510320b6a327fb308379de2f94d8859a54
+--- /dev/null
++++ b/path/with spaces
+@@ -0,0 +1 @@
++dummy content
+diff --git a/path/with-question-mark? b/path/with-question-mark?
+new file mode 100644
+index 0000000000000000000000000000000000000000..eaf5f7510320b6a327fb308379de2f94d8859a54
+--- /dev/null
++++ b/path/with-question-mark?
+@@ -0,0 +1 @@
++dummy content
+diff --git "a/path/¯\\_(ツ)_|¯" "b/path/¯\\_(ツ)_|¯"
+new file mode 100644
+index 0000000000000000000000000000000000000000..eaf5f7510320b6a327fb308379de2f94d8859a54
+--- /dev/null
++++ "b/path/¯\\_(ツ)_|¯"
+@@ -0,0 +1 @@
++dummy content
+diff --git a/a/with spaces b/b/with some spaces
+similarity index 100%
+rename from a/with spaces
+rename to b/with some spaces
+diff --git a/a/ending in a space b/b/ending with space
+similarity index 100%
+rename from a/ending in a space
+rename to b/ending with space
+diff --git "a/a/\"with-quotes\"" "b/b/\"with even more quotes\""
+similarity index 100%
+rename from "a/\"with-quotes\""
+rename to "b/\"with even more quotes\""
diff --git a/git/test/test_diff.py b/git/test/test_diff.py
index 0c670f0b..858b3994 100644
--- a/git/test/test_diff.py
+++ b/git/test/test_diff.py
@@ -1,4 +1,4 @@
-#-*-coding:utf-8-*-
+# coding: utf-8
# test_diff.py
# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors
#
@@ -145,12 +145,37 @@ class TestDiff(TestBase):
assert diff_index[0].new_file
assert diff_index[0].diff == fixture('diff_initial')
+ def test_diff_unsafe_paths(self):
+ output = StringProcessAdapter(fixture('diff_patch_unsafe_paths'))
+ res = Diff._index_from_patch_format(None, output.stdout)
+
+ # The "Additions"
+ self.assertEqual(res[0].b_path, u'path/ starting with a space')
+ self.assertEqual(res[1].b_path, u'path/"with-quotes"')
+ self.assertEqual(res[2].b_path, u"path/'with-single-quotes'")
+ self.assertEqual(res[3].b_path, u'path/ending in a space ')
+ self.assertEqual(res[4].b_path, u'path/with\ttab')
+ self.assertEqual(res[5].b_path, u'path/with\nnewline')
+ self.assertEqual(res[6].b_path, u'path/with spaces')
+ self.assertEqual(res[7].b_path, u'path/with-question-mark?')
+ self.assertEqual(res[8].b_path, u'path/¯\\_(ツ)_|¯')
+
+ # The "Moves"
+ # NOTE: The path prefixes a/ and b/ here are legit! We're actually
+ # verifying that it's not "a/a/" that shows up, see the fixture data.
+ self.assertEqual(res[9].a_path, u'a/with spaces') # NOTE: path a/ here legit!
+ self.assertEqual(res[9].b_path, u'b/with some spaces') # NOTE: path b/ here legit!
+ self.assertEqual(res[10].a_path, u'a/ending in a space ')
+ self.assertEqual(res[10].b_path, u'b/ending with space ')
+ self.assertEqual(res[11].a_path, u'a/"with-quotes"')
+ self.assertEqual(res[11].b_path, u'b/"with even more quotes"')
+
def test_diff_patch_format(self):
# test all of the 'old' format diffs for completness - it should at least
# be able to deal with it
fixtures = ("diff_2", "diff_2f", "diff_f", "diff_i", "diff_mode_only",
"diff_new_mode", "diff_numstat", "diff_p", "diff_rename",
- "diff_tree_numstat_root")
+ "diff_tree_numstat_root", "diff_patch_unsafe_paths")
for fixture_name in fixtures:
diff_proc = StringProcessAdapter(fixture(fixture_name))