summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVincent Driessen <me@nvie.com>2016-04-19 23:41:01 +0200
committerVincent Driessen <me@nvie.com>2016-04-19 23:46:54 +0200
commit7fbc182e6d4636f67f44e5893dee3dcedfa90e04 (patch)
tree2e761da1ed007fbbeb1668eaf97c4d09be1d6920
parent504870e633eeb5fc1bd7c33b8dde0eb62a5b2d12 (diff)
downloadgitpython-7fbc182e6d4636f67f44e5893dee3dcedfa90e04.tar.gz
Fix diff patch parser for paths with unsafe chars
This specifically covers the cases where unsafe chars occur in path names, and git-diff -p will escape those. From the git-diff-tree manpage: > 3. TAB, LF, double quote and backslash characters in pathnames are > represented as \t, \n, \" and \\, respectively. If there is need > for such substitution then the whole pathname is put in double > quotes. This patch checks whether or not this has happened and will unescape those paths accordingly. One thing to note here is that, depending on the position in the patch format, those paths may be prefixed with an a/ or b/. I've specifically made sure to never interpret a path that actually starts with a/ or b/ incorrectly. Example of that subtlety below. Here, the actual file path is "b/normal". On the diff file that gets encoded as "b/b/normal". diff --git a/b/normal b/b/normal new file mode 100644 index 0000000000000000000000000000000000000000..eaf5f7510320b6a327fb308379de2f94d8859a54 --- /dev/null +++ b/b/normal @@ -0,0 +1 @@ +dummy content Here, we prefer the "---" and "+++" lines' values. Note that these paths start with a/ or b/. The only exception is the value "/dev/null", which is handled as a special case. Suppose now the file gets moved "b/moved", the output of that diff would then be this: diff --git a/b/normal b/b/moved similarity index 100% rename from b/normal rename to b/moved We prefer the "rename" lines' values in this case (the "diff" line is always a last resort). Take note that those lines are not prefixed with a/ or b/, but the ones in the "diff" line are (just like the ones in "---" or "+++" lines).
-rw-r--r--git/diff.py47
-rw-r--r--git/test/fixtures/diff_patch_unsafe_paths75
-rw-r--r--git/test/test_diff.py29
3 files changed, 136 insertions, 15 deletions
diff --git a/git/diff.py b/git/diff.py
index d4affd30..a7e7411d 100644
--- a/git/diff.py
+++ b/git/diff.py
@@ -22,6 +22,20 @@ __all__ = ('Diffable', 'DiffIndex', 'Diff', 'NULL_TREE')
NULL_TREE = object()
+def decode_path(path, has_ab_prefix=True):
+ if path == b'/dev/null':
+ return None
+
+ if path.startswith(b'"') and path.endswith(b'"'):
+ path = path[1:-1].decode('string_escape')
+
+ if has_ab_prefix:
+ assert path.startswith(b'a/') or path.startswith(b'b/')
+ path = path[2:]
+
+ return path
+
+
class Diffable(object):
"""Common interface for all object that can be diffed against another object of compatible type.
@@ -196,9 +210,9 @@ class Diff(object):
be different to the version in the index or tree, and hence has been modified."""
# precompiled regex
- re_header = re.compile(r"""
+ re_header = re.compile(br"""
^diff[ ]--git
- [ ](?:a/)?(?P<a_path_fallback>.+?)[ ](?:b/)?(?P<b_path_fallback>.+?)\n
+ [ ](?P<a_path_fallback>"?a/.+?"?)[ ](?P<b_path_fallback>"?b/.+?"?)\n
(?:^old[ ]mode[ ](?P<old_mode>\d+)\n
^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?
(?:^similarity[ ]index[ ]\d+%\n
@@ -208,9 +222,9 @@ class Diff(object):
(?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?
(?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)
\.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?
- (?:^---[ ](?:a/)?(?P<a_path>[^\t\n\r\f\v]*)[\t\r\f\v]*(?:\n|$))?
- (?:^\+\+\+[ ](?:b/)?(?P<b_path>[^\t\n\r\f\v]*)[\t\r\f\v]*(?:\n|$))?
- """.encode('ascii'), re.VERBOSE | re.MULTILINE)
+ (?:^---[ ](?P<a_path>[^\t\n\r\f\v]*)[\t\r\f\v]*(?:\n|$))?
+ (?:^\+\+\+[ ](?P<b_path>[^\t\n\r\f\v]*)[\t\r\f\v]*(?:\n|$))?
+ """, re.VERBOSE | re.MULTILINE)
# can be used for comparisons
NULL_HEX_SHA = "0" * 40
NULL_BIN_SHA = b"\0" * 20
@@ -320,6 +334,19 @@ class Diff(object):
return self.rename_from != self.rename_to
@classmethod
+ def _pick_best_path(cls, path_match, rename_match, path_fallback_match):
+ if path_match:
+ return decode_path(path_match)
+
+ if rename_match:
+ return decode_path(rename_match, has_ab_prefix=False)
+
+ if path_fallback_match:
+ return decode_path(path_fallback_match)
+
+ return None
+
+ @classmethod
def _index_from_patch_format(cls, repo, stream):
"""Create a new DiffIndex from the given text which must be in patch format
:param repo: is the repository we are operating on - it is required
@@ -338,14 +365,8 @@ class Diff(object):
a_path, b_path = header.groups()
new_file, deleted_file = bool(new_file_mode), bool(deleted_file_mode)
- a_path = a_path or rename_from or a_path_fallback
- b_path = b_path or rename_to or b_path_fallback
-
- if a_path == b'/dev/null':
- a_path = None
-
- if b_path == b'/dev/null':
- b_path = None
+ a_path = cls._pick_best_path(a_path, rename_from, a_path_fallback)
+ b_path = cls._pick_best_path(b_path, rename_to, b_path_fallback)
# Our only means to find the actual text is to see what has not been matched by our regex,
# and then retro-actively assin it to our index
diff --git a/git/test/fixtures/diff_patch_unsafe_paths b/git/test/fixtures/diff_patch_unsafe_paths
new file mode 100644
index 00000000..14375f79
--- /dev/null
+++ b/git/test/fixtures/diff_patch_unsafe_paths
@@ -0,0 +1,75 @@
+diff --git a/path/ starting with a space b/path/ starting with a space
+new file mode 100644
+index 0000000000000000000000000000000000000000..eaf5f7510320b6a327fb308379de2f94d8859a54
+--- /dev/null
++++ b/path/ starting with a space
+@@ -0,0 +1 @@
++dummy content
+diff --git "a/path/\"with-quotes\"" "b/path/\"with-quotes\""
+new file mode 100644
+index 0000000000000000000000000000000000000000..eaf5f7510320b6a327fb308379de2f94d8859a54
+--- /dev/null
++++ "b/path/\"with-quotes\""
+@@ -0,0 +1 @@
++dummy content
+diff --git a/path/'with-single-quotes' b/path/'with-single-quotes'
+new file mode 100644
+index 0000000000000000000000000000000000000000..eaf5f7510320b6a327fb308379de2f94d8859a54
+--- /dev/null
++++ b/path/'with-single-quotes'
+@@ -0,0 +1 @@
++dummy content
+diff --git a/path/ending in a space b/path/ending in a space
+new file mode 100644
+index 0000000000000000000000000000000000000000..eaf5f7510320b6a327fb308379de2f94d8859a54
+--- /dev/null
++++ b/path/ending in a space
+@@ -0,0 +1 @@
++dummy content
+diff --git "a/path/with\ttab" "b/path/with\ttab"
+new file mode 100644
+index 0000000000000000000000000000000000000000..eaf5f7510320b6a327fb308379de2f94d8859a54
+--- /dev/null
++++ "b/path/with\ttab"
+@@ -0,0 +1 @@
++dummy content
+diff --git "a/path/with\nnewline" "b/path/with\nnewline"
+new file mode 100644
+index 0000000000000000000000000000000000000000..eaf5f7510320b6a327fb308379de2f94d8859a54
+--- /dev/null
++++ "b/path/with\nnewline"
+@@ -0,0 +1 @@
++dummy content
+diff --git a/path/with spaces b/path/with spaces
+new file mode 100644
+index 0000000000000000000000000000000000000000..eaf5f7510320b6a327fb308379de2f94d8859a54
+--- /dev/null
++++ b/path/with spaces
+@@ -0,0 +1 @@
++dummy content
+diff --git a/path/with-question-mark? b/path/with-question-mark?
+new file mode 100644
+index 0000000000000000000000000000000000000000..eaf5f7510320b6a327fb308379de2f94d8859a54
+--- /dev/null
++++ b/path/with-question-mark?
+@@ -0,0 +1 @@
++dummy content
+diff --git "a/path/¯\\_(ツ)_|¯" "b/path/¯\\_(ツ)_|¯"
+new file mode 100644
+index 0000000000000000000000000000000000000000..eaf5f7510320b6a327fb308379de2f94d8859a54
+--- /dev/null
++++ "b/path/¯\\_(ツ)_|¯"
+@@ -0,0 +1 @@
++dummy content
+diff --git a/a/with spaces b/b/with some spaces
+similarity index 100%
+rename from a/with spaces
+rename to b/with some spaces
+diff --git a/a/ending in a space b/b/ending with space
+similarity index 100%
+rename from a/ending in a space
+rename to b/ending with space
+diff --git "a/a/\"with-quotes\"" "b/b/\"with even more quotes\""
+similarity index 100%
+rename from "a/\"with-quotes\""
+rename to "b/\"with even more quotes\""
diff --git a/git/test/test_diff.py b/git/test/test_diff.py
index 0c670f0b..7bca0c2a 100644
--- a/git/test/test_diff.py
+++ b/git/test/test_diff.py
@@ -1,4 +1,4 @@
-#-*-coding:utf-8-*-
+# coding: utf-8
# test_diff.py
# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors
#
@@ -145,12 +145,37 @@ class TestDiff(TestBase):
assert diff_index[0].new_file
assert diff_index[0].diff == fixture('diff_initial')
+ def test_diff_unsafe_paths(self):
+ output = StringProcessAdapter(fixture('diff_patch_unsafe_paths'))
+ res = Diff._index_from_patch_format(None, output.stdout)
+
+ # The "Additions"
+ self.assertEqual(res[0].b_path, u'path/ starting with a space')
+ self.assertEqual(res[1].b_path, u'path/"with-quotes"')
+ self.assertEqual(res[2].b_path, u"path/'with-single-quotes'")
+ self.assertEqual(res[3].b_path, u'path/ending in a space ')
+ self.assertEqual(res[4].b_path, u'path/with\ttab')
+ self.assertEqual(res[5].b_path, u'path/with\nnewline')
+ self.assertEqual(res[6].b_path, u'path/with spaces')
+ self.assertEqual(res[7].b_path, u'path/with-question-mark?')
+ self.assertEqual(res[8].b_path, ur'path/¯\_(ツ)_|¯')
+
+ # The "Moves"
+ # NOTE: The path prefixes a/ and b/ here are legit! We're actually
+ # verifying that it's not "a/a/" that shows up, see the fixture data.
+ self.assertEqual(res[9].a_path, u'a/with spaces') # NOTE: path a/ here legit!
+ self.assertEqual(res[9].b_path, u'b/with some spaces') # NOTE: path b/ here legit!
+ self.assertEqual(res[10].a_path, u'a/ending in a space ')
+ self.assertEqual(res[10].b_path, u'b/ending with space ')
+ self.assertEqual(res[11].a_path, u'a/"with-quotes"')
+ self.assertEqual(res[11].b_path, u'b/"with even more quotes"')
+
def test_diff_patch_format(self):
# test all of the 'old' format diffs for completness - it should at least
# be able to deal with it
fixtures = ("diff_2", "diff_2f", "diff_f", "diff_i", "diff_mode_only",
"diff_new_mode", "diff_numstat", "diff_p", "diff_rename",
- "diff_tree_numstat_root")
+ "diff_tree_numstat_root", "diff_patch_unsafe_paths")
for fixture_name in fixtures:
diff_proc = StringProcessAdapter(fixture(fixture_name))