diff options
author | Vincent Driessen <me@nvie.com> | 2016-04-19 23:41:01 +0200 |
---|---|---|
committer | Vincent Driessen <me@nvie.com> | 2016-04-19 23:46:54 +0200 |
commit | 7fbc182e6d4636f67f44e5893dee3dcedfa90e04 (patch) | |
tree | 2e761da1ed007fbbeb1668eaf97c4d09be1d6920 | |
parent | 504870e633eeb5fc1bd7c33b8dde0eb62a5b2d12 (diff) | |
download | gitpython-7fbc182e6d4636f67f44e5893dee3dcedfa90e04.tar.gz |
Fix diff patch parser for paths with unsafe chars
This specifically covers the cases where unsafe chars occur in path
names, and git-diff -p will escape those.
From the git-diff-tree manpage:
> 3. TAB, LF, double quote and backslash characters in pathnames are
> represented as \t, \n, \" and \\, respectively. If there is need
> for such substitution then the whole pathname is put in double
> quotes.
This patch checks whether or not this has happened and will unescape
those paths accordingly.
One thing to note here is that, depending on the position in the patch
format, those paths may be prefixed with an a/ or b/. I've specifically
made sure to never interpret a path that actually starts with a/ or b/
incorrectly.
Example of that subtlety below. Here, the actual file path is
"b/normal". On the diff file that gets encoded as "b/b/normal".
diff --git a/b/normal b/b/normal
new file mode 100644
index 0000000000000000000000000000000000000000..eaf5f7510320b6a327fb308379de2f94d8859a54
--- /dev/null
+++ b/b/normal
@@ -0,0 +1 @@
+dummy content
Here, we prefer the "---" and "+++" lines' values. Note that these
paths start with a/ or b/. The only exception is the value "/dev/null",
which is handled as a special case.
Suppose now the file gets moved "b/moved", the output of that diff would
then be this:
diff --git a/b/normal b/b/moved
similarity index 100%
rename from b/normal
rename to b/moved
We prefer the "rename" lines' values in this case (the "diff" line is
always a last resort). Take note that those lines are not prefixed with
a/ or b/, but the ones in the "diff" line are (just like the ones in
"---" or "+++" lines).
-rw-r--r-- | git/diff.py | 47 | ||||
-rw-r--r-- | git/test/fixtures/diff_patch_unsafe_paths | 75 | ||||
-rw-r--r-- | git/test/test_diff.py | 29 |
3 files changed, 136 insertions, 15 deletions
diff --git a/git/diff.py b/git/diff.py index d4affd30..a7e7411d 100644 --- a/git/diff.py +++ b/git/diff.py @@ -22,6 +22,20 @@ __all__ = ('Diffable', 'DiffIndex', 'Diff', 'NULL_TREE') NULL_TREE = object() +def decode_path(path, has_ab_prefix=True): + if path == b'/dev/null': + return None + + if path.startswith(b'"') and path.endswith(b'"'): + path = path[1:-1].decode('string_escape') + + if has_ab_prefix: + assert path.startswith(b'a/') or path.startswith(b'b/') + path = path[2:] + + return path + + class Diffable(object): """Common interface for all object that can be diffed against another object of compatible type. @@ -196,9 +210,9 @@ class Diff(object): be different to the version in the index or tree, and hence has been modified.""" # precompiled regex - re_header = re.compile(r""" + re_header = re.compile(br""" ^diff[ ]--git - [ ](?:a/)?(?P<a_path_fallback>.+?)[ ](?:b/)?(?P<b_path_fallback>.+?)\n + [ ](?P<a_path_fallback>"?a/.+?"?)[ ](?P<b_path_fallback>"?b/.+?"?)\n (?:^old[ ]mode[ ](?P<old_mode>\d+)\n ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))? (?:^similarity[ ]index[ ]\d+%\n @@ -208,9 +222,9 @@ class Diff(object): (?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))? (?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+) \.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))? - (?:^---[ ](?:a/)?(?P<a_path>[^\t\n\r\f\v]*)[\t\r\f\v]*(?:\n|$))? - (?:^\+\+\+[ ](?:b/)?(?P<b_path>[^\t\n\r\f\v]*)[\t\r\f\v]*(?:\n|$))? - """.encode('ascii'), re.VERBOSE | re.MULTILINE) + (?:^---[ ](?P<a_path>[^\t\n\r\f\v]*)[\t\r\f\v]*(?:\n|$))? + (?:^\+\+\+[ ](?P<b_path>[^\t\n\r\f\v]*)[\t\r\f\v]*(?:\n|$))? + """, re.VERBOSE | re.MULTILINE) # can be used for comparisons NULL_HEX_SHA = "0" * 40 NULL_BIN_SHA = b"\0" * 20 @@ -320,6 +334,19 @@ class Diff(object): return self.rename_from != self.rename_to @classmethod + def _pick_best_path(cls, path_match, rename_match, path_fallback_match): + if path_match: + return decode_path(path_match) + + if rename_match: + return decode_path(rename_match, has_ab_prefix=False) + + if path_fallback_match: + return decode_path(path_fallback_match) + + return None + + @classmethod def _index_from_patch_format(cls, repo, stream): """Create a new DiffIndex from the given text which must be in patch format :param repo: is the repository we are operating on - it is required @@ -338,14 +365,8 @@ class Diff(object): a_path, b_path = header.groups() new_file, deleted_file = bool(new_file_mode), bool(deleted_file_mode) - a_path = a_path or rename_from or a_path_fallback - b_path = b_path or rename_to or b_path_fallback - - if a_path == b'/dev/null': - a_path = None - - if b_path == b'/dev/null': - b_path = None + a_path = cls._pick_best_path(a_path, rename_from, a_path_fallback) + b_path = cls._pick_best_path(b_path, rename_to, b_path_fallback) # Our only means to find the actual text is to see what has not been matched by our regex, # and then retro-actively assin it to our index diff --git a/git/test/fixtures/diff_patch_unsafe_paths b/git/test/fixtures/diff_patch_unsafe_paths new file mode 100644 index 00000000..14375f79 --- /dev/null +++ b/git/test/fixtures/diff_patch_unsafe_paths @@ -0,0 +1,75 @@ +diff --git a/path/ starting with a space b/path/ starting with a space +new file mode 100644 +index 0000000000000000000000000000000000000000..eaf5f7510320b6a327fb308379de2f94d8859a54 +--- /dev/null ++++ b/path/ starting with a space +@@ -0,0 +1 @@ ++dummy content +diff --git "a/path/\"with-quotes\"" "b/path/\"with-quotes\"" +new file mode 100644 +index 0000000000000000000000000000000000000000..eaf5f7510320b6a327fb308379de2f94d8859a54 +--- /dev/null ++++ "b/path/\"with-quotes\"" +@@ -0,0 +1 @@ ++dummy content +diff --git a/path/'with-single-quotes' b/path/'with-single-quotes' +new file mode 100644 +index 0000000000000000000000000000000000000000..eaf5f7510320b6a327fb308379de2f94d8859a54 +--- /dev/null ++++ b/path/'with-single-quotes' +@@ -0,0 +1 @@ ++dummy content +diff --git a/path/ending in a space b/path/ending in a space +new file mode 100644 +index 0000000000000000000000000000000000000000..eaf5f7510320b6a327fb308379de2f94d8859a54 +--- /dev/null ++++ b/path/ending in a space +@@ -0,0 +1 @@ ++dummy content +diff --git "a/path/with\ttab" "b/path/with\ttab" +new file mode 100644 +index 0000000000000000000000000000000000000000..eaf5f7510320b6a327fb308379de2f94d8859a54 +--- /dev/null ++++ "b/path/with\ttab" +@@ -0,0 +1 @@ ++dummy content +diff --git "a/path/with\nnewline" "b/path/with\nnewline" +new file mode 100644 +index 0000000000000000000000000000000000000000..eaf5f7510320b6a327fb308379de2f94d8859a54 +--- /dev/null ++++ "b/path/with\nnewline" +@@ -0,0 +1 @@ ++dummy content +diff --git a/path/with spaces b/path/with spaces +new file mode 100644 +index 0000000000000000000000000000000000000000..eaf5f7510320b6a327fb308379de2f94d8859a54 +--- /dev/null ++++ b/path/with spaces +@@ -0,0 +1 @@ ++dummy content +diff --git a/path/with-question-mark? b/path/with-question-mark? +new file mode 100644 +index 0000000000000000000000000000000000000000..eaf5f7510320b6a327fb308379de2f94d8859a54 +--- /dev/null ++++ b/path/with-question-mark? +@@ -0,0 +1 @@ ++dummy content +diff --git "a/path/¯\\_(ツ)_|¯" "b/path/¯\\_(ツ)_|¯" +new file mode 100644 +index 0000000000000000000000000000000000000000..eaf5f7510320b6a327fb308379de2f94d8859a54 +--- /dev/null ++++ "b/path/¯\\_(ツ)_|¯" +@@ -0,0 +1 @@ ++dummy content +diff --git a/a/with spaces b/b/with some spaces +similarity index 100% +rename from a/with spaces +rename to b/with some spaces +diff --git a/a/ending in a space b/b/ending with space +similarity index 100% +rename from a/ending in a space +rename to b/ending with space +diff --git "a/a/\"with-quotes\"" "b/b/\"with even more quotes\"" +similarity index 100% +rename from "a/\"with-quotes\"" +rename to "b/\"with even more quotes\"" diff --git a/git/test/test_diff.py b/git/test/test_diff.py index 0c670f0b..7bca0c2a 100644 --- a/git/test/test_diff.py +++ b/git/test/test_diff.py @@ -1,4 +1,4 @@ -#-*-coding:utf-8-*- +# coding: utf-8 # test_diff.py # Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors # @@ -145,12 +145,37 @@ class TestDiff(TestBase): assert diff_index[0].new_file assert diff_index[0].diff == fixture('diff_initial') + def test_diff_unsafe_paths(self): + output = StringProcessAdapter(fixture('diff_patch_unsafe_paths')) + res = Diff._index_from_patch_format(None, output.stdout) + + # The "Additions" + self.assertEqual(res[0].b_path, u'path/ starting with a space') + self.assertEqual(res[1].b_path, u'path/"with-quotes"') + self.assertEqual(res[2].b_path, u"path/'with-single-quotes'") + self.assertEqual(res[3].b_path, u'path/ending in a space ') + self.assertEqual(res[4].b_path, u'path/with\ttab') + self.assertEqual(res[5].b_path, u'path/with\nnewline') + self.assertEqual(res[6].b_path, u'path/with spaces') + self.assertEqual(res[7].b_path, u'path/with-question-mark?') + self.assertEqual(res[8].b_path, ur'path/¯\_(ツ)_|¯') + + # The "Moves" + # NOTE: The path prefixes a/ and b/ here are legit! We're actually + # verifying that it's not "a/a/" that shows up, see the fixture data. + self.assertEqual(res[9].a_path, u'a/with spaces') # NOTE: path a/ here legit! + self.assertEqual(res[9].b_path, u'b/with some spaces') # NOTE: path b/ here legit! + self.assertEqual(res[10].a_path, u'a/ending in a space ') + self.assertEqual(res[10].b_path, u'b/ending with space ') + self.assertEqual(res[11].a_path, u'a/"with-quotes"') + self.assertEqual(res[11].b_path, u'b/"with even more quotes"') + def test_diff_patch_format(self): # test all of the 'old' format diffs for completness - it should at least # be able to deal with it fixtures = ("diff_2", "diff_2f", "diff_f", "diff_i", "diff_mode_only", "diff_new_mode", "diff_numstat", "diff_p", "diff_rename", - "diff_tree_numstat_root") + "diff_tree_numstat_root", "diff_patch_unsafe_paths") for fixture_name in fixtures: diff_proc = StringProcessAdapter(fixture(fixture_name)) |