From 200d3c6cb436097eaee7c951a0c9921bfcb75c7f Mon Sep 17 00:00:00 2001 From: Vincent Driessen Date: Mon, 6 Jun 2016 12:13:37 +0200 Subject: Don't choke on (legitimately) invalidly encoded Unicode paths --- git/diff.py | 8 ++++---- git/test/fixtures/diff_patch_unsafe_paths | 7 +++++++ git/test/test_diff.py | 13 +++++++------ 3 files changed, 18 insertions(+), 10 deletions(-) diff --git a/git/diff.py b/git/diff.py index 9073767e..aeaa67d5 100644 --- a/git/diff.py +++ b/git/diff.py @@ -404,15 +404,15 @@ class Diff(object): a_mode = old_mode or deleted_file_mode or (a_path and (b_mode or new_mode or new_file_mode)) b_mode = b_mode or new_mode or new_file_mode or (b_path and a_mode) index.append(Diff(repo, - a_path and a_path.decode(defenc), - b_path and b_path.decode(defenc), + a_path and a_path.decode(defenc, 'replace'), + b_path and b_path.decode(defenc, 'replace'), a_blob_id and a_blob_id.decode(defenc), b_blob_id and b_blob_id.decode(defenc), a_mode and a_mode.decode(defenc), b_mode and b_mode.decode(defenc), new_file, deleted_file, - rename_from and rename_from.decode(defenc), - rename_to and rename_to.decode(defenc), + rename_from and rename_from.decode(defenc, 'replace'), + rename_to and rename_to.decode(defenc, 'replace'), None)) previous_header = header diff --git a/git/test/fixtures/diff_patch_unsafe_paths b/git/test/fixtures/diff_patch_unsafe_paths index 9ee6b834..1aad6754 100644 --- a/git/test/fixtures/diff_patch_unsafe_paths +++ b/git/test/fixtures/diff_patch_unsafe_paths @@ -68,6 +68,13 @@ index 0000000000000000000000000000000000000000..eaf5f7510320b6a327fb308379de2f94 +++ "b/path/\360\237\222\251.txt" @@ -0,0 +1 @@ +dummy content +diff --git "a/path/\200-invalid-unicode-path.txt" "b/path/\200-invalid-unicode-path.txt" +new file mode 100644 +index 0000000000000000000000000000000000000000..eaf5f7510320b6a327fb308379de2f94d8859a54 +--- /dev/null ++++ "b/path/\200-invalid-unicode-path.txt" +@@ -0,0 +1 @@ ++dummy content diff --git a/a/with spaces b/b/with some spaces similarity index 100% rename from a/with spaces diff --git a/git/test/test_diff.py b/git/test/test_diff.py index 8966351a..8d189b12 100644 --- a/git/test/test_diff.py +++ b/git/test/test_diff.py @@ -162,16 +162,17 @@ class TestDiff(TestBase): self.assertEqual(res[7].b_path, u'path/with-question-mark?') self.assertEqual(res[8].b_path, u'path/¯\\_(ツ)_|¯') self.assertEqual(res[9].b_path, u'path/💩.txt') + self.assertEqual(res[10].b_path, u'path/�-invalid-unicode-path.txt') # The "Moves" # NOTE: The path prefixes a/ and b/ here are legit! We're actually # verifying that it's not "a/a/" that shows up, see the fixture data. - self.assertEqual(res[10].a_path, u'a/with spaces') # NOTE: path a/ here legit! - self.assertEqual(res[10].b_path, u'b/with some spaces') # NOTE: path b/ here legit! - self.assertEqual(res[11].a_path, u'a/ending in a space ') - self.assertEqual(res[11].b_path, u'b/ending with space ') - self.assertEqual(res[12].a_path, u'a/"with-quotes"') - self.assertEqual(res[12].b_path, u'b/"with even more quotes"') + self.assertEqual(res[11].a_path, u'a/with spaces') # NOTE: path a/ here legit! + self.assertEqual(res[11].b_path, u'b/with some spaces') # NOTE: path b/ here legit! + self.assertEqual(res[12].a_path, u'a/ending in a space ') + self.assertEqual(res[12].b_path, u'b/ending with space ') + self.assertEqual(res[13].a_path, u'a/"with-quotes"') + self.assertEqual(res[13].b_path, u'b/"with even more quotes"') def test_diff_patch_format(self): # test all of the 'old' format diffs for completness - it should at least -- cgit v1.2.1