summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVincent Driessen <me@nvie.com>2016-04-19 21:46:59 +0200
committerVincent Driessen <me@nvie.com>2016-04-19 21:46:59 +0200
commit8bbf1a3b801fb4e00c10f631faa87114dcd0462f (patch)
treea231962fc64fb5b255d87ecb409bfc1f48dfe8a5
parent0d7a40f603412be7e1046b500057b08558d9d250 (diff)
parent1445b59bb41c4b1a94b7cb0ec6864c98de63814b (diff)
downloadgitpython-8bbf1a3b801fb4e00c10f631faa87114dcd0462f.tar.gz
Merge pull request #412 from nvie/fix-diff-patch-parsing
Fix diff patch parsing
-rw-r--r--doc/source/changes.rst20
-rw-r--r--git/diff.py35
-rw-r--r--git/test/fixtures/diff_file_with_spaces7
-rw-r--r--git/test/fixtures/diff_initial2
-rw-r--r--git/test/test_diff.py13
5 files changed, 58 insertions, 19 deletions
diff --git a/doc/source/changes.rst b/doc/source/changes.rst
index 4fdf39ca..734c479c 100644
--- a/doc/source/changes.rst
+++ b/doc/source/changes.rst
@@ -5,9 +5,23 @@ Changelog
1.0.3 - Fixes
=============
-* `Commit.diff()` now supports diffing the root commit via `Commit.diff(NULL_TREE)`.
-* `Repo.blame()` now respects `incremental=True`, supporting incremental blames. Incremental blames are slightly faster since they don't include the file's contents in them.
-* IMPORTANT: This release drops support for python 2.6, which is officially deprecated by the python maintainers.
+* `Commit.diff()` now supports diffing the root commit via
+ `Commit.diff(NULL_TREE)`.
+* `Repo.blame()` now respects `incremental=True`, supporting incremental
+ blames. Incremental blames are slightly faster since they don't include
+ the file's contents in them.
+* Fix: `Diff` objects created with patch output will now have their
+ `a_path` and `b_path` properties parsed out correctly. Previously, some
+ values may have been populated incorrectly when a file was added or
+ deleted.
+* IMPORTANT: This release drops support for python 2.6, which is
+ officially deprecated by the python maintainers.
+* CRITICAL: `Diff` objects created with patch output will now not carry
+ the --- and +++ header lines anymore. All diffs now start with the
+ @@ header line directly. Users that rely on the old behaviour can now
+ (reliably) read this information from the a_path and b_path properties
+ without having to parse these lines manually.
+
1.0.2 - Fixes
=============
diff --git a/git/diff.py b/git/diff.py
index de3aa1e8..d4affd30 100644
--- a/git/diff.py
+++ b/git/diff.py
@@ -198,16 +198,18 @@ class Diff(object):
# precompiled regex
re_header = re.compile(r"""
^diff[ ]--git
- [ ](?:a/)?(?P<a_path>.+?)[ ](?:b/)?(?P<b_path>.+?)\n
- (?:^similarity[ ]index[ ](?P<similarity_index>\d+)%\n
- ^rename[ ]from[ ](?P<rename_from>\S+)\n
- ^rename[ ]to[ ](?P<rename_to>\S+)(?:\n|$))?
+ [ ](?:a/)?(?P<a_path_fallback>.+?)[ ](?:b/)?(?P<b_path_fallback>.+?)\n
(?:^old[ ]mode[ ](?P<old_mode>\d+)\n
^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?
+ (?:^similarity[ ]index[ ]\d+%\n
+ ^rename[ ]from[ ](?P<rename_from>.*)\n
+ ^rename[ ]to[ ](?P<rename_to>.*)(?:\n|$))?
(?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?
(?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?
(?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)
\.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?
+ (?:^---[ ](?:a/)?(?P<a_path>[^\t\n\r\f\v]*)[\t\r\f\v]*(?:\n|$))?
+ (?:^\+\+\+[ ](?:b/)?(?P<b_path>[^\t\n\r\f\v]*)[\t\r\f\v]*(?:\n|$))?
""".encode('ascii'), re.VERBOSE | re.MULTILINE)
# can be used for comparisons
NULL_HEX_SHA = "0" * 40
@@ -231,15 +233,14 @@ class Diff(object):
if self.b_mode:
self.b_mode = mode_str_to_int(self.b_mode)
- if a_blob_id is None:
+ if a_blob_id is None or a_blob_id == self.NULL_HEX_SHA:
self.a_blob = None
else:
- assert self.a_mode is not None
self.a_blob = Blob(repo, hex_to_bin(a_blob_id), mode=self.a_mode, path=a_path)
- if b_blob_id is None:
+
+ if b_blob_id is None or b_blob_id == self.NULL_HEX_SHA:
self.b_blob = None
else:
- assert self.b_mode is not None
self.b_blob = Blob(repo, hex_to_bin(b_blob_id), mode=self.b_mode, path=b_path)
self.new_file = new_file
@@ -329,11 +330,23 @@ class Diff(object):
index = DiffIndex()
previous_header = None
for header in cls.re_header.finditer(text):
- a_path, b_path, similarity_index, rename_from, rename_to, \
- old_mode, new_mode, new_file_mode, deleted_file_mode, \
- a_blob_id, b_blob_id, b_mode = header.groups()
+ a_path_fallback, b_path_fallback, \
+ old_mode, new_mode, \
+ rename_from, rename_to, \
+ new_file_mode, deleted_file_mode, \
+ a_blob_id, b_blob_id, b_mode, \
+ a_path, b_path = header.groups()
new_file, deleted_file = bool(new_file_mode), bool(deleted_file_mode)
+ a_path = a_path or rename_from or a_path_fallback
+ b_path = b_path or rename_to or b_path_fallback
+
+ if a_path == b'/dev/null':
+ a_path = None
+
+ if b_path == b'/dev/null':
+ b_path = None
+
# Our only means to find the actual text is to see what has not been matched by our regex,
# and then retro-actively assin it to our index
if previous_header is not None:
diff --git a/git/test/fixtures/diff_file_with_spaces b/git/test/fixtures/diff_file_with_spaces
new file mode 100644
index 00000000..a9f0b06c
--- /dev/null
+++ b/git/test/fixtures/diff_file_with_spaces
@@ -0,0 +1,7 @@
+diff --git a/file with spaces b/file with spaces
+new file mode 100644
+index 0000000000000000000000000000000000000000..75c620d7b0d3b0100415421a97f553c979d75174
+--- /dev/null
++++ b/file with spaces
+@@ -0,0 +1 @@
++ohai
diff --git a/git/test/fixtures/diff_initial b/git/test/fixtures/diff_initial
index 6037c677..648d7043 100644
--- a/git/test/fixtures/diff_initial
+++ b/git/test/fixtures/diff_initial
@@ -1,5 +1,3 @@
---- /dev/null
-+++ b/CHANGES
@@ -0,0 +1,7 @@
+=======
+CHANGES
diff --git a/git/test/test_diff.py b/git/test/test_diff.py
index 56e395fd..0c670f0b 100644
--- a/git/test/test_diff.py
+++ b/git/test/test_diff.py
@@ -76,7 +76,7 @@ class TestDiff(TestBase):
self._assert_diff_format(diffs)
assert_equal(1, len(diffs))
- assert_equal(10, len(diffs[0].diff.splitlines()))
+ assert_equal(8, len(diffs[0].diff.splitlines()))
def test_diff_with_rename(self):
output = StringProcessAdapter(fixture('diff_rename'))
@@ -116,7 +116,7 @@ class TestDiff(TestBase):
res = Diff._index_from_patch_format(None, output.stdout)
assert len(res) == 6
for dr in res:
- assert dr.diff
+ assert dr.diff.startswith(b'@@')
assert str(dr), "Diff to string conversion should be possible"
# end for each diff
@@ -140,7 +140,8 @@ class TestDiff(TestBase):
# ...and with creating a patch
diff_index = initial_commit.diff(NULL_TREE, create_patch=True)
- assert diff_index[0].b_path == 'CHANGES'
+ assert diff_index[0].a_path is None, repr(diff_index[0].a_path)
+ assert diff_index[0].b_path == 'CHANGES', repr(diff_index[0].b_path)
assert diff_index[0].new_file
assert diff_index[0].diff == fixture('diff_initial')
@@ -156,6 +157,12 @@ class TestDiff(TestBase):
Diff._index_from_patch_format(self.rorepo, diff_proc.stdout)
# END for each fixture
+ def test_diff_with_spaces(self):
+ data = StringProcessAdapter(fixture('diff_file_with_spaces'))
+ diff_index = Diff._index_from_patch_format(self.rorepo, data.stdout)
+ assert diff_index[0].a_path is None, repr(diff_index[0].a_path)
+ assert diff_index[0].b_path == u'file with spaces', repr(diff_index[0].b_path)
+
def test_diff_interface(self):
# test a few variations of the main diff routine
assertion_map = dict()