summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVincent Driessen <me@nvie.com>2016-06-14 22:44:11 +0200
committerVincent Driessen <me@nvie.com>2016-06-14 23:09:22 +0200
commit3ee291c469fc7ea6065ed22f344ed3f2792aa2ca (patch)
tree2b0b2e213c34fb61a0eb91f358685bb40101b1b0
parent105a8c0fb3fe61b77956c8ebd3216738c78a3dff (diff)
downloadgitpython-keep-raw-bytes-on-diffs.tar.gz
Store raw path bytes in Diff instanceskeep-raw-bytes-on-diffs
Previously, the following fields on Diff instances were assumed to be passed in as unicode strings: - `a_path` - `b_path` - `rename_from` - `rename_to` However, since Git natively records paths as bytes, these may potentially not have a valid unicode representation. This patch changes the Diff instance to instead take the following equivalent fields that should be raw bytes instead: - `a_rawpath` - `b_rawpath` - `raw_rename_from` - `raw_rename_to` NOTE ON BACKWARD COMPATIBILITY: The original `a_path`, `b_path`, etc. fields are still available as properties (rather than slots). These properties now dynamically decode the raw bytes into a unicode string (performing the potentially destructive operation of replacing invalid unicode chars by "�"'s). This means that all code using Diffs should remain backward compatible. The only exception is when people would manually construct Diff instances by calling the constructor directly, in which case they should now pass in bytes rather than unicode strings. See also the discussion on https://github.com/gitpython-developers/GitPython/pull/467
-rw-r--r--doc/source/changes.rst3
-rw-r--r--git/compat.py2
-rw-r--r--git/diff.py58
-rw-r--r--git/test/test_diff.py6
4 files changed, 51 insertions, 18 deletions
diff --git a/doc/source/changes.rst b/doc/source/changes.rst
index 3f5b8c50..492d1055 100644
--- a/doc/source/changes.rst
+++ b/doc/source/changes.rst
@@ -5,6 +5,9 @@ Changelog
2.0.6 - Fixes and Features
==========================
+* API: Diffs now have `a_rawpath`, `b_rawpath`, `raw_rename_from`,
+ `raw_rename_to` properties, which are the raw-bytes equivalents of their
+ unicode path counterparts.
* Fix: TypeError about passing keyword argument to string decode() on
Python 2.6.
* Feature: `setUrl API on Remotes <https://github.com/gitpython-developers/GitPython/pull/446#issuecomment-224670539>`_
diff --git a/git/compat.py b/git/compat.py
index 5b46255c..b3572474 100644
--- a/git/compat.py
+++ b/git/compat.py
@@ -35,6 +35,7 @@ if PY3:
return d.values()
range = xrange
unicode = str
+ binary_type = bytes
else:
FileType = file
# usually, this is just ascii, which might not enough for our encoding needs
@@ -44,6 +45,7 @@ else:
byte_ord = ord
bchr = chr
unicode = unicode
+ binary_type = str
range = xrange
def mviter(d):
return d.itervalues()
diff --git a/git/diff.py b/git/diff.py
index aeaa67d5..06193920 100644
--- a/git/diff.py
+++ b/git/diff.py
@@ -7,6 +7,7 @@ import re
from gitdb.util import hex_to_bin
+from .compat import binary_type
from .objects.blob import Blob
from .objects.util import mode_str_to_int
@@ -245,18 +246,20 @@ class Diff(object):
NULL_HEX_SHA = "0" * 40
NULL_BIN_SHA = b"\0" * 20
- __slots__ = ("a_blob", "b_blob", "a_mode", "b_mode", "a_path", "b_path",
- "new_file", "deleted_file", "rename_from", "rename_to", "diff")
+ __slots__ = ("a_blob", "b_blob", "a_mode", "b_mode", "a_rawpath", "b_rawpath",
+ "new_file", "deleted_file", "raw_rename_from", "raw_rename_to", "diff")
- def __init__(self, repo, a_path, b_path, a_blob_id, b_blob_id, a_mode,
- b_mode, new_file, deleted_file, rename_from,
- rename_to, diff):
+ def __init__(self, repo, a_rawpath, b_rawpath, a_blob_id, b_blob_id, a_mode,
+ b_mode, new_file, deleted_file, raw_rename_from,
+ raw_rename_to, diff):
self.a_mode = a_mode
self.b_mode = b_mode
- self.a_path = a_path
- self.b_path = b_path
+ assert a_rawpath is None or isinstance(a_rawpath, binary_type)
+ assert b_rawpath is None or isinstance(b_rawpath, binary_type)
+ self.a_rawpath = a_rawpath
+ self.b_rawpath = b_rawpath
if self.a_mode:
self.a_mode = mode_str_to_int(self.a_mode)
@@ -266,19 +269,21 @@ class Diff(object):
if a_blob_id is None or a_blob_id == self.NULL_HEX_SHA:
self.a_blob = None
else:
- self.a_blob = Blob(repo, hex_to_bin(a_blob_id), mode=self.a_mode, path=a_path)
+ self.a_blob = Blob(repo, hex_to_bin(a_blob_id), mode=self.a_mode, path=self.a_path)
if b_blob_id is None or b_blob_id == self.NULL_HEX_SHA:
self.b_blob = None
else:
- self.b_blob = Blob(repo, hex_to_bin(b_blob_id), mode=self.b_mode, path=b_path)
+ self.b_blob = Blob(repo, hex_to_bin(b_blob_id), mode=self.b_mode, path=self.b_path)
self.new_file = new_file
self.deleted_file = deleted_file
# be clear and use None instead of empty strings
- self.rename_from = rename_from or None
- self.rename_to = rename_to or None
+ assert raw_rename_from is None or isinstance(raw_rename_from, binary_type)
+ assert raw_rename_to is None or isinstance(raw_rename_to, binary_type)
+ self.raw_rename_from = raw_rename_from or None
+ self.raw_rename_to = raw_rename_to or None
self.diff = diff
@@ -345,6 +350,22 @@ class Diff(object):
return res
@property
+ def a_path(self):
+ return self.a_rawpath.decode(defenc, 'replace') if self.a_rawpath else None
+
+ @property
+ def b_path(self):
+ return self.b_rawpath.decode(defenc, 'replace') if self.b_rawpath else None
+
+ @property
+ def rename_from(self):
+ return self.raw_rename_from.decode(defenc, 'replace') if self.raw_rename_from else None
+
+ @property
+ def rename_to(self):
+ return self.raw_rename_to.decode(defenc, 'replace') if self.raw_rename_to else None
+
+ @property
def renamed(self):
""":returns: True if the blob of our diff has been renamed
:note: This property is deprecated, please use ``renamed_file`` instead.
@@ -388,6 +409,7 @@ class Diff(object):
new_file_mode, deleted_file_mode, \
a_blob_id, b_blob_id, b_mode, \
a_path, b_path = header.groups()
+
new_file, deleted_file = bool(new_file_mode), bool(deleted_file_mode)
a_path = cls._pick_best_path(a_path, rename_from, a_path_fallback)
@@ -404,15 +426,15 @@ class Diff(object):
a_mode = old_mode or deleted_file_mode or (a_path and (b_mode or new_mode or new_file_mode))
b_mode = b_mode or new_mode or new_file_mode or (b_path and a_mode)
index.append(Diff(repo,
- a_path and a_path.decode(defenc, 'replace'),
- b_path and b_path.decode(defenc, 'replace'),
+ a_path,
+ b_path,
a_blob_id and a_blob_id.decode(defenc),
b_blob_id and b_blob_id.decode(defenc),
a_mode and a_mode.decode(defenc),
b_mode and b_mode.decode(defenc),
new_file, deleted_file,
- rename_from and rename_from.decode(defenc, 'replace'),
- rename_to and rename_to.decode(defenc, 'replace'),
+ rename_from,
+ rename_to,
None))
previous_header = header
@@ -438,8 +460,8 @@ class Diff(object):
meta, _, path = line[1:].partition('\t')
old_mode, new_mode, a_blob_id, b_blob_id, change_type = meta.split(None, 4)
path = path.strip()
- a_path = path
- b_path = path
+ a_path = path.encode(defenc)
+ b_path = path.encode(defenc)
deleted_file = False
new_file = False
rename_from = None
@@ -455,6 +477,8 @@ class Diff(object):
new_file = True
elif change_type[0] == 'R': # parses RXXX, where XXX is a confidence value
a_path, b_path = path.split('\t', 1)
+ a_path = a_path.encode(defenc)
+ b_path = b_path.encode(defenc)
rename_from, rename_to = a_path, b_path
# END add/remove handling
diff --git a/git/test/test_diff.py b/git/test/test_diff.py
index 8d189b12..ba0d2d13 100644
--- a/git/test/test_diff.py
+++ b/git/test/test_diff.py
@@ -90,6 +90,8 @@ class TestDiff(TestBase):
assert_true(diff.renamed)
assert_equal(diff.rename_from, u'Jérôme')
assert_equal(diff.rename_to, u'müller')
+ assert_equal(diff.raw_rename_from, b'J\xc3\xa9r\xc3\xb4me')
+ assert_equal(diff.raw_rename_to, b'm\xc3\xbcller')
assert isinstance(str(diff), str)
output = StringProcessAdapter(fixture('diff_rename_raw'))
@@ -129,7 +131,7 @@ class TestDiff(TestBase):
output = StringProcessAdapter(fixture('diff_index_raw'))
res = Diff._index_from_raw_format(None, output.stdout)
assert res[0].deleted_file
- assert res[0].b_path == ''
+ assert res[0].b_path is None
def test_diff_initial_commit(self):
initial_commit = self.rorepo.commit('33ebe7acec14b25c5f84f35a664803fcab2f7781')
@@ -162,7 +164,9 @@ class TestDiff(TestBase):
self.assertEqual(res[7].b_path, u'path/with-question-mark?')
self.assertEqual(res[8].b_path, u'path/¯\\_(ツ)_|¯')
self.assertEqual(res[9].b_path, u'path/💩.txt')
+ self.assertEqual(res[9].b_rawpath, b'path/\xf0\x9f\x92\xa9.txt')
self.assertEqual(res[10].b_path, u'path/�-invalid-unicode-path.txt')
+ self.assertEqual(res[10].b_rawpath, b'path/\x80-invalid-unicode-path.txt')
# The "Moves"
# NOTE: The path prefixes a/ and b/ here are legit! We're actually