From 1faf84f8eb760b003ad2be81432443bf443b82e6 Mon Sep 17 00:00:00 2001 From: Vincent Driessen Date: Mon, 30 May 2016 15:26:23 +0200 Subject: Fix bug in diff parser output MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The diff --patch parser was missing some edge case where Git would encode non-ASCII chars in path names as octals, but these weren't decoded properly. \360\237\222\251.txt Decoded via utf-8, that will return: 💩.txt --- doc/source/changes.rst | 2 ++ git/diff.py | 17 +++++++++++++++-- git/test/fixtures/diff_patch_unsafe_paths | 7 +++++++ git/test/test_diff.py | 13 +++++++------ 4 files changed, 31 insertions(+), 8 deletions(-) diff --git a/doc/source/changes.rst b/doc/source/changes.rst index 928675d0..dd7a3815 100644 --- a/doc/source/changes.rst +++ b/doc/source/changes.rst @@ -5,6 +5,8 @@ Changelog 2.0.4 - Fixes ============= +* Fix: non-ASCII paths are now properly decoded and returned in + ``.diff()`` output * Fix: `RemoteProgress` will now strip the ', ' prefix or suffix from messages. * API: Remote.[fetch|push|pull](...) methods now allow the ``progress`` argument to be a callable. This saves you from creating a custom type with usually just one diff --git a/git/diff.py b/git/diff.py index 44a65017..9073767e 100644 --- a/git/diff.py +++ b/git/diff.py @@ -15,12 +15,23 @@ from git.compat import ( PY3 ) - __all__ = ('Diffable', 'DiffIndex', 'Diff', 'NULL_TREE') # Special object to compare against the empty tree in diffs NULL_TREE = object() +_octal_byte_re = re.compile(b'\\\\([0-9]{3})') + + +def _octal_repl(matchobj): + value = matchobj.group(1) + value = int(value, 8) + if PY3: + value = bytes(bytearray((value,))) + else: + value = chr(value) + return value + def decode_path(path, has_ab_prefix=True): if path == b'/dev/null': @@ -32,6 +43,8 @@ def decode_path(path, has_ab_prefix=True): .replace(b'\\"', b'"') .replace(b'\\\\', b'\\')) + path = _octal_byte_re.sub(_octal_repl, path) + if has_ab_prefix: assert path.startswith(b'a/') or path.startswith(b'b/') path = path[2:] @@ -337,7 +350,7 @@ class Diff(object): :note: This property is deprecated, please use ``renamed_file`` instead. """ return self.renamed_file - + @property def renamed_file(self): """:returns: True if the blob of our diff has been renamed diff --git a/git/test/fixtures/diff_patch_unsafe_paths b/git/test/fixtures/diff_patch_unsafe_paths index 14375f79..9ee6b834 100644 --- a/git/test/fixtures/diff_patch_unsafe_paths +++ b/git/test/fixtures/diff_patch_unsafe_paths @@ -61,6 +61,13 @@ index 0000000000000000000000000000000000000000..eaf5f7510320b6a327fb308379de2f94 +++ "b/path/¯\\_(ツ)_|¯" @@ -0,0 +1 @@ +dummy content +diff --git "a/path/\360\237\222\251.txt" "b/path/\360\237\222\251.txt" +new file mode 100644 +index 0000000000000000000000000000000000000000..eaf5f7510320b6a327fb308379de2f94d8859a54 +--- /dev/null ++++ "b/path/\360\237\222\251.txt" +@@ -0,0 +1 @@ ++dummy content diff --git a/a/with spaces b/b/with some spaces similarity index 100% rename from a/with spaces diff --git a/git/test/test_diff.py b/git/test/test_diff.py index 1d7a4fda..8966351a 100644 --- a/git/test/test_diff.py +++ b/git/test/test_diff.py @@ -161,16 +161,17 @@ class TestDiff(TestBase): self.assertEqual(res[6].b_path, u'path/with spaces') self.assertEqual(res[7].b_path, u'path/with-question-mark?') self.assertEqual(res[8].b_path, u'path/¯\\_(ツ)_|¯') + self.assertEqual(res[9].b_path, u'path/💩.txt') # The "Moves" # NOTE: The path prefixes a/ and b/ here are legit! We're actually # verifying that it's not "a/a/" that shows up, see the fixture data. - self.assertEqual(res[9].a_path, u'a/with spaces') # NOTE: path a/ here legit! - self.assertEqual(res[9].b_path, u'b/with some spaces') # NOTE: path b/ here legit! - self.assertEqual(res[10].a_path, u'a/ending in a space ') - self.assertEqual(res[10].b_path, u'b/ending with space ') - self.assertEqual(res[11].a_path, u'a/"with-quotes"') - self.assertEqual(res[11].b_path, u'b/"with even more quotes"') + self.assertEqual(res[10].a_path, u'a/with spaces') # NOTE: path a/ here legit! + self.assertEqual(res[10].b_path, u'b/with some spaces') # NOTE: path b/ here legit! + self.assertEqual(res[11].a_path, u'a/ending in a space ') + self.assertEqual(res[11].b_path, u'b/ending with space ') + self.assertEqual(res[12].a_path, u'a/"with-quotes"') + self.assertEqual(res[12].b_path, u'b/"with even more quotes"') def test_diff_patch_format(self): # test all of the 'old' format diffs for completness - it should at least -- cgit v1.2.1 From 0235f910916b49a38aaf1fcbaa6cfbef32c567a6 Mon Sep 17 00:00:00 2001 From: Vincent Driessen Date: Mon, 30 May 2016 15:59:46 +0200 Subject: Skip test that always fails on Travis CI --- git/test/test_docs.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/git/test/test_docs.py b/git/test/test_docs.py index 7b3b7474..27470748 100644 --- a/git/test/test_docs.py +++ b/git/test/test_docs.py @@ -7,11 +7,12 @@ import os from git.test.lib import TestBase -from gitdb.test.lib import with_rw_directory +from gitdb.test.lib import skip_on_travis_ci, with_rw_directory class Tutorials(TestBase): + @skip_on_travis_ci @with_rw_directory def test_init_repo_object(self, rw_dir): # [1-test_init_repo_object] @@ -165,7 +166,7 @@ class Tutorials(TestBase): for sm in cloned_repo.submodules: assert not sm.remove().exists() # after removal, the sm doesn't exist anymore sm = cloned_repo.create_submodule('mysubrepo', 'path/to/subrepo', url=bare_repo.git_dir, branch='master') - + # .gitmodules was written and added to the index, which is now being committed cloned_repo.index.commit("Added submodule") assert sm.exists() and sm.module_exists() # this submodule is defintely available @@ -395,7 +396,7 @@ class Tutorials(TestBase): hcommit.diff() # diff tree against index hcommit.diff('HEAD~1') # diff tree against previous tree hcommit.diff(None) # diff tree against working tree - + index = repo.index index.diff() # diff index against itself yielding empty diff index.diff(None) # diff index against working copy @@ -446,7 +447,7 @@ class Tutorials(TestBase): sm = sms[0] assert sm.name == 'gitdb' # git-python has gitdb as single submodule ... assert sm.children()[0].name == 'smmap' # ... which has smmap as single submodule - + # The module is the repository referenced by the submodule assert sm.module_exists() # the module is available, which doesn't have to be the case. assert sm.module().working_tree_dir.endswith('gitdb') @@ -458,7 +459,7 @@ class Tutorials(TestBase): assert sm.config_reader().get_value('path') == sm.path assert len(sm.children()) == 1 # query the submodule hierarchy # ![1-test_submodules] - + @with_rw_directory def test_add_file_and_commit(self, rw_dir): import git -- cgit v1.2.1 From 0eafe201905d85be767c24106eb1ab12efd3ee22 Mon Sep 17 00:00:00 2001 From: Vincent Driessen Date: Mon, 30 May 2016 16:20:22 +0200 Subject: Add test case as example of Git commit with invalid data MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a real commit from the microjs.com open source project, see https://github.com/madrobby/microjs.com/commit/7e8457c17850d0991763941213dcb403d80f39f8, which is declared to be encoded in UTF-8, but contains invalid bytes. This makes GitPython choke on it while decoding. Rather than choking, this should instead accept the error and replace the invalid bytes by the � (\x80) char. --- git/test/fixtures/commit_invalid_data | 6 ++++++ git/test/test_commit.py | 7 +++++++ 2 files changed, 13 insertions(+) create mode 100644 git/test/fixtures/commit_invalid_data diff --git a/git/test/fixtures/commit_invalid_data b/git/test/fixtures/commit_invalid_data new file mode 100644 index 00000000..d112bf2d --- /dev/null +++ b/git/test/fixtures/commit_invalid_data @@ -0,0 +1,6 @@ +tree 9f1a495d7d9692d24f5caedaa89f5c2c32d59368 +parent 492ace2ffce0e426ebeb55e364e987bcf024dd3b +author E.Azer KoÃoÃoÃoculu 1306710073 +0300 +committer E.Azer KoÃoÃoÃoculu 1306710073 +0300 + +add environjs diff --git a/git/test/test_commit.py b/git/test/test_commit.py index 23b7154a..ea8cd9af 100644 --- a/git/test/test_commit.py +++ b/git/test/test_commit.py @@ -306,6 +306,13 @@ class TestCommit(TestBase): # it appears cmt.author.__repr__() + def test_invalid_commit(self): + cmt = self.rorepo.commit() + cmt._deserialize(open(fixture_path('commit_invalid_data'), 'rb')) + + assert cmt.author.name == u'E.Azer Ko�o�o�oculu', cmt.author.name + assert cmt.author.email == 'azer@kodfabrik.com', cmt.author.email + def test_gpgsig(self): cmt = self.rorepo.commit() cmt._deserialize(open(fixture_path('commit_with_gpgsig'), 'rb')) -- cgit v1.2.1 From 79c99c0f66c8f3c8d13258376c82125a23b1b5c8 Mon Sep 17 00:00:00 2001 From: Vincent Driessen Date: Mon, 30 May 2016 16:26:43 +0200 Subject: Ignore invalid data when decoding commit objects MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously, GitPython chokes on this while decoding. Rather than choking, instead accept the error and replace the invalid bytes by the � (\x80) char. --- git/objects/commit.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/git/objects/commit.py b/git/objects/commit.py index dc722f97..58a8912f 100644 --- a/git/objects/commit.py +++ b/git/objects/commit.py @@ -501,14 +501,14 @@ class Commit(base.Object, Iterable, Diffable, Traversable, Serializable): try: self.author, self.authored_date, self.author_tz_offset = \ - parse_actor_and_date(author_line.decode(self.encoding)) + parse_actor_and_date(author_line.decode(self.encoding, errors='replace')) except UnicodeDecodeError: log.error("Failed to decode author line '%s' using encoding %s", author_line, self.encoding, exc_info=True) try: self.committer, self.committed_date, self.committer_tz_offset = \ - parse_actor_and_date(committer_line.decode(self.encoding)) + parse_actor_and_date(committer_line.decode(self.encoding, errors='replace')) except UnicodeDecodeError: log.error("Failed to decode committer line '%s' using encoding %s", committer_line, self.encoding, exc_info=True) @@ -518,7 +518,7 @@ class Commit(base.Object, Iterable, Diffable, Traversable, Serializable): # The end of our message stream is marked with a newline that we strip self.message = stream.read() try: - self.message = self.message.decode(self.encoding) + self.message = self.message.decode(self.encoding, errors='replace') except UnicodeDecodeError: log.error("Failed to decode message '%s' using encoding %s", self.message, self.encoding, exc_info=True) # END exception handling -- cgit v1.2.1 From c4ace5482efa4ca8769895dc9506d8eccfb0173d Mon Sep 17 00:00:00 2001 From: Vincent Driessen Date: Mon, 30 May 2016 19:17:05 +0200 Subject: Update changelog --- doc/source/changes.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/source/changes.rst b/doc/source/changes.rst index dd7a3815..273b9ad0 100644 --- a/doc/source/changes.rst +++ b/doc/source/changes.rst @@ -5,6 +5,9 @@ Changelog 2.0.4 - Fixes ============= +* Fix: parser of commit object data is now robust against cases where + commit object contains invalid bytes. The invalid characters are now + replaced rather than choked on. * Fix: non-ASCII paths are now properly decoded and returned in ``.diff()`` output * Fix: `RemoteProgress` will now strip the ', ' prefix or suffix from messages. -- cgit v1.2.1 From 2f91ab7bb0dadfd165031f846ae92c9466dceb66 Mon Sep 17 00:00:00 2001 From: Vincent Driessen Date: Mon, 30 May 2016 19:21:35 +0200 Subject: This is 2.0.4 --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 50ffc5aa..2165f8f9 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.0.3 +2.0.4 -- cgit v1.2.1 From 25844b80c56890abc79423a7a727a129b2b9db85 Mon Sep 17 00:00:00 2001 From: Vincent Driessen Date: Mon, 30 May 2016 21:20:47 +0200 Subject: Fix regex This catches the case where the matched line contains "(" or ")" characters. --- doc/source/changes.rst | 5 +++++ git/remote.py | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/doc/source/changes.rst b/doc/source/changes.rst index 273b9ad0..6a8e87d0 100644 --- a/doc/source/changes.rst +++ b/doc/source/changes.rst @@ -2,6 +2,11 @@ Changelog ========= +2.0.5 - Fixes +============= + +* Fix: parser of fetch info lines choked on some legitimate lines + 2.0.4 - Fixes ============= diff --git a/git/remote.py b/git/remote.py index 30e32ae3..42753977 100644 --- a/git/remote.py +++ b/git/remote.py @@ -204,7 +204,7 @@ class FetchInfo(object): NEW_TAG, NEW_HEAD, HEAD_UPTODATE, TAG_UPDATE, REJECTED, FORCED_UPDATE, \ FAST_FORWARD, ERROR = [1 << x for x in range(8)] - re_fetch_result = re.compile("^\s*(.) (\[?[\w\s\.$@]+\]?)\s+(.+) -> ([/\w_\+\.\-$@#]+)( \(.*\)?$)?") + re_fetch_result = re.compile("^\s*(.) (\[?[\w\s\.$@]+\]?)\s+(.+) -> ([/\w_\+\.\-$@#()]+)( \(.*\)?$)?") _flag_map = {'!': ERROR, '+': FORCED_UPDATE, -- cgit v1.2.1 From 88716d3be8d9393fcf5695dd23efb9c252d1b09e Mon Sep 17 00:00:00 2001 From: Vincent Driessen Date: Mon, 30 May 2016 21:23:51 +0200 Subject: This is 2.0.5 --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 2165f8f9..e0102586 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.0.4 +2.0.5 -- cgit v1.2.1 From 6581acaa7081d29dbf9f35c5ce78db78cf822ab8 Mon Sep 17 00:00:00 2001 From: Vincent Driessen Date: Mon, 30 May 2016 21:29:40 +0200 Subject: Bump for new version --- VERSION | 2 +- doc/source/changes.rst | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/VERSION b/VERSION index e0102586..a47ed0c4 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.0.5 +2.0.6dev0 diff --git a/doc/source/changes.rst b/doc/source/changes.rst index 6a8e87d0..4623fdc4 100644 --- a/doc/source/changes.rst +++ b/doc/source/changes.rst @@ -2,6 +2,11 @@ Changelog ========= +2.0.6 - Fixes +============= + +* ... + 2.0.5 - Fixes ============= -- cgit v1.2.1 From 543d900e68883740acf3b07026b262176191ab60 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 1 Jun 2016 09:06:22 +0200 Subject: chore(compat): state py2.6 support officially More information in the respective issue. Fixes #453 --- README.md | 2 ++ setup.py | 1 + 2 files changed, 3 insertions(+) diff --git a/README.md b/README.md index 220e8f35..b3c5c947 100644 --- a/README.md +++ b/README.md @@ -6,11 +6,13 @@ It provides abstractions of git objects for easy access of repository data, and The object database implementation is optimized for handling large quantities of objects and large datasets, which is achieved by using low-level structures and data streaming. + ### REQUIREMENTS GitPython needs the `git` executable to be installed on the system and available in your `PATH` for most operations. If it is not in your `PATH`, you can help GitPython find it by setting the `GIT_PYTHON_GIT_EXECUTABLE=` environment variable. * Git (1.7.x or newer) +* Python 2.7 to 3.5, while python 2.6 is supported on a *best-effort basis*. The list of dependencies are listed in `./requirements.txt` and `./test-requirements.txt`. The installer takes care of installing them for you. diff --git a/setup.py b/setup.py index 2df910e0..05c12b8f 100755 --- a/setup.py +++ b/setup.py @@ -110,6 +110,7 @@ GitPython is a python library used to interact with Git repositories""", "Operating System :: MacOS :: MacOS X", "Programming Language :: Python", "Programming Language :: Python :: 2", + "Programming Language :: Python :: 2.6", "Programming Language :: Python :: 2.7", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.3", -- cgit v1.2.1 From 2219f13eb6e18bdd498b709e074ff9c7e8cb3511 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 1 Jun 2016 09:12:04 +0200 Subject: fix(test): do not skip test on travis Please exclude the particular assertion instead. Related to https://github.com/gitpython-developers/GitPython/commit/a3f24f64a20d1e09917288f67fd21969f4444acd#commitcomment-17691581 --- git/ext/gitdb | 2 +- git/test/test_docs.py | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/git/ext/gitdb b/git/ext/gitdb index d1996e04..2389b752 160000 --- a/git/ext/gitdb +++ b/git/ext/gitdb @@ -1 +1 @@ -Subproject commit d1996e04dbf4841b853b60c1365f0f5fd28d170c +Subproject commit 2389b75280efb1a63e6ea578eae7f897fd4beb1b diff --git a/git/test/test_docs.py b/git/test/test_docs.py index 27470748..bc961230 100644 --- a/git/test/test_docs.py +++ b/git/test/test_docs.py @@ -11,8 +11,6 @@ from gitdb.test.lib import skip_on_travis_ci, with_rw_directory class Tutorials(TestBase): - - @skip_on_travis_ci @with_rw_directory def test_init_repo_object(self, rw_dir): # [1-test_init_repo_object] -- cgit v1.2.1 From 55969cb6034d5b416946cdb8aaf7223b1c3cbea6 Mon Sep 17 00:00:00 2001 From: Andreas Maier Date: Wed, 1 Jun 2016 10:02:44 +0200 Subject: Fixed 'TypeError: decode() takes no keyword arguments' on Python 2.6 --- doc/source/changes.rst | 3 ++- git/objects/commit.py | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/doc/source/changes.rst b/doc/source/changes.rst index 4623fdc4..9bf09065 100644 --- a/doc/source/changes.rst +++ b/doc/source/changes.rst @@ -5,7 +5,8 @@ Changelog 2.0.6 - Fixes ============= -* ... +* Fix: TypeError about passing keyword argument to string decode() on + Python 2.6. 2.0.5 - Fixes ============= diff --git a/git/objects/commit.py b/git/objects/commit.py index 58a8912f..9e434c92 100644 --- a/git/objects/commit.py +++ b/git/objects/commit.py @@ -501,14 +501,14 @@ class Commit(base.Object, Iterable, Diffable, Traversable, Serializable): try: self.author, self.authored_date, self.author_tz_offset = \ - parse_actor_and_date(author_line.decode(self.encoding, errors='replace')) + parse_actor_and_date(author_line.decode(self.encoding, 'replace')) except UnicodeDecodeError: log.error("Failed to decode author line '%s' using encoding %s", author_line, self.encoding, exc_info=True) try: self.committer, self.committed_date, self.committer_tz_offset = \ - parse_actor_and_date(committer_line.decode(self.encoding, errors='replace')) + parse_actor_and_date(committer_line.decode(self.encoding, 'replace')) except UnicodeDecodeError: log.error("Failed to decode committer line '%s' using encoding %s", committer_line, self.encoding, exc_info=True) @@ -518,7 +518,7 @@ class Commit(base.Object, Iterable, Diffable, Traversable, Serializable): # The end of our message stream is marked with a newline that we strip self.message = stream.read() try: - self.message = self.message.decode(self.encoding, errors='replace') + self.message = self.message.decode(self.encoding, 'replace') except UnicodeDecodeError: log.error("Failed to decode message '%s' using encoding %s", self.message, self.encoding, exc_info=True) # END exception handling -- cgit v1.2.1 From 85e78ca3d9decf8807508b41dbe5335ffb6050a7 Mon Sep 17 00:00:00 2001 From: David Danier Date: Wed, 1 Jun 2016 18:01:34 +0200 Subject: Make sure os is not even partly destroyed --- git/cmd.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/git/cmd.py b/git/cmd.py index c29e3485..a8afc144 100644 --- a/git/cmd.py +++ b/git/cmd.py @@ -287,7 +287,7 @@ class Git(LazyMixin): return # can be that nothing really exists anymore ... - if os is None: + if os is None or os.kill is None: return # try to kill it -- cgit v1.2.1 From fde89f2a65c2503e5aaf44628e05079504e559a0 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 2 Jun 2016 06:42:45 +0200 Subject: fix(test): remove unused import --- git/ext/gitdb | 2 +- git/test/test_docs.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/git/ext/gitdb b/git/ext/gitdb index 2389b752..d1996e04 160000 --- a/git/ext/gitdb +++ b/git/ext/gitdb @@ -1 +1 @@ -Subproject commit 2389b75280efb1a63e6ea578eae7f897fd4beb1b +Subproject commit d1996e04dbf4841b853b60c1365f0f5fd28d170c diff --git a/git/test/test_docs.py b/git/test/test_docs.py index bc961230..8dc08559 100644 --- a/git/test/test_docs.py +++ b/git/test/test_docs.py @@ -7,7 +7,7 @@ import os from git.test.lib import TestBase -from gitdb.test.lib import skip_on_travis_ci, with_rw_directory +from gitdb.test.lib import with_rw_directory class Tutorials(TestBase): -- cgit v1.2.1