From 394ed7006ee5dc8bddfd132b64001d5dfc0ffdd3 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 13 Aug 2010 14:04:11 +0200 Subject: unicode handling in messages and trees was improved. Messages are now written according to the encoding of the commit object, and decoded using that information as well. Trees will encode and decode their names with utf8 --- lib/git/ext/gitdb | 2 +- lib/git/objects/commit.py | 14 +++++++++++++- lib/git/objects/fun.py | 7 +++++++ 3 files changed, 21 insertions(+), 2 deletions(-) (limited to 'lib/git') diff --git a/lib/git/ext/gitdb b/lib/git/ext/gitdb index 18152feb..425ecf04 160000 --- a/lib/git/ext/gitdb +++ b/lib/git/ext/gitdb @@ -1 +1 @@ -Subproject commit 18152febd428e67b86bb4fb68ec1691d4de75a9c +Subproject commit 425ecf04aa5038c3d46b01ca20de17c51ef6c4e5 diff --git a/lib/git/objects/commit.py b/lib/git/objects/commit.py index 132d794b..f3a6e216 100644 --- a/lib/git/objects/commit.py +++ b/lib/git/objects/commit.py @@ -31,6 +31,7 @@ from time import ( altzone ) import os +import sys __all__ = ('Commit', ) @@ -381,7 +382,13 @@ class Commit(base.Object, Iterable, Diffable, Traversable, Serializable): write("encoding %s\n" % self.encoding) write("\n") - write(self.message) + + # write plain bytes, be sure its encoded according to our encoding + if isinstance(self.message, unicode): + write(self.message.encode(self.encoding)) + else: + write(self.message) + # END handle encoding return self def _deserialize(self, stream): @@ -421,6 +428,11 @@ class Commit(base.Object, Iterable, Diffable, Traversable, Serializable): # a stream from our data simply gives us the plain message # The end of our message stream is marked with a newline that we strip self.message = stream.read() + try: + self.message = self.message.decode(self.encoding) + except Exception: + print >> sys.stderr, "Failed to decode message: %s" % self.message + # END exception handling return self #} END serializable implementation diff --git a/lib/git/objects/fun.py b/lib/git/objects/fun.py index e73e93b0..9b0a377c 100644 --- a/lib/git/objects/fun.py +++ b/lib/git/objects/fun.py @@ -66,7 +66,14 @@ def tree_entries_from_data(data): while data[i] != '\0': i += 1 # END while not reached NULL + + # default encoding for strings in git is utf8 + # Only use the respective unicode object if the byte stream was encoded name = data[ns:i] + name_enc = name.decode("utf-8") + if len(name) > len(name_enc): + name = name_enc + # END handle encoding # byte is NULL, get next 20 i += 1 -- cgit v1.2.1