diff options
author | Sebastian Thiel <byronimo@gmail.com> | 2015-01-05 16:44:54 +0100 |
---|---|---|
committer | Sebastian Thiel <byronimo@gmail.com> | 2015-01-05 16:44:54 +0100 |
commit | 8a308613467a1510f8dac514624abae4e10c0779 (patch) | |
tree | 6c703cb00875ca77972b6250db8b63800a051502 | |
parent | 3d0556a31916a709e9da3eafb92fc6b8bf69896c (diff) | |
download | gitpython-8a308613467a1510f8dac514624abae4e10c0779.tar.gz |
Fixes test_blob and improved commit writing/reading
-rw-r--r-- | git/compat.py | 4 | ||||
-rw-r--r-- | git/objects/commit.py | 44 | ||||
-rw-r--r-- | git/objects/fun.py | 26 |
3 files changed, 40 insertions, 34 deletions
diff --git a/git/compat.py b/git/compat.py index 4a892ad2..f11d1423 100644 --- a/git/compat.py +++ b/git/compat.py @@ -27,12 +27,15 @@ defenc = sys.getdefaultencoding() if PY3: import io FileType = io.IOBase + def byte_ord(b): + return b else: FileType = file # usually, this is just ascii, which might not enough for our encoding needs # Unless it's set specifically, we override it to be utf-8 if defenc == 'ascii': defenc = 'utf-8' + byte_ord = ord def with_metaclass(meta, *bases): @@ -54,4 +57,3 @@ def with_metaclass(meta, *bases): # end metaclass return metaclass(meta.__name__ + 'Helper', None, {}) # end handle py2 - diff --git a/git/objects/commit.py b/git/objects/commit.py index 5ad7902b..f8b5c969 100644 --- a/git/objects/commit.py +++ b/git/objects/commit.py @@ -419,23 +419,25 @@ class Commit(base.Object, Iterable, Diffable, Traversable, Serializable): next_line = None while True: parent_line = readline() - if not parent_line.startswith('parent'): + if not parent_line.startswith(b'parent'): next_line = parent_line break # END abort reading parents - self.parents.append(type(self)(self.repo, hex_to_bin(parent_line.split()[-1]))) + self.parents.append(type(self)(self.repo, hex_to_bin(parent_line.split()[-1].decode('ascii')))) # END for each parent line self.parents = tuple(self.parents) - self.author, self.authored_date, self.author_tz_offset = parse_actor_and_date(next_line) - self.committer, self.committed_date, self.committer_tz_offset = parse_actor_and_date(readline()) + # we don't know actual author encoding before we have parsed it, so keep the lines around + author_line = next_line + committer_line = readline() # we might run into one or more mergetag blocks, skip those for now next_line = readline() - while next_line.startswith('mergetag '): + while next_line.startswith(b'mergetag '): next_line = readline() while next_line.startswith(' '): next_line = readline() + # end skip mergetags # now we can have the encoding line, or an empty line followed by the optional # message. @@ -444,39 +446,40 @@ class Commit(base.Object, Iterable, Diffable, Traversable, Serializable): # read headers enc = next_line buf = enc.strip() - while buf != "": - if buf[0:10] == "encoding ": - self.encoding = buf[buf.find(' ') + 1:] - elif buf[0:7] == "gpgsig ": - sig = buf[buf.find(' ') + 1:] + "\n" + while buf: + if buf[0:10] == b"encoding ": + self.encoding = buf[buf.find(' ') + 1:].decode('ascii') + elif buf[0:7] == b"gpgsig ": + sig = buf[buf.find(b' ') + 1:] + b"\n" is_next_header = False while True: sigbuf = readline() - if sigbuf == "": + if not sigbuf: break - if sigbuf[0:1] != " ": + if sigbuf[0:1] != b" ": buf = sigbuf.strip() is_next_header = True break sig += sigbuf[1:] - self.gpgsig = sig.rstrip("\n") + # end read all signature + self.gpgsig = sig.rstrip(b"\n").decode('ascii') if is_next_header: continue buf = readline().strip() - # decode the authors name + try: - self.author.name = self.author.name.decode(self.encoding) + self.author, self.authored_date, self.author_tz_offset = \ + parse_actor_and_date(author_line.decode(self.encoding)) except UnicodeDecodeError: - log.error("Failed to decode author name '%s' using encoding %s", self.author.name, self.encoding, + log.error("Failed to decode author line '%s' using encoding %s", author_line, self.encoding, exc_info=True) - # END handle author's encoding - # decode committer name try: - self.committer.name = self.committer.name.decode(self.encoding) + self.committer, self.committed_date, self.committer_tz_offset = \ + parse_actor_and_date(committer_line.decode(self.encoding)) except UnicodeDecodeError: - log.error("Failed to decode committer name '%s' using encoding %s", self.committer.name, self.encoding, + log.error("Failed to decode committer line '%s' using encoding %s", committer_line, self.encoding, exc_info=True) # END handle author's encoding @@ -488,6 +491,7 @@ class Commit(base.Object, Iterable, Diffable, Traversable, Serializable): except UnicodeDecodeError: log.error("Failed to decode message '%s' using encoding %s", self.message, self.encoding, exc_info=True) # END exception handling + return self #} END serializable implementation diff --git a/git/objects/fun.py b/git/objects/fun.py index db2ec7c2..f92a4c06 100644 --- a/git/objects/fun.py +++ b/git/objects/fun.py @@ -1,6 +1,9 @@ """Module with functions which are supposed to be as fast as possible""" from stat import S_ISDIR from git.compat import ( + byte_ord, + force_bytes, + defenc, xrange, text_type ) @@ -17,13 +20,13 @@ def tree_to_stream(entries, write): bit_mask = 7 # 3 bits set for binsha, mode, name in entries: - mode_str = '' + mode_str = b'' for i in xrange(6): mode_str = chr(((mode >> (i * 3)) & bit_mask) + ord_zero) + mode_str # END for each 8 octal value # git slices away the first octal if its zero - if mode_str[0] == '0': + if byte_ord(mode_str[0]) == ord_zero: mode_str = mode_str[1:] # END save a byte @@ -33,16 +36,16 @@ def tree_to_stream(entries, write): # According to my tests, this is exactly what git does, that is it just # takes the input literally, which appears to be utf8 on linux. if isinstance(name, text_type): - name = name.encode("utf8") - write("%s %s\0%s" % (mode_str, name, binsha)) + name = name.encode(defenc) + write(b''.join(mode_str, b' ', name, b'\0', binsha)) # END for each item - def tree_entries_from_data(data): """Reads the binary representation of a tree and returns tuples of Tree items - :param data: data block with tree data + :param data: data block with tree data (as bytes) :return: list(tuple(binsha, mode, tree_relative_path), ...)""" ord_zero = ord('0') + space_ord = ord(' ') len_data = len(data) i = 0 out = list() @@ -52,10 +55,10 @@ def tree_entries_from_data(data): # read mode # Some git versions truncate the leading 0, some don't # The type will be extracted from the mode later - while data[i] != ' ': + while byte_ord(data[i]) != space_ord: # move existing mode integer up one level being 3 bits # and add the actual ordinal value of the character - mode = (mode << 3) + (ord(data[i]) - ord_zero) + mode = (mode << 3) + (byte_ord(data[i]) - ord_zero) i += 1 # END while reading mode @@ -65,7 +68,7 @@ def tree_entries_from_data(data): # parse name, it is NULL separated ns = i - while data[i] != '\0': + while byte_ord(data[i]) != 0: i += 1 # END while not reached NULL @@ -73,12 +76,9 @@ def tree_entries_from_data(data): # Only use the respective unicode object if the byte stream was encoded name = data[ns:i] try: - name_enc = name.decode("utf-8") + name = name.decode(defenc) except UnicodeDecodeError: pass - else: - if len(name) > len(name_enc): - name = name_enc # END handle encoding # byte is NULL, get next 20 |