From 8c1a87d11df666d308d14e4ae7ee0e9d614296b6 Mon Sep 17 00:00:00 2001
From: Sebastian Thiel <byronimo@gmail.com>
Date: Wed, 2 Jun 2010 12:30:33 +0200
Subject: commit: refactored existing code to decode commits from streams -
 performance is slightly better git.cmd: added method to provide access to the
 content stream directly. This is more efficient if large objects are handled,
 if it is actually used test.helpers: removed unnecessary code

---
 lib/git/cmd.py            | 901 +++++++++++++++++++++++++---------------------
 lib/git/objects/base.py   | 418 ++++++++++-----------
 lib/git/objects/commit.py | 139 +++----
 lib/git/objects/tree.py   |   2 +-
 lib/git/objects/utils.py  |  17 +
 test/git/test_commit.py   |   2 +-
 test/git/test_diff.py     |   6 +-
 test/git/test_repo.py     |   5 +-
 test/testlib/helper.py    |  38 +-
 9 files changed, 810 insertions(+), 718 deletions(-)

diff --git a/lib/git/cmd.py b/lib/git/cmd.py
index ef2fdf4e..cef4ea60 100644
--- a/lib/git/cmd.py
+++ b/lib/git/cmd.py
@@ -13,414 +13,505 @@ from errors import GitCommandError
 GIT_PYTHON_TRACE = os.environ.get("GIT_PYTHON_TRACE", False)
 
 execute_kwargs = ('istream', 'with_keep_cwd', 'with_extended_output',
-                  'with_exceptions', 'as_process', 
-                  'output_stream' )
+				  'with_exceptions', 'as_process', 
+				  'output_stream' )
 
 
 
 def dashify(string):
-    return string.replace('_', '-')
+	return string.replace('_', '-')
 
 class Git(object):
-    """
-    The Git class manages communication with the Git binary.
-    
-    It provides a convenient interface to calling the Git binary, such as in::
-    
-     g = Git( git_dir )
-     g.init()                   # calls 'git init' program
-     rval = g.ls_files()        # calls 'git ls-files' program
-    
-    ``Debugging``
-        Set the GIT_PYTHON_TRACE environment variable print each invocation 
-        of the command to stdout.
-        Set its value to 'full' to see details about the returned values.
-    """
-    __slots__ = ("_working_dir", "cat_file_all", "cat_file_header")
-    
-    class AutoInterrupt(object):
-        """
-        Kill/Interrupt the stored process instance once this instance goes out of scope. It is 
-        used to prevent processes piling up in case iterators stop reading.
-        Besides all attributes are wired through to the contained process object.
-        
-        The wait method was overridden to perform automatic status code checking
-        and possibly raise.
-        """
-        __slots__= ("proc", "args")
-        
-        def __init__(self, proc, args ):
-            self.proc = proc
-            self.args = args
-            
-        def __del__(self):
-            # did the process finish already so we have a return code ?
-            if self.proc.poll() is not None:
-                return
-                
-            # can be that nothing really exists anymore ... 
-            if os is None:
-                return
-                
-            # try to kill it
-            try:
-                os.kill(self.proc.pid, 2)   # interrupt signal
-            except AttributeError:
-                # try windows 
-                # for some reason, providing None for stdout/stderr still prints something. This is why 
-                # we simply use the shell and redirect to nul. Its slower than CreateProcess, question 
-                # is whether we really want to see all these messages. Its annoying no matter what.
-                subprocess.call(("TASKKILL /F /T /PID %s 2>nul 1>nul" % str(self.proc.pid)), shell=True)
-            # END exception handling 
-            
-        def __getattr__(self, attr):
-            return getattr(self.proc, attr)
-            
-        def wait(self):
-            """
-            Wait for the process and return its status code. 
-            
-            Raise
-                GitCommandError if the return status is not 0
-            """
-            status = self.proc.wait()
-            if status != 0:
-                raise GitCommandError(self.args, status, self.proc.stderr.read())
-            # END status handling 
-            return status
-            
-    
-    
-    def __init__(self, working_dir=None):
-        """
-        Initialize this instance with:
-        
-        ``working_dir``
-           Git directory we should work in. If None, we always work in the current 
-           directory as returned by os.getcwd().
-           It is meant to be the working tree directory if available, or the 
-           .git directory in case of bare repositories.
-        """
-        super(Git, self).__init__()
-        self._working_dir = working_dir
-        
-        # cached command slots
-        self.cat_file_header = None
-        self.cat_file_all = None
-
-    def __getattr__(self, name):
-        """
-        A convenience method as it allows to call the command as if it was 
-        an object.
-        Returns
-            Callable object that will execute call _call_process with your arguments.
-        """
-        if name[:1] == '_':
-            raise AttributeError(name)
-        return lambda *args, **kwargs: self._call_process(name, *args, **kwargs)
-
-    @property
-    def working_dir(self):
-        """
-        Returns
-            Git directory we are working on
-        """
-        return self._working_dir
-
-    def execute(self, command,
-                istream=None,
-                with_keep_cwd=False,
-                with_extended_output=False,
-                with_exceptions=True,
-                as_process=False, 
-                output_stream=None, 
-                **subprocess_kwargs
-                ):
-        """
-        Handles executing the command on the shell and consumes and returns
-        the returned information (stdout)
-
-        ``command``
-            The command argument list to execute.
-            It should be a string, or a sequence of program arguments. The
-            program to execute is the first item in the args sequence or string.
-
-        ``istream``
-            Standard input filehandle passed to subprocess.Popen.
-
-        ``with_keep_cwd``
-            Whether to use the current working directory from os.getcwd().
-            The cmd otherwise uses its own working_dir that it has been initialized
-            with if possible.
-
-        ``with_extended_output``
-            Whether to return a (status, stdout, stderr) tuple.
-
-        ``with_exceptions``
-            Whether to raise an exception when git returns a non-zero status.
-
-        ``as_process``
-            Whether to return the created process instance directly from which 
-            streams can be read on demand. This will render with_extended_output and 
-            with_exceptions ineffective - the caller will have 
-            to deal with the details himself.
-            It is important to note that the process will be placed into an AutoInterrupt
-            wrapper that will interrupt the process once it goes out of scope. If you 
-            use the command in iterators, you should pass the whole process instance 
-            instead of a single stream.
-            
-        ``output_stream``
-            If set to a file-like object, data produced by the git command will be 
-            output to the given stream directly.
-            This feature only has any effect if as_process is False. Processes will
-            always be created with a pipe due to issues with subprocess.
-            This merely is a workaround as data will be copied from the 
-            output pipe to the given output stream directly.
-            
-        ``**subprocess_kwargs``
-        	Keyword arguments to be passed to subprocess.Popen. Please note that 
-        	some of the valid kwargs are already set by this method, the ones you 
-        	specify may not be the same ones.
-        	
-        Returns::
-        
-         str(output)                                   # extended_output = False (Default)
-         tuple(int(status), str(stdout), str(stderr)) # extended_output = True
-         
-         if ouput_stream is True, the stdout value will be your output stream:
-         output_stream                                  # extended_output = False
-         tuple(int(status), output_stream, str(stderr))# extended_output = True
-        
-        Raise
-            GitCommandError
-        
-        NOTE
-           If you add additional keyword arguments to the signature of this method, 
-           you must update the execute_kwargs tuple housed in this module.
-        """
-        if GIT_PYTHON_TRACE and not GIT_PYTHON_TRACE == 'full':
-            print ' '.join(command)
-
-        # Allow the user to have the command executed in their working dir.
-        if with_keep_cwd or self._working_dir is None:
-          cwd = os.getcwd()
-        else:
-          cwd=self._working_dir
-          
-        # Start the process
-        proc = subprocess.Popen(command,
-                                cwd=cwd,
-                                stdin=istream,
-                                stderr=subprocess.PIPE,
-                                stdout=subprocess.PIPE,
-                                close_fds=(os.name=='posix'),# unsupported on linux
-                                **subprocess_kwargs
-                                )
-        if as_process:
-            return self.AutoInterrupt(proc, command)
-        
-        # Wait for the process to return
-        status = 0
-        stdout_value = ''
-        stderr_value = ''
-        try:
-            if output_stream is None:
-                stdout_value = proc.stdout.read().rstrip()      # strip trailing "\n"
-            else:
-                max_chunk_size = 1024*64
-                while True:
-                    chunk = proc.stdout.read(max_chunk_size)
-                    output_stream.write(chunk)
-                    if len(chunk) < max_chunk_size:
-                        break
-                # END reading output stream
-                stdout_value = output_stream
-            # END stdout handling
-            stderr_value = proc.stderr.read().rstrip()          # strip trailing "\n"
-            
-            # waiting here should do nothing as we have finished stream reading
-            status = proc.wait()
-        finally:
-            proc.stdout.close()
-            proc.stderr.close()
-
-        if with_exceptions and status != 0:
-            raise GitCommandError(command, status, stderr_value)
-
-        if GIT_PYTHON_TRACE == 'full':
-            if stderr_value:
-              print "%s -> %d: '%s' !! '%s'" % (command, status, stdout_value, stderr_value)
-            elif stdout_value:
-              print "%s -> %d: '%s'" % (command, status, stdout_value)
-            else:
-              print "%s -> %d" % (command, status)
-
-        # Allow access to the command's status code
-        if with_extended_output:
-            return (status, stdout_value, stderr_value)
-        else:
-            return stdout_value
-
-    def transform_kwargs(self, **kwargs):
-        """
-        Transforms Python style kwargs into git command line options.
-        """
-        args = []
-        for k, v in kwargs.items():
-            if len(k) == 1:
-                if v is True:
-                    args.append("-%s" % k)
-                elif type(v) is not bool:
-                    args.append("-%s%s" % (k, v))
-            else:
-                if v is True:
-                    args.append("--%s" % dashify(k))
-                elif type(v) is not bool:
-                    args.append("--%s=%s" % (dashify(k), v))
-        return args
-
-    @classmethod
-    def __unpack_args(cls, arg_list):
-        if not isinstance(arg_list, (list,tuple)):
-            return [ str(arg_list) ]
-            
-        outlist = list()
-        for arg in arg_list:
-            if isinstance(arg_list, (list, tuple)):
-                outlist.extend(cls.__unpack_args( arg ))
-            # END recursion 
-            else:
-                outlist.append(str(arg))
-        # END for each arg
-        return outlist
-
-    def _call_process(self, method, *args, **kwargs):
-        """
-        Run the given git command with the specified arguments and return
-        the result as a String
-
-        ``method``
-            is the command. Contained "_" characters will be converted to dashes,
-            such as in 'ls_files' to call 'ls-files'.
-
-        ``args``
-            is the list of arguments. If None is included, it will be pruned.
-            This allows your commands to call git more conveniently as None
-            is realized as non-existent
-
-        ``kwargs``
-            is a dict of keyword arguments.
-            This function accepts the same optional keyword arguments
-            as execute().
-
-        Examples::
-            git.rev_list('master', max_count=10, header=True)
-
-        Returns
-            Same as execute()
-        """
-
-        # Handle optional arguments prior to calling transform_kwargs
-        # otherwise these'll end up in args, which is bad.
-        _kwargs = {}
-        for kwarg in execute_kwargs:
-            try:
-                _kwargs[kwarg] = kwargs.pop(kwarg)
-            except KeyError:
-                pass
-
-        # Prepare the argument list
-        opt_args = self.transform_kwargs(**kwargs)
-        
-        ext_args = self.__unpack_args([a for a in args if a is not None])
-        args = opt_args + ext_args
-
-        call = ["git", dashify(method)]
-        call.extend(args)
-
-        return self.execute(call, **_kwargs)
-        
-    def _parse_object_header(self, header_line):
-        """
-        ``header_line``
-            <hex_sha> type_string size_as_int
-            
-        Returns
-            (hex_sha, type_string, size_as_int)
-            
-        Raises
-            ValueError if the header contains indication for an error due to incorrect 
-            input sha
-        """
-        tokens = header_line.split()
-        if len(tokens) != 3:
-            raise ValueError("SHA named %s could not be resolved, git returned: %r" % (tokens[0], header_line.strip()) )
-        if len(tokens[0]) != 40:
-            raise ValueError("Failed to parse header: %r" % header_line) 
-        return (tokens[0], tokens[1], int(tokens[2]))
-    
-    def __prepare_ref(self, ref):
-        # required for command to separate refs on stdin
-        refstr = str(ref)               # could be ref-object
-        if refstr.endswith("\n"):
-            return refstr
-        return refstr + "\n"
-    
-    def __get_persistent_cmd(self, attr_name, cmd_name, *args,**kwargs):
-        cur_val = getattr(self, attr_name)
-        if cur_val is not None:
-            return cur_val
-            
-        options = { "istream" : subprocess.PIPE, "as_process" : True }
-        options.update( kwargs )
-        
-        cmd = self._call_process( cmd_name, *args, **options )
-        setattr(self, attr_name, cmd )
-        return cmd
-    
-    def __get_object_header(self, cmd, ref):
-        cmd.stdin.write(self.__prepare_ref(ref))
-        cmd.stdin.flush()
-        return self._parse_object_header(cmd.stdout.readline())
-    
-    def get_object_header(self, ref):
-        """
-        Use this method to quickly examine the type and size of the object behind 
-        the given ref. 
-        
-        NOTE
-            The method will only suffer from the costs of command invocation 
-            once and reuses the command in subsequent calls. 
-        
-        Return:
-            (hexsha, type_string, size_as_int)
-        """
-        cmd = self.__get_persistent_cmd("cat_file_header", "cat_file", batch_check=True)
-        return self.__get_object_header(cmd, ref)
-        
-    def get_object_data(self, ref):
-        """
-        As get_object_header, but returns object data as well
-        
-        Return:
-            (hexsha, type_string, size_as_int,data_string)
-        """
-        cmd = self.__get_persistent_cmd("cat_file_all", "cat_file", batch=True)
-        hexsha, typename, size = self.__get_object_header(cmd, ref)
-        data = cmd.stdout.read(size)
-        cmd.stdout.read(1)      # finishing newlines
-        
-        return (hexsha, typename, size, data)
-        
-    def clear_cache(self):
-        """
-        Clear all kinds of internal caches to release resources.
-        
-        Currently persistent commands will be interrupted.
-        
-        Returns
-            self
-        """
-        self.cat_file_all = None
-        self.cat_file_header = None
-        return self
+	"""
+	The Git class manages communication with the Git binary.
+	
+	It provides a convenient interface to calling the Git binary, such as in::
+	
+	 g = Git( git_dir )
+	 g.init()					# calls 'git init' program
+	 rval = g.ls_files()		# calls 'git ls-files' program
+	
+	``Debugging``
+		Set the GIT_PYTHON_TRACE environment variable print each invocation 
+		of the command to stdout.
+		Set its value to 'full' to see details about the returned values.
+	"""
+	__slots__ = ("_working_dir", "cat_file_all", "cat_file_header")
+	
+	class AutoInterrupt(object):
+		"""
+		Kill/Interrupt the stored process instance once this instance goes out of scope. It is 
+		used to prevent processes piling up in case iterators stop reading.
+		Besides all attributes are wired through to the contained process object.
+		
+		The wait method was overridden to perform automatic status code checking
+		and possibly raise.
+		"""
+		__slots__= ("proc", "args")
+		
+		def __init__(self, proc, args ):
+			self.proc = proc
+			self.args = args
+			
+		def __del__(self):
+			# did the process finish already so we have a return code ?
+			if self.proc.poll() is not None:
+				return
+				
+			# can be that nothing really exists anymore ... 
+			if os is None:
+				return
+				
+			# try to kill it
+			try:
+				os.kill(self.proc.pid, 2)	# interrupt signal
+			except AttributeError:
+				# try windows 
+				# for some reason, providing None for stdout/stderr still prints something. This is why 
+				# we simply use the shell and redirect to nul. Its slower than CreateProcess, question 
+				# is whether we really want to see all these messages. Its annoying no matter what.
+				subprocess.call(("TASKKILL /F /T /PID %s 2>nul 1>nul" % str(self.proc.pid)), shell=True)
+			# END exception handling 
+			
+		def __getattr__(self, attr):
+			return getattr(self.proc, attr)
+			
+		def wait(self):
+			"""
+			Wait for the process and return its status code. 
+			
+			Raise
+				GitCommandError if the return status is not 0
+			"""
+			status = self.proc.wait()
+			if status != 0:
+				raise GitCommandError(self.args, status, self.proc.stderr.read())
+			# END status handling 
+			return status
+	# END auto interrupt
+	
+	class CatFileContentStream(object):
+		"""Object representing a sized read-only stream returning the contents of 
+		an object.
+		It behaves like a stream, but counts the data read and simulates an empty 
+		stream once our sized content region is empty.
+		If not all data is read to the end of the objects's lifetime, we read the 
+		rest to assure the underlying stream continues to work"""
+		
+		__slots__ = ('_stream', '_nbr', '_size')
+		
+		def __init__(self, size, stream):
+			self._stream = stream
+			self._size = size
+			self._nbr = 0			# num bytes read
+			
+		def read(self, size=-1):
+			bytes_left = self._size - self._nbr
+			if bytes_left == 0:
+				return ''
+			if size > -1:
+				# assure we don't try to read past our limit
+				size = min(bytes_left, size)
+			else:
+				# they try to read all, make sure its not more than what remains
+				size = bytes_left
+			# END check early depletion
+			data = self._stream.read(size)
+			self._nbr += len(data)
+			
+			# check for depletion, read our final byte to make the stream usable by others
+			if self._size - self._nbr == 0:
+				self._stream.read(1)	# final newline
+			# END finish reading
+			
+			return data
+			
+		def readline(self, size=-1):
+			if self._nbr == self._size:
+				return ''
+			
+			if size > -1:
+				size = min(self._size - self._nbr, size)
+			
+			data = self._stream.readline(size)
+			self._nbr += len(data)
+			
+			# handle final byte
+			# we inline everything, it must be fast !
+			if self._size - self._nbr == 0:
+				self._stream.read(1)
+			# END finish reading
+			
+			return data
+			
+		def readlines(self, size=-1):
+			if self._nbr == self._size:
+				return list()
+			
+			# leave all additional logic to our readline method, we just check the size
+			out = list()
+			nbr = 0
+			while True:
+				line = self.readline()
+				if not line:
+					break
+				out.append(line)
+				if size > -1:
+					nbr += len(line)
+					if nbr > size:
+						break
+				# END handle size constraint
+			# END readline loop
+			return out
+			
+		def __iter__(self):
+			return self
+			
+		def next(self):
+			line = self.readline()
+			if not line:
+				raise StopIteration
+			return line
+			
+		def __del__(self):
+			bytes_left = self._size - self._nbr
+			if bytes_left:
+				# seek and discard
+				self._stream.seek(bytes_left + 1, os.SEEK_CUR)	# includes terminating newline
+			# END handle incomplete read
+	
+	
+	def __init__(self, working_dir=None):
+		"""
+		Initialize this instance with:
+		
+		``working_dir``
+		   Git directory we should work in. If None, we always work in the current 
+		   directory as returned by os.getcwd().
+		   It is meant to be the working tree directory if available, or the 
+		   .git directory in case of bare repositories.
+		"""
+		super(Git, self).__init__()
+		self._working_dir = working_dir
+		
+		# cached command slots
+		self.cat_file_header = None
+		self.cat_file_all = None
+
+	def __getattr__(self, name):
+		"""
+		A convenience method as it allows to call the command as if it was 
+		an object.
+		Returns
+			Callable object that will execute call _call_process with your arguments.
+		"""
+		if name[:1] == '_':
+			raise AttributeError(name)
+		return lambda *args, **kwargs: self._call_process(name, *args, **kwargs)
+
+	@property
+	def working_dir(self):
+		"""
+		Returns
+			Git directory we are working on
+		"""
+		return self._working_dir
+
+	def execute(self, command,
+				istream=None,
+				with_keep_cwd=False,
+				with_extended_output=False,
+				with_exceptions=True,
+				as_process=False, 
+				output_stream=None, 
+				**subprocess_kwargs
+				):
+		"""
+		Handles executing the command on the shell and consumes and returns
+		the returned information (stdout)
+
+		``command``
+			The command argument list to execute.
+			It should be a string, or a sequence of program arguments. The
+			program to execute is the first item in the args sequence or string.
+
+		``istream``
+			Standard input filehandle passed to subprocess.Popen.
+
+		``with_keep_cwd``
+			Whether to use the current working directory from os.getcwd().
+			The cmd otherwise uses its own working_dir that it has been initialized
+			with if possible.
+
+		``with_extended_output``
+			Whether to return a (status, stdout, stderr) tuple.
+
+		``with_exceptions``
+			Whether to raise an exception when git returns a non-zero status.
+
+		``as_process``
+			Whether to return the created process instance directly from which 
+			streams can be read on demand. This will render with_extended_output and 
+			with_exceptions ineffective - the caller will have 
+			to deal with the details himself.
+			It is important to note that the process will be placed into an AutoInterrupt
+			wrapper that will interrupt the process once it goes out of scope. If you 
+			use the command in iterators, you should pass the whole process instance 
+			instead of a single stream.
+			
+		``output_stream``
+			If set to a file-like object, data produced by the git command will be 
+			output to the given stream directly.
+			This feature only has any effect if as_process is False. Processes will
+			always be created with a pipe due to issues with subprocess.
+			This merely is a workaround as data will be copied from the 
+			output pipe to the given output stream directly.
+			
+		``**subprocess_kwargs``
+			Keyword arguments to be passed to subprocess.Popen. Please note that 
+			some of the valid kwargs are already set by this method, the ones you 
+			specify may not be the same ones.
+			
+		Returns::
+		
+		 str(output)								   # extended_output = False (Default)
+		 tuple(int(status), str(stdout), str(stderr)) # extended_output = True
+		 
+		 if ouput_stream is True, the stdout value will be your output stream:
+		 output_stream									# extended_output = False
+		 tuple(int(status), output_stream, str(stderr))# extended_output = True
+		
+		Raise
+			GitCommandError
+		
+		NOTE
+		   If you add additional keyword arguments to the signature of this method, 
+		   you must update the execute_kwargs tuple housed in this module.
+		"""
+		if GIT_PYTHON_TRACE and not GIT_PYTHON_TRACE == 'full':
+			print ' '.join(command)
+
+		# Allow the user to have the command executed in their working dir.
+		if with_keep_cwd or self._working_dir is None:
+		  cwd = os.getcwd()
+		else:
+		  cwd=self._working_dir
+		  
+		# Start the process
+		proc = subprocess.Popen(command,
+								cwd=cwd,
+								stdin=istream,
+								stderr=subprocess.PIPE,
+								stdout=subprocess.PIPE,
+								close_fds=(os.name=='posix'),# unsupported on linux
+								**subprocess_kwargs
+								)
+		if as_process:
+			return self.AutoInterrupt(proc, command)
+		
+		# Wait for the process to return
+		status = 0
+		stdout_value = ''
+		stderr_value = ''
+		try:
+			if output_stream is None:
+				stdout_value = proc.stdout.read().rstrip()		# strip trailing "\n"
+			else:
+				max_chunk_size = 1024*64
+				while True:
+					chunk = proc.stdout.read(max_chunk_size)
+					output_stream.write(chunk)
+					if len(chunk) < max_chunk_size:
+						break
+				# END reading output stream
+				stdout_value = output_stream
+			# END stdout handling
+			stderr_value = proc.stderr.read().rstrip()			# strip trailing "\n"
+			
+			# waiting here should do nothing as we have finished stream reading
+			status = proc.wait()
+		finally:
+			proc.stdout.close()
+			proc.stderr.close()
+
+		if with_exceptions and status != 0:
+			raise GitCommandError(command, status, stderr_value)
+
+		if GIT_PYTHON_TRACE == 'full':
+			if stderr_value:
+			  print "%s -> %d: '%s' !! '%s'" % (command, status, stdout_value, stderr_value)
+			elif stdout_value:
+			  print "%s -> %d: '%s'" % (command, status, stdout_value)
+			else:
+			  print "%s -> %d" % (command, status)
+
+		# Allow access to the command's status code
+		if with_extended_output:
+			return (status, stdout_value, stderr_value)
+		else:
+			return stdout_value
+
+	def transform_kwargs(self, **kwargs):
+		"""
+		Transforms Python style kwargs into git command line options.
+		"""
+		args = []
+		for k, v in kwargs.items():
+			if len(k) == 1:
+				if v is True:
+					args.append("-%s" % k)
+				elif type(v) is not bool:
+					args.append("-%s%s" % (k, v))
+			else:
+				if v is True:
+					args.append("--%s" % dashify(k))
+				elif type(v) is not bool:
+					args.append("--%s=%s" % (dashify(k), v))
+		return args
+
+	@classmethod
+	def __unpack_args(cls, arg_list):
+		if not isinstance(arg_list, (list,tuple)):
+			return [ str(arg_list) ]
+			
+		outlist = list()
+		for arg in arg_list:
+			if isinstance(arg_list, (list, tuple)):
+				outlist.extend(cls.__unpack_args( arg ))
+			# END recursion 
+			else:
+				outlist.append(str(arg))
+		# END for each arg
+		return outlist
+
+	def _call_process(self, method, *args, **kwargs):
+		"""
+		Run the given git command with the specified arguments and return
+		the result as a String
+
+		``method``
+			is the command. Contained "_" characters will be converted to dashes,
+			such as in 'ls_files' to call 'ls-files'.
+
+		``args``
+			is the list of arguments. If None is included, it will be pruned.
+			This allows your commands to call git more conveniently as None
+			is realized as non-existent
+
+		``kwargs``
+			is a dict of keyword arguments.
+			This function accepts the same optional keyword arguments
+			as execute().
+
+		Examples::
+			git.rev_list('master', max_count=10, header=True)
+
+		Returns
+			Same as execute()
+		"""
+
+		# Handle optional arguments prior to calling transform_kwargs
+		# otherwise these'll end up in args, which is bad.
+		_kwargs = {}
+		for kwarg in execute_kwargs:
+			try:
+				_kwargs[kwarg] = kwargs.pop(kwarg)
+			except KeyError:
+				pass
+
+		# Prepare the argument list
+		opt_args = self.transform_kwargs(**kwargs)
+		
+		ext_args = self.__unpack_args([a for a in args if a is not None])
+		args = opt_args + ext_args
+
+		call = ["git", dashify(method)]
+		call.extend(args)
+
+		return self.execute(call, **_kwargs)
+		
+	def _parse_object_header(self, header_line):
+		"""
+		``header_line``
+			<hex_sha> type_string size_as_int
+			
+		Returns
+			(hex_sha, type_string, size_as_int)
+			
+		Raises
+			ValueError if the header contains indication for an error due to incorrect 
+			input sha
+		"""
+		tokens = header_line.split()
+		if len(tokens) != 3:
+			raise ValueError("SHA named %s could not be resolved, git returned: %r" % (tokens[0], header_line.strip()) )
+		if len(tokens[0]) != 40:
+			raise ValueError("Failed to parse header: %r" % header_line) 
+		return (tokens[0], tokens[1], int(tokens[2]))
+	
+	def __prepare_ref(self, ref):
+		# required for command to separate refs on stdin
+		refstr = str(ref)				# could be ref-object
+		if refstr.endswith("\n"):
+			return refstr
+		return refstr + "\n"
+	
+	def __get_persistent_cmd(self, attr_name, cmd_name, *args,**kwargs):
+		cur_val = getattr(self, attr_name)
+		if cur_val is not None:
+			return cur_val
+			
+		options = { "istream" : subprocess.PIPE, "as_process" : True }
+		options.update( kwargs )
+		
+		cmd = self._call_process( cmd_name, *args, **options )
+		setattr(self, attr_name, cmd )
+		return cmd
+	
+	def __get_object_header(self, cmd, ref):
+		cmd.stdin.write(self.__prepare_ref(ref))
+		cmd.stdin.flush()
+		return self._parse_object_header(cmd.stdout.readline())
+	
+	def get_object_header(self, ref):
+		""" Use this method to quickly examine the type and size of the object behind 
+		the given ref. 
+		
+		:note: The method will only suffer from the costs of command invocation 
+			once and reuses the command in subsequent calls. 
+		
+		:return: (hexsha, type_string, size_as_int) """
+		cmd = self.__get_persistent_cmd("cat_file_header", "cat_file", batch_check=True)
+		return self.__get_object_header(cmd, ref)
+		
+	def get_object_data(self, ref):
+		""" As get_object_header, but returns object data as well
+		:return: (hexsha, type_string, size_as_int,data_string)
+		:note: not threadsafe
+		"""
+		hexsha, typename, size, stream = self.stream_object_data(ref)
+		data = stream.read(size)
+		del(stream)
+		return (hexsha, typename, size, data)
+		
+	def stream_object_data(self, ref):
+		"""As get_object_header, but returns the data as a stream
+		:return: (hexsha, type_string, size_as_int, stream)
+		:note: This method is not threadsafe, you need one independent  Command instance
+			per thread to be safe !"""
+		cmd = self.__get_persistent_cmd("cat_file_all", "cat_file", batch=True)
+		hexsha, typename, size = self.__get_object_header(cmd, ref)
+		return (hexsha, typename, size, self.CatFileContentStream(size, cmd.stdout))
+		
+	def clear_cache(self):
+		"""
+		Clear all kinds of internal caches to release resources.
+		
+		Currently persistent commands will be interrupted.
+		
+		Returns
+			self
+		"""
+		self.cat_file_all = None
+		self.cat_file_header = None
+		return self
diff --git a/lib/git/objects/base.py b/lib/git/objects/base.py
index bb15192d..f7043199 100644
--- a/lib/git/objects/base.py
+++ b/lib/git/objects/base.py
@@ -6,223 +6,223 @@
 import os
 from git.utils import LazyMixin, join_path_native
 import utils
-    
+	
 _assertion_msg_format = "Created object %r whose python type %r disagrees with the acutal git object type %r"
 
 class Object(LazyMixin):
-    """
-    Implements an Object which may be Blobs, Trees, Commits and Tags
-    
-    This Object also serves as a constructor for instances of the correct type::
-    
-        inst = Object.new(repo,id)
-        inst.sha        # objects sha in hex
-        inst.size   # objects uncompressed data size
-        inst.data   # byte string containing the whole data of the object
-    """
-    NULL_HEX_SHA = '0'*40
-    TYPES = ("blob", "tree", "commit", "tag")
-    __slots__ = ("repo", "sha", "size", "data" )
-    type = None         # to be set by subclass
-    
-    def __init__(self, repo, id):
-        """
-        Initialize an object by identifying it by its id. All keyword arguments
-        will be set on demand if None.
-        
-        ``repo``
-            repository this object is located in
-            
-        ``id``
-            SHA1 or ref suitable for git-rev-parse
-        """
-        super(Object,self).__init__()
-        self.repo = repo
-        self.sha = id
+	"""
+	Implements an Object which may be Blobs, Trees, Commits and Tags
+	
+	This Object also serves as a constructor for instances of the correct type::
+	
+		inst = Object.new(repo,id)
+		inst.sha		# objects sha in hex
+		inst.size	# objects uncompressed data size
+		inst.data	# byte string containing the whole data of the object
+	"""
+	NULL_HEX_SHA = '0'*40
+	TYPES = ("blob", "tree", "commit", "tag")
+	__slots__ = ("repo", "sha", "size", "data" )
+	type = None			# to be set by subclass
+	
+	def __init__(self, repo, id):
+		"""
+		Initialize an object by identifying it by its id. All keyword arguments
+		will be set on demand if None.
+		
+		``repo``
+			repository this object is located in
+			
+		``id``
+			SHA1 or ref suitable for git-rev-parse
+		"""
+		super(Object,self).__init__()
+		self.repo = repo
+		self.sha = id
 
-    @classmethod
-    def new(cls, repo, id):
-        """
-        Return
-            New Object instance of a type appropriate to the object type behind 
-            id. The id of the newly created object will be a hexsha even though 
-            the input id may have been a Reference or Rev-Spec
-            
-        Note
-            This cannot be a __new__ method as it would always call __init__
-            with the input id which is not necessarily a hexsha.
-        """
-        hexsha, typename, size = repo.git.get_object_header(id)
-        obj_type = utils.get_object_type_by_name(typename)
-        inst = obj_type(repo, hexsha)
-        inst.size = size
-        return inst
-    
-    def _set_self_from_args_(self, args_dict):
-        """
-        Initialize attributes on self from the given dict that was retrieved
-        from locals() in the calling method.
-        
-        Will only set an attribute on self if the corresponding value in args_dict
-        is not None
-        """
-        for attr, val in args_dict.items():
-            if attr != "self" and val is not None:
-                setattr( self, attr, val )
-        # END set all non-None attributes
-    
-    def _set_cache_(self, attr):
-        """
-        Retrieve object information
-        """
-        if attr  == "size":
-            hexsha, typename, self.size = self.repo.git.get_object_header(self.sha)
-            assert typename == self.type, _assertion_msg_format % (self.sha, typename, self.type)
-        elif attr == "data":
-            hexsha, typename, self.size, self.data = self.repo.git.get_object_data(self.sha)
-            assert typename == self.type, _assertion_msg_format % (self.sha, typename, self.type)
-        else:
-            super(Object,self)._set_cache_(attr)
-        
-    def __eq__(self, other):
-        """
-        Returns
-            True if the objects have the same SHA1
-        """
-        return self.sha == other.sha
-        
-    def __ne__(self, other):
-        """
-        Returns
-            True if the objects do not have the same SHA1
-        """
-        return self.sha != other.sha
-        
-    def __hash__(self):
-        """
-        Returns
-            Hash of our id allowing objects to be used in dicts and sets
-        """
-        return hash(self.sha)
-        
-    def __str__(self):
-        """
-        Returns
-            string of our SHA1 as understood by all git commands
-        """
-        return self.sha
-        
-    def __repr__(self):
-        """
-        Returns
-            string with pythonic representation of our object
-        """
-        return '<git.%s "%s">' % (self.__class__.__name__, self.sha)
+	@classmethod
+	def new(cls, repo, id):
+		"""
+		Return
+			New Object instance of a type appropriate to the object type behind 
+			id. The id of the newly created object will be a hexsha even though 
+			the input id may have been a Reference or Rev-Spec
+			
+		Note
+			This cannot be a __new__ method as it would always call __init__
+			with the input id which is not necessarily a hexsha.
+		"""
+		hexsha, typename, size = repo.git.get_object_header(id)
+		obj_type = utils.get_object_type_by_name(typename)
+		inst = obj_type(repo, hexsha)
+		inst.size = size
+		return inst
+	
+	def _set_self_from_args_(self, args_dict):
+		"""
+		Initialize attributes on self from the given dict that was retrieved
+		from locals() in the calling method.
+		
+		Will only set an attribute on self if the corresponding value in args_dict
+		is not None
+		"""
+		for attr, val in args_dict.items():
+			if attr != "self" and val is not None:
+				setattr( self, attr, val )
+		# END set all non-None attributes
+	
+	def _set_cache_(self, attr):
+		"""
+		Retrieve object information
+		"""
+		if attr	 == "size":
+			hexsha, typename, self.size = self.repo.git.get_object_header(self.sha)
+			assert typename == self.type, _assertion_msg_format % (self.sha, typename, self.type)
+		elif attr == "data":
+			hexsha, typename, self.size, self.data = self.repo.git.get_object_data(self.sha)
+			assert typename == self.type, _assertion_msg_format % (self.sha, typename, self.type)
+		else:
+			super(Object,self)._set_cache_(attr)
+		
+	def __eq__(self, other):
+		"""
+		Returns
+			True if the objects have the same SHA1
+		"""
+		return self.sha == other.sha
+		
+	def __ne__(self, other):
+		"""
+		Returns
+			True if the objects do not have the same SHA1
+		"""
+		return self.sha != other.sha
+		
+	def __hash__(self):
+		"""
+		Returns
+			Hash of our id allowing objects to be used in dicts and sets
+		"""
+		return hash(self.sha)
+		
+	def __str__(self):
+		"""
+		Returns
+			string of our SHA1 as understood by all git commands
+		"""
+		return self.sha
+		
+	def __repr__(self):
+		"""
+		Returns
+			string with pythonic representation of our object
+		"""
+		return '<git.%s "%s">' % (self.__class__.__name__, self.sha)
 
-    @property
-    def data_stream(self):
-        """
-        Returns 
-            File Object compatible stream to the uncompressed raw data of the object
-        """
-        proc = self.repo.git.cat_file(self.type, self.sha, as_process=True)
-        return utils.ProcessStreamAdapter(proc, "stdout") 
-
-    def stream_data(self, ostream):
-        """
-        Writes our data directly to the given output stream
-        
-        ``ostream``
-            File object compatible stream object.
-            
-        Returns
-            self
-        """
-        self.repo.git.cat_file(self.type, self.sha, output_stream=ostream)
-        return self
+	@property
+	def data_stream(self):
+		"""
+		Returns 
+			File Object compatible stream to the uncompressed raw data of the object
+		"""
+		proc = self.repo.git.cat_file(self.type, self.sha, as_process=True)
+		return utils.ProcessStreamAdapter(proc, "stdout") 
 
+	def stream_data(self, ostream):
+		"""
+		Writes our data directly to the given output stream
+		
+		``ostream``
+			File object compatible stream object.
+			
+		Returns
+			self
+		"""
+		self.repo.git.cat_file(self.type, self.sha, output_stream=ostream)
+		return self
+		
 
 class IndexObject(Object):
-    """
-    Base for all objects that can be part of the index file , namely Tree, Blob and
-    SubModule objects
-    """
-    __slots__ = ("path", "mode") 
-    
-    def __init__(self, repo, sha, mode=None, path=None):
-        """
-        Initialize a newly instanced IndexObject
-        ``repo``
-            is the Repo we are located in
+	"""
+	Base for all objects that can be part of the index file , namely Tree, Blob and
+	SubModule objects
+	"""
+	__slots__ = ("path", "mode") 
+	
+	def __init__(self, repo, sha, mode=None, path=None):
+		"""
+		Initialize a newly instanced IndexObject
+		``repo``
+			is the Repo we are located in
 
-        ``sha`` : string
-            is the git object id as hex sha
+		``sha`` : string
+			is the git object id as hex sha
 
-        ``mode`` : int
-            is the file mode as int, use the stat module to evaluate the infomration
+		``mode`` : int
+			is the file mode as int, use the stat module to evaluate the infomration
 
-        ``path`` : str
-            is the path to the file in the file system, relative to the git repository root, i.e.
-            file.ext or folder/other.ext
-                
-        NOTE
-            Path may not be set of the index object has been created directly as it cannot
-            be retrieved without knowing the parent tree.
-        """
-        super(IndexObject, self).__init__(repo, sha)
-        self._set_self_from_args_(locals())
-        if isinstance(mode, basestring):
-            self.mode = self._mode_str_to_int(mode)
-    
-    def __hash__(self):
-        """
-        Returns
-            Hash of our path as index items are uniquely identifyable by path, not 
-            by their data !
-        """
-        return hash(self.path)
-    
-    def _set_cache_(self, attr):
-        if attr in IndexObject.__slots__:
-            # they cannot be retrieved lateron ( not without searching for them )
-            raise AttributeError( "path and mode attributes must have been set during %s object creation" % type(self).__name__ )
-        else:
-            super(IndexObject, self)._set_cache_(attr)
-    
-    @classmethod
-    def _mode_str_to_int(cls, modestr):
-        """
-        ``modestr``
-            string like 755 or 644 or 100644 - only the last 6 chars will be used
-            
-        Returns
-            String identifying a mode compatible to the mode methods ids of the 
-            stat module regarding the rwx permissions for user, group and other, 
-            special flags and file system flags, i.e. whether it is a symlink
-            for example.
-        """
-        mode = 0
-        for iteration,char in enumerate(reversed(modestr[-6:])):
-            mode += int(char) << iteration*3
-        # END for each char
-        return mode
-        
-    @property
-    def name(self):
-        """
-        Returns
-            Name portion of the path, effectively being the basename
-        """
-        return os.path.basename(self.path)
-        
-    @property
-    def abspath(self):
-        """
-        Returns
-            Absolute path to this index object in the file system ( as opposed to the 
-            .path field which is a path relative to the git repository ).
-            
-            The returned path will be native to the system and contains '\' on windows. 
-        """
-        return join_path_native(self.repo.working_tree_dir, self.path)
-        
+		``path`` : str
+			is the path to the file in the file system, relative to the git repository root, i.e.
+			file.ext or folder/other.ext
+				
+		NOTE
+			Path may not be set of the index object has been created directly as it cannot
+			be retrieved without knowing the parent tree.
+		"""
+		super(IndexObject, self).__init__(repo, sha)
+		self._set_self_from_args_(locals())
+		if isinstance(mode, basestring):
+			self.mode = self._mode_str_to_int(mode)
+	
+	def __hash__(self):
+		"""
+		Returns
+			Hash of our path as index items are uniquely identifyable by path, not 
+			by their data !
+		"""
+		return hash(self.path)
+	
+	def _set_cache_(self, attr):
+		if attr in IndexObject.__slots__:
+			# they cannot be retrieved lateron ( not without searching for them )
+			raise AttributeError( "path and mode attributes must have been set during %s object creation" % type(self).__name__ )
+		else:
+			super(IndexObject, self)._set_cache_(attr)
+	
+	@classmethod
+	def _mode_str_to_int(cls, modestr):
+		"""
+		``modestr``
+			string like 755 or 644 or 100644 - only the last 6 chars will be used
+			
+		Returns
+			String identifying a mode compatible to the mode methods ids of the 
+			stat module regarding the rwx permissions for user, group and other, 
+			special flags and file system flags, i.e. whether it is a symlink
+			for example.
+		"""
+		mode = 0
+		for iteration,char in enumerate(reversed(modestr[-6:])):
+			mode += int(char) << iteration*3
+		# END for each char
+		return mode
+		
+	@property
+	def name(self):
+		"""
+		Returns
+			Name portion of the path, effectively being the basename
+		"""
+		return os.path.basename(self.path)
+		
+	@property
+	def abspath(self):
+		"""
+		Returns
+			Absolute path to this index object in the file system ( as opposed to the 
+			.path field which is a path relative to the git repository ).
+			
+			The returned path will be native to the system and contains '\' on windows. 
+		"""
+		return join_path_native(self.repo.working_tree_dir, self.path)
+		
diff --git a/lib/git/objects/commit.py b/lib/git/objects/commit.py
index 87eed49b..948e9a54 100644
--- a/lib/git/objects/commit.py
+++ b/lib/git/objects/commit.py
@@ -9,12 +9,14 @@ import git.diff as diff
 import git.stats as stats
 from git.actor import Actor
 from tree import Tree
+from cStringIO import StringIO
 import base
 import utils
 import time
 import os
 
-class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable):
+
+class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable, utils.Serializable):
 	"""
 	Wraps a git Commit object.
 	
@@ -91,7 +93,8 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable):
 		self._set_self_from_args_(locals())
 
 		if parents is not None:
-			self.parents = tuple( self.__class__(repo, p) for p in parents )
+			cls = type(self)
+			self.parents = tuple(cls(repo, p) for p in parents if not isinstance(p, cls))
 		# END for each parent to convert
 			
 		if self.sha and tree is not None:
@@ -109,20 +112,9 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable):
 		We set all values at once.
 		"""
 		if attr in Commit.__slots__:
-			# prepare our data lines to match rev-list
-			data_lines = self.data.splitlines()
-			data_lines.insert(0, "commit %s" % self.sha)
-			temp = self._iter_from_process_or_stream(self.repo, iter(data_lines), False).next()
-			self.parents = temp.parents
-			self.tree = temp.tree
-			self.author = temp.author
-			self.authored_date = temp.authored_date
-			self.author_tz_offset = temp.author_tz_offset
-			self.committer = temp.committer
-			self.committed_date = temp.committed_date
-			self.committer_tz_offset = temp.committer_tz_offset
-			self.message = temp.message
-			self.encoding = temp.encoding
+			# read the data in a chunk, its faster - then provide a file wrapper
+			hexsha, typename, size, data = self.repo.git.get_object_data(self)
+			self._deserialize(StringIO(data))
 		else:
 			super(Commit, self)._set_cache_(attr)
 
@@ -260,59 +252,18 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable):
 			iterator returning Commit objects
 		"""
 		stream = proc_or_stream
-		if not hasattr(stream,'next'):
+		if not hasattr(stream,'readline'):
 			stream = proc_or_stream.stdout
 			
-		for line in stream:
-			commit_tokens = line.split() 
+		while True:
+			line = stream.readline()
+			if not line:
+				break
+			commit_tokens = line.split()
 			id = commit_tokens[1]
 			assert commit_tokens[0] == "commit"
-			tree = stream.next().split()[1]
-
-			parents = []
-			next_line = None
-			for parent_line in stream:
-				if not parent_line.startswith('parent'):
-					next_line = parent_line
-					break
-				# END abort reading parents
-				parents.append(parent_line.split()[-1])
-			# END for each parent line
-			
-			author, authored_date, author_tz_offset = utils.parse_actor_and_date(next_line)
-			committer, committed_date, committer_tz_offset = utils.parse_actor_and_date(stream.next())
-			
 			
-			# empty line
-			encoding = stream.next()
-			encoding.strip()
-			if encoding:
-				encoding = encoding[encoding.find(' ')+1:]
-			# END parse encoding
-			
-			message_lines = list()
-			if from_rev_list:
-				for msg_line in stream:
-					if not msg_line.startswith('    '):
-						# and forget about this empty marker
-						break
-					# END abort message reading 
-					# strip leading 4 spaces
-					message_lines.append(msg_line[4:])
-				# END while there are message lines
-			else:
-				# a stream from our data simply gives us the plain message
-				for msg_line in stream:
-					message_lines.append(msg_line)
-			# END message parsing
-			message = '\n'.join(message_lines)
-			
-			
-			yield Commit(repo, id, tree,  
-						 author, authored_date, author_tz_offset,
-						 committer, committed_date, committer_tz_offset,
-						 message, tuple(parents), 
-						 encoding or cls.default_encoding)
+			yield Commit(repo, id)._deserialize(stream, from_rev_list) 
 		# END for each line in stream
 		
 		
@@ -393,7 +344,7 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable):
 		
 		# assume utf8 encoding
 		enc_section, enc_option = cls.conf_encoding.split('.')
-		conf_encoding = cr.get_value(enc_section, enc_option, default_encoding)
+		conf_encoding = cr.get_value(enc_section, enc_option, cls.default_encoding)
 		
 		author = Actor(author_name, author_email)
 		committer = Actor(committer_name, committer_email)
@@ -429,3 +380,61 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable):
 	def __repr__(self):
 		return '<git.Commit "%s">' % self.sha
 
+	#{ Serializable Implementation
+	
+	def _serialize(self, stream):
+		# for now, this is very inefficient and in fact shouldn't be used like this
+		return super(Commit, self)._serialize(stream)
+	
+	def _deserialize(self, stream, from_rev_list=False):
+		""":param from_rev_list: if true, the stream format is coming from the rev-list command
+		Otherwise it is assumed to be a plain data stream from our object"""
+		self.tree = Tree(self.repo, stream.readline().split()[1], 0, '')
+
+		self.parents = list()
+		next_line = None
+		while True:
+			parent_line = stream.readline()
+			if not parent_line.startswith('parent'):
+				next_line = parent_line
+				break
+			# END abort reading parents
+			self.parents.append(type(self)(self.repo, parent_line.split()[-1]))
+		# END for each parent line
+		self.parents = tuple(self.parents)
+		
+		self.author, self.authored_date, self.author_tz_offset = utils.parse_actor_and_date(next_line)
+		self.committer, self.committed_date, self.committer_tz_offset = utils.parse_actor_and_date(stream.readline())
+		
+		
+		# empty line
+		self.encoding = self.default_encoding
+		enc = stream.readline()
+		enc.strip()
+		if enc:
+			self.encoding = enc[enc.find(' ')+1:]
+		# END parse encoding
+		
+		message_lines = list()
+		if from_rev_list:
+			while True:
+				msg_line = stream.readline()
+				if not msg_line.startswith('    '):
+					# and forget about this empty marker
+					# cut the last newline to get rid of the artificial newline added
+					# by rev-list command. Lets hope its just linux style \n
+					message_lines[-1] = message_lines[-1][:-1]
+					break
+				# END abort message reading 
+				# strip leading 4 spaces
+				message_lines.append(msg_line[4:])
+			# END while there are message lines
+			self.message = ''.join(message_lines)
+		else:
+			# a stream from our data simply gives us the plain message
+			# The end of our message stream is marked with a newline that we strip
+			self.message = stream.read()[:-1]
+		# END message parsing
+		return self
+		
+	#} END serializable implementation
diff --git a/lib/git/objects/tree.py b/lib/git/objects/tree.py
index a9e60981..285d3b5b 100644
--- a/lib/git/objects/tree.py
+++ b/lib/git/objects/tree.py
@@ -209,7 +209,7 @@ class Tree(base.IndexObject, diff.Diffable, utils.Traversable):
                            visit_once = False, ignore_self=1 ):
         """For documentation, see utils.Traversable.traverse
         
-        Trees are set to visist_once = False to gain more performance in the traversal"""
+        Trees are set to visit_once = False to gain more performance in the traversal"""
         return super(Tree, self).traverse(predicate, prune, depth, branch_first, visit_once, ignore_self)
 
     # List protocol
diff --git a/lib/git/objects/utils.py b/lib/git/objects/utils.py
index 7060e293..6d378a72 100644
--- a/lib/git/objects/utils.py
+++ b/lib/git/objects/utils.py
@@ -280,3 +280,20 @@ class Traversable(object):
 	
 			addToStack( stack, item, branch_first, nd )
 		# END for each item on work stack
+		
+
+class Serializable(object):
+	"""Defines methods to serialize and deserialize objects from and into a data stream"""
+	
+	def _serialize(self, stream):
+		"""Serialize the data of this object into the given data stream
+		:note: a serialized object would ``_deserialize`` into the same objet
+		:param stream: a file-like object
+		:return: self"""
+		raise NotImplementedError("To be implemented in subclass")
+		
+	def _deserialize(self, stream):
+		"""Deserialize all information regarding this object from the stream
+		:param stream: a file-like object
+		:return: self"""
+		raise NotImplementedError("To be implemented in subclass")
diff --git a/test/git/test_commit.py b/test/git/test_commit.py
index 48937c93..28b407ac 100644
--- a/test/git/test_commit.py
+++ b/test/git/test_commit.py
@@ -129,7 +129,7 @@ class TestCommit(TestBase):
                                       bisect_all=True)
         assert_true(git.called)
 
-        commits = Commit._iter_from_process_or_stream(self.rorepo, ListProcessAdapter(revs), True)
+        commits = Commit._iter_from_process_or_stream(self.rorepo, StringProcessAdapter(revs), True)
         expected_ids = (
             'cf37099ea8d1d8c7fbf9b6d12d7ec0249d3acb8b',
             '33ebe7acec14b25c5f84f35a664803fcab2f7781',
diff --git a/test/git/test_diff.py b/test/git/test_diff.py
index 2f6a19bd..a113b992 100644
--- a/test/git/test_diff.py
+++ b/test/git/test_diff.py
@@ -20,7 +20,7 @@ class TestDiff(TestBase):
         return diffs
     
     def test_list_from_string_new_mode(self):
-        output = ListProcessAdapter(fixture('diff_new_mode'))
+        output = StringProcessAdapter(fixture('diff_new_mode'))
         diffs = Diff._index_from_patch_format(self.rorepo, output.stdout)
         self._assert_diff_format(diffs)
         
@@ -28,7 +28,7 @@ class TestDiff(TestBase):
         assert_equal(10, len(diffs[0].diff.splitlines()))
 
     def test_diff_with_rename(self):
-        output = ListProcessAdapter(fixture('diff_rename'))
+        output = StringProcessAdapter(fixture('diff_rename'))
         diffs = Diff._index_from_patch_format(self.rorepo, output.stdout)
         self._assert_diff_format(diffs)
         
@@ -47,7 +47,7 @@ class TestDiff(TestBase):
                     "diff_tree_numstat_root" )
         
         for fixture_name in fixtures:
-            diff_proc = ListProcessAdapter(fixture(fixture_name))
+            diff_proc = StringProcessAdapter(fixture(fixture_name))
             diffs = Diff._index_from_patch_format(self.rorepo, diff_proc.stdout)
         # END for each fixture
 
diff --git a/test/git/test_repo.py b/test/git/test_repo.py
index ce79402a..9316245b 100644
--- a/test/git/test_repo.py
+++ b/test/git/test_repo.py
@@ -48,6 +48,7 @@ class TestRepo(TestBase):
         
     def test_tree_from_revision(self):
         tree = self.rorepo.tree('0.1.6')
+        assert len(tree.sha) == 40 
         assert tree.type == "tree"
         assert self.rorepo.tree(tree) == tree
         
@@ -56,9 +57,9 @@ class TestRepo(TestBase):
 
     @patch_object(Git, '_call_process')
     def test_commits(self, git):
-        git.return_value = ListProcessAdapter(fixture('rev_list'))
+        git.return_value = StringProcessAdapter(fixture('rev_list'))
 
-        commits = list( self.rorepo.iter_commits('master', max_count=10) )
+        commits = list(self.rorepo.iter_commits('master', max_count=10))
         
         c = commits[0]
         assert_equal('4c8124ffcf4039d292442eeccabdeca5af5c5017', c.sha)
diff --git a/test/testlib/helper.py b/test/testlib/helper.py
index 9c38ffd5..c9b4c2ac 100644
--- a/test/testlib/helper.py
+++ b/test/testlib/helper.py
@@ -9,6 +9,7 @@ from git import Repo, Remote, GitCommandError
 from unittest import TestCase
 import tempfile
 import shutil
+import cStringIO
 
 GIT_REPO = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
 
@@ -23,40 +24,13 @@ def absolute_project_path():
     return os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
     
     
-class ListProcessAdapter(object):
-    """Allows to use lists as Process object as returned by SubProcess.Popen.
+class StringProcessAdapter(object):
+    """Allows to use strings as Process object as returned by SubProcess.Popen.
     Its tailored to work with the test system only"""
     
-    class Stream(object):
-        """Simple stream emulater meant to work only with tests"""
-        def __init__(self, data):
-            self.data = data
-            self.cur_iter = None
-        
-        def __iter__(self):
-            dat = self.data
-            if isinstance(dat, basestring):
-                dat = dat.splitlines()
-            if self.cur_iter is None:
-                self.cur_iter = iter(dat)
-            return self.cur_iter
-            
-        def read(self):
-            dat = self.data
-            if isinstance(dat, (tuple,list)):
-                dat = "\n".join(dat)
-            return dat
-            
-        def next(self):
-            if self.cur_iter is None:
-                self.cur_iter = iter(self)
-            return self.cur_iter.next()
-            
-    # END stream 
-    
-    def __init__(self, input_list_or_string):
-        self.stdout = self.Stream(input_list_or_string)
-        self.stderr = self.Stream('')
+    def __init__(self, input_string):
+        self.stdout = cStringIO.StringIO(input_string)
+        self.stderr = cStringIO.StringIO()
         
     def wait(self):
         return 0
-- 
cgit v1.2.1