tabs to spaces

author: Sebastian Thiel <byronimo@gmail.com> 2014-02-09 20:51:43 +0100
committer: Sebastian Thiel <byronimo@gmail.com> 2014-02-09 20:51:43 +0100
commit: 6576d5503a64d124fd7bcf639cc8955918b3ac43 (patch)
tree: 847028954b05307086eda1782c2e9521c8d67a13 /gitdb/db
parent: ea54328ce05abdcb4f23300df51422e62b737f63 (diff)
download: gitdb-6576d5503a64d124fd7bcf639cc8955918b3ac43.tar.gz
6 files changed, 945 insertions, 945 deletions
diff --git a/gitdb/db/base.py b/gitdb/db/base.py
index 984acaf..867e93a 100644
--- a/gitdb/db/base.py
+++ b/gitdb/db/base.py
@@ -4,20 +4,20 @@
 # the New BSD License: http://www.opensource.org/licenses/bsd-license.php
 """Contains implementations of database retrieveing objects"""
 from gitdb.util import (
-		pool,
-		join, 
-		LazyMixin, 
-		hex_to_bin
-	)
+        pool,
+        join, 
+        LazyMixin, 
+        hex_to_bin
+    )
 
 from gitdb.exc import (
-						BadObject, 
-						AmbiguousObjectName
-						)
+                        BadObject, 
+                        AmbiguousObjectName
+                        )
 
 from async import (
-		ChannelThreadTask
-	)
+        ChannelThreadTask
+    )
 
 from itertools import chain
 
@@ -26,301 +26,301 @@ __all__ = ('ObjectDBR', 'ObjectDBW', 'FileDBBase', 'CompoundDB', 'CachingDB')
 
 
 class ObjectDBR(object):
-	"""Defines an interface for object database lookup.
-	Objects are identified either by their 20 byte bin sha"""
-	
-	def __contains__(self, sha):
-		return self.has_obj
-	
-	#{ Query Interface 
-	def has_object(self, sha):
-		"""
-		:return: True if the object identified by the given 20 bytes
-			binary sha is contained in the database"""
-		raise NotImplementedError("To be implemented in subclass")
-		
-	def has_object_async(self, reader):
-		"""Return a reader yielding information about the membership of objects
-		as identified by shas
-		:param reader: Reader yielding 20 byte shas.
-		:return: async.Reader yielding tuples of (sha, bool) pairs which indicate
-			whether the given sha exists in the database or not"""
-		task = ChannelThreadTask(reader, str(self.has_object_async), lambda sha: (sha, self.has_object(sha)))
-		return pool.add_task(task) 
-		
-	def info(self, sha):
-		""" :return: OInfo instance
-		:param sha: bytes binary sha
-		:raise BadObject:"""
-		raise NotImplementedError("To be implemented in subclass")
-		
-	def info_async(self, reader):
-		"""Retrieve information of a multitude of objects asynchronously
-		:param reader: Channel yielding the sha's of the objects of interest
-		:return: async.Reader yielding OInfo|InvalidOInfo, in any order"""
-		task = ChannelThreadTask(reader, str(self.info_async), self.info)
-		return pool.add_task(task)
-		
-	def stream(self, sha):
-		""":return: OStream instance
-		:param sha: 20 bytes binary sha
-		:raise BadObject:"""
-		raise NotImplementedError("To be implemented in subclass")
-		
-	def stream_async(self, reader):
-		"""Retrieve the OStream of multiple objects
-		:param reader: see ``info``
-		:param max_threads: see ``ObjectDBW.store``
-		:return: async.Reader yielding OStream|InvalidOStream instances in any order
-		
-		**Note:** depending on the system configuration, it might not be possible to 
-			read all OStreams at once. Instead, read them individually using reader.read(x)
-			where x is small enough."""
-		# base implementation just uses the stream method repeatedly
-		task = ChannelThreadTask(reader, str(self.stream_async), self.stream)
-		return pool.add_task(task)
-	
-	def size(self):
-		""":return: amount of objects in this database"""
-		raise NotImplementedError()
-		
-	def sha_iter(self):
-		"""Return iterator yielding 20 byte shas for all objects in this data base"""
-		raise NotImplementedError()
-			
-	#} END query interface
-	
-	
+    """Defines an interface for object database lookup.
+    Objects are identified either by their 20 byte bin sha"""
+    
+    def __contains__(self, sha):
+        return self.has_obj
+    
+    #{ Query Interface 
+    def has_object(self, sha):
+        """
+        :return: True if the object identified by the given 20 bytes
+            binary sha is contained in the database"""
+        raise NotImplementedError("To be implemented in subclass")
+        
+    def has_object_async(self, reader):
+        """Return a reader yielding information about the membership of objects
+        as identified by shas
+        :param reader: Reader yielding 20 byte shas.
+        :return: async.Reader yielding tuples of (sha, bool) pairs which indicate
+            whether the given sha exists in the database or not"""
+        task = ChannelThreadTask(reader, str(self.has_object_async), lambda sha: (sha, self.has_object(sha)))
+        return pool.add_task(task) 
+        
+    def info(self, sha):
+        """ :return: OInfo instance
+        :param sha: bytes binary sha
+        :raise BadObject:"""
+        raise NotImplementedError("To be implemented in subclass")
+        
+    def info_async(self, reader):
+        """Retrieve information of a multitude of objects asynchronously
+        :param reader: Channel yielding the sha's of the objects of interest
+        :return: async.Reader yielding OInfo|InvalidOInfo, in any order"""
+        task = ChannelThreadTask(reader, str(self.info_async), self.info)
+        return pool.add_task(task)
+        
+    def stream(self, sha):
+        """:return: OStream instance
+        :param sha: 20 bytes binary sha
+        :raise BadObject:"""
+        raise NotImplementedError("To be implemented in subclass")
+        
+    def stream_async(self, reader):
+        """Retrieve the OStream of multiple objects
+        :param reader: see ``info``
+        :param max_threads: see ``ObjectDBW.store``
+        :return: async.Reader yielding OStream|InvalidOStream instances in any order
+        
+        **Note:** depending on the system configuration, it might not be possible to 
+            read all OStreams at once. Instead, read them individually using reader.read(x)
+            where x is small enough."""
+        # base implementation just uses the stream method repeatedly
+        task = ChannelThreadTask(reader, str(self.stream_async), self.stream)
+        return pool.add_task(task)
+    
+    def size(self):
+        """:return: amount of objects in this database"""
+        raise NotImplementedError()
+        
+    def sha_iter(self):
+        """Return iterator yielding 20 byte shas for all objects in this data base"""
+        raise NotImplementedError()
+            
+    #} END query interface
+    
+    
 class ObjectDBW(object):
-	"""Defines an interface to create objects in the database"""
-	
-	def __init__(self, *args, **kwargs):
-		self._ostream = None
-	
-	#{ Edit Interface
-	def set_ostream(self, stream):
-		"""
-		Adjusts the stream to which all data should be sent when storing new objects
-		
-		:param stream: if not None, the stream to use, if None the default stream
-			will be used.
-		:return: previously installed stream, or None if there was no override
-		:raise TypeError: if the stream doesn't have the supported functionality"""
-		cstream = self._ostream
-		self._ostream = stream
-		return cstream
-		
-	def ostream(self):
-		"""
-		:return: overridden output stream this instance will write to, or None
-			if it will write to the default stream"""
-		return self._ostream
-	
-	def store(self, istream):
-		"""
-		Create a new object in the database
-		:return: the input istream object with its sha set to its corresponding value
-		
-		:param istream: IStream compatible instance. If its sha is already set 
-			to a value, the object will just be stored in the our database format, 
-			in which case the input stream is expected to be in object format ( header + contents ).
-		:raise IOError: if data could not be written"""
-		raise NotImplementedError("To be implemented in subclass")
-	
-	def store_async(self, reader):
-		"""
-		Create multiple new objects in the database asynchronously. The method will 
-		return right away, returning an output channel which receives the results as 
-		they are computed.
-		
-		:return: Channel yielding your IStream which served as input, in any order.
-			The IStreams sha will be set to the sha it received during the process, 
-			or its error attribute will be set to the exception informing about the error.
-			
-		:param reader: async.Reader yielding IStream instances.
-			The same instances will be used in the output channel as were received
-			in by the Reader.
-		
-		**Note:** As some ODB implementations implement this operation atomic, they might 
-			abort the whole operation if one item could not be processed. Hence check how 
-			many items have actually been produced."""
-		# base implementation uses store to perform the work
-		task = ChannelThreadTask(reader, str(self.store_async), self.store) 
-		return pool.add_task(task)
-	
-	#} END edit interface
-	
+    """Defines an interface to create objects in the database"""
+    
+    def __init__(self, *args, **kwargs):
+        self._ostream = None
+    
+    #{ Edit Interface
+    def set_ostream(self, stream):
+        """
+        Adjusts the stream to which all data should be sent when storing new objects
+        
+        :param stream: if not None, the stream to use, if None the default stream
+            will be used.
+        :return: previously installed stream, or None if there was no override
+        :raise TypeError: if the stream doesn't have the supported functionality"""
+        cstream = self._ostream
+        self._ostream = stream
+        return cstream
+        
+    def ostream(self):
+        """
+        :return: overridden output stream this instance will write to, or None
+            if it will write to the default stream"""
+        return self._ostream
+    
+    def store(self, istream):
+        """
+        Create a new object in the database
+        :return: the input istream object with its sha set to its corresponding value
+        
+        :param istream: IStream compatible instance. If its sha is already set 
+            to a value, the object will just be stored in the our database format, 
+            in which case the input stream is expected to be in object format ( header + contents ).
+        :raise IOError: if data could not be written"""
+        raise NotImplementedError("To be implemented in subclass")
+    
+    def store_async(self, reader):
+        """
+        Create multiple new objects in the database asynchronously. The method will 
+        return right away, returning an output channel which receives the results as 
+        they are computed.
+        
+        :return: Channel yielding your IStream which served as input, in any order.
+            The IStreams sha will be set to the sha it received during the process, 
+            or its error attribute will be set to the exception informing about the error.
+            
+        :param reader: async.Reader yielding IStream instances.
+            The same instances will be used in the output channel as were received
+            in by the Reader.
+        
+        **Note:** As some ODB implementations implement this operation atomic, they might 
+            abort the whole operation if one item could not be processed. Hence check how 
+            many items have actually been produced."""
+        # base implementation uses store to perform the work
+        task = ChannelThreadTask(reader, str(self.store_async), self.store) 
+        return pool.add_task(task)
+    
+    #} END edit interface
+    
 
 class FileDBBase(object):
-	"""Provides basic facilities to retrieve files of interest, including 
-	caching facilities to help mapping hexsha's to objects"""
-	
-	def __init__(self, root_path):
-		"""Initialize this instance to look for its files at the given root path
-		All subsequent operations will be relative to this path
-		:raise InvalidDBRoot: 
-		**Note:** The base will not perform any accessablity checking as the base
-			might not yet be accessible, but become accessible before the first 
-			access."""
-		super(FileDBBase, self).__init__()
-		self._root_path = root_path
-		
-		
-	#{ Interface 
-	def root_path(self):
-		""":return: path at which this db operates"""
-		return self._root_path
-	
-	def db_path(self, rela_path):
-		"""
-		:return: the given relative path relative to our database root, allowing 
-			to pontentially access datafiles"""
-		return join(self._root_path, rela_path)
-	#} END interface
-		
+    """Provides basic facilities to retrieve files of interest, including 
+    caching facilities to help mapping hexsha's to objects"""
+    
+    def __init__(self, root_path):
+        """Initialize this instance to look for its files at the given root path
+        All subsequent operations will be relative to this path
+        :raise InvalidDBRoot: 
+        **Note:** The base will not perform any accessablity checking as the base
+            might not yet be accessible, but become accessible before the first 
+            access."""
+        super(FileDBBase, self).__init__()
+        self._root_path = root_path
+        
+        
+    #{ Interface 
+    def root_path(self):
+        """:return: path at which this db operates"""
+        return self._root_path
+    
+    def db_path(self, rela_path):
+        """
+        :return: the given relative path relative to our database root, allowing 
+            to pontentially access datafiles"""
+        return join(self._root_path, rela_path)
+    #} END interface
+        
 
 class CachingDB(object):
-	"""A database which uses caches to speed-up access"""
-	
-	#{ Interface 
-	def update_cache(self, force=False):
-		"""
-		Call this method if the underlying data changed to trigger an update
-		of the internal caching structures.
-		
-		:param force: if True, the update must be performed. Otherwise the implementation
-			may decide not to perform an update if it thinks nothing has changed.
-		:return: True if an update was performed as something change indeed"""
-		
-	# END interface
+    """A database which uses caches to speed-up access"""
+    
+    #{ Interface 
+    def update_cache(self, force=False):
+        """
+        Call this method if the underlying data changed to trigger an update
+        of the internal caching structures.
+        
+        :param force: if True, the update must be performed. Otherwise the implementation
+            may decide not to perform an update if it thinks nothing has changed.
+        :return: True if an update was performed as something change indeed"""
+        
+    # END interface
 
 
 
 
 def _databases_recursive(database, output):
-	"""Fill output list with database from db, in order. Deals with Loose, Packed 
-	and compound databases."""
-	if isinstance(database, CompoundDB):
-		compounds = list()
-		dbs = database.databases()
-		output.extend(db for db in dbs if not isinstance(db, CompoundDB))
-		for cdb in (db for db in dbs if isinstance(db, CompoundDB)):
-			_databases_recursive(cdb, output)
-	else:
-		output.append(database)
-	# END handle database type
-	
+    """Fill output list with database from db, in order. Deals with Loose, Packed 
+    and compound databases."""
+    if isinstance(database, CompoundDB):
+        compounds = list()
+        dbs = database.databases()
+        output.extend(db for db in dbs if not isinstance(db, CompoundDB))
+        for cdb in (db for db in dbs if isinstance(db, CompoundDB)):
+            _databases_recursive(cdb, output)
+    else:
+        output.append(database)
+    # END handle database type
+    
 
 class CompoundDB(ObjectDBR, LazyMixin, CachingDB):
-	"""A database which delegates calls to sub-databases.
-	
-	Databases are stored in the lazy-loaded _dbs attribute.
-	Define _set_cache_ to update it with your databases"""
-	def _set_cache_(self, attr):
-		if attr == '_dbs':
-			self._dbs = list()
-		elif attr == '_db_cache':
-			self._db_cache = dict()
-		else:
-			super(CompoundDB, self)._set_cache_(attr)
-	
-	def _db_query(self, sha):
-		""":return: database containing the given 20 byte sha
-		:raise BadObject:"""
-		# most databases use binary representations, prevent converting 
-		# it everytime a database is being queried
-		try:
-			return self._db_cache[sha]
-		except KeyError:
-			pass
-		# END first level cache
-		
-		for db in self._dbs:
-			if db.has_object(sha):
-				self._db_cache[sha] = db
-				return db
-		# END for each database
-		raise BadObject(sha)
-	
-	#{ ObjectDBR interface 
-	
-	def has_object(self, sha):
-		try:
-			self._db_query(sha)
-			return True
-		except BadObject:
-			return False
-		# END handle exceptions
-		
-	def info(self, sha):
-		return self._db_query(sha).info(sha)
-		
-	def stream(self, sha):
-		return self._db_query(sha).stream(sha)
+    """A database which delegates calls to sub-databases.
+    
+    Databases are stored in the lazy-loaded _dbs attribute.
+    Define _set_cache_ to update it with your databases"""
+    def _set_cache_(self, attr):
+        if attr == '_dbs':
+            self._dbs = list()
+        elif attr == '_db_cache':
+            self._db_cache = dict()
+        else:
+            super(CompoundDB, self)._set_cache_(attr)
+    
+    def _db_query(self, sha):
+        """:return: database containing the given 20 byte sha
+        :raise BadObject:"""
+        # most databases use binary representations, prevent converting 
+        # it everytime a database is being queried
+        try:
+            return self._db_cache[sha]
+        except KeyError:
+            pass
+        # END first level cache
+        
+        for db in self._dbs:
+            if db.has_object(sha):
+                self._db_cache[sha] = db
+                return db
+        # END for each database
+        raise BadObject(sha)
+    
+    #{ ObjectDBR interface 
+    
+    def has_object(self, sha):
+        try:
+            self._db_query(sha)
+            return True
+        except BadObject:
+            return False
+        # END handle exceptions
+        
+    def info(self, sha):
+        return self._db_query(sha).info(sha)
+        
+    def stream(self, sha):
+        return self._db_query(sha).stream(sha)
 
-	def size(self):
-		""":return: total size of all contained databases"""
-		return reduce(lambda x,y: x+y, (db.size() for db in self._dbs), 0)
-		
-	def sha_iter(self):
-		return chain(*(db.sha_iter() for db in self._dbs))
-		
-	#} END object DBR Interface
-	
-	#{ Interface
-	
-	def databases(self):
-		""":return: tuple of database instances we use for lookups"""
-		return tuple(self._dbs)
+    def size(self):
+        """:return: total size of all contained databases"""
+        return reduce(lambda x,y: x+y, (db.size() for db in self._dbs), 0)
+        
+    def sha_iter(self):
+        return chain(*(db.sha_iter() for db in self._dbs))
+        
+    #} END object DBR Interface
+    
+    #{ Interface
+    
+    def databases(self):
+        """:return: tuple of database instances we use for lookups"""
+        return tuple(self._dbs)
 
-	def update_cache(self, force=False):
-		# something might have changed, clear everything
-		self._db_cache.clear()
-		stat = False
-		for db in self._dbs:
-			if isinstance(db, CachingDB):
-				stat |= db.update_cache(force)
-			# END if is caching db
-		# END for each database to update
-		return stat
-		
-	def partial_to_complete_sha_hex(self, partial_hexsha):
-		"""
-		:return: 20 byte binary sha1 from the given less-than-40 byte hexsha
-		:param partial_hexsha: hexsha with less than 40 byte
-		:raise AmbiguousObjectName: """
-		databases = list()
-		_databases_recursive(self, databases)
-		
-		len_partial_hexsha = len(partial_hexsha)
-		if len_partial_hexsha % 2 != 0:
-			partial_binsha = hex_to_bin(partial_hexsha + "0")
-		else:
-			partial_binsha = hex_to_bin(partial_hexsha)
-		# END assure successful binary conversion 
-		
-		candidate = None
-		for db in databases:
-			full_bin_sha = None
-			try:
-				if hasattr(db, 'partial_to_complete_sha_hex'):
-					full_bin_sha = db.partial_to_complete_sha_hex(partial_hexsha)
-				else:
-					full_bin_sha = db.partial_to_complete_sha(partial_binsha, len_partial_hexsha)
-				# END handle database type
-			except BadObject:
-				continue
-			# END ignore bad objects
-			if full_bin_sha:
-				if candidate and candidate != full_bin_sha:
-					raise AmbiguousObjectName(partial_hexsha)
-				candidate = full_bin_sha
-			# END handle candidate
-		# END for each db
-		if not candidate:
-			raise BadObject(partial_binsha)
-		return candidate
-		
-	#} END interface
-		
+    def update_cache(self, force=False):
+        # something might have changed, clear everything
+        self._db_cache.clear()
+        stat = False
+        for db in self._dbs:
+            if isinstance(db, CachingDB):
+                stat |= db.update_cache(force)
+            # END if is caching db
+        # END for each database to update
+        return stat
+        
+    def partial_to_complete_sha_hex(self, partial_hexsha):
+        """
+        :return: 20 byte binary sha1 from the given less-than-40 byte hexsha
+        :param partial_hexsha: hexsha with less than 40 byte
+        :raise AmbiguousObjectName: """
+        databases = list()
+        _databases_recursive(self, databases)
+        
+        len_partial_hexsha = len(partial_hexsha)
+        if len_partial_hexsha % 2 != 0:
+            partial_binsha = hex_to_bin(partial_hexsha + "0")
+        else:
+            partial_binsha = hex_to_bin(partial_hexsha)
+        # END assure successful binary conversion 
+        
+        candidate = None
+        for db in databases:
+            full_bin_sha = None
+            try:
+                if hasattr(db, 'partial_to_complete_sha_hex'):
+                    full_bin_sha = db.partial_to_complete_sha_hex(partial_hexsha)
+                else:
+                    full_bin_sha = db.partial_to_complete_sha(partial_binsha, len_partial_hexsha)
+                # END handle database type
+            except BadObject:
+                continue
+            # END ignore bad objects
+            if full_bin_sha:
+                if candidate and candidate != full_bin_sha:
+                    raise AmbiguousObjectName(partial_hexsha)
+                candidate = full_bin_sha
+            # END handle candidate
+        # END for each db
+        if not candidate:
+            raise BadObject(partial_binsha)
+        return candidate
+        
+    #} END interface
+        
 
diff --git a/gitdb/db/git.py b/gitdb/db/git.py
index b8fc46a..1d6ad0f 100644
--- a/gitdb/db/git.py
+++ b/gitdb/db/git.py
@@ -3,10 +3,10 @@
 # This module is part of GitDB and is released under
 # the New BSD License: http://www.opensource.org/licenses/bsd-license.php
 from base import (
-						CompoundDB, 
-						ObjectDBW, 
-						FileDBBase
-					)
+                        CompoundDB, 
+                        ObjectDBW, 
+                        FileDBBase
+                    )
 
 from loose import LooseObjectDB
 from pack import PackedDB
@@ -14,72 +14,72 @@ from ref import ReferenceDB
 
 from gitdb.util import LazyMixin
 from gitdb.exc import (
-						InvalidDBRoot, 
-						BadObject, 
-						AmbiguousObjectName
-						)
+                        InvalidDBRoot, 
+                        BadObject, 
+                        AmbiguousObjectName
+                        )
 import os
 
 __all__ = ('GitDB', )
 
 
 class GitDB(FileDBBase, ObjectDBW, CompoundDB):
-	"""A git-style object database, which contains all objects in the 'objects'
-	subdirectory"""
-	# Configuration
-	PackDBCls = PackedDB
-	LooseDBCls = LooseObjectDB
-	ReferenceDBCls = ReferenceDB
-	
-	# Directories
-	packs_dir = 'pack'
-	loose_dir = ''
-	alternates_dir = os.path.join('info', 'alternates')
-	
-	def __init__(self, root_path):
-		"""Initialize ourselves on a git objects directory"""
-		super(GitDB, self).__init__(root_path)
-		
-	def _set_cache_(self, attr):
-		if attr == '_dbs' or attr == '_loose_db':
-			self._dbs = list()
-			loose_db = None
-			for subpath, dbcls in ((self.packs_dir, self.PackDBCls), 
-									(self.loose_dir, self.LooseDBCls),
-									(self.alternates_dir, self.ReferenceDBCls)):
-				path = self.db_path(subpath)
-				if os.path.exists(path):
-					self._dbs.append(dbcls(path))
-					if dbcls is self.LooseDBCls:
-						loose_db = self._dbs[-1]
-					# END remember loose db
-				# END check path exists
-			# END for each db type
-			
-			# should have at least one subdb
-			if not self._dbs:
-				raise InvalidDBRoot(self.root_path())
-			# END handle error
-			
-			# we the first one should have the store method
-			assert loose_db is not None and hasattr(loose_db, 'store'), "First database needs store functionality"
-			
-			# finally set the value
-			self._loose_db = loose_db
-		else:
-			super(GitDB, self)._set_cache_(attr)
-		# END handle attrs
-		
-	#{ ObjectDBW interface
-		
-	def store(self, istream):
-		return self._loose_db.store(istream)
-		
-	def ostream(self):
-		return self._loose_db.ostream()
-	
-	def set_ostream(self, ostream):
-		return self._loose_db.set_ostream(ostream)
-		
-	#} END objectdbw interface
-	
+    """A git-style object database, which contains all objects in the 'objects'
+    subdirectory"""
+    # Configuration
+    PackDBCls = PackedDB
+    LooseDBCls = LooseObjectDB
+    ReferenceDBCls = ReferenceDB
+    
+    # Directories
+    packs_dir = 'pack'
+    loose_dir = ''
+    alternates_dir = os.path.join('info', 'alternates')
+    
+    def __init__(self, root_path):
+        """Initialize ourselves on a git objects directory"""
+        super(GitDB, self).__init__(root_path)
+        
+    def _set_cache_(self, attr):
+        if attr == '_dbs' or attr == '_loose_db':
+            self._dbs = list()
+            loose_db = None
+            for subpath, dbcls in ((self.packs_dir, self.PackDBCls), 
+                                    (self.loose_dir, self.LooseDBCls),
+                                    (self.alternates_dir, self.ReferenceDBCls)):
+                path = self.db_path(subpath)
+                if os.path.exists(path):
+                    self._dbs.append(dbcls(path))
+                    if dbcls is self.LooseDBCls:
+                        loose_db = self._dbs[-1]
+                    # END remember loose db
+                # END check path exists
+            # END for each db type
+            
+            # should have at least one subdb
+            if not self._dbs:
+                raise InvalidDBRoot(self.root_path())
+            # END handle error
+            
+            # we the first one should have the store method
+            assert loose_db is not None and hasattr(loose_db, 'store'), "First database needs store functionality"
+            
+            # finally set the value
+            self._loose_db = loose_db
+        else:
+            super(GitDB, self)._set_cache_(attr)
+        # END handle attrs
+        
+    #{ ObjectDBW interface
+        
+    def store(self, istream):
+        return self._loose_db.store(istream)
+        
+    def ostream(self):
+        return self._loose_db.ostream()
+    
+    def set_ostream(self, ostream):
+        return self._loose_db.set_ostream(ostream)
+        
+    #} END objectdbw interface
+    
diff --git a/gitdb/db/loose.py b/gitdb/db/loose.py
index 6cd1cef..dc0ea0e 100644
--- a/gitdb/db/loose.py
+++ b/gitdb/db/loose.py
@@ -3,53 +3,53 @@
 # This module is part of GitDB and is released under
 # the New BSD License: http://www.opensource.org/licenses/bsd-license.php
 from base import (
-						FileDBBase, 
-						ObjectDBR, 
-						ObjectDBW
-				)
+                        FileDBBase, 
+                        ObjectDBR, 
+                        ObjectDBW
+                )
 
 
 from gitdb.exc import (
-	InvalidDBRoot, 
-	BadObject,
-	AmbiguousObjectName
-	)
+    InvalidDBRoot, 
+    BadObject,
+    AmbiguousObjectName
+    )
 
 from gitdb.stream import (
-		DecompressMemMapReader,
-		FDCompressedSha1Writer,
-		FDStream,
-		Sha1Writer
-	)
+        DecompressMemMapReader,
+        FDCompressedSha1Writer,
+        FDStream,
+        Sha1Writer
+    )
 
 from gitdb.base import (
-							OStream,
-							OInfo
-						)
+                            OStream,
+                            OInfo
+                        )
 
 from gitdb.util import (
-		file_contents_ro_filepath,
-		ENOENT,
-		hex_to_bin,
-		bin_to_hex,
-		exists,
-		chmod,
-		isdir,
-		isfile,
-		remove,
-		mkdir,
-		rename,
-		dirname,
-		basename,
-		join
-	)
+        file_contents_ro_filepath,
+        ENOENT,
+        hex_to_bin,
+        bin_to_hex,
+        exists,
+        chmod,
+        isdir,
+        isfile,
+        remove,
+        mkdir,
+        rename,
+        dirname,
+        basename,
+        join
+    )
 
 from gitdb.fun import ( 
-	chunk_size,
-	loose_object_header_info, 
-	write_object,
-	stream_copy
-	)
+    chunk_size,
+    loose_object_header_info, 
+    write_object,
+    stream_copy
+    )
 
 import tempfile
 import mmap
@@ -61,202 +61,202 @@ __all__ = ( 'LooseObjectDB', )
 
 
 class LooseObjectDB(FileDBBase, ObjectDBR, ObjectDBW):
-	"""A database which operates on loose object files"""
-	
-	# CONFIGURATION
-	# chunks in which data will be copied between streams
-	stream_chunk_size = chunk_size
-	
-	# On windows we need to keep it writable, otherwise it cannot be removed
-	# either
-	new_objects_mode = 0444
-	if os.name == 'nt':
-		new_objects_mode = 0644
-			
-	
-	def __init__(self, root_path):
-		super(LooseObjectDB, self).__init__(root_path)
-		self._hexsha_to_file = dict()
-		# Additional Flags - might be set to 0 after the first failure
-		# Depending on the root, this might work for some mounts, for others not, which
-		# is why it is per instance
-		self._fd_open_flags = getattr(os, 'O_NOATIME', 0)
-	
-	#{ Interface 
-	def object_path(self, hexsha):
-		"""
-		:return: path at which the object with the given hexsha would be stored, 
-			relative to the database root"""
-		return join(hexsha[:2], hexsha[2:])
-	
-	def readable_db_object_path(self, hexsha):
-		"""
-		:return: readable object path to the object identified by hexsha
-		:raise BadObject: If the object file does not exist"""
-		try:
-			return self._hexsha_to_file[hexsha]
-		except KeyError:
-			pass
-		# END ignore cache misses 
-			
-		# try filesystem
-		path = self.db_path(self.object_path(hexsha))
-		if exists(path):
-			self._hexsha_to_file[hexsha] = path
-			return path
-		# END handle cache
-		raise BadObject(hexsha)
-		
-	def partial_to_complete_sha_hex(self, partial_hexsha):
-		""":return: 20 byte binary sha1 string which matches the given name uniquely
-		:param name: hexadecimal partial name
-		:raise AmbiguousObjectName: 
-		:raise BadObject: """
-		candidate = None
-		for binsha in self.sha_iter():
-			if bin_to_hex(binsha).startswith(partial_hexsha):
-				# it can't ever find the same object twice
-				if candidate is not None:
-					raise AmbiguousObjectName(partial_hexsha)
-				candidate = binsha
-		# END for each object
-		if candidate is None:
-			raise BadObject(partial_hexsha)
-		return candidate
-		
-	#} END interface
-	
-	def _map_loose_object(self, sha):
-		"""
-		:return: memory map of that file to allow random read access
-		:raise BadObject: if object could not be located"""
-		db_path = self.db_path(self.object_path(bin_to_hex(sha)))
-		try:
-			return file_contents_ro_filepath(db_path, flags=self._fd_open_flags)
-		except OSError,e:
-			if e.errno != ENOENT:
-				# try again without noatime
-				try:
-					return file_contents_ro_filepath(db_path)
-				except OSError:
-					raise BadObject(sha)
-				# didn't work because of our flag, don't try it again
-				self._fd_open_flags = 0
-			else:
-				raise BadObject(sha)
-			# END handle error
-		# END exception handling
-		try:
-			return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
-		finally:
-			os.close(fd)
-		# END assure file is closed
-		
-	def set_ostream(self, stream):
-		""":raise TypeError: if the stream does not support the Sha1Writer interface"""
-		if stream is not None and not isinstance(stream, Sha1Writer):
-			raise TypeError("Output stream musst support the %s interface" % Sha1Writer.__name__)
-		return super(LooseObjectDB, self).set_ostream(stream)
-			
-	def info(self, sha):
-		m = self._map_loose_object(sha)
-		try:
-			type, size = loose_object_header_info(m)
-			return OInfo(sha, type, size)
-		finally:
-			m.close()
-		# END assure release of system resources
-		
-	def stream(self, sha):
-		m = self._map_loose_object(sha)
-		type, size, stream = DecompressMemMapReader.new(m, close_on_deletion = True)
-		return OStream(sha, type, size, stream)
-		
-	def has_object(self, sha):
-		try:
-			self.readable_db_object_path(bin_to_hex(sha))
-			return True
-		except BadObject:
-			return False
-		# END check existance
-	
-	def store(self, istream):
-		"""note: The sha we produce will be hex by nature"""
-		tmp_path = None
-		writer = self.ostream()
-		if writer is None:
-			# open a tmp file to write the data to
-			fd, tmp_path = tempfile.mkstemp(prefix='obj', dir=self._root_path)
-			
-			if istream.binsha is None:
-				writer = FDCompressedSha1Writer(fd)
-			else:
-				writer = FDStream(fd)
-			# END handle direct stream copies
-		# END handle custom writer
-	
-		try:
-			try:
-				if istream.binsha is not None:
-					# copy as much as possible, the actual uncompressed item size might
-					# be smaller than the compressed version
-					stream_copy(istream.read, writer.write, sys.maxint, self.stream_chunk_size)
-				else:
-					# write object with header, we have to make a new one
-					write_object(istream.type, istream.size, istream.read, writer.write,
-									chunk_size=self.stream_chunk_size)
-				# END handle direct stream copies
-			finally:
-				if tmp_path:
-					writer.close()
-			# END assure target stream is closed
-		except:
-			if tmp_path:
-				os.remove(tmp_path)
-			raise
-		# END assure tmpfile removal on error
-		
-		hexsha = None
-		if istream.binsha:
-			hexsha = istream.hexsha
-		else:
-			hexsha = writer.sha(as_hex=True)
-		# END handle sha
-		
-		if tmp_path:
-			obj_path = self.db_path(self.object_path(hexsha))
-			obj_dir = dirname(obj_path)
-			if not isdir(obj_dir):
-				mkdir(obj_dir)
-			# END handle destination directory
-			# rename onto existing doesn't work on windows
-			if os.name == 'nt' and isfile(obj_path):
-				remove(obj_path)
-			# END handle win322
-			rename(tmp_path, obj_path)
-			
-			# make sure its readable for all ! It started out as rw-- tmp file
-			# but needs to be rwrr
-			chmod(obj_path, self.new_objects_mode)
-		# END handle dry_run
-		
-		istream.binsha = hex_to_bin(hexsha)
-		return istream
-		
-	def sha_iter(self):
-		# find all files which look like an object, extract sha from there
-		for root, dirs, files in os.walk(self.root_path()):
-			root_base = basename(root)
-			if len(root_base) != 2:
-				continue
-				
-			for f in files:
-				if len(f) != 38:
-					continue
-				yield hex_to_bin(root_base + f)
-			# END for each file
-		# END for each walk iteration
-		
-	def size(self):
-		return len(tuple(self.sha_iter()))
-	
+    """A database which operates on loose object files"""
+    
+    # CONFIGURATION
+    # chunks in which data will be copied between streams
+    stream_chunk_size = chunk_size
+    
+    # On windows we need to keep it writable, otherwise it cannot be removed
+    # either
+    new_objects_mode = 0444
+    if os.name == 'nt':
+        new_objects_mode = 0644
+            
+    
+    def __init__(self, root_path):
+        super(LooseObjectDB, self).__init__(root_path)
+        self._hexsha_to_file = dict()
+        # Additional Flags - might be set to 0 after the first failure
+        # Depending on the root, this might work for some mounts, for others not, which
+        # is why it is per instance
+        self._fd_open_flags = getattr(os, 'O_NOATIME', 0)
+    
+    #{ Interface 
+    def object_path(self, hexsha):
+        """
+        :return: path at which the object with the given hexsha would be stored, 
+            relative to the database root"""
+        return join(hexsha[:2], hexsha[2:])
+    
+    def readable_db_object_path(self, hexsha):
+        """
+        :return: readable object path to the object identified by hexsha
+        :raise BadObject: If the object file does not exist"""
+        try:
+            return self._hexsha_to_file[hexsha]
+        except KeyError:
+            pass
+        # END ignore cache misses 
+            
+        # try filesystem
+        path = self.db_path(self.object_path(hexsha))
+        if exists(path):
+            self._hexsha_to_file[hexsha] = path
+            return path
+        # END handle cache
+        raise BadObject(hexsha)
+        
+    def partial_to_complete_sha_hex(self, partial_hexsha):
+        """:return: 20 byte binary sha1 string which matches the given name uniquely
+        :param name: hexadecimal partial name
+        :raise AmbiguousObjectName: 
+        :raise BadObject: """
+        candidate = None
+        for binsha in self.sha_iter():
+            if bin_to_hex(binsha).startswith(partial_hexsha):
+                # it can't ever find the same object twice
+                if candidate is not None:
+                    raise AmbiguousObjectName(partial_hexsha)
+                candidate = binsha
+        # END for each object
+        if candidate is None:
+            raise BadObject(partial_hexsha)
+        return candidate
+        
+    #} END interface
+    
+    def _map_loose_object(self, sha):
+        """
+        :return: memory map of that file to allow random read access
+        :raise BadObject: if object could not be located"""
+        db_path = self.db_path(self.object_path(bin_to_hex(sha)))
+        try:
+            return file_contents_ro_filepath(db_path, flags=self._fd_open_flags)
+        except OSError,e:
+            if e.errno != ENOENT:
+                # try again without noatime
+                try:
+                    return file_contents_ro_filepath(db_path)
+                except OSError:
+                    raise BadObject(sha)
+                # didn't work because of our flag, don't try it again
+                self._fd_open_flags = 0
+            else:
+                raise BadObject(sha)
+            # END handle error
+        # END exception handling
+        try:
+            return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
+        finally:
+            os.close(fd)
+        # END assure file is closed
+        
+    def set_ostream(self, stream):
+        """:raise TypeError: if the stream does not support the Sha1Writer interface"""
+        if stream is not None and not isinstance(stream, Sha1Writer):
+            raise TypeError("Output stream musst support the %s interface" % Sha1Writer.__name__)
+        return super(LooseObjectDB, self).set_ostream(stream)
+            
+    def info(self, sha):
+        m = self._map_loose_object(sha)
+        try:
+            type, size = loose_object_header_info(m)
+            return OInfo(sha, type, size)
+        finally:
+            m.close()
+        # END assure release of system resources
+        
+    def stream(self, sha):
+        m = self._map_loose_object(sha)
+        type, size, stream = DecompressMemMapReader.new(m, close_on_deletion = True)
+        return OStream(sha, type, size, stream)
+        
+    def has_object(self, sha):
+        try:
+            self.readable_db_object_path(bin_to_hex(sha))
+            return True
+        except BadObject:
+            return False
+        # END check existance
+    
+    def store(self, istream):
+        """note: The sha we produce will be hex by nature"""
+        tmp_path = None
+        writer = self.ostream()
+        if writer is None:
+            # open a tmp file to write the data to
+            fd, tmp_path = tempfile.mkstemp(prefix='obj', dir=self._root_path)
+            
+            if istream.binsha is None:
+                writer = FDCompressedSha1Writer(fd)
+            else:
+                writer = FDStream(fd)
+            # END handle direct stream copies
+        # END handle custom writer
+    
+        try:
+            try:
+                if istream.binsha is not None:
+                    # copy as much as possible, the actual uncompressed item size might
+                    # be smaller than the compressed version
+                    stream_copy(istream.read, writer.write, sys.maxint, self.stream_chunk_size)
+                else:
+                    # write object with header, we have to make a new one
+                    write_object(istream.type, istream.size, istream.read, writer.write,
+                                    chunk_size=self.stream_chunk_size)
+                # END handle direct stream copies
+            finally:
+                if tmp_path:
+                    writer.close()
+            # END assure target stream is closed
+        except:
+            if tmp_path:
+                os.remove(tmp_path)
+            raise
+        # END assure tmpfile removal on error
+        
+        hexsha = None
+        if istream.binsha:
+            hexsha = istream.hexsha
+        else:
+            hexsha = writer.sha(as_hex=True)
+        # END handle sha
+        
+        if tmp_path:
+            obj_path = self.db_path(self.object_path(hexsha))
+            obj_dir = dirname(obj_path)
+            if not isdir(obj_dir):
+                mkdir(obj_dir)
+            # END handle destination directory
+            # rename onto existing doesn't work on windows
+            if os.name == 'nt' and isfile(obj_path):
+                remove(obj_path)
+            # END handle win322
+            rename(tmp_path, obj_path)
+            
+            # make sure its readable for all ! It started out as rw-- tmp file
+            # but needs to be rwrr
+            chmod(obj_path, self.new_objects_mode)
+        # END handle dry_run
+        
+        istream.binsha = hex_to_bin(hexsha)
+        return istream
+        
+    def sha_iter(self):
+        # find all files which look like an object, extract sha from there
+        for root, dirs, files in os.walk(self.root_path()):
+            root_base = basename(root)
+            if len(root_base) != 2:
+                continue
+                
+            for f in files:
+                if len(f) != 38:
+                    continue
+                yield hex_to_bin(root_base + f)
+            # END for each file
+        # END for each walk iteration
+        
+    def size(self):
+        return len(tuple(self.sha_iter()))
+    
diff --git a/gitdb/db/mem.py b/gitdb/db/mem.py
index 5d76c83..b9b2b89 100644
--- a/gitdb/db/mem.py
+++ b/gitdb/db/mem.py
@@ -5,109 +5,109 @@
 """Contains the MemoryDatabase implementation"""
 from loose import LooseObjectDB
 from base import (
-						ObjectDBR, 
-						ObjectDBW
-					)
+                        ObjectDBR, 
+                        ObjectDBW
+                    )
 
 from gitdb.base import (
-							OStream,
-							IStream,
-						)
+                            OStream,
+                            IStream,
+                        )
 
 from gitdb.exc import (
-						BadObject,
-						UnsupportedOperation
-						)
+                        BadObject,
+                        UnsupportedOperation
+                        )
 from gitdb.stream import (
-							ZippedStoreShaWriter,
-							DecompressMemMapReader,
-						)
+                            ZippedStoreShaWriter,
+                            DecompressMemMapReader,
+                        )
 
 from cStringIO import StringIO
 
 __all__ = ("MemoryDB", )
 
 class MemoryDB(ObjectDBR, ObjectDBW):
-	"""A memory database stores everything to memory, providing fast IO and object
-	retrieval. It should be used to buffer results and obtain SHAs before writing
-	it to the actual physical storage, as it allows to query whether object already
-	exists in the target storage before introducing actual IO
-	
-	**Note:** memory is currently not threadsafe, hence the async methods cannot be used
-		for storing"""
-	
-	def __init__(self):
-		super(MemoryDB, self).__init__()
-		self._db = LooseObjectDB("path/doesnt/matter")
-		
-		# maps 20 byte shas to their OStream objects
-		self._cache = dict()
-		
-	def set_ostream(self, stream):
-		raise UnsupportedOperation("MemoryDB's always stream into memory")
-		
-	def store(self, istream):
-		zstream = ZippedStoreShaWriter()
-		self._db.set_ostream(zstream)
-		
-		istream = self._db.store(istream)
-		zstream.close()		# close to flush
-		zstream.seek(0)
-		
-		# don't provide a size, the stream is written in object format, hence the 
-		# header needs decompression
-		decomp_stream = DecompressMemMapReader(zstream.getvalue(), close_on_deletion=False) 
-		self._cache[istream.binsha] = OStream(istream.binsha, istream.type, istream.size, decomp_stream)
-		
-		return istream
-		
-	def store_async(self, reader):
-		raise UnsupportedOperation("MemoryDBs cannot currently be used for async write access")
-	
-	def has_object(self, sha):
-		return sha in self._cache
+    """A memory database stores everything to memory, providing fast IO and object
+    retrieval. It should be used to buffer results and obtain SHAs before writing
+    it to the actual physical storage, as it allows to query whether object already
+    exists in the target storage before introducing actual IO
+    
+    **Note:** memory is currently not threadsafe, hence the async methods cannot be used
+        for storing"""
+    
+    def __init__(self):
+        super(MemoryDB, self).__init__()
+        self._db = LooseObjectDB("path/doesnt/matter")
+        
+        # maps 20 byte shas to their OStream objects
+        self._cache = dict()
+        
+    def set_ostream(self, stream):
+        raise UnsupportedOperation("MemoryDB's always stream into memory")
+        
+    def store(self, istream):
+        zstream = ZippedStoreShaWriter()
+        self._db.set_ostream(zstream)
+        
+        istream = self._db.store(istream)
+        zstream.close()     # close to flush
+        zstream.seek(0)
+        
+        # don't provide a size, the stream is written in object format, hence the 
+        # header needs decompression
+        decomp_stream = DecompressMemMapReader(zstream.getvalue(), close_on_deletion=False) 
+        self._cache[istream.binsha] = OStream(istream.binsha, istream.type, istream.size, decomp_stream)
+        
+        return istream
+        
+    def store_async(self, reader):
+        raise UnsupportedOperation("MemoryDBs cannot currently be used for async write access")
+    
+    def has_object(self, sha):
+        return sha in self._cache
 
-	def info(self, sha):
-		# we always return streams, which are infos as well
-		return self.stream(sha)
-	
-	def stream(self, sha):
-		try:
-			ostream = self._cache[sha]
-			# rewind stream for the next one to read
-			ostream.stream.seek(0)
-			return ostream
-		except KeyError:
-			raise BadObject(sha)
-		# END exception handling
-	
-	def size(self):
-		return len(self._cache)
-		
-	def sha_iter(self):
-		return self._cache.iterkeys()
-		
-		
-	#{ Interface 
-	def stream_copy(self, sha_iter, odb):
-		"""Copy the streams as identified by sha's yielded by sha_iter into the given odb
-		The streams will be copied directly
-		**Note:** the object will only be written if it did not exist in the target db
-		:return: amount of streams actually copied into odb. If smaller than the amount
-			of input shas, one or more objects did already exist in odb"""
-		count = 0
-		for sha in sha_iter:
-			if odb.has_object(sha):
-				continue
-			# END check object existance
-			
-			ostream = self.stream(sha)
-			# compressed data including header
-			sio = StringIO(ostream.stream.data())
-			istream = IStream(ostream.type, ostream.size, sio, sha)
-			
-			odb.store(istream)
-			count += 1
-		# END for each sha
-		return count
-	#} END interface
+    def info(self, sha):
+        # we always return streams, which are infos as well
+        return self.stream(sha)
+    
+    def stream(self, sha):
+        try:
+            ostream = self._cache[sha]
+            # rewind stream for the next one to read
+            ostream.stream.seek(0)
+            return ostream
+        except KeyError:
+            raise BadObject(sha)
+        # END exception handling
+    
+    def size(self):
+        return len(self._cache)
+        
+    def sha_iter(self):
+        return self._cache.iterkeys()
+        
+        
+    #{ Interface 
+    def stream_copy(self, sha_iter, odb):
+        """Copy the streams as identified by sha's yielded by sha_iter into the given odb
+        The streams will be copied directly
+        **Note:** the object will only be written if it did not exist in the target db
+        :return: amount of streams actually copied into odb. If smaller than the amount
+            of input shas, one or more objects did already exist in odb"""
+        count = 0
+        for sha in sha_iter:
+            if odb.has_object(sha):
+                continue
+            # END check object existance
+            
+            ostream = self.stream(sha)
+            # compressed data including header
+            sio = StringIO(ostream.stream.data())
+            istream = IStream(ostream.type, ostream.size, sio, sha)
+            
+            odb.store(istream)
+            count += 1
+        # END for each sha
+        return count
+    #} END interface
diff --git a/gitdb/db/pack.py b/gitdb/db/pack.py
index 4c9d0b9..9287319 100644
--- a/gitdb/db/pack.py
+++ b/gitdb/db/pack.py
@@ -4,18 +4,18 @@
 # the New BSD License: http://www.opensource.org/licenses/bsd-license.php
 """Module containing a database to deal with packs"""
 from base import (
-						FileDBBase, 
-						ObjectDBR, 
-						CachingDB
-				)
+                        FileDBBase, 
+                        ObjectDBR, 
+                        CachingDB
+                )
 
 from gitdb.util import LazyMixin
 
 from gitdb.exc import (
-							BadObject,
-							UnsupportedOperation,
-							AmbiguousObjectName
-						)
+                            BadObject,
+                            UnsupportedOperation,
+                            AmbiguousObjectName
+                        )
 
 from gitdb.pack import PackEntity
 
@@ -28,182 +28,182 @@ __all__ = ('PackedDB', )
 
 
 class PackedDB(FileDBBase, ObjectDBR, CachingDB, LazyMixin):
-	"""A database operating on a set of object packs"""
-	
-	# sort the priority list every N queries
-	# Higher values are better, performance tests don't show this has 
-	# any effect, but it should have one
-	_sort_interval = 500
-	
-	def __init__(self, root_path):
-		super(PackedDB, self).__init__(root_path)
-		# list of lists with three items:
-		# * hits - number of times the pack was hit with a request
-		# * entity - Pack entity instance
-		# * sha_to_index - PackIndexFile.sha_to_index method for direct cache query
-		# self._entities = list()		# lazy loaded list
-		self._hit_count	= 0				# amount of hits
-		self._st_mtime = 0				# last modification data of our root path
-		
-	def _set_cache_(self, attr):
-		if attr == '_entities':
-			self._entities = list()
-			self.update_cache(force=True)
-		# END handle entities initialization
-		
-	def _sort_entities(self):
-		self._entities.sort(key=lambda l: l[0], reverse=True)
-		
-	def _pack_info(self, sha):
-		""":return: tuple(entity, index) for an item at the given sha
-		:param sha: 20 or 40 byte sha
-		:raise BadObject:
-		**Note:** This method is not thread-safe, but may be hit in multi-threaded
-			operation. The worst thing that can happen though is a counter that 
-			was not incremented, or the list being in wrong order. So we safe
-			the time for locking here, lets see how that goes"""
-		# presort ?
-		if self._hit_count % self._sort_interval == 0:
-			self._sort_entities()
-		# END update sorting
-		
-		for item in self._entities:
-			index = item[2](sha)
-			if index is not None:
-				item[0] += 1			# one hit for you
-				self._hit_count += 1	# general hit count
-				return (item[1], index)
-			# END index found in pack
-		# END for each item
-		
-		# no hit, see whether we have to update packs
-		# NOTE: considering packs don't change very often, we safe this call
-		# and leave it to the super-caller to trigger that
-		raise BadObject(sha)
-	
-	#{ Object DB Read 
-	
-	def has_object(self, sha):
-		try:
-			self._pack_info(sha)
-			return True
-		except BadObject:
-			return False
-		# END exception handling
-		
-	def info(self, sha):
-		entity, index = self._pack_info(sha)
-		return entity.info_at_index(index)
-	
-	def stream(self, sha):
-		entity, index = self._pack_info(sha)
-		return entity.stream_at_index(index)
-		
-	def sha_iter(self):
-		sha_list = list()
-		for entity in self.entities():
-			index = entity.index()
-			sha_by_index = index.sha
-			for index in xrange(index.size()):
-				yield sha_by_index(index)
-			# END for each index
-		# END for each entity
-	
-	def size(self):
-		sizes = [item[1].index().size() for item in self._entities]
-		return reduce(lambda x,y: x+y, sizes, 0)
-	
-	#} END object db read
-	
-	#{ object db write
-	
-	def store(self, istream):
-		"""Storing individual objects is not feasible as a pack is designed to 
-		hold multiple objects. Writing or rewriting packs for single objects is
-		inefficient"""
-		raise UnsupportedOperation()
-		
-	def store_async(self, reader):
-		# TODO: add ObjectDBRW before implementing this
-		raise NotImplementedError()
-	
-	#} END object db write
-	
-	
-	#{ Interface 
-	
-	def update_cache(self, force=False):
-		"""
-		Update our cache with the acutally existing packs on disk. Add new ones, 
-		and remove deleted ones. We keep the unchanged ones
-		
-		:param force: If True, the cache will be updated even though the directory
-			does not appear to have changed according to its modification timestamp.
-		:return: True if the packs have been updated so there is new information, 
-			False if there was no change to the pack database"""
-		stat = os.stat(self.root_path())
-		if not force and stat.st_mtime <= self._st_mtime:
-			return False
-		# END abort early on no change
-		self._st_mtime = stat.st_mtime
-		
-		# packs are supposed to be prefixed with pack- by git-convention
-		# get all pack files, figure out what changed
-		pack_files = set(glob.glob(os.path.join(self.root_path(), "pack-*.pack")))
-		our_pack_files = set(item[1].pack().path() for item in self._entities)
-		
-		# new packs
-		for pack_file in (pack_files - our_pack_files):
-			# init the hit-counter/priority with the size, a good measure for hit-
-			# probability. Its implemented so that only 12 bytes will be read
-			entity = PackEntity(pack_file)
-			self._entities.append([entity.pack().size(), entity, entity.index().sha_to_index])
-		# END for each new packfile
-		
-		# removed packs
-		for pack_file in (our_pack_files - pack_files):
-			del_index = -1
-			for i, item in enumerate(self._entities):
-				if item[1].pack().path() == pack_file:
-					del_index = i
-					break
-				# END found index
-			# END for each entity
-			assert del_index != -1
-			del(self._entities[del_index])
-		# END for each removed pack
-		
-		# reinitialize prioritiess
-		self._sort_entities()
-		return True
-		
-	def entities(self):
-		""":return: list of pack entities operated upon by this database"""
-		return [ item[1] for item in self._entities ]
-		
-	def partial_to_complete_sha(self, partial_binsha, canonical_length):
-		""":return: 20 byte sha as inferred by the given partial binary sha
-		:param partial_binsha: binary sha with less than 20 bytes 
-		:param canonical_length: length of the corresponding canonical representation.
-			It is required as binary sha's cannot display whether the original hex sha
-			had an odd or even number of characters
-		:raise AmbiguousObjectName: 
-		:raise BadObject: """
-		candidate = None
-		for item in self._entities:
-			item_index = item[1].index().partial_sha_to_index(partial_binsha, canonical_length)
-			if item_index is not None:
-				sha = item[1].index().sha(item_index)
-				if candidate and candidate != sha:
-					raise AmbiguousObjectName(partial_binsha)
-				candidate = sha
-			# END handle full sha could be found
-		# END for each entity
-		
-		if candidate:
-			return candidate
-		
-		# still not found ?
-		raise BadObject(partial_binsha)
-	
-	#} END interface
+    """A database operating on a set of object packs"""
+    
+    # sort the priority list every N queries
+    # Higher values are better, performance tests don't show this has 
+    # any effect, but it should have one
+    _sort_interval = 500
+    
+    def __init__(self, root_path):
+        super(PackedDB, self).__init__(root_path)
+        # list of lists with three items:
+        # * hits - number of times the pack was hit with a request
+        # * entity - Pack entity instance
+        # * sha_to_index - PackIndexFile.sha_to_index method for direct cache query
+        # self._entities = list()       # lazy loaded list
+        self._hit_count = 0             # amount of hits
+        self._st_mtime = 0              # last modification data of our root path
+        
+    def _set_cache_(self, attr):
+        if attr == '_entities':
+            self._entities = list()
+            self.update_cache(force=True)
+        # END handle entities initialization
+        
+    def _sort_entities(self):
+        self._entities.sort(key=lambda l: l[0], reverse=True)
+        
+    def _pack_info(self, sha):
+        """:return: tuple(entity, index) for an item at the given sha
+        :param sha: 20 or 40 byte sha
+        :raise BadObject:
+        **Note:** This method is not thread-safe, but may be hit in multi-threaded
+            operation. The worst thing that can happen though is a counter that 
+            was not incremented, or the list being in wrong order. So we safe
+            the time for locking here, lets see how that goes"""
+        # presort ?
+        if self._hit_count % self._sort_interval == 0:
+            self._sort_entities()
+        # END update sorting
+        
+        for item in self._entities:
+            index = item[2](sha)
+            if index is not None:
+                item[0] += 1            # one hit for you
+                self._hit_count += 1    # general hit count
+                return (item[1], index)
+            # END index found in pack
+        # END for each item
+        
+        # no hit, see whether we have to update packs
+        # NOTE: considering packs don't change very often, we safe this call
+        # and leave it to the super-caller to trigger that
+        raise BadObject(sha)
+    
+    #{ Object DB Read 
+    
+    def has_object(self, sha):
+        try:
+            self._pack_info(sha)
+            return True
+        except BadObject:
+            return False
+        # END exception handling
+        
+    def info(self, sha):
+        entity, index = self._pack_info(sha)
+        return entity.info_at_index(index)
+    
+    def stream(self, sha):
+        entity, index = self._pack_info(sha)
+        return entity.stream_at_index(index)
+        
+    def sha_iter(self):
+        sha_list = list()
+        for entity in self.entities():
+            index = entity.index()
+            sha_by_index = index.sha
+            for index in xrange(index.size()):
+                yield sha_by_index(index)
+            # END for each index
+        # END for each entity
+    
+    def size(self):
+        sizes = [item[1].index().size() for item in self._entities]
+        return reduce(lambda x,y: x+y, sizes, 0)
+    
+    #} END object db read
+    
+    #{ object db write
+    
+    def store(self, istream):
+        """Storing individual objects is not feasible as a pack is designed to 
+        hold multiple objects. Writing or rewriting packs for single objects is
+        inefficient"""
+        raise UnsupportedOperation()
+        
+    def store_async(self, reader):
+        # TODO: add ObjectDBRW before implementing this
+        raise NotImplementedError()
+    
+    #} END object db write
+    
+    
+    #{ Interface 
+    
+    def update_cache(self, force=False):
+        """
+        Update our cache with the acutally existing packs on disk. Add new ones, 
+        and remove deleted ones. We keep the unchanged ones
+        
+        :param force: If True, the cache will be updated even though the directory
+            does not appear to have changed according to its modification timestamp.
+        :return: True if the packs have been updated so there is new information, 
+            False if there was no change to the pack database"""
+        stat = os.stat(self.root_path())
+        if not force and stat.st_mtime <= self._st_mtime:
+            return False
+        # END abort early on no change
+        self._st_mtime = stat.st_mtime
+        
+        # packs are supposed to be prefixed with pack- by git-convention
+        # get all pack files, figure out what changed
+        pack_files = set(glob.glob(os.path.join(self.root_path(), "pack-*.pack")))
+        our_pack_files = set(item[1].pack().path() for item in self._entities)
+        
+        # new packs
+        for pack_file in (pack_files - our_pack_files):
+            # init the hit-counter/priority with the size, a good measure for hit-
+            # probability. Its implemented so that only 12 bytes will be read
+            entity = PackEntity(pack_file)
+            self._entities.append([entity.pack().size(), entity, entity.index().sha_to_index])
+        # END for each new packfile
+        
+        # removed packs
+        for pack_file in (our_pack_files - pack_files):
+            del_index = -1
+            for i, item in enumerate(self._entities):
+                if item[1].pack().path() == pack_file:
+                    del_index = i
+                    break
+                # END found index
+            # END for each entity
+            assert del_index != -1
+            del(self._entities[del_index])
+        # END for each removed pack
+        
+        # reinitialize prioritiess
+        self._sort_entities()
+        return True
+        
+    def entities(self):
+        """:return: list of pack entities operated upon by this database"""
+        return [ item[1] for item in self._entities ]
+        
+    def partial_to_complete_sha(self, partial_binsha, canonical_length):
+        """:return: 20 byte sha as inferred by the given partial binary sha
+        :param partial_binsha: binary sha with less than 20 bytes 
+        :param canonical_length: length of the corresponding canonical representation.
+            It is required as binary sha's cannot display whether the original hex sha
+            had an odd or even number of characters
+        :raise AmbiguousObjectName: 
+        :raise BadObject: """
+        candidate = None
+        for item in self._entities:
+            item_index = item[1].index().partial_sha_to_index(partial_binsha, canonical_length)
+            if item_index is not None:
+                sha = item[1].index().sha(item_index)
+                if candidate and candidate != sha:
+                    raise AmbiguousObjectName(partial_binsha)
+                candidate = sha
+            # END handle full sha could be found
+        # END for each entity
+        
+        if candidate:
+            return candidate
+        
+        # still not found ?
+        raise BadObject(partial_binsha)
+    
+    #} END interface
diff --git a/gitdb/db/ref.py b/gitdb/db/ref.py
index 8989843..60004a7 100644
--- a/gitdb/db/ref.py
+++ b/gitdb/db/ref.py
@@ -3,77 +3,77 @@
 # This module is part of GitDB and is released under
 # the New BSD License: http://www.opensource.org/licenses/bsd-license.php
 from base import (
-					CompoundDB,
-				)
+                    CompoundDB,
+                )
 
 import os
 __all__ = ('ReferenceDB', )
 
 class ReferenceDB(CompoundDB):
-	"""A database consisting of database referred to in a file"""
-	
-	# Configuration
-	# Specifies the object database to use for the paths found in the alternates
-	# file. If None, it defaults to the GitDB
-	ObjectDBCls = None
-	
-	def __init__(self, ref_file):
-		super(ReferenceDB, self).__init__()
-		self._ref_file = ref_file
-		
-	def _set_cache_(self, attr):
-		if attr == '_dbs':
-			self._dbs = list()
-			self._update_dbs_from_ref_file()
-		else:
-			super(ReferenceDB, self)._set_cache_(attr)
-		# END handle attrs
-		
-	def _update_dbs_from_ref_file(self):
-		dbcls = self.ObjectDBCls
-		if dbcls is None:
-			# late import
-			from git import GitDB
-			dbcls = GitDB
-		# END get db type
-		
-		# try to get as many as possible, don't fail if some are unavailable
-		ref_paths = list()
-		try:
-			ref_paths = [l.strip() for l in open(self._ref_file, 'r').readlines()]
-		except (OSError, IOError):
-			pass
-		# END handle alternates
-		
-		ref_paths_set = set(ref_paths)
-		cur_ref_paths_set = set(db.root_path() for db in self._dbs)
-		
-		# remove existing
-		for path in (cur_ref_paths_set - ref_paths_set):
-			for i, db in enumerate(self._dbs[:]):
-				if db.root_path() == path:
-					del(self._dbs[i])
-					continue
-				# END del matching db
-		# END for each path to remove
-		
-		# add new
-		# sort them to maintain order
-		added_paths = sorted(ref_paths_set - cur_ref_paths_set, key=lambda p: ref_paths.index(p))
-		for path in added_paths:
-			try:
-				db = dbcls(path)
-				# force an update to verify path
-				if isinstance(db, CompoundDB):
-					db.databases()
-				# END verification
-				self._dbs.append(db)
-			except Exception, e:
-				# ignore invalid paths or issues
-				pass
-		# END for each path to add
-		
-	def update_cache(self, force=False):
-		# re-read alternates and update databases
-		self._update_dbs_from_ref_file()
-		return super(ReferenceDB, self).update_cache(force)
+    """A database consisting of database referred to in a file"""
+    
+    # Configuration
+    # Specifies the object database to use for the paths found in the alternates
+    # file. If None, it defaults to the GitDB
+    ObjectDBCls = None
+    
+    def __init__(self, ref_file):
+        super(ReferenceDB, self).__init__()
+        self._ref_file = ref_file
+        
+    def _set_cache_(self, attr):
+        if attr == '_dbs':
+            self._dbs = list()
+            self._update_dbs_from_ref_file()
+        else:
+            super(ReferenceDB, self)._set_cache_(attr)
+        # END handle attrs
+        
+    def _update_dbs_from_ref_file(self):
+        dbcls = self.ObjectDBCls
+        if dbcls is None:
+            # late import
+            from git import GitDB
+            dbcls = GitDB
+        # END get db type
+        
+        # try to get as many as possible, don't fail if some are unavailable
+        ref_paths = list()
+        try:
+            ref_paths = [l.strip() for l in open(self._ref_file, 'r').readlines()]
+        except (OSError, IOError):
+            pass
+        # END handle alternates
+        
+        ref_paths_set = set(ref_paths)
+        cur_ref_paths_set = set(db.root_path() for db in self._dbs)
+        
+        # remove existing
+        for path in (cur_ref_paths_set - ref_paths_set):
+            for i, db in enumerate(self._dbs[:]):
+                if db.root_path() == path:
+                    del(self._dbs[i])
+                    continue
+                # END del matching db
+        # END for each path to remove
+        
+        # add new
+        # sort them to maintain order
+        added_paths = sorted(ref_paths_set - cur_ref_paths_set, key=lambda p: ref_paths.index(p))
+        for path in added_paths:
+            try:
+                db = dbcls(path)
+                # force an update to verify path
+                if isinstance(db, CompoundDB):
+                    db.databases()
+                # END verification
+                self._dbs.append(db)
+            except Exception, e:
+                # ignore invalid paths or issues
+                pass
+        # END for each path to add
+        
+    def update_cache(self, force=False):
+        # re-read alternates and update databases
+        self._update_dbs_from_ref_file()
+        return super(ReferenceDB, self).update_cache(force)
author	Sebastian Thiel <byronimo@gmail.com>	2014-02-09 20:51:43 +0100
committer	Sebastian Thiel <byronimo@gmail.com>	2014-02-09 20:51:43 +0100
commit	6576d5503a64d124fd7bcf639cc8955918b3ac43 (patch)
tree	847028954b05307086eda1782c2e9521c8d67a13 /gitdb/db
parent	ea54328ce05abdcb4f23300df51422e62b737f63 (diff)
download	gitdb-6576d5503a64d124fd7bcf639cc8955918b3ac43.tar.gz