summaryrefslogtreecommitdiff
path: root/gitdb/db/base.py
diff options
context:
space:
mode:
authorSebastian Thiel <byronimo@gmail.com>2014-11-14 12:45:19 +0100
committerSebastian Thiel <byronimo@gmail.com>2014-11-14 12:45:19 +0100
commit2f2fe4eea8ba4f47e63a7392a1f27f74f5ee925d (patch)
tree176a493d114fab7cc6e930bf318b2339db386cf5 /gitdb/db/base.py
parent81707c606b88e971cc359e3e9f3abeeea2204860 (diff)
parent0dcec5a27b341ce58e5ab169f91aa25b2cafec0c (diff)
downloadgitdb-0.6.0.tar.gz
Merge branch 'py2n3'0.6.0
* python 3 compatibility * all tests work in py2.6, 2.7, 3.3, 3.4
Diffstat (limited to 'gitdb/db/base.py')
-rw-r--r--gitdb/db/base.py187
1 files changed, 66 insertions, 121 deletions
diff --git a/gitdb/db/base.py b/gitdb/db/base.py
index 867e93a..a670eea 100644
--- a/gitdb/db/base.py
+++ b/gitdb/db/base.py
@@ -4,22 +4,20 @@
# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
"""Contains implementations of database retrieveing objects"""
from gitdb.util import (
- pool,
- join,
- LazyMixin,
- hex_to_bin
- )
+ join,
+ LazyMixin,
+ hex_to_bin
+)
+from gitdb.utils.encoding import force_text
from gitdb.exc import (
- BadObject,
- AmbiguousObjectName
- )
-
-from async import (
- ChannelThreadTask
- )
+ BadObject,
+ AmbiguousObjectName
+)
from itertools import chain
+from functools import reduce
+
__all__ = ('ObjectDBR', 'ObjectDBW', 'FileDBBase', 'CompoundDB', 'CachingDB')
@@ -28,80 +26,51 @@ __all__ = ('ObjectDBR', 'ObjectDBW', 'FileDBBase', 'CompoundDB', 'CachingDB')
class ObjectDBR(object):
"""Defines an interface for object database lookup.
Objects are identified either by their 20 byte bin sha"""
-
+
def __contains__(self, sha):
return self.has_obj
-
- #{ Query Interface
+
+ #{ Query Interface
def has_object(self, sha):
"""
:return: True if the object identified by the given 20 bytes
binary sha is contained in the database"""
raise NotImplementedError("To be implemented in subclass")
-
- def has_object_async(self, reader):
- """Return a reader yielding information about the membership of objects
- as identified by shas
- :param reader: Reader yielding 20 byte shas.
- :return: async.Reader yielding tuples of (sha, bool) pairs which indicate
- whether the given sha exists in the database or not"""
- task = ChannelThreadTask(reader, str(self.has_object_async), lambda sha: (sha, self.has_object(sha)))
- return pool.add_task(task)
-
+
def info(self, sha):
""" :return: OInfo instance
:param sha: bytes binary sha
:raise BadObject:"""
raise NotImplementedError("To be implemented in subclass")
-
- def info_async(self, reader):
- """Retrieve information of a multitude of objects asynchronously
- :param reader: Channel yielding the sha's of the objects of interest
- :return: async.Reader yielding OInfo|InvalidOInfo, in any order"""
- task = ChannelThreadTask(reader, str(self.info_async), self.info)
- return pool.add_task(task)
-
+
def stream(self, sha):
""":return: OStream instance
:param sha: 20 bytes binary sha
:raise BadObject:"""
raise NotImplementedError("To be implemented in subclass")
-
- def stream_async(self, reader):
- """Retrieve the OStream of multiple objects
- :param reader: see ``info``
- :param max_threads: see ``ObjectDBW.store``
- :return: async.Reader yielding OStream|InvalidOStream instances in any order
-
- **Note:** depending on the system configuration, it might not be possible to
- read all OStreams at once. Instead, read them individually using reader.read(x)
- where x is small enough."""
- # base implementation just uses the stream method repeatedly
- task = ChannelThreadTask(reader, str(self.stream_async), self.stream)
- return pool.add_task(task)
-
+
def size(self):
""":return: amount of objects in this database"""
raise NotImplementedError()
-
+
def sha_iter(self):
"""Return iterator yielding 20 byte shas for all objects in this data base"""
raise NotImplementedError()
-
+
#} END query interface
-
-
+
+
class ObjectDBW(object):
"""Defines an interface to create objects in the database"""
-
+
def __init__(self, *args, **kwargs):
self._ostream = None
-
+
#{ Edit Interface
def set_ostream(self, stream):
"""
Adjusts the stream to which all data should be sent when storing new objects
-
+
:param stream: if not None, the stream to use, if None the default stream
will be used.
:return: previously installed stream, or None if there was no override
@@ -109,99 +78,77 @@ class ObjectDBW(object):
cstream = self._ostream
self._ostream = stream
return cstream
-
+
def ostream(self):
"""
:return: overridden output stream this instance will write to, or None
if it will write to the default stream"""
return self._ostream
-
+
def store(self, istream):
"""
Create a new object in the database
:return: the input istream object with its sha set to its corresponding value
-
- :param istream: IStream compatible instance. If its sha is already set
- to a value, the object will just be stored in the our database format,
+
+ :param istream: IStream compatible instance. If its sha is already set
+ to a value, the object will just be stored in the our database format,
in which case the input stream is expected to be in object format ( header + contents ).
:raise IOError: if data could not be written"""
raise NotImplementedError("To be implemented in subclass")
-
- def store_async(self, reader):
- """
- Create multiple new objects in the database asynchronously. The method will
- return right away, returning an output channel which receives the results as
- they are computed.
-
- :return: Channel yielding your IStream which served as input, in any order.
- The IStreams sha will be set to the sha it received during the process,
- or its error attribute will be set to the exception informing about the error.
-
- :param reader: async.Reader yielding IStream instances.
- The same instances will be used in the output channel as were received
- in by the Reader.
-
- **Note:** As some ODB implementations implement this operation atomic, they might
- abort the whole operation if one item could not be processed. Hence check how
- many items have actually been produced."""
- # base implementation uses store to perform the work
- task = ChannelThreadTask(reader, str(self.store_async), self.store)
- return pool.add_task(task)
-
+
#} END edit interface
-
+
class FileDBBase(object):
- """Provides basic facilities to retrieve files of interest, including
+ """Provides basic facilities to retrieve files of interest, including
caching facilities to help mapping hexsha's to objects"""
-
+
def __init__(self, root_path):
"""Initialize this instance to look for its files at the given root path
All subsequent operations will be relative to this path
- :raise InvalidDBRoot:
+ :raise InvalidDBRoot:
**Note:** The base will not perform any accessablity checking as the base
- might not yet be accessible, but become accessible before the first
+ might not yet be accessible, but become accessible before the first
access."""
super(FileDBBase, self).__init__()
self._root_path = root_path
-
-
- #{ Interface
+
+
+ #{ Interface
def root_path(self):
""":return: path at which this db operates"""
return self._root_path
-
+
def db_path(self, rela_path):
"""
- :return: the given relative path relative to our database root, allowing
+ :return: the given relative path relative to our database root, allowing
to pontentially access datafiles"""
- return join(self._root_path, rela_path)
+ return join(self._root_path, force_text(rela_path))
#} END interface
-
+
class CachingDB(object):
"""A database which uses caches to speed-up access"""
-
- #{ Interface
+
+ #{ Interface
def update_cache(self, force=False):
"""
Call this method if the underlying data changed to trigger an update
of the internal caching structures.
-
+
:param force: if True, the update must be performed. Otherwise the implementation
may decide not to perform an update if it thinks nothing has changed.
:return: True if an update was performed as something change indeed"""
-
+
# END interface
def _databases_recursive(database, output):
- """Fill output list with database from db, in order. Deals with Loose, Packed
+ """Fill output list with database from db, in order. Deals with Loose, Packed
and compound databases."""
if isinstance(database, CompoundDB):
- compounds = list()
dbs = database.databases()
output.extend(db for db in dbs if not isinstance(db, CompoundDB))
for cdb in (db for db in dbs if isinstance(db, CompoundDB)):
@@ -209,11 +156,11 @@ def _databases_recursive(database, output):
else:
output.append(database)
# END handle database type
-
+
class CompoundDB(ObjectDBR, LazyMixin, CachingDB):
"""A database which delegates calls to sub-databases.
-
+
Databases are stored in the lazy-loaded _dbs attribute.
Define _set_cache_ to update it with your databases"""
def _set_cache_(self, attr):
@@ -223,27 +170,27 @@ class CompoundDB(ObjectDBR, LazyMixin, CachingDB):
self._db_cache = dict()
else:
super(CompoundDB, self)._set_cache_(attr)
-
+
def _db_query(self, sha):
""":return: database containing the given 20 byte sha
:raise BadObject:"""
- # most databases use binary representations, prevent converting
+ # most databases use binary representations, prevent converting
# it everytime a database is being queried
try:
return self._db_cache[sha]
except KeyError:
pass
# END first level cache
-
+
for db in self._dbs:
if db.has_object(sha):
self._db_cache[sha] = db
return db
# END for each database
raise BadObject(sha)
-
- #{ ObjectDBR interface
-
+
+ #{ ObjectDBR interface
+
def has_object(self, sha):
try:
self._db_query(sha)
@@ -251,24 +198,24 @@ class CompoundDB(ObjectDBR, LazyMixin, CachingDB):
except BadObject:
return False
# END handle exceptions
-
+
def info(self, sha):
return self._db_query(sha).info(sha)
-
+
def stream(self, sha):
return self._db_query(sha).stream(sha)
def size(self):
""":return: total size of all contained databases"""
return reduce(lambda x,y: x+y, (db.size() for db in self._dbs), 0)
-
+
def sha_iter(self):
return chain(*(db.sha_iter() for db in self._dbs))
-
+
#} END object DBR Interface
-
+
#{ Interface
-
+
def databases(self):
""":return: tuple of database instances we use for lookups"""
return tuple(self._dbs)
@@ -283,22 +230,22 @@ class CompoundDB(ObjectDBR, LazyMixin, CachingDB):
# END if is caching db
# END for each database to update
return stat
-
+
def partial_to_complete_sha_hex(self, partial_hexsha):
"""
- :return: 20 byte binary sha1 from the given less-than-40 byte hexsha
+ :return: 20 byte binary sha1 from the given less-than-40 byte hexsha (bytes or str)
:param partial_hexsha: hexsha with less than 40 byte
:raise AmbiguousObjectName: """
databases = list()
_databases_recursive(self, databases)
-
+ partial_hexsha = force_text(partial_hexsha)
len_partial_hexsha = len(partial_hexsha)
if len_partial_hexsha % 2 != 0:
partial_binsha = hex_to_bin(partial_hexsha + "0")
else:
partial_binsha = hex_to_bin(partial_hexsha)
- # END assure successful binary conversion
-
+ # END assure successful binary conversion
+
candidate = None
for db in databases:
full_bin_sha = None
@@ -320,7 +267,5 @@ class CompoundDB(ObjectDBR, LazyMixin, CachingDB):
if not candidate:
raise BadObject(partial_binsha)
return candidate
-
- #} END interface
-
+ #} END interface