summaryrefslogtreecommitdiff
path: root/gitdb/db
diff options
context:
space:
mode:
authorSebastian Thiel <byronimo@gmail.com>2014-02-09 20:51:43 +0100
committerSebastian Thiel <byronimo@gmail.com>2014-02-09 20:51:43 +0100
commit6576d5503a64d124fd7bcf639cc8955918b3ac43 (patch)
tree847028954b05307086eda1782c2e9521c8d67a13 /gitdb/db
parentea54328ce05abdcb4f23300df51422e62b737f63 (diff)
downloadgitdb-6576d5503a64d124fd7bcf639cc8955918b3ac43.tar.gz
tabs to spaces
Diffstat (limited to 'gitdb/db')
-rw-r--r--gitdb/db/base.py586
-rw-r--r--gitdb/db/git.py134
-rw-r--r--gitdb/db/loose.py470
-rw-r--r--gitdb/db/mem.py188
-rw-r--r--gitdb/db/pack.py374
-rw-r--r--gitdb/db/ref.py138
6 files changed, 945 insertions, 945 deletions
diff --git a/gitdb/db/base.py b/gitdb/db/base.py
index 984acaf..867e93a 100644
--- a/gitdb/db/base.py
+++ b/gitdb/db/base.py
@@ -4,20 +4,20 @@
# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
"""Contains implementations of database retrieveing objects"""
from gitdb.util import (
- pool,
- join,
- LazyMixin,
- hex_to_bin
- )
+ pool,
+ join,
+ LazyMixin,
+ hex_to_bin
+ )
from gitdb.exc import (
- BadObject,
- AmbiguousObjectName
- )
+ BadObject,
+ AmbiguousObjectName
+ )
from async import (
- ChannelThreadTask
- )
+ ChannelThreadTask
+ )
from itertools import chain
@@ -26,301 +26,301 @@ __all__ = ('ObjectDBR', 'ObjectDBW', 'FileDBBase', 'CompoundDB', 'CachingDB')
class ObjectDBR(object):
- """Defines an interface for object database lookup.
- Objects are identified either by their 20 byte bin sha"""
-
- def __contains__(self, sha):
- return self.has_obj
-
- #{ Query Interface
- def has_object(self, sha):
- """
- :return: True if the object identified by the given 20 bytes
- binary sha is contained in the database"""
- raise NotImplementedError("To be implemented in subclass")
-
- def has_object_async(self, reader):
- """Return a reader yielding information about the membership of objects
- as identified by shas
- :param reader: Reader yielding 20 byte shas.
- :return: async.Reader yielding tuples of (sha, bool) pairs which indicate
- whether the given sha exists in the database or not"""
- task = ChannelThreadTask(reader, str(self.has_object_async), lambda sha: (sha, self.has_object(sha)))
- return pool.add_task(task)
-
- def info(self, sha):
- """ :return: OInfo instance
- :param sha: bytes binary sha
- :raise BadObject:"""
- raise NotImplementedError("To be implemented in subclass")
-
- def info_async(self, reader):
- """Retrieve information of a multitude of objects asynchronously
- :param reader: Channel yielding the sha's of the objects of interest
- :return: async.Reader yielding OInfo|InvalidOInfo, in any order"""
- task = ChannelThreadTask(reader, str(self.info_async), self.info)
- return pool.add_task(task)
-
- def stream(self, sha):
- """:return: OStream instance
- :param sha: 20 bytes binary sha
- :raise BadObject:"""
- raise NotImplementedError("To be implemented in subclass")
-
- def stream_async(self, reader):
- """Retrieve the OStream of multiple objects
- :param reader: see ``info``
- :param max_threads: see ``ObjectDBW.store``
- :return: async.Reader yielding OStream|InvalidOStream instances in any order
-
- **Note:** depending on the system configuration, it might not be possible to
- read all OStreams at once. Instead, read them individually using reader.read(x)
- where x is small enough."""
- # base implementation just uses the stream method repeatedly
- task = ChannelThreadTask(reader, str(self.stream_async), self.stream)
- return pool.add_task(task)
-
- def size(self):
- """:return: amount of objects in this database"""
- raise NotImplementedError()
-
- def sha_iter(self):
- """Return iterator yielding 20 byte shas for all objects in this data base"""
- raise NotImplementedError()
-
- #} END query interface
-
-
+ """Defines an interface for object database lookup.
+ Objects are identified either by their 20 byte bin sha"""
+
+ def __contains__(self, sha):
+ return self.has_obj
+
+ #{ Query Interface
+ def has_object(self, sha):
+ """
+ :return: True if the object identified by the given 20 bytes
+ binary sha is contained in the database"""
+ raise NotImplementedError("To be implemented in subclass")
+
+ def has_object_async(self, reader):
+ """Return a reader yielding information about the membership of objects
+ as identified by shas
+ :param reader: Reader yielding 20 byte shas.
+ :return: async.Reader yielding tuples of (sha, bool) pairs which indicate
+ whether the given sha exists in the database or not"""
+ task = ChannelThreadTask(reader, str(self.has_object_async), lambda sha: (sha, self.has_object(sha)))
+ return pool.add_task(task)
+
+ def info(self, sha):
+ """ :return: OInfo instance
+ :param sha: bytes binary sha
+ :raise BadObject:"""
+ raise NotImplementedError("To be implemented in subclass")
+
+ def info_async(self, reader):
+ """Retrieve information of a multitude of objects asynchronously
+ :param reader: Channel yielding the sha's of the objects of interest
+ :return: async.Reader yielding OInfo|InvalidOInfo, in any order"""
+ task = ChannelThreadTask(reader, str(self.info_async), self.info)
+ return pool.add_task(task)
+
+ def stream(self, sha):
+ """:return: OStream instance
+ :param sha: 20 bytes binary sha
+ :raise BadObject:"""
+ raise NotImplementedError("To be implemented in subclass")
+
+ def stream_async(self, reader):
+ """Retrieve the OStream of multiple objects
+ :param reader: see ``info``
+ :param max_threads: see ``ObjectDBW.store``
+ :return: async.Reader yielding OStream|InvalidOStream instances in any order
+
+ **Note:** depending on the system configuration, it might not be possible to
+ read all OStreams at once. Instead, read them individually using reader.read(x)
+ where x is small enough."""
+ # base implementation just uses the stream method repeatedly
+ task = ChannelThreadTask(reader, str(self.stream_async), self.stream)
+ return pool.add_task(task)
+
+ def size(self):
+ """:return: amount of objects in this database"""
+ raise NotImplementedError()
+
+ def sha_iter(self):
+ """Return iterator yielding 20 byte shas for all objects in this data base"""
+ raise NotImplementedError()
+
+ #} END query interface
+
+
class ObjectDBW(object):
- """Defines an interface to create objects in the database"""
-
- def __init__(self, *args, **kwargs):
- self._ostream = None
-
- #{ Edit Interface
- def set_ostream(self, stream):
- """
- Adjusts the stream to which all data should be sent when storing new objects
-
- :param stream: if not None, the stream to use, if None the default stream
- will be used.
- :return: previously installed stream, or None if there was no override
- :raise TypeError: if the stream doesn't have the supported functionality"""
- cstream = self._ostream
- self._ostream = stream
- return cstream
-
- def ostream(self):
- """
- :return: overridden output stream this instance will write to, or None
- if it will write to the default stream"""
- return self._ostream
-
- def store(self, istream):
- """
- Create a new object in the database
- :return: the input istream object with its sha set to its corresponding value
-
- :param istream: IStream compatible instance. If its sha is already set
- to a value, the object will just be stored in the our database format,
- in which case the input stream is expected to be in object format ( header + contents ).
- :raise IOError: if data could not be written"""
- raise NotImplementedError("To be implemented in subclass")
-
- def store_async(self, reader):
- """
- Create multiple new objects in the database asynchronously. The method will
- return right away, returning an output channel which receives the results as
- they are computed.
-
- :return: Channel yielding your IStream which served as input, in any order.
- The IStreams sha will be set to the sha it received during the process,
- or its error attribute will be set to the exception informing about the error.
-
- :param reader: async.Reader yielding IStream instances.
- The same instances will be used in the output channel as were received
- in by the Reader.
-
- **Note:** As some ODB implementations implement this operation atomic, they might
- abort the whole operation if one item could not be processed. Hence check how
- many items have actually been produced."""
- # base implementation uses store to perform the work
- task = ChannelThreadTask(reader, str(self.store_async), self.store)
- return pool.add_task(task)
-
- #} END edit interface
-
+ """Defines an interface to create objects in the database"""
+
+ def __init__(self, *args, **kwargs):
+ self._ostream = None
+
+ #{ Edit Interface
+ def set_ostream(self, stream):
+ """
+ Adjusts the stream to which all data should be sent when storing new objects
+
+ :param stream: if not None, the stream to use, if None the default stream
+ will be used.
+ :return: previously installed stream, or None if there was no override
+ :raise TypeError: if the stream doesn't have the supported functionality"""
+ cstream = self._ostream
+ self._ostream = stream
+ return cstream
+
+ def ostream(self):
+ """
+ :return: overridden output stream this instance will write to, or None
+ if it will write to the default stream"""
+ return self._ostream
+
+ def store(self, istream):
+ """
+ Create a new object in the database
+ :return: the input istream object with its sha set to its corresponding value
+
+ :param istream: IStream compatible instance. If its sha is already set
+ to a value, the object will just be stored in the our database format,
+ in which case the input stream is expected to be in object format ( header + contents ).
+ :raise IOError: if data could not be written"""
+ raise NotImplementedError("To be implemented in subclass")
+
+ def store_async(self, reader):
+ """
+ Create multiple new objects in the database asynchronously. The method will
+ return right away, returning an output channel which receives the results as
+ they are computed.
+
+ :return: Channel yielding your IStream which served as input, in any order.
+ The IStreams sha will be set to the sha it received during the process,
+ or its error attribute will be set to the exception informing about the error.
+
+ :param reader: async.Reader yielding IStream instances.
+ The same instances will be used in the output channel as were received
+ in by the Reader.
+
+ **Note:** As some ODB implementations implement this operation atomic, they might
+ abort the whole operation if one item could not be processed. Hence check how
+ many items have actually been produced."""
+ # base implementation uses store to perform the work
+ task = ChannelThreadTask(reader, str(self.store_async), self.store)
+ return pool.add_task(task)
+
+ #} END edit interface
+
class FileDBBase(object):
- """Provides basic facilities to retrieve files of interest, including
- caching facilities to help mapping hexsha's to objects"""
-
- def __init__(self, root_path):
- """Initialize this instance to look for its files at the given root path
- All subsequent operations will be relative to this path
- :raise InvalidDBRoot:
- **Note:** The base will not perform any accessablity checking as the base
- might not yet be accessible, but become accessible before the first
- access."""
- super(FileDBBase, self).__init__()
- self._root_path = root_path
-
-
- #{ Interface
- def root_path(self):
- """:return: path at which this db operates"""
- return self._root_path
-
- def db_path(self, rela_path):
- """
- :return: the given relative path relative to our database root, allowing
- to pontentially access datafiles"""
- return join(self._root_path, rela_path)
- #} END interface
-
+ """Provides basic facilities to retrieve files of interest, including
+ caching facilities to help mapping hexsha's to objects"""
+
+ def __init__(self, root_path):
+ """Initialize this instance to look for its files at the given root path
+ All subsequent operations will be relative to this path
+ :raise InvalidDBRoot:
+ **Note:** The base will not perform any accessablity checking as the base
+ might not yet be accessible, but become accessible before the first
+ access."""
+ super(FileDBBase, self).__init__()
+ self._root_path = root_path
+
+
+ #{ Interface
+ def root_path(self):
+ """:return: path at which this db operates"""
+ return self._root_path
+
+ def db_path(self, rela_path):
+ """
+ :return: the given relative path relative to our database root, allowing
+ to pontentially access datafiles"""
+ return join(self._root_path, rela_path)
+ #} END interface
+
class CachingDB(object):
- """A database which uses caches to speed-up access"""
-
- #{ Interface
- def update_cache(self, force=False):
- """
- Call this method if the underlying data changed to trigger an update
- of the internal caching structures.
-
- :param force: if True, the update must be performed. Otherwise the implementation
- may decide not to perform an update if it thinks nothing has changed.
- :return: True if an update was performed as something change indeed"""
-
- # END interface
+ """A database which uses caches to speed-up access"""
+
+ #{ Interface
+ def update_cache(self, force=False):
+ """
+ Call this method if the underlying data changed to trigger an update
+ of the internal caching structures.
+
+ :param force: if True, the update must be performed. Otherwise the implementation
+ may decide not to perform an update if it thinks nothing has changed.
+ :return: True if an update was performed as something change indeed"""
+
+ # END interface
def _databases_recursive(database, output):
- """Fill output list with database from db, in order. Deals with Loose, Packed
- and compound databases."""
- if isinstance(database, CompoundDB):
- compounds = list()
- dbs = database.databases()
- output.extend(db for db in dbs if not isinstance(db, CompoundDB))
- for cdb in (db for db in dbs if isinstance(db, CompoundDB)):
- _databases_recursive(cdb, output)
- else:
- output.append(database)
- # END handle database type
-
+ """Fill output list with database from db, in order. Deals with Loose, Packed
+ and compound databases."""
+ if isinstance(database, CompoundDB):
+ compounds = list()
+ dbs = database.databases()
+ output.extend(db for db in dbs if not isinstance(db, CompoundDB))
+ for cdb in (db for db in dbs if isinstance(db, CompoundDB)):
+ _databases_recursive(cdb, output)
+ else:
+ output.append(database)
+ # END handle database type
+
class CompoundDB(ObjectDBR, LazyMixin, CachingDB):
- """A database which delegates calls to sub-databases.
-
- Databases are stored in the lazy-loaded _dbs attribute.
- Define _set_cache_ to update it with your databases"""
- def _set_cache_(self, attr):
- if attr == '_dbs':
- self._dbs = list()
- elif attr == '_db_cache':
- self._db_cache = dict()
- else:
- super(CompoundDB, self)._set_cache_(attr)
-
- def _db_query(self, sha):
- """:return: database containing the given 20 byte sha
- :raise BadObject:"""
- # most databases use binary representations, prevent converting
- # it everytime a database is being queried
- try:
- return self._db_cache[sha]
- except KeyError:
- pass
- # END first level cache
-
- for db in self._dbs:
- if db.has_object(sha):
- self._db_cache[sha] = db
- return db
- # END for each database
- raise BadObject(sha)
-
- #{ ObjectDBR interface
-
- def has_object(self, sha):
- try:
- self._db_query(sha)
- return True
- except BadObject:
- return False
- # END handle exceptions
-
- def info(self, sha):
- return self._db_query(sha).info(sha)
-
- def stream(self, sha):
- return self._db_query(sha).stream(sha)
+ """A database which delegates calls to sub-databases.
+
+ Databases are stored in the lazy-loaded _dbs attribute.
+ Define _set_cache_ to update it with your databases"""
+ def _set_cache_(self, attr):
+ if attr == '_dbs':
+ self._dbs = list()
+ elif attr == '_db_cache':
+ self._db_cache = dict()
+ else:
+ super(CompoundDB, self)._set_cache_(attr)
+
+ def _db_query(self, sha):
+ """:return: database containing the given 20 byte sha
+ :raise BadObject:"""
+ # most databases use binary representations, prevent converting
+ # it everytime a database is being queried
+ try:
+ return self._db_cache[sha]
+ except KeyError:
+ pass
+ # END first level cache
+
+ for db in self._dbs:
+ if db.has_object(sha):
+ self._db_cache[sha] = db
+ return db
+ # END for each database
+ raise BadObject(sha)
+
+ #{ ObjectDBR interface
+
+ def has_object(self, sha):
+ try:
+ self._db_query(sha)
+ return True
+ except BadObject:
+ return False
+ # END handle exceptions
+
+ def info(self, sha):
+ return self._db_query(sha).info(sha)
+
+ def stream(self, sha):
+ return self._db_query(sha).stream(sha)
- def size(self):
- """:return: total size of all contained databases"""
- return reduce(lambda x,y: x+y, (db.size() for db in self._dbs), 0)
-
- def sha_iter(self):
- return chain(*(db.sha_iter() for db in self._dbs))
-
- #} END object DBR Interface
-
- #{ Interface
-
- def databases(self):
- """:return: tuple of database instances we use for lookups"""
- return tuple(self._dbs)
+ def size(self):
+ """:return: total size of all contained databases"""
+ return reduce(lambda x,y: x+y, (db.size() for db in self._dbs), 0)
+
+ def sha_iter(self):
+ return chain(*(db.sha_iter() for db in self._dbs))
+
+ #} END object DBR Interface
+
+ #{ Interface
+
+ def databases(self):
+ """:return: tuple of database instances we use for lookups"""
+ return tuple(self._dbs)
- def update_cache(self, force=False):
- # something might have changed, clear everything
- self._db_cache.clear()
- stat = False
- for db in self._dbs:
- if isinstance(db, CachingDB):
- stat |= db.update_cache(force)
- # END if is caching db
- # END for each database to update
- return stat
-
- def partial_to_complete_sha_hex(self, partial_hexsha):
- """
- :return: 20 byte binary sha1 from the given less-than-40 byte hexsha
- :param partial_hexsha: hexsha with less than 40 byte
- :raise AmbiguousObjectName: """
- databases = list()
- _databases_recursive(self, databases)
-
- len_partial_hexsha = len(partial_hexsha)
- if len_partial_hexsha % 2 != 0:
- partial_binsha = hex_to_bin(partial_hexsha + "0")
- else:
- partial_binsha = hex_to_bin(partial_hexsha)
- # END assure successful binary conversion
-
- candidate = None
- for db in databases:
- full_bin_sha = None
- try:
- if hasattr(db, 'partial_to_complete_sha_hex'):
- full_bin_sha = db.partial_to_complete_sha_hex(partial_hexsha)
- else:
- full_bin_sha = db.partial_to_complete_sha(partial_binsha, len_partial_hexsha)
- # END handle database type
- except BadObject:
- continue
- # END ignore bad objects
- if full_bin_sha:
- if candidate and candidate != full_bin_sha:
- raise AmbiguousObjectName(partial_hexsha)
- candidate = full_bin_sha
- # END handle candidate
- # END for each db
- if not candidate:
- raise BadObject(partial_binsha)
- return candidate
-
- #} END interface
-
+ def update_cache(self, force=False):
+ # something might have changed, clear everything
+ self._db_cache.clear()
+ stat = False
+ for db in self._dbs:
+ if isinstance(db, CachingDB):
+ stat |= db.update_cache(force)
+ # END if is caching db
+ # END for each database to update
+ return stat
+
+ def partial_to_complete_sha_hex(self, partial_hexsha):
+ """
+ :return: 20 byte binary sha1 from the given less-than-40 byte hexsha
+ :param partial_hexsha: hexsha with less than 40 byte
+ :raise AmbiguousObjectName: """
+ databases = list()
+ _databases_recursive(self, databases)
+
+ len_partial_hexsha = len(partial_hexsha)
+ if len_partial_hexsha % 2 != 0:
+ partial_binsha = hex_to_bin(partial_hexsha + "0")
+ else:
+ partial_binsha = hex_to_bin(partial_hexsha)
+ # END assure successful binary conversion
+
+ candidate = None
+ for db in databases:
+ full_bin_sha = None
+ try:
+ if hasattr(db, 'partial_to_complete_sha_hex'):
+ full_bin_sha = db.partial_to_complete_sha_hex(partial_hexsha)
+ else:
+ full_bin_sha = db.partial_to_complete_sha(partial_binsha, len_partial_hexsha)
+ # END handle database type
+ except BadObject:
+ continue
+ # END ignore bad objects
+ if full_bin_sha:
+ if candidate and candidate != full_bin_sha:
+ raise AmbiguousObjectName(partial_hexsha)
+ candidate = full_bin_sha
+ # END handle candidate
+ # END for each db
+ if not candidate:
+ raise BadObject(partial_binsha)
+ return candidate
+
+ #} END interface
+
diff --git a/gitdb/db/git.py b/gitdb/db/git.py
index b8fc46a..1d6ad0f 100644
--- a/gitdb/db/git.py
+++ b/gitdb/db/git.py
@@ -3,10 +3,10 @@
# This module is part of GitDB and is released under
# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
from base import (
- CompoundDB,
- ObjectDBW,
- FileDBBase
- )
+ CompoundDB,
+ ObjectDBW,
+ FileDBBase
+ )
from loose import LooseObjectDB
from pack import PackedDB
@@ -14,72 +14,72 @@ from ref import ReferenceDB
from gitdb.util import LazyMixin
from gitdb.exc import (
- InvalidDBRoot,
- BadObject,
- AmbiguousObjectName
- )
+ InvalidDBRoot,
+ BadObject,
+ AmbiguousObjectName
+ )
import os
__all__ = ('GitDB', )
class GitDB(FileDBBase, ObjectDBW, CompoundDB):
- """A git-style object database, which contains all objects in the 'objects'
- subdirectory"""
- # Configuration
- PackDBCls = PackedDB
- LooseDBCls = LooseObjectDB
- ReferenceDBCls = ReferenceDB
-
- # Directories
- packs_dir = 'pack'
- loose_dir = ''
- alternates_dir = os.path.join('info', 'alternates')
-
- def __init__(self, root_path):
- """Initialize ourselves on a git objects directory"""
- super(GitDB, self).__init__(root_path)
-
- def _set_cache_(self, attr):
- if attr == '_dbs' or attr == '_loose_db':
- self._dbs = list()
- loose_db = None
- for subpath, dbcls in ((self.packs_dir, self.PackDBCls),
- (self.loose_dir, self.LooseDBCls),
- (self.alternates_dir, self.ReferenceDBCls)):
- path = self.db_path(subpath)
- if os.path.exists(path):
- self._dbs.append(dbcls(path))
- if dbcls is self.LooseDBCls:
- loose_db = self._dbs[-1]
- # END remember loose db
- # END check path exists
- # END for each db type
-
- # should have at least one subdb
- if not self._dbs:
- raise InvalidDBRoot(self.root_path())
- # END handle error
-
- # we the first one should have the store method
- assert loose_db is not None and hasattr(loose_db, 'store'), "First database needs store functionality"
-
- # finally set the value
- self._loose_db = loose_db
- else:
- super(GitDB, self)._set_cache_(attr)
- # END handle attrs
-
- #{ ObjectDBW interface
-
- def store(self, istream):
- return self._loose_db.store(istream)
-
- def ostream(self):
- return self._loose_db.ostream()
-
- def set_ostream(self, ostream):
- return self._loose_db.set_ostream(ostream)
-
- #} END objectdbw interface
-
+ """A git-style object database, which contains all objects in the 'objects'
+ subdirectory"""
+ # Configuration
+ PackDBCls = PackedDB
+ LooseDBCls = LooseObjectDB
+ ReferenceDBCls = ReferenceDB
+
+ # Directories
+ packs_dir = 'pack'
+ loose_dir = ''
+ alternates_dir = os.path.join('info', 'alternates')
+
+ def __init__(self, root_path):
+ """Initialize ourselves on a git objects directory"""
+ super(GitDB, self).__init__(root_path)
+
+ def _set_cache_(self, attr):
+ if attr == '_dbs' or attr == '_loose_db':
+ self._dbs = list()
+ loose_db = None
+ for subpath, dbcls in ((self.packs_dir, self.PackDBCls),
+ (self.loose_dir, self.LooseDBCls),
+ (self.alternates_dir, self.ReferenceDBCls)):
+ path = self.db_path(subpath)
+ if os.path.exists(path):
+ self._dbs.append(dbcls(path))
+ if dbcls is self.LooseDBCls:
+ loose_db = self._dbs[-1]
+ # END remember loose db
+ # END check path exists
+ # END for each db type
+
+ # should have at least one subdb
+ if not self._dbs:
+ raise InvalidDBRoot(self.root_path())
+ # END handle error
+
+ # we the first one should have the store method
+ assert loose_db is not None and hasattr(loose_db, 'store'), "First database needs store functionality"
+
+ # finally set the value
+ self._loose_db = loose_db
+ else:
+ super(GitDB, self)._set_cache_(attr)
+ # END handle attrs
+
+ #{ ObjectDBW interface
+
+ def store(self, istream):
+ return self._loose_db.store(istream)
+
+ def ostream(self):
+ return self._loose_db.ostream()
+
+ def set_ostream(self, ostream):
+ return self._loose_db.set_ostream(ostream)
+
+ #} END objectdbw interface
+
diff --git a/gitdb/db/loose.py b/gitdb/db/loose.py
index 6cd1cef..dc0ea0e 100644
--- a/gitdb/db/loose.py
+++ b/gitdb/db/loose.py
@@ -3,53 +3,53 @@
# This module is part of GitDB and is released under
# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
from base import (
- FileDBBase,
- ObjectDBR,
- ObjectDBW
- )
+ FileDBBase,
+ ObjectDBR,
+ ObjectDBW
+ )
from gitdb.exc import (
- InvalidDBRoot,
- BadObject,
- AmbiguousObjectName
- )
+ InvalidDBRoot,
+ BadObject,
+ AmbiguousObjectName
+ )
from gitdb.stream import (
- DecompressMemMapReader,
- FDCompressedSha1Writer,
- FDStream,
- Sha1Writer
- )
+ DecompressMemMapReader,
+ FDCompressedSha1Writer,
+ FDStream,
+ Sha1Writer
+ )
from gitdb.base import (
- OStream,
- OInfo
- )
+ OStream,
+ OInfo
+ )
from gitdb.util import (
- file_contents_ro_filepath,
- ENOENT,
- hex_to_bin,
- bin_to_hex,
- exists,
- chmod,
- isdir,
- isfile,
- remove,
- mkdir,
- rename,
- dirname,
- basename,
- join
- )
+ file_contents_ro_filepath,
+ ENOENT,
+ hex_to_bin,
+ bin_to_hex,
+ exists,
+ chmod,
+ isdir,
+ isfile,
+ remove,
+ mkdir,
+ rename,
+ dirname,
+ basename,
+ join
+ )
from gitdb.fun import (
- chunk_size,
- loose_object_header_info,
- write_object,
- stream_copy
- )
+ chunk_size,
+ loose_object_header_info,
+ write_object,
+ stream_copy
+ )
import tempfile
import mmap
@@ -61,202 +61,202 @@ __all__ = ( 'LooseObjectDB', )
class LooseObjectDB(FileDBBase, ObjectDBR, ObjectDBW):
- """A database which operates on loose object files"""
-
- # CONFIGURATION
- # chunks in which data will be copied between streams
- stream_chunk_size = chunk_size
-
- # On windows we need to keep it writable, otherwise it cannot be removed
- # either
- new_objects_mode = 0444
- if os.name == 'nt':
- new_objects_mode = 0644
-
-
- def __init__(self, root_path):
- super(LooseObjectDB, self).__init__(root_path)
- self._hexsha_to_file = dict()
- # Additional Flags - might be set to 0 after the first failure
- # Depending on the root, this might work for some mounts, for others not, which
- # is why it is per instance
- self._fd_open_flags = getattr(os, 'O_NOATIME', 0)
-
- #{ Interface
- def object_path(self, hexsha):
- """
- :return: path at which the object with the given hexsha would be stored,
- relative to the database root"""
- return join(hexsha[:2], hexsha[2:])
-
- def readable_db_object_path(self, hexsha):
- """
- :return: readable object path to the object identified by hexsha
- :raise BadObject: If the object file does not exist"""
- try:
- return self._hexsha_to_file[hexsha]
- except KeyError:
- pass
- # END ignore cache misses
-
- # try filesystem
- path = self.db_path(self.object_path(hexsha))
- if exists(path):
- self._hexsha_to_file[hexsha] = path
- return path
- # END handle cache
- raise BadObject(hexsha)
-
- def partial_to_complete_sha_hex(self, partial_hexsha):
- """:return: 20 byte binary sha1 string which matches the given name uniquely
- :param name: hexadecimal partial name
- :raise AmbiguousObjectName:
- :raise BadObject: """
- candidate = None
- for binsha in self.sha_iter():
- if bin_to_hex(binsha).startswith(partial_hexsha):
- # it can't ever find the same object twice
- if candidate is not None:
- raise AmbiguousObjectName(partial_hexsha)
- candidate = binsha
- # END for each object
- if candidate is None:
- raise BadObject(partial_hexsha)
- return candidate
-
- #} END interface
-
- def _map_loose_object(self, sha):
- """
- :return: memory map of that file to allow random read access
- :raise BadObject: if object could not be located"""
- db_path = self.db_path(self.object_path(bin_to_hex(sha)))
- try:
- return file_contents_ro_filepath(db_path, flags=self._fd_open_flags)
- except OSError,e:
- if e.errno != ENOENT:
- # try again without noatime
- try:
- return file_contents_ro_filepath(db_path)
- except OSError:
- raise BadObject(sha)
- # didn't work because of our flag, don't try it again
- self._fd_open_flags = 0
- else:
- raise BadObject(sha)
- # END handle error
- # END exception handling
- try:
- return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
- finally:
- os.close(fd)
- # END assure file is closed
-
- def set_ostream(self, stream):
- """:raise TypeError: if the stream does not support the Sha1Writer interface"""
- if stream is not None and not isinstance(stream, Sha1Writer):
- raise TypeError("Output stream musst support the %s interface" % Sha1Writer.__name__)
- return super(LooseObjectDB, self).set_ostream(stream)
-
- def info(self, sha):
- m = self._map_loose_object(sha)
- try:
- type, size = loose_object_header_info(m)
- return OInfo(sha, type, size)
- finally:
- m.close()
- # END assure release of system resources
-
- def stream(self, sha):
- m = self._map_loose_object(sha)
- type, size, stream = DecompressMemMapReader.new(m, close_on_deletion = True)
- return OStream(sha, type, size, stream)
-
- def has_object(self, sha):
- try:
- self.readable_db_object_path(bin_to_hex(sha))
- return True
- except BadObject:
- return False
- # END check existance
-
- def store(self, istream):
- """note: The sha we produce will be hex by nature"""
- tmp_path = None
- writer = self.ostream()
- if writer is None:
- # open a tmp file to write the data to
- fd, tmp_path = tempfile.mkstemp(prefix='obj', dir=self._root_path)
-
- if istream.binsha is None:
- writer = FDCompressedSha1Writer(fd)
- else:
- writer = FDStream(fd)
- # END handle direct stream copies
- # END handle custom writer
-
- try:
- try:
- if istream.binsha is not None:
- # copy as much as possible, the actual uncompressed item size might
- # be smaller than the compressed version
- stream_copy(istream.read, writer.write, sys.maxint, self.stream_chunk_size)
- else:
- # write object with header, we have to make a new one
- write_object(istream.type, istream.size, istream.read, writer.write,
- chunk_size=self.stream_chunk_size)
- # END handle direct stream copies
- finally:
- if tmp_path:
- writer.close()
- # END assure target stream is closed
- except:
- if tmp_path:
- os.remove(tmp_path)
- raise
- # END assure tmpfile removal on error
-
- hexsha = None
- if istream.binsha:
- hexsha = istream.hexsha
- else:
- hexsha = writer.sha(as_hex=True)
- # END handle sha
-
- if tmp_path:
- obj_path = self.db_path(self.object_path(hexsha))
- obj_dir = dirname(obj_path)
- if not isdir(obj_dir):
- mkdir(obj_dir)
- # END handle destination directory
- # rename onto existing doesn't work on windows
- if os.name == 'nt' and isfile(obj_path):
- remove(obj_path)
- # END handle win322
- rename(tmp_path, obj_path)
-
- # make sure its readable for all ! It started out as rw-- tmp file
- # but needs to be rwrr
- chmod(obj_path, self.new_objects_mode)
- # END handle dry_run
-
- istream.binsha = hex_to_bin(hexsha)
- return istream
-
- def sha_iter(self):
- # find all files which look like an object, extract sha from there
- for root, dirs, files in os.walk(self.root_path()):
- root_base = basename(root)
- if len(root_base) != 2:
- continue
-
- for f in files:
- if len(f) != 38:
- continue
- yield hex_to_bin(root_base + f)
- # END for each file
- # END for each walk iteration
-
- def size(self):
- return len(tuple(self.sha_iter()))
-
+ """A database which operates on loose object files"""
+
+ # CONFIGURATION
+ # chunks in which data will be copied between streams
+ stream_chunk_size = chunk_size
+
+ # On windows we need to keep it writable, otherwise it cannot be removed
+ # either
+ new_objects_mode = 0444
+ if os.name == 'nt':
+ new_objects_mode = 0644
+
+
+ def __init__(self, root_path):
+ super(LooseObjectDB, self).__init__(root_path)
+ self._hexsha_to_file = dict()
+ # Additional Flags - might be set to 0 after the first failure
+ # Depending on the root, this might work for some mounts, for others not, which
+ # is why it is per instance
+ self._fd_open_flags = getattr(os, 'O_NOATIME', 0)
+
+ #{ Interface
+ def object_path(self, hexsha):
+ """
+ :return: path at which the object with the given hexsha would be stored,
+ relative to the database root"""
+ return join(hexsha[:2], hexsha[2:])
+
+ def readable_db_object_path(self, hexsha):
+ """
+ :return: readable object path to the object identified by hexsha
+ :raise BadObject: If the object file does not exist"""
+ try:
+ return self._hexsha_to_file[hexsha]
+ except KeyError:
+ pass
+ # END ignore cache misses
+
+ # try filesystem
+ path = self.db_path(self.object_path(hexsha))
+ if exists(path):
+ self._hexsha_to_file[hexsha] = path
+ return path
+ # END handle cache
+ raise BadObject(hexsha)
+
+ def partial_to_complete_sha_hex(self, partial_hexsha):
+ """:return: 20 byte binary sha1 string which matches the given name uniquely
+ :param name: hexadecimal partial name
+ :raise AmbiguousObjectName:
+ :raise BadObject: """
+ candidate = None
+ for binsha in self.sha_iter():
+ if bin_to_hex(binsha).startswith(partial_hexsha):
+ # it can't ever find the same object twice
+ if candidate is not None:
+ raise AmbiguousObjectName(partial_hexsha)
+ candidate = binsha
+ # END for each object
+ if candidate is None:
+ raise BadObject(partial_hexsha)
+ return candidate
+
+ #} END interface
+
+ def _map_loose_object(self, sha):
+ """
+ :return: memory map of that file to allow random read access
+ :raise BadObject: if object could not be located"""
+ db_path = self.db_path(self.object_path(bin_to_hex(sha)))
+ try:
+ return file_contents_ro_filepath(db_path, flags=self._fd_open_flags)
+ except OSError,e:
+ if e.errno != ENOENT:
+ # try again without noatime
+ try:
+ return file_contents_ro_filepath(db_path)
+ except OSError:
+ raise BadObject(sha)
+ # didn't work because of our flag, don't try it again
+ self._fd_open_flags = 0
+ else:
+ raise BadObject(sha)
+ # END handle error
+ # END exception handling
+ try:
+ return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
+ finally:
+ os.close(fd)
+ # END assure file is closed
+
+ def set_ostream(self, stream):
+ """:raise TypeError: if the stream does not support the Sha1Writer interface"""
+ if stream is not None and not isinstance(stream, Sha1Writer):
+ raise TypeError("Output stream musst support the %s interface" % Sha1Writer.__name__)
+ return super(LooseObjectDB, self).set_ostream(stream)
+
+ def info(self, sha):
+ m = self._map_loose_object(sha)
+ try:
+ type, size = loose_object_header_info(m)
+ return OInfo(sha, type, size)
+ finally:
+ m.close()
+ # END assure release of system resources
+
+ def stream(self, sha):
+ m = self._map_loose_object(sha)
+ type, size, stream = DecompressMemMapReader.new(m, close_on_deletion = True)
+ return OStream(sha, type, size, stream)
+
+ def has_object(self, sha):
+ try:
+ self.readable_db_object_path(bin_to_hex(sha))
+ return True
+ except BadObject:
+ return False
+ # END check existance
+
+ def store(self, istream):
+ """note: The sha we produce will be hex by nature"""
+ tmp_path = None
+ writer = self.ostream()
+ if writer is None:
+ # open a tmp file to write the data to
+ fd, tmp_path = tempfile.mkstemp(prefix='obj', dir=self._root_path)
+
+ if istream.binsha is None:
+ writer = FDCompressedSha1Writer(fd)
+ else:
+ writer = FDStream(fd)
+ # END handle direct stream copies
+ # END handle custom writer
+
+ try:
+ try:
+ if istream.binsha is not None:
+ # copy as much as possible, the actual uncompressed item size might
+ # be smaller than the compressed version
+ stream_copy(istream.read, writer.write, sys.maxint, self.stream_chunk_size)
+ else:
+ # write object with header, we have to make a new one
+ write_object(istream.type, istream.size, istream.read, writer.write,
+ chunk_size=self.stream_chunk_size)
+ # END handle direct stream copies
+ finally:
+ if tmp_path:
+ writer.close()
+ # END assure target stream is closed
+ except:
+ if tmp_path:
+ os.remove(tmp_path)
+ raise
+ # END assure tmpfile removal on error
+
+ hexsha = None
+ if istream.binsha:
+ hexsha = istream.hexsha
+ else:
+ hexsha = writer.sha(as_hex=True)
+ # END handle sha
+
+ if tmp_path:
+ obj_path = self.db_path(self.object_path(hexsha))
+ obj_dir = dirname(obj_path)
+ if not isdir(obj_dir):
+ mkdir(obj_dir)
+ # END handle destination directory
+ # rename onto existing doesn't work on windows
+ if os.name == 'nt' and isfile(obj_path):
+ remove(obj_path)
+ # END handle win322
+ rename(tmp_path, obj_path)
+
+ # make sure its readable for all ! It started out as rw-- tmp file
+ # but needs to be rwrr
+ chmod(obj_path, self.new_objects_mode)
+ # END handle dry_run
+
+ istream.binsha = hex_to_bin(hexsha)
+ return istream
+
+ def sha_iter(self):
+ # find all files which look like an object, extract sha from there
+ for root, dirs, files in os.walk(self.root_path()):
+ root_base = basename(root)
+ if len(root_base) != 2:
+ continue
+
+ for f in files:
+ if len(f) != 38:
+ continue
+ yield hex_to_bin(root_base + f)
+ # END for each file
+ # END for each walk iteration
+
+ def size(self):
+ return len(tuple(self.sha_iter()))
+
diff --git a/gitdb/db/mem.py b/gitdb/db/mem.py
index 5d76c83..b9b2b89 100644
--- a/gitdb/db/mem.py
+++ b/gitdb/db/mem.py
@@ -5,109 +5,109 @@
"""Contains the MemoryDatabase implementation"""
from loose import LooseObjectDB
from base import (
- ObjectDBR,
- ObjectDBW
- )
+ ObjectDBR,
+ ObjectDBW
+ )
from gitdb.base import (
- OStream,
- IStream,
- )
+ OStream,
+ IStream,
+ )
from gitdb.exc import (
- BadObject,
- UnsupportedOperation
- )
+ BadObject,
+ UnsupportedOperation
+ )
from gitdb.stream import (
- ZippedStoreShaWriter,
- DecompressMemMapReader,
- )
+ ZippedStoreShaWriter,
+ DecompressMemMapReader,
+ )
from cStringIO import StringIO
__all__ = ("MemoryDB", )
class MemoryDB(ObjectDBR, ObjectDBW):
- """A memory database stores everything to memory, providing fast IO and object
- retrieval. It should be used to buffer results and obtain SHAs before writing
- it to the actual physical storage, as it allows to query whether object already
- exists in the target storage before introducing actual IO
-
- **Note:** memory is currently not threadsafe, hence the async methods cannot be used
- for storing"""
-
- def __init__(self):
- super(MemoryDB, self).__init__()
- self._db = LooseObjectDB("path/doesnt/matter")
-
- # maps 20 byte shas to their OStream objects
- self._cache = dict()
-
- def set_ostream(self, stream):
- raise UnsupportedOperation("MemoryDB's always stream into memory")
-
- def store(self, istream):
- zstream = ZippedStoreShaWriter()
- self._db.set_ostream(zstream)
-
- istream = self._db.store(istream)
- zstream.close() # close to flush
- zstream.seek(0)
-
- # don't provide a size, the stream is written in object format, hence the
- # header needs decompression
- decomp_stream = DecompressMemMapReader(zstream.getvalue(), close_on_deletion=False)
- self._cache[istream.binsha] = OStream(istream.binsha, istream.type, istream.size, decomp_stream)
-
- return istream
-
- def store_async(self, reader):
- raise UnsupportedOperation("MemoryDBs cannot currently be used for async write access")
-
- def has_object(self, sha):
- return sha in self._cache
+ """A memory database stores everything to memory, providing fast IO and object
+ retrieval. It should be used to buffer results and obtain SHAs before writing
+ it to the actual physical storage, as it allows to query whether object already
+ exists in the target storage before introducing actual IO
+
+ **Note:** memory is currently not threadsafe, hence the async methods cannot be used
+ for storing"""
+
+ def __init__(self):
+ super(MemoryDB, self).__init__()
+ self._db = LooseObjectDB("path/doesnt/matter")
+
+ # maps 20 byte shas to their OStream objects
+ self._cache = dict()
+
+ def set_ostream(self, stream):
+ raise UnsupportedOperation("MemoryDB's always stream into memory")
+
+ def store(self, istream):
+ zstream = ZippedStoreShaWriter()
+ self._db.set_ostream(zstream)
+
+ istream = self._db.store(istream)
+ zstream.close() # close to flush
+ zstream.seek(0)
+
+ # don't provide a size, the stream is written in object format, hence the
+ # header needs decompression
+ decomp_stream = DecompressMemMapReader(zstream.getvalue(), close_on_deletion=False)
+ self._cache[istream.binsha] = OStream(istream.binsha, istream.type, istream.size, decomp_stream)
+
+ return istream
+
+ def store_async(self, reader):
+ raise UnsupportedOperation("MemoryDBs cannot currently be used for async write access")
+
+ def has_object(self, sha):
+ return sha in self._cache
- def info(self, sha):
- # we always return streams, which are infos as well
- return self.stream(sha)
-
- def stream(self, sha):
- try:
- ostream = self._cache[sha]
- # rewind stream for the next one to read
- ostream.stream.seek(0)
- return ostream
- except KeyError:
- raise BadObject(sha)
- # END exception handling
-
- def size(self):
- return len(self._cache)
-
- def sha_iter(self):
- return self._cache.iterkeys()
-
-
- #{ Interface
- def stream_copy(self, sha_iter, odb):
- """Copy the streams as identified by sha's yielded by sha_iter into the given odb
- The streams will be copied directly
- **Note:** the object will only be written if it did not exist in the target db
- :return: amount of streams actually copied into odb. If smaller than the amount
- of input shas, one or more objects did already exist in odb"""
- count = 0
- for sha in sha_iter:
- if odb.has_object(sha):
- continue
- # END check object existance
-
- ostream = self.stream(sha)
- # compressed data including header
- sio = StringIO(ostream.stream.data())
- istream = IStream(ostream.type, ostream.size, sio, sha)
-
- odb.store(istream)
- count += 1
- # END for each sha
- return count
- #} END interface
+ def info(self, sha):
+ # we always return streams, which are infos as well
+ return self.stream(sha)
+
+ def stream(self, sha):
+ try:
+ ostream = self._cache[sha]
+ # rewind stream for the next one to read
+ ostream.stream.seek(0)
+ return ostream
+ except KeyError:
+ raise BadObject(sha)
+ # END exception handling
+
+ def size(self):
+ return len(self._cache)
+
+ def sha_iter(self):
+ return self._cache.iterkeys()
+
+
+ #{ Interface
+ def stream_copy(self, sha_iter, odb):
+ """Copy the streams as identified by sha's yielded by sha_iter into the given odb
+ The streams will be copied directly
+ **Note:** the object will only be written if it did not exist in the target db
+ :return: amount of streams actually copied into odb. If smaller than the amount
+ of input shas, one or more objects did already exist in odb"""
+ count = 0
+ for sha in sha_iter:
+ if odb.has_object(sha):
+ continue
+ # END check object existance
+
+ ostream = self.stream(sha)
+ # compressed data including header
+ sio = StringIO(ostream.stream.data())
+ istream = IStream(ostream.type, ostream.size, sio, sha)
+
+ odb.store(istream)
+ count += 1
+ # END for each sha
+ return count
+ #} END interface
diff --git a/gitdb/db/pack.py b/gitdb/db/pack.py
index 4c9d0b9..9287319 100644
--- a/gitdb/db/pack.py
+++ b/gitdb/db/pack.py
@@ -4,18 +4,18 @@
# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
"""Module containing a database to deal with packs"""
from base import (
- FileDBBase,
- ObjectDBR,
- CachingDB
- )
+ FileDBBase,
+ ObjectDBR,
+ CachingDB
+ )
from gitdb.util import LazyMixin
from gitdb.exc import (
- BadObject,
- UnsupportedOperation,
- AmbiguousObjectName
- )
+ BadObject,
+ UnsupportedOperation,
+ AmbiguousObjectName
+ )
from gitdb.pack import PackEntity
@@ -28,182 +28,182 @@ __all__ = ('PackedDB', )
class PackedDB(FileDBBase, ObjectDBR, CachingDB, LazyMixin):
- """A database operating on a set of object packs"""
-
- # sort the priority list every N queries
- # Higher values are better, performance tests don't show this has
- # any effect, but it should have one
- _sort_interval = 500
-
- def __init__(self, root_path):
- super(PackedDB, self).__init__(root_path)
- # list of lists with three items:
- # * hits - number of times the pack was hit with a request
- # * entity - Pack entity instance
- # * sha_to_index - PackIndexFile.sha_to_index method for direct cache query
- # self._entities = list() # lazy loaded list
- self._hit_count = 0 # amount of hits
- self._st_mtime = 0 # last modification data of our root path
-
- def _set_cache_(self, attr):
- if attr == '_entities':
- self._entities = list()
- self.update_cache(force=True)
- # END handle entities initialization
-
- def _sort_entities(self):
- self._entities.sort(key=lambda l: l[0], reverse=True)
-
- def _pack_info(self, sha):
- """:return: tuple(entity, index) for an item at the given sha
- :param sha: 20 or 40 byte sha
- :raise BadObject:
- **Note:** This method is not thread-safe, but may be hit in multi-threaded
- operation. The worst thing that can happen though is a counter that
- was not incremented, or the list being in wrong order. So we safe
- the time for locking here, lets see how that goes"""
- # presort ?
- if self._hit_count % self._sort_interval == 0:
- self._sort_entities()
- # END update sorting
-
- for item in self._entities:
- index = item[2](sha)
- if index is not None:
- item[0] += 1 # one hit for you
- self._hit_count += 1 # general hit count
- return (item[1], index)
- # END index found in pack
- # END for each item
-
- # no hit, see whether we have to update packs
- # NOTE: considering packs don't change very often, we safe this call
- # and leave it to the super-caller to trigger that
- raise BadObject(sha)
-
- #{ Object DB Read
-
- def has_object(self, sha):
- try:
- self._pack_info(sha)
- return True
- except BadObject:
- return False
- # END exception handling
-
- def info(self, sha):
- entity, index = self._pack_info(sha)
- return entity.info_at_index(index)
-
- def stream(self, sha):
- entity, index = self._pack_info(sha)
- return entity.stream_at_index(index)
-
- def sha_iter(self):
- sha_list = list()
- for entity in self.entities():
- index = entity.index()
- sha_by_index = index.sha
- for index in xrange(index.size()):
- yield sha_by_index(index)
- # END for each index
- # END for each entity
-
- def size(self):
- sizes = [item[1].index().size() for item in self._entities]
- return reduce(lambda x,y: x+y, sizes, 0)
-
- #} END object db read
-
- #{ object db write
-
- def store(self, istream):
- """Storing individual objects is not feasible as a pack is designed to
- hold multiple objects. Writing or rewriting packs for single objects is
- inefficient"""
- raise UnsupportedOperation()
-
- def store_async(self, reader):
- # TODO: add ObjectDBRW before implementing this
- raise NotImplementedError()
-
- #} END object db write
-
-
- #{ Interface
-
- def update_cache(self, force=False):
- """
- Update our cache with the acutally existing packs on disk. Add new ones,
- and remove deleted ones. We keep the unchanged ones
-
- :param force: If True, the cache will be updated even though the directory
- does not appear to have changed according to its modification timestamp.
- :return: True if the packs have been updated so there is new information,
- False if there was no change to the pack database"""
- stat = os.stat(self.root_path())
- if not force and stat.st_mtime <= self._st_mtime:
- return False
- # END abort early on no change
- self._st_mtime = stat.st_mtime
-
- # packs are supposed to be prefixed with pack- by git-convention
- # get all pack files, figure out what changed
- pack_files = set(glob.glob(os.path.join(self.root_path(), "pack-*.pack")))
- our_pack_files = set(item[1].pack().path() for item in self._entities)
-
- # new packs
- for pack_file in (pack_files - our_pack_files):
- # init the hit-counter/priority with the size, a good measure for hit-
- # probability. Its implemented so that only 12 bytes will be read
- entity = PackEntity(pack_file)
- self._entities.append([entity.pack().size(), entity, entity.index().sha_to_index])
- # END for each new packfile
-
- # removed packs
- for pack_file in (our_pack_files - pack_files):
- del_index = -1
- for i, item in enumerate(self._entities):
- if item[1].pack().path() == pack_file:
- del_index = i
- break
- # END found index
- # END for each entity
- assert del_index != -1
- del(self._entities[del_index])
- # END for each removed pack
-
- # reinitialize prioritiess
- self._sort_entities()
- return True
-
- def entities(self):
- """:return: list of pack entities operated upon by this database"""
- return [ item[1] for item in self._entities ]
-
- def partial_to_complete_sha(self, partial_binsha, canonical_length):
- """:return: 20 byte sha as inferred by the given partial binary sha
- :param partial_binsha: binary sha with less than 20 bytes
- :param canonical_length: length of the corresponding canonical representation.
- It is required as binary sha's cannot display whether the original hex sha
- had an odd or even number of characters
- :raise AmbiguousObjectName:
- :raise BadObject: """
- candidate = None
- for item in self._entities:
- item_index = item[1].index().partial_sha_to_index(partial_binsha, canonical_length)
- if item_index is not None:
- sha = item[1].index().sha(item_index)
- if candidate and candidate != sha:
- raise AmbiguousObjectName(partial_binsha)
- candidate = sha
- # END handle full sha could be found
- # END for each entity
-
- if candidate:
- return candidate
-
- # still not found ?
- raise BadObject(partial_binsha)
-
- #} END interface
+ """A database operating on a set of object packs"""
+
+ # sort the priority list every N queries
+ # Higher values are better, performance tests don't show this has
+ # any effect, but it should have one
+ _sort_interval = 500
+
+ def __init__(self, root_path):
+ super(PackedDB, self).__init__(root_path)
+ # list of lists with three items:
+ # * hits - number of times the pack was hit with a request
+ # * entity - Pack entity instance
+ # * sha_to_index - PackIndexFile.sha_to_index method for direct cache query
+ # self._entities = list() # lazy loaded list
+ self._hit_count = 0 # amount of hits
+ self._st_mtime = 0 # last modification data of our root path
+
+ def _set_cache_(self, attr):
+ if attr == '_entities':
+ self._entities = list()
+ self.update_cache(force=True)
+ # END handle entities initialization
+
+ def _sort_entities(self):
+ self._entities.sort(key=lambda l: l[0], reverse=True)
+
+ def _pack_info(self, sha):
+ """:return: tuple(entity, index) for an item at the given sha
+ :param sha: 20 or 40 byte sha
+ :raise BadObject:
+ **Note:** This method is not thread-safe, but may be hit in multi-threaded
+ operation. The worst thing that can happen though is a counter that
+ was not incremented, or the list being in wrong order. So we safe
+ the time for locking here, lets see how that goes"""
+ # presort ?
+ if self._hit_count % self._sort_interval == 0:
+ self._sort_entities()
+ # END update sorting
+
+ for item in self._entities:
+ index = item[2](sha)
+ if index is not None:
+ item[0] += 1 # one hit for you
+ self._hit_count += 1 # general hit count
+ return (item[1], index)
+ # END index found in pack
+ # END for each item
+
+ # no hit, see whether we have to update packs
+ # NOTE: considering packs don't change very often, we safe this call
+ # and leave it to the super-caller to trigger that
+ raise BadObject(sha)
+
+ #{ Object DB Read
+
+ def has_object(self, sha):
+ try:
+ self._pack_info(sha)
+ return True
+ except BadObject:
+ return False
+ # END exception handling
+
+ def info(self, sha):
+ entity, index = self._pack_info(sha)
+ return entity.info_at_index(index)
+
+ def stream(self, sha):
+ entity, index = self._pack_info(sha)
+ return entity.stream_at_index(index)
+
+ def sha_iter(self):
+ sha_list = list()
+ for entity in self.entities():
+ index = entity.index()
+ sha_by_index = index.sha
+ for index in xrange(index.size()):
+ yield sha_by_index(index)
+ # END for each index
+ # END for each entity
+
+ def size(self):
+ sizes = [item[1].index().size() for item in self._entities]
+ return reduce(lambda x,y: x+y, sizes, 0)
+
+ #} END object db read
+
+ #{ object db write
+
+ def store(self, istream):
+ """Storing individual objects is not feasible as a pack is designed to
+ hold multiple objects. Writing or rewriting packs for single objects is
+ inefficient"""
+ raise UnsupportedOperation()
+
+ def store_async(self, reader):
+ # TODO: add ObjectDBRW before implementing this
+ raise NotImplementedError()
+
+ #} END object db write
+
+
+ #{ Interface
+
+ def update_cache(self, force=False):
+ """
+ Update our cache with the acutally existing packs on disk. Add new ones,
+ and remove deleted ones. We keep the unchanged ones
+
+ :param force: If True, the cache will be updated even though the directory
+ does not appear to have changed according to its modification timestamp.
+ :return: True if the packs have been updated so there is new information,
+ False if there was no change to the pack database"""
+ stat = os.stat(self.root_path())
+ if not force and stat.st_mtime <= self._st_mtime:
+ return False
+ # END abort early on no change
+ self._st_mtime = stat.st_mtime
+
+ # packs are supposed to be prefixed with pack- by git-convention
+ # get all pack files, figure out what changed
+ pack_files = set(glob.glob(os.path.join(self.root_path(), "pack-*.pack")))
+ our_pack_files = set(item[1].pack().path() for item in self._entities)
+
+ # new packs
+ for pack_file in (pack_files - our_pack_files):
+ # init the hit-counter/priority with the size, a good measure for hit-
+ # probability. Its implemented so that only 12 bytes will be read
+ entity = PackEntity(pack_file)
+ self._entities.append([entity.pack().size(), entity, entity.index().sha_to_index])
+ # END for each new packfile
+
+ # removed packs
+ for pack_file in (our_pack_files - pack_files):
+ del_index = -1
+ for i, item in enumerate(self._entities):
+ if item[1].pack().path() == pack_file:
+ del_index = i
+ break
+ # END found index
+ # END for each entity
+ assert del_index != -1
+ del(self._entities[del_index])
+ # END for each removed pack
+
+ # reinitialize prioritiess
+ self._sort_entities()
+ return True
+
+ def entities(self):
+ """:return: list of pack entities operated upon by this database"""
+ return [ item[1] for item in self._entities ]
+
+ def partial_to_complete_sha(self, partial_binsha, canonical_length):
+ """:return: 20 byte sha as inferred by the given partial binary sha
+ :param partial_binsha: binary sha with less than 20 bytes
+ :param canonical_length: length of the corresponding canonical representation.
+ It is required as binary sha's cannot display whether the original hex sha
+ had an odd or even number of characters
+ :raise AmbiguousObjectName:
+ :raise BadObject: """
+ candidate = None
+ for item in self._entities:
+ item_index = item[1].index().partial_sha_to_index(partial_binsha, canonical_length)
+ if item_index is not None:
+ sha = item[1].index().sha(item_index)
+ if candidate and candidate != sha:
+ raise AmbiguousObjectName(partial_binsha)
+ candidate = sha
+ # END handle full sha could be found
+ # END for each entity
+
+ if candidate:
+ return candidate
+
+ # still not found ?
+ raise BadObject(partial_binsha)
+
+ #} END interface
diff --git a/gitdb/db/ref.py b/gitdb/db/ref.py
index 8989843..60004a7 100644
--- a/gitdb/db/ref.py
+++ b/gitdb/db/ref.py
@@ -3,77 +3,77 @@
# This module is part of GitDB and is released under
# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
from base import (
- CompoundDB,
- )
+ CompoundDB,
+ )
import os
__all__ = ('ReferenceDB', )
class ReferenceDB(CompoundDB):
- """A database consisting of database referred to in a file"""
-
- # Configuration
- # Specifies the object database to use for the paths found in the alternates
- # file. If None, it defaults to the GitDB
- ObjectDBCls = None
-
- def __init__(self, ref_file):
- super(ReferenceDB, self).__init__()
- self._ref_file = ref_file
-
- def _set_cache_(self, attr):
- if attr == '_dbs':
- self._dbs = list()
- self._update_dbs_from_ref_file()
- else:
- super(ReferenceDB, self)._set_cache_(attr)
- # END handle attrs
-
- def _update_dbs_from_ref_file(self):
- dbcls = self.ObjectDBCls
- if dbcls is None:
- # late import
- from git import GitDB
- dbcls = GitDB
- # END get db type
-
- # try to get as many as possible, don't fail if some are unavailable
- ref_paths = list()
- try:
- ref_paths = [l.strip() for l in open(self._ref_file, 'r').readlines()]
- except (OSError, IOError):
- pass
- # END handle alternates
-
- ref_paths_set = set(ref_paths)
- cur_ref_paths_set = set(db.root_path() for db in self._dbs)
-
- # remove existing
- for path in (cur_ref_paths_set - ref_paths_set):
- for i, db in enumerate(self._dbs[:]):
- if db.root_path() == path:
- del(self._dbs[i])
- continue
- # END del matching db
- # END for each path to remove
-
- # add new
- # sort them to maintain order
- added_paths = sorted(ref_paths_set - cur_ref_paths_set, key=lambda p: ref_paths.index(p))
- for path in added_paths:
- try:
- db = dbcls(path)
- # force an update to verify path
- if isinstance(db, CompoundDB):
- db.databases()
- # END verification
- self._dbs.append(db)
- except Exception, e:
- # ignore invalid paths or issues
- pass
- # END for each path to add
-
- def update_cache(self, force=False):
- # re-read alternates and update databases
- self._update_dbs_from_ref_file()
- return super(ReferenceDB, self).update_cache(force)
+ """A database consisting of database referred to in a file"""
+
+ # Configuration
+ # Specifies the object database to use for the paths found in the alternates
+ # file. If None, it defaults to the GitDB
+ ObjectDBCls = None
+
+ def __init__(self, ref_file):
+ super(ReferenceDB, self).__init__()
+ self._ref_file = ref_file
+
+ def _set_cache_(self, attr):
+ if attr == '_dbs':
+ self._dbs = list()
+ self._update_dbs_from_ref_file()
+ else:
+ super(ReferenceDB, self)._set_cache_(attr)
+ # END handle attrs
+
+ def _update_dbs_from_ref_file(self):
+ dbcls = self.ObjectDBCls
+ if dbcls is None:
+ # late import
+ from git import GitDB
+ dbcls = GitDB
+ # END get db type
+
+ # try to get as many as possible, don't fail if some are unavailable
+ ref_paths = list()
+ try:
+ ref_paths = [l.strip() for l in open(self._ref_file, 'r').readlines()]
+ except (OSError, IOError):
+ pass
+ # END handle alternates
+
+ ref_paths_set = set(ref_paths)
+ cur_ref_paths_set = set(db.root_path() for db in self._dbs)
+
+ # remove existing
+ for path in (cur_ref_paths_set - ref_paths_set):
+ for i, db in enumerate(self._dbs[:]):
+ if db.root_path() == path:
+ del(self._dbs[i])
+ continue
+ # END del matching db
+ # END for each path to remove
+
+ # add new
+ # sort them to maintain order
+ added_paths = sorted(ref_paths_set - cur_ref_paths_set, key=lambda p: ref_paths.index(p))
+ for path in added_paths:
+ try:
+ db = dbcls(path)
+ # force an update to verify path
+ if isinstance(db, CompoundDB):
+ db.databases()
+ # END verification
+ self._dbs.append(db)
+ except Exception, e:
+ # ignore invalid paths or issues
+ pass
+ # END for each path to add
+
+ def update_cache(self, force=False):
+ # re-read alternates and update databases
+ self._update_dbs_from_ref_file()
+ return super(ReferenceDB, self).update_cache(force)