diff options
| author | Sebastian Thiel <byronimo@gmail.com> | 2014-02-09 20:51:43 +0100 |
|---|---|---|
| committer | Sebastian Thiel <byronimo@gmail.com> | 2014-02-09 20:51:43 +0100 |
| commit | 6576d5503a64d124fd7bcf639cc8955918b3ac43 (patch) | |
| tree | 847028954b05307086eda1782c2e9521c8d67a13 /gitdb/db | |
| parent | ea54328ce05abdcb4f23300df51422e62b737f63 (diff) | |
| download | gitdb-6576d5503a64d124fd7bcf639cc8955918b3ac43.tar.gz | |
tabs to spaces
Diffstat (limited to 'gitdb/db')
| -rw-r--r-- | gitdb/db/base.py | 586 | ||||
| -rw-r--r-- | gitdb/db/git.py | 134 | ||||
| -rw-r--r-- | gitdb/db/loose.py | 470 | ||||
| -rw-r--r-- | gitdb/db/mem.py | 188 | ||||
| -rw-r--r-- | gitdb/db/pack.py | 374 | ||||
| -rw-r--r-- | gitdb/db/ref.py | 138 |
6 files changed, 945 insertions, 945 deletions
diff --git a/gitdb/db/base.py b/gitdb/db/base.py index 984acaf..867e93a 100644 --- a/gitdb/db/base.py +++ b/gitdb/db/base.py @@ -4,20 +4,20 @@ # the New BSD License: http://www.opensource.org/licenses/bsd-license.php """Contains implementations of database retrieveing objects""" from gitdb.util import ( - pool, - join, - LazyMixin, - hex_to_bin - ) + pool, + join, + LazyMixin, + hex_to_bin + ) from gitdb.exc import ( - BadObject, - AmbiguousObjectName - ) + BadObject, + AmbiguousObjectName + ) from async import ( - ChannelThreadTask - ) + ChannelThreadTask + ) from itertools import chain @@ -26,301 +26,301 @@ __all__ = ('ObjectDBR', 'ObjectDBW', 'FileDBBase', 'CompoundDB', 'CachingDB') class ObjectDBR(object): - """Defines an interface for object database lookup. - Objects are identified either by their 20 byte bin sha""" - - def __contains__(self, sha): - return self.has_obj - - #{ Query Interface - def has_object(self, sha): - """ - :return: True if the object identified by the given 20 bytes - binary sha is contained in the database""" - raise NotImplementedError("To be implemented in subclass") - - def has_object_async(self, reader): - """Return a reader yielding information about the membership of objects - as identified by shas - :param reader: Reader yielding 20 byte shas. - :return: async.Reader yielding tuples of (sha, bool) pairs which indicate - whether the given sha exists in the database or not""" - task = ChannelThreadTask(reader, str(self.has_object_async), lambda sha: (sha, self.has_object(sha))) - return pool.add_task(task) - - def info(self, sha): - """ :return: OInfo instance - :param sha: bytes binary sha - :raise BadObject:""" - raise NotImplementedError("To be implemented in subclass") - - def info_async(self, reader): - """Retrieve information of a multitude of objects asynchronously - :param reader: Channel yielding the sha's of the objects of interest - :return: async.Reader yielding OInfo|InvalidOInfo, in any order""" - task = ChannelThreadTask(reader, str(self.info_async), self.info) - return pool.add_task(task) - - def stream(self, sha): - """:return: OStream instance - :param sha: 20 bytes binary sha - :raise BadObject:""" - raise NotImplementedError("To be implemented in subclass") - - def stream_async(self, reader): - """Retrieve the OStream of multiple objects - :param reader: see ``info`` - :param max_threads: see ``ObjectDBW.store`` - :return: async.Reader yielding OStream|InvalidOStream instances in any order - - **Note:** depending on the system configuration, it might not be possible to - read all OStreams at once. Instead, read them individually using reader.read(x) - where x is small enough.""" - # base implementation just uses the stream method repeatedly - task = ChannelThreadTask(reader, str(self.stream_async), self.stream) - return pool.add_task(task) - - def size(self): - """:return: amount of objects in this database""" - raise NotImplementedError() - - def sha_iter(self): - """Return iterator yielding 20 byte shas for all objects in this data base""" - raise NotImplementedError() - - #} END query interface - - + """Defines an interface for object database lookup. + Objects are identified either by their 20 byte bin sha""" + + def __contains__(self, sha): + return self.has_obj + + #{ Query Interface + def has_object(self, sha): + """ + :return: True if the object identified by the given 20 bytes + binary sha is contained in the database""" + raise NotImplementedError("To be implemented in subclass") + + def has_object_async(self, reader): + """Return a reader yielding information about the membership of objects + as identified by shas + :param reader: Reader yielding 20 byte shas. + :return: async.Reader yielding tuples of (sha, bool) pairs which indicate + whether the given sha exists in the database or not""" + task = ChannelThreadTask(reader, str(self.has_object_async), lambda sha: (sha, self.has_object(sha))) + return pool.add_task(task) + + def info(self, sha): + """ :return: OInfo instance + :param sha: bytes binary sha + :raise BadObject:""" + raise NotImplementedError("To be implemented in subclass") + + def info_async(self, reader): + """Retrieve information of a multitude of objects asynchronously + :param reader: Channel yielding the sha's of the objects of interest + :return: async.Reader yielding OInfo|InvalidOInfo, in any order""" + task = ChannelThreadTask(reader, str(self.info_async), self.info) + return pool.add_task(task) + + def stream(self, sha): + """:return: OStream instance + :param sha: 20 bytes binary sha + :raise BadObject:""" + raise NotImplementedError("To be implemented in subclass") + + def stream_async(self, reader): + """Retrieve the OStream of multiple objects + :param reader: see ``info`` + :param max_threads: see ``ObjectDBW.store`` + :return: async.Reader yielding OStream|InvalidOStream instances in any order + + **Note:** depending on the system configuration, it might not be possible to + read all OStreams at once. Instead, read them individually using reader.read(x) + where x is small enough.""" + # base implementation just uses the stream method repeatedly + task = ChannelThreadTask(reader, str(self.stream_async), self.stream) + return pool.add_task(task) + + def size(self): + """:return: amount of objects in this database""" + raise NotImplementedError() + + def sha_iter(self): + """Return iterator yielding 20 byte shas for all objects in this data base""" + raise NotImplementedError() + + #} END query interface + + class ObjectDBW(object): - """Defines an interface to create objects in the database""" - - def __init__(self, *args, **kwargs): - self._ostream = None - - #{ Edit Interface - def set_ostream(self, stream): - """ - Adjusts the stream to which all data should be sent when storing new objects - - :param stream: if not None, the stream to use, if None the default stream - will be used. - :return: previously installed stream, or None if there was no override - :raise TypeError: if the stream doesn't have the supported functionality""" - cstream = self._ostream - self._ostream = stream - return cstream - - def ostream(self): - """ - :return: overridden output stream this instance will write to, or None - if it will write to the default stream""" - return self._ostream - - def store(self, istream): - """ - Create a new object in the database - :return: the input istream object with its sha set to its corresponding value - - :param istream: IStream compatible instance. If its sha is already set - to a value, the object will just be stored in the our database format, - in which case the input stream is expected to be in object format ( header + contents ). - :raise IOError: if data could not be written""" - raise NotImplementedError("To be implemented in subclass") - - def store_async(self, reader): - """ - Create multiple new objects in the database asynchronously. The method will - return right away, returning an output channel which receives the results as - they are computed. - - :return: Channel yielding your IStream which served as input, in any order. - The IStreams sha will be set to the sha it received during the process, - or its error attribute will be set to the exception informing about the error. - - :param reader: async.Reader yielding IStream instances. - The same instances will be used in the output channel as were received - in by the Reader. - - **Note:** As some ODB implementations implement this operation atomic, they might - abort the whole operation if one item could not be processed. Hence check how - many items have actually been produced.""" - # base implementation uses store to perform the work - task = ChannelThreadTask(reader, str(self.store_async), self.store) - return pool.add_task(task) - - #} END edit interface - + """Defines an interface to create objects in the database""" + + def __init__(self, *args, **kwargs): + self._ostream = None + + #{ Edit Interface + def set_ostream(self, stream): + """ + Adjusts the stream to which all data should be sent when storing new objects + + :param stream: if not None, the stream to use, if None the default stream + will be used. + :return: previously installed stream, or None if there was no override + :raise TypeError: if the stream doesn't have the supported functionality""" + cstream = self._ostream + self._ostream = stream + return cstream + + def ostream(self): + """ + :return: overridden output stream this instance will write to, or None + if it will write to the default stream""" + return self._ostream + + def store(self, istream): + """ + Create a new object in the database + :return: the input istream object with its sha set to its corresponding value + + :param istream: IStream compatible instance. If its sha is already set + to a value, the object will just be stored in the our database format, + in which case the input stream is expected to be in object format ( header + contents ). + :raise IOError: if data could not be written""" + raise NotImplementedError("To be implemented in subclass") + + def store_async(self, reader): + """ + Create multiple new objects in the database asynchronously. The method will + return right away, returning an output channel which receives the results as + they are computed. + + :return: Channel yielding your IStream which served as input, in any order. + The IStreams sha will be set to the sha it received during the process, + or its error attribute will be set to the exception informing about the error. + + :param reader: async.Reader yielding IStream instances. + The same instances will be used in the output channel as were received + in by the Reader. + + **Note:** As some ODB implementations implement this operation atomic, they might + abort the whole operation if one item could not be processed. Hence check how + many items have actually been produced.""" + # base implementation uses store to perform the work + task = ChannelThreadTask(reader, str(self.store_async), self.store) + return pool.add_task(task) + + #} END edit interface + class FileDBBase(object): - """Provides basic facilities to retrieve files of interest, including - caching facilities to help mapping hexsha's to objects""" - - def __init__(self, root_path): - """Initialize this instance to look for its files at the given root path - All subsequent operations will be relative to this path - :raise InvalidDBRoot: - **Note:** The base will not perform any accessablity checking as the base - might not yet be accessible, but become accessible before the first - access.""" - super(FileDBBase, self).__init__() - self._root_path = root_path - - - #{ Interface - def root_path(self): - """:return: path at which this db operates""" - return self._root_path - - def db_path(self, rela_path): - """ - :return: the given relative path relative to our database root, allowing - to pontentially access datafiles""" - return join(self._root_path, rela_path) - #} END interface - + """Provides basic facilities to retrieve files of interest, including + caching facilities to help mapping hexsha's to objects""" + + def __init__(self, root_path): + """Initialize this instance to look for its files at the given root path + All subsequent operations will be relative to this path + :raise InvalidDBRoot: + **Note:** The base will not perform any accessablity checking as the base + might not yet be accessible, but become accessible before the first + access.""" + super(FileDBBase, self).__init__() + self._root_path = root_path + + + #{ Interface + def root_path(self): + """:return: path at which this db operates""" + return self._root_path + + def db_path(self, rela_path): + """ + :return: the given relative path relative to our database root, allowing + to pontentially access datafiles""" + return join(self._root_path, rela_path) + #} END interface + class CachingDB(object): - """A database which uses caches to speed-up access""" - - #{ Interface - def update_cache(self, force=False): - """ - Call this method if the underlying data changed to trigger an update - of the internal caching structures. - - :param force: if True, the update must be performed. Otherwise the implementation - may decide not to perform an update if it thinks nothing has changed. - :return: True if an update was performed as something change indeed""" - - # END interface + """A database which uses caches to speed-up access""" + + #{ Interface + def update_cache(self, force=False): + """ + Call this method if the underlying data changed to trigger an update + of the internal caching structures. + + :param force: if True, the update must be performed. Otherwise the implementation + may decide not to perform an update if it thinks nothing has changed. + :return: True if an update was performed as something change indeed""" + + # END interface def _databases_recursive(database, output): - """Fill output list with database from db, in order. Deals with Loose, Packed - and compound databases.""" - if isinstance(database, CompoundDB): - compounds = list() - dbs = database.databases() - output.extend(db for db in dbs if not isinstance(db, CompoundDB)) - for cdb in (db for db in dbs if isinstance(db, CompoundDB)): - _databases_recursive(cdb, output) - else: - output.append(database) - # END handle database type - + """Fill output list with database from db, in order. Deals with Loose, Packed + and compound databases.""" + if isinstance(database, CompoundDB): + compounds = list() + dbs = database.databases() + output.extend(db for db in dbs if not isinstance(db, CompoundDB)) + for cdb in (db for db in dbs if isinstance(db, CompoundDB)): + _databases_recursive(cdb, output) + else: + output.append(database) + # END handle database type + class CompoundDB(ObjectDBR, LazyMixin, CachingDB): - """A database which delegates calls to sub-databases. - - Databases are stored in the lazy-loaded _dbs attribute. - Define _set_cache_ to update it with your databases""" - def _set_cache_(self, attr): - if attr == '_dbs': - self._dbs = list() - elif attr == '_db_cache': - self._db_cache = dict() - else: - super(CompoundDB, self)._set_cache_(attr) - - def _db_query(self, sha): - """:return: database containing the given 20 byte sha - :raise BadObject:""" - # most databases use binary representations, prevent converting - # it everytime a database is being queried - try: - return self._db_cache[sha] - except KeyError: - pass - # END first level cache - - for db in self._dbs: - if db.has_object(sha): - self._db_cache[sha] = db - return db - # END for each database - raise BadObject(sha) - - #{ ObjectDBR interface - - def has_object(self, sha): - try: - self._db_query(sha) - return True - except BadObject: - return False - # END handle exceptions - - def info(self, sha): - return self._db_query(sha).info(sha) - - def stream(self, sha): - return self._db_query(sha).stream(sha) + """A database which delegates calls to sub-databases. + + Databases are stored in the lazy-loaded _dbs attribute. + Define _set_cache_ to update it with your databases""" + def _set_cache_(self, attr): + if attr == '_dbs': + self._dbs = list() + elif attr == '_db_cache': + self._db_cache = dict() + else: + super(CompoundDB, self)._set_cache_(attr) + + def _db_query(self, sha): + """:return: database containing the given 20 byte sha + :raise BadObject:""" + # most databases use binary representations, prevent converting + # it everytime a database is being queried + try: + return self._db_cache[sha] + except KeyError: + pass + # END first level cache + + for db in self._dbs: + if db.has_object(sha): + self._db_cache[sha] = db + return db + # END for each database + raise BadObject(sha) + + #{ ObjectDBR interface + + def has_object(self, sha): + try: + self._db_query(sha) + return True + except BadObject: + return False + # END handle exceptions + + def info(self, sha): + return self._db_query(sha).info(sha) + + def stream(self, sha): + return self._db_query(sha).stream(sha) - def size(self): - """:return: total size of all contained databases""" - return reduce(lambda x,y: x+y, (db.size() for db in self._dbs), 0) - - def sha_iter(self): - return chain(*(db.sha_iter() for db in self._dbs)) - - #} END object DBR Interface - - #{ Interface - - def databases(self): - """:return: tuple of database instances we use for lookups""" - return tuple(self._dbs) + def size(self): + """:return: total size of all contained databases""" + return reduce(lambda x,y: x+y, (db.size() for db in self._dbs), 0) + + def sha_iter(self): + return chain(*(db.sha_iter() for db in self._dbs)) + + #} END object DBR Interface + + #{ Interface + + def databases(self): + """:return: tuple of database instances we use for lookups""" + return tuple(self._dbs) - def update_cache(self, force=False): - # something might have changed, clear everything - self._db_cache.clear() - stat = False - for db in self._dbs: - if isinstance(db, CachingDB): - stat |= db.update_cache(force) - # END if is caching db - # END for each database to update - return stat - - def partial_to_complete_sha_hex(self, partial_hexsha): - """ - :return: 20 byte binary sha1 from the given less-than-40 byte hexsha - :param partial_hexsha: hexsha with less than 40 byte - :raise AmbiguousObjectName: """ - databases = list() - _databases_recursive(self, databases) - - len_partial_hexsha = len(partial_hexsha) - if len_partial_hexsha % 2 != 0: - partial_binsha = hex_to_bin(partial_hexsha + "0") - else: - partial_binsha = hex_to_bin(partial_hexsha) - # END assure successful binary conversion - - candidate = None - for db in databases: - full_bin_sha = None - try: - if hasattr(db, 'partial_to_complete_sha_hex'): - full_bin_sha = db.partial_to_complete_sha_hex(partial_hexsha) - else: - full_bin_sha = db.partial_to_complete_sha(partial_binsha, len_partial_hexsha) - # END handle database type - except BadObject: - continue - # END ignore bad objects - if full_bin_sha: - if candidate and candidate != full_bin_sha: - raise AmbiguousObjectName(partial_hexsha) - candidate = full_bin_sha - # END handle candidate - # END for each db - if not candidate: - raise BadObject(partial_binsha) - return candidate - - #} END interface - + def update_cache(self, force=False): + # something might have changed, clear everything + self._db_cache.clear() + stat = False + for db in self._dbs: + if isinstance(db, CachingDB): + stat |= db.update_cache(force) + # END if is caching db + # END for each database to update + return stat + + def partial_to_complete_sha_hex(self, partial_hexsha): + """ + :return: 20 byte binary sha1 from the given less-than-40 byte hexsha + :param partial_hexsha: hexsha with less than 40 byte + :raise AmbiguousObjectName: """ + databases = list() + _databases_recursive(self, databases) + + len_partial_hexsha = len(partial_hexsha) + if len_partial_hexsha % 2 != 0: + partial_binsha = hex_to_bin(partial_hexsha + "0") + else: + partial_binsha = hex_to_bin(partial_hexsha) + # END assure successful binary conversion + + candidate = None + for db in databases: + full_bin_sha = None + try: + if hasattr(db, 'partial_to_complete_sha_hex'): + full_bin_sha = db.partial_to_complete_sha_hex(partial_hexsha) + else: + full_bin_sha = db.partial_to_complete_sha(partial_binsha, len_partial_hexsha) + # END handle database type + except BadObject: + continue + # END ignore bad objects + if full_bin_sha: + if candidate and candidate != full_bin_sha: + raise AmbiguousObjectName(partial_hexsha) + candidate = full_bin_sha + # END handle candidate + # END for each db + if not candidate: + raise BadObject(partial_binsha) + return candidate + + #} END interface + diff --git a/gitdb/db/git.py b/gitdb/db/git.py index b8fc46a..1d6ad0f 100644 --- a/gitdb/db/git.py +++ b/gitdb/db/git.py @@ -3,10 +3,10 @@ # This module is part of GitDB and is released under # the New BSD License: http://www.opensource.org/licenses/bsd-license.php from base import ( - CompoundDB, - ObjectDBW, - FileDBBase - ) + CompoundDB, + ObjectDBW, + FileDBBase + ) from loose import LooseObjectDB from pack import PackedDB @@ -14,72 +14,72 @@ from ref import ReferenceDB from gitdb.util import LazyMixin from gitdb.exc import ( - InvalidDBRoot, - BadObject, - AmbiguousObjectName - ) + InvalidDBRoot, + BadObject, + AmbiguousObjectName + ) import os __all__ = ('GitDB', ) class GitDB(FileDBBase, ObjectDBW, CompoundDB): - """A git-style object database, which contains all objects in the 'objects' - subdirectory""" - # Configuration - PackDBCls = PackedDB - LooseDBCls = LooseObjectDB - ReferenceDBCls = ReferenceDB - - # Directories - packs_dir = 'pack' - loose_dir = '' - alternates_dir = os.path.join('info', 'alternates') - - def __init__(self, root_path): - """Initialize ourselves on a git objects directory""" - super(GitDB, self).__init__(root_path) - - def _set_cache_(self, attr): - if attr == '_dbs' or attr == '_loose_db': - self._dbs = list() - loose_db = None - for subpath, dbcls in ((self.packs_dir, self.PackDBCls), - (self.loose_dir, self.LooseDBCls), - (self.alternates_dir, self.ReferenceDBCls)): - path = self.db_path(subpath) - if os.path.exists(path): - self._dbs.append(dbcls(path)) - if dbcls is self.LooseDBCls: - loose_db = self._dbs[-1] - # END remember loose db - # END check path exists - # END for each db type - - # should have at least one subdb - if not self._dbs: - raise InvalidDBRoot(self.root_path()) - # END handle error - - # we the first one should have the store method - assert loose_db is not None and hasattr(loose_db, 'store'), "First database needs store functionality" - - # finally set the value - self._loose_db = loose_db - else: - super(GitDB, self)._set_cache_(attr) - # END handle attrs - - #{ ObjectDBW interface - - def store(self, istream): - return self._loose_db.store(istream) - - def ostream(self): - return self._loose_db.ostream() - - def set_ostream(self, ostream): - return self._loose_db.set_ostream(ostream) - - #} END objectdbw interface - + """A git-style object database, which contains all objects in the 'objects' + subdirectory""" + # Configuration + PackDBCls = PackedDB + LooseDBCls = LooseObjectDB + ReferenceDBCls = ReferenceDB + + # Directories + packs_dir = 'pack' + loose_dir = '' + alternates_dir = os.path.join('info', 'alternates') + + def __init__(self, root_path): + """Initialize ourselves on a git objects directory""" + super(GitDB, self).__init__(root_path) + + def _set_cache_(self, attr): + if attr == '_dbs' or attr == '_loose_db': + self._dbs = list() + loose_db = None + for subpath, dbcls in ((self.packs_dir, self.PackDBCls), + (self.loose_dir, self.LooseDBCls), + (self.alternates_dir, self.ReferenceDBCls)): + path = self.db_path(subpath) + if os.path.exists(path): + self._dbs.append(dbcls(path)) + if dbcls is self.LooseDBCls: + loose_db = self._dbs[-1] + # END remember loose db + # END check path exists + # END for each db type + + # should have at least one subdb + if not self._dbs: + raise InvalidDBRoot(self.root_path()) + # END handle error + + # we the first one should have the store method + assert loose_db is not None and hasattr(loose_db, 'store'), "First database needs store functionality" + + # finally set the value + self._loose_db = loose_db + else: + super(GitDB, self)._set_cache_(attr) + # END handle attrs + + #{ ObjectDBW interface + + def store(self, istream): + return self._loose_db.store(istream) + + def ostream(self): + return self._loose_db.ostream() + + def set_ostream(self, ostream): + return self._loose_db.set_ostream(ostream) + + #} END objectdbw interface + diff --git a/gitdb/db/loose.py b/gitdb/db/loose.py index 6cd1cef..dc0ea0e 100644 --- a/gitdb/db/loose.py +++ b/gitdb/db/loose.py @@ -3,53 +3,53 @@ # This module is part of GitDB and is released under # the New BSD License: http://www.opensource.org/licenses/bsd-license.php from base import ( - FileDBBase, - ObjectDBR, - ObjectDBW - ) + FileDBBase, + ObjectDBR, + ObjectDBW + ) from gitdb.exc import ( - InvalidDBRoot, - BadObject, - AmbiguousObjectName - ) + InvalidDBRoot, + BadObject, + AmbiguousObjectName + ) from gitdb.stream import ( - DecompressMemMapReader, - FDCompressedSha1Writer, - FDStream, - Sha1Writer - ) + DecompressMemMapReader, + FDCompressedSha1Writer, + FDStream, + Sha1Writer + ) from gitdb.base import ( - OStream, - OInfo - ) + OStream, + OInfo + ) from gitdb.util import ( - file_contents_ro_filepath, - ENOENT, - hex_to_bin, - bin_to_hex, - exists, - chmod, - isdir, - isfile, - remove, - mkdir, - rename, - dirname, - basename, - join - ) + file_contents_ro_filepath, + ENOENT, + hex_to_bin, + bin_to_hex, + exists, + chmod, + isdir, + isfile, + remove, + mkdir, + rename, + dirname, + basename, + join + ) from gitdb.fun import ( - chunk_size, - loose_object_header_info, - write_object, - stream_copy - ) + chunk_size, + loose_object_header_info, + write_object, + stream_copy + ) import tempfile import mmap @@ -61,202 +61,202 @@ __all__ = ( 'LooseObjectDB', ) class LooseObjectDB(FileDBBase, ObjectDBR, ObjectDBW): - """A database which operates on loose object files""" - - # CONFIGURATION - # chunks in which data will be copied between streams - stream_chunk_size = chunk_size - - # On windows we need to keep it writable, otherwise it cannot be removed - # either - new_objects_mode = 0444 - if os.name == 'nt': - new_objects_mode = 0644 - - - def __init__(self, root_path): - super(LooseObjectDB, self).__init__(root_path) - self._hexsha_to_file = dict() - # Additional Flags - might be set to 0 after the first failure - # Depending on the root, this might work for some mounts, for others not, which - # is why it is per instance - self._fd_open_flags = getattr(os, 'O_NOATIME', 0) - - #{ Interface - def object_path(self, hexsha): - """ - :return: path at which the object with the given hexsha would be stored, - relative to the database root""" - return join(hexsha[:2], hexsha[2:]) - - def readable_db_object_path(self, hexsha): - """ - :return: readable object path to the object identified by hexsha - :raise BadObject: If the object file does not exist""" - try: - return self._hexsha_to_file[hexsha] - except KeyError: - pass - # END ignore cache misses - - # try filesystem - path = self.db_path(self.object_path(hexsha)) - if exists(path): - self._hexsha_to_file[hexsha] = path - return path - # END handle cache - raise BadObject(hexsha) - - def partial_to_complete_sha_hex(self, partial_hexsha): - """:return: 20 byte binary sha1 string which matches the given name uniquely - :param name: hexadecimal partial name - :raise AmbiguousObjectName: - :raise BadObject: """ - candidate = None - for binsha in self.sha_iter(): - if bin_to_hex(binsha).startswith(partial_hexsha): - # it can't ever find the same object twice - if candidate is not None: - raise AmbiguousObjectName(partial_hexsha) - candidate = binsha - # END for each object - if candidate is None: - raise BadObject(partial_hexsha) - return candidate - - #} END interface - - def _map_loose_object(self, sha): - """ - :return: memory map of that file to allow random read access - :raise BadObject: if object could not be located""" - db_path = self.db_path(self.object_path(bin_to_hex(sha))) - try: - return file_contents_ro_filepath(db_path, flags=self._fd_open_flags) - except OSError,e: - if e.errno != ENOENT: - # try again without noatime - try: - return file_contents_ro_filepath(db_path) - except OSError: - raise BadObject(sha) - # didn't work because of our flag, don't try it again - self._fd_open_flags = 0 - else: - raise BadObject(sha) - # END handle error - # END exception handling - try: - return mmap.mmap(fd, 0, access=mmap.ACCESS_READ) - finally: - os.close(fd) - # END assure file is closed - - def set_ostream(self, stream): - """:raise TypeError: if the stream does not support the Sha1Writer interface""" - if stream is not None and not isinstance(stream, Sha1Writer): - raise TypeError("Output stream musst support the %s interface" % Sha1Writer.__name__) - return super(LooseObjectDB, self).set_ostream(stream) - - def info(self, sha): - m = self._map_loose_object(sha) - try: - type, size = loose_object_header_info(m) - return OInfo(sha, type, size) - finally: - m.close() - # END assure release of system resources - - def stream(self, sha): - m = self._map_loose_object(sha) - type, size, stream = DecompressMemMapReader.new(m, close_on_deletion = True) - return OStream(sha, type, size, stream) - - def has_object(self, sha): - try: - self.readable_db_object_path(bin_to_hex(sha)) - return True - except BadObject: - return False - # END check existance - - def store(self, istream): - """note: The sha we produce will be hex by nature""" - tmp_path = None - writer = self.ostream() - if writer is None: - # open a tmp file to write the data to - fd, tmp_path = tempfile.mkstemp(prefix='obj', dir=self._root_path) - - if istream.binsha is None: - writer = FDCompressedSha1Writer(fd) - else: - writer = FDStream(fd) - # END handle direct stream copies - # END handle custom writer - - try: - try: - if istream.binsha is not None: - # copy as much as possible, the actual uncompressed item size might - # be smaller than the compressed version - stream_copy(istream.read, writer.write, sys.maxint, self.stream_chunk_size) - else: - # write object with header, we have to make a new one - write_object(istream.type, istream.size, istream.read, writer.write, - chunk_size=self.stream_chunk_size) - # END handle direct stream copies - finally: - if tmp_path: - writer.close() - # END assure target stream is closed - except: - if tmp_path: - os.remove(tmp_path) - raise - # END assure tmpfile removal on error - - hexsha = None - if istream.binsha: - hexsha = istream.hexsha - else: - hexsha = writer.sha(as_hex=True) - # END handle sha - - if tmp_path: - obj_path = self.db_path(self.object_path(hexsha)) - obj_dir = dirname(obj_path) - if not isdir(obj_dir): - mkdir(obj_dir) - # END handle destination directory - # rename onto existing doesn't work on windows - if os.name == 'nt' and isfile(obj_path): - remove(obj_path) - # END handle win322 - rename(tmp_path, obj_path) - - # make sure its readable for all ! It started out as rw-- tmp file - # but needs to be rwrr - chmod(obj_path, self.new_objects_mode) - # END handle dry_run - - istream.binsha = hex_to_bin(hexsha) - return istream - - def sha_iter(self): - # find all files which look like an object, extract sha from there - for root, dirs, files in os.walk(self.root_path()): - root_base = basename(root) - if len(root_base) != 2: - continue - - for f in files: - if len(f) != 38: - continue - yield hex_to_bin(root_base + f) - # END for each file - # END for each walk iteration - - def size(self): - return len(tuple(self.sha_iter())) - + """A database which operates on loose object files""" + + # CONFIGURATION + # chunks in which data will be copied between streams + stream_chunk_size = chunk_size + + # On windows we need to keep it writable, otherwise it cannot be removed + # either + new_objects_mode = 0444 + if os.name == 'nt': + new_objects_mode = 0644 + + + def __init__(self, root_path): + super(LooseObjectDB, self).__init__(root_path) + self._hexsha_to_file = dict() + # Additional Flags - might be set to 0 after the first failure + # Depending on the root, this might work for some mounts, for others not, which + # is why it is per instance + self._fd_open_flags = getattr(os, 'O_NOATIME', 0) + + #{ Interface + def object_path(self, hexsha): + """ + :return: path at which the object with the given hexsha would be stored, + relative to the database root""" + return join(hexsha[:2], hexsha[2:]) + + def readable_db_object_path(self, hexsha): + """ + :return: readable object path to the object identified by hexsha + :raise BadObject: If the object file does not exist""" + try: + return self._hexsha_to_file[hexsha] + except KeyError: + pass + # END ignore cache misses + + # try filesystem + path = self.db_path(self.object_path(hexsha)) + if exists(path): + self._hexsha_to_file[hexsha] = path + return path + # END handle cache + raise BadObject(hexsha) + + def partial_to_complete_sha_hex(self, partial_hexsha): + """:return: 20 byte binary sha1 string which matches the given name uniquely + :param name: hexadecimal partial name + :raise AmbiguousObjectName: + :raise BadObject: """ + candidate = None + for binsha in self.sha_iter(): + if bin_to_hex(binsha).startswith(partial_hexsha): + # it can't ever find the same object twice + if candidate is not None: + raise AmbiguousObjectName(partial_hexsha) + candidate = binsha + # END for each object + if candidate is None: + raise BadObject(partial_hexsha) + return candidate + + #} END interface + + def _map_loose_object(self, sha): + """ + :return: memory map of that file to allow random read access + :raise BadObject: if object could not be located""" + db_path = self.db_path(self.object_path(bin_to_hex(sha))) + try: + return file_contents_ro_filepath(db_path, flags=self._fd_open_flags) + except OSError,e: + if e.errno != ENOENT: + # try again without noatime + try: + return file_contents_ro_filepath(db_path) + except OSError: + raise BadObject(sha) + # didn't work because of our flag, don't try it again + self._fd_open_flags = 0 + else: + raise BadObject(sha) + # END handle error + # END exception handling + try: + return mmap.mmap(fd, 0, access=mmap.ACCESS_READ) + finally: + os.close(fd) + # END assure file is closed + + def set_ostream(self, stream): + """:raise TypeError: if the stream does not support the Sha1Writer interface""" + if stream is not None and not isinstance(stream, Sha1Writer): + raise TypeError("Output stream musst support the %s interface" % Sha1Writer.__name__) + return super(LooseObjectDB, self).set_ostream(stream) + + def info(self, sha): + m = self._map_loose_object(sha) + try: + type, size = loose_object_header_info(m) + return OInfo(sha, type, size) + finally: + m.close() + # END assure release of system resources + + def stream(self, sha): + m = self._map_loose_object(sha) + type, size, stream = DecompressMemMapReader.new(m, close_on_deletion = True) + return OStream(sha, type, size, stream) + + def has_object(self, sha): + try: + self.readable_db_object_path(bin_to_hex(sha)) + return True + except BadObject: + return False + # END check existance + + def store(self, istream): + """note: The sha we produce will be hex by nature""" + tmp_path = None + writer = self.ostream() + if writer is None: + # open a tmp file to write the data to + fd, tmp_path = tempfile.mkstemp(prefix='obj', dir=self._root_path) + + if istream.binsha is None: + writer = FDCompressedSha1Writer(fd) + else: + writer = FDStream(fd) + # END handle direct stream copies + # END handle custom writer + + try: + try: + if istream.binsha is not None: + # copy as much as possible, the actual uncompressed item size might + # be smaller than the compressed version + stream_copy(istream.read, writer.write, sys.maxint, self.stream_chunk_size) + else: + # write object with header, we have to make a new one + write_object(istream.type, istream.size, istream.read, writer.write, + chunk_size=self.stream_chunk_size) + # END handle direct stream copies + finally: + if tmp_path: + writer.close() + # END assure target stream is closed + except: + if tmp_path: + os.remove(tmp_path) + raise + # END assure tmpfile removal on error + + hexsha = None + if istream.binsha: + hexsha = istream.hexsha + else: + hexsha = writer.sha(as_hex=True) + # END handle sha + + if tmp_path: + obj_path = self.db_path(self.object_path(hexsha)) + obj_dir = dirname(obj_path) + if not isdir(obj_dir): + mkdir(obj_dir) + # END handle destination directory + # rename onto existing doesn't work on windows + if os.name == 'nt' and isfile(obj_path): + remove(obj_path) + # END handle win322 + rename(tmp_path, obj_path) + + # make sure its readable for all ! It started out as rw-- tmp file + # but needs to be rwrr + chmod(obj_path, self.new_objects_mode) + # END handle dry_run + + istream.binsha = hex_to_bin(hexsha) + return istream + + def sha_iter(self): + # find all files which look like an object, extract sha from there + for root, dirs, files in os.walk(self.root_path()): + root_base = basename(root) + if len(root_base) != 2: + continue + + for f in files: + if len(f) != 38: + continue + yield hex_to_bin(root_base + f) + # END for each file + # END for each walk iteration + + def size(self): + return len(tuple(self.sha_iter())) + diff --git a/gitdb/db/mem.py b/gitdb/db/mem.py index 5d76c83..b9b2b89 100644 --- a/gitdb/db/mem.py +++ b/gitdb/db/mem.py @@ -5,109 +5,109 @@ """Contains the MemoryDatabase implementation""" from loose import LooseObjectDB from base import ( - ObjectDBR, - ObjectDBW - ) + ObjectDBR, + ObjectDBW + ) from gitdb.base import ( - OStream, - IStream, - ) + OStream, + IStream, + ) from gitdb.exc import ( - BadObject, - UnsupportedOperation - ) + BadObject, + UnsupportedOperation + ) from gitdb.stream import ( - ZippedStoreShaWriter, - DecompressMemMapReader, - ) + ZippedStoreShaWriter, + DecompressMemMapReader, + ) from cStringIO import StringIO __all__ = ("MemoryDB", ) class MemoryDB(ObjectDBR, ObjectDBW): - """A memory database stores everything to memory, providing fast IO and object - retrieval. It should be used to buffer results and obtain SHAs before writing - it to the actual physical storage, as it allows to query whether object already - exists in the target storage before introducing actual IO - - **Note:** memory is currently not threadsafe, hence the async methods cannot be used - for storing""" - - def __init__(self): - super(MemoryDB, self).__init__() - self._db = LooseObjectDB("path/doesnt/matter") - - # maps 20 byte shas to their OStream objects - self._cache = dict() - - def set_ostream(self, stream): - raise UnsupportedOperation("MemoryDB's always stream into memory") - - def store(self, istream): - zstream = ZippedStoreShaWriter() - self._db.set_ostream(zstream) - - istream = self._db.store(istream) - zstream.close() # close to flush - zstream.seek(0) - - # don't provide a size, the stream is written in object format, hence the - # header needs decompression - decomp_stream = DecompressMemMapReader(zstream.getvalue(), close_on_deletion=False) - self._cache[istream.binsha] = OStream(istream.binsha, istream.type, istream.size, decomp_stream) - - return istream - - def store_async(self, reader): - raise UnsupportedOperation("MemoryDBs cannot currently be used for async write access") - - def has_object(self, sha): - return sha in self._cache + """A memory database stores everything to memory, providing fast IO and object + retrieval. It should be used to buffer results and obtain SHAs before writing + it to the actual physical storage, as it allows to query whether object already + exists in the target storage before introducing actual IO + + **Note:** memory is currently not threadsafe, hence the async methods cannot be used + for storing""" + + def __init__(self): + super(MemoryDB, self).__init__() + self._db = LooseObjectDB("path/doesnt/matter") + + # maps 20 byte shas to their OStream objects + self._cache = dict() + + def set_ostream(self, stream): + raise UnsupportedOperation("MemoryDB's always stream into memory") + + def store(self, istream): + zstream = ZippedStoreShaWriter() + self._db.set_ostream(zstream) + + istream = self._db.store(istream) + zstream.close() # close to flush + zstream.seek(0) + + # don't provide a size, the stream is written in object format, hence the + # header needs decompression + decomp_stream = DecompressMemMapReader(zstream.getvalue(), close_on_deletion=False) + self._cache[istream.binsha] = OStream(istream.binsha, istream.type, istream.size, decomp_stream) + + return istream + + def store_async(self, reader): + raise UnsupportedOperation("MemoryDBs cannot currently be used for async write access") + + def has_object(self, sha): + return sha in self._cache - def info(self, sha): - # we always return streams, which are infos as well - return self.stream(sha) - - def stream(self, sha): - try: - ostream = self._cache[sha] - # rewind stream for the next one to read - ostream.stream.seek(0) - return ostream - except KeyError: - raise BadObject(sha) - # END exception handling - - def size(self): - return len(self._cache) - - def sha_iter(self): - return self._cache.iterkeys() - - - #{ Interface - def stream_copy(self, sha_iter, odb): - """Copy the streams as identified by sha's yielded by sha_iter into the given odb - The streams will be copied directly - **Note:** the object will only be written if it did not exist in the target db - :return: amount of streams actually copied into odb. If smaller than the amount - of input shas, one or more objects did already exist in odb""" - count = 0 - for sha in sha_iter: - if odb.has_object(sha): - continue - # END check object existance - - ostream = self.stream(sha) - # compressed data including header - sio = StringIO(ostream.stream.data()) - istream = IStream(ostream.type, ostream.size, sio, sha) - - odb.store(istream) - count += 1 - # END for each sha - return count - #} END interface + def info(self, sha): + # we always return streams, which are infos as well + return self.stream(sha) + + def stream(self, sha): + try: + ostream = self._cache[sha] + # rewind stream for the next one to read + ostream.stream.seek(0) + return ostream + except KeyError: + raise BadObject(sha) + # END exception handling + + def size(self): + return len(self._cache) + + def sha_iter(self): + return self._cache.iterkeys() + + + #{ Interface + def stream_copy(self, sha_iter, odb): + """Copy the streams as identified by sha's yielded by sha_iter into the given odb + The streams will be copied directly + **Note:** the object will only be written if it did not exist in the target db + :return: amount of streams actually copied into odb. If smaller than the amount + of input shas, one or more objects did already exist in odb""" + count = 0 + for sha in sha_iter: + if odb.has_object(sha): + continue + # END check object existance + + ostream = self.stream(sha) + # compressed data including header + sio = StringIO(ostream.stream.data()) + istream = IStream(ostream.type, ostream.size, sio, sha) + + odb.store(istream) + count += 1 + # END for each sha + return count + #} END interface diff --git a/gitdb/db/pack.py b/gitdb/db/pack.py index 4c9d0b9..9287319 100644 --- a/gitdb/db/pack.py +++ b/gitdb/db/pack.py @@ -4,18 +4,18 @@ # the New BSD License: http://www.opensource.org/licenses/bsd-license.php """Module containing a database to deal with packs""" from base import ( - FileDBBase, - ObjectDBR, - CachingDB - ) + FileDBBase, + ObjectDBR, + CachingDB + ) from gitdb.util import LazyMixin from gitdb.exc import ( - BadObject, - UnsupportedOperation, - AmbiguousObjectName - ) + BadObject, + UnsupportedOperation, + AmbiguousObjectName + ) from gitdb.pack import PackEntity @@ -28,182 +28,182 @@ __all__ = ('PackedDB', ) class PackedDB(FileDBBase, ObjectDBR, CachingDB, LazyMixin): - """A database operating on a set of object packs""" - - # sort the priority list every N queries - # Higher values are better, performance tests don't show this has - # any effect, but it should have one - _sort_interval = 500 - - def __init__(self, root_path): - super(PackedDB, self).__init__(root_path) - # list of lists with three items: - # * hits - number of times the pack was hit with a request - # * entity - Pack entity instance - # * sha_to_index - PackIndexFile.sha_to_index method for direct cache query - # self._entities = list() # lazy loaded list - self._hit_count = 0 # amount of hits - self._st_mtime = 0 # last modification data of our root path - - def _set_cache_(self, attr): - if attr == '_entities': - self._entities = list() - self.update_cache(force=True) - # END handle entities initialization - - def _sort_entities(self): - self._entities.sort(key=lambda l: l[0], reverse=True) - - def _pack_info(self, sha): - """:return: tuple(entity, index) for an item at the given sha - :param sha: 20 or 40 byte sha - :raise BadObject: - **Note:** This method is not thread-safe, but may be hit in multi-threaded - operation. The worst thing that can happen though is a counter that - was not incremented, or the list being in wrong order. So we safe - the time for locking here, lets see how that goes""" - # presort ? - if self._hit_count % self._sort_interval == 0: - self._sort_entities() - # END update sorting - - for item in self._entities: - index = item[2](sha) - if index is not None: - item[0] += 1 # one hit for you - self._hit_count += 1 # general hit count - return (item[1], index) - # END index found in pack - # END for each item - - # no hit, see whether we have to update packs - # NOTE: considering packs don't change very often, we safe this call - # and leave it to the super-caller to trigger that - raise BadObject(sha) - - #{ Object DB Read - - def has_object(self, sha): - try: - self._pack_info(sha) - return True - except BadObject: - return False - # END exception handling - - def info(self, sha): - entity, index = self._pack_info(sha) - return entity.info_at_index(index) - - def stream(self, sha): - entity, index = self._pack_info(sha) - return entity.stream_at_index(index) - - def sha_iter(self): - sha_list = list() - for entity in self.entities(): - index = entity.index() - sha_by_index = index.sha - for index in xrange(index.size()): - yield sha_by_index(index) - # END for each index - # END for each entity - - def size(self): - sizes = [item[1].index().size() for item in self._entities] - return reduce(lambda x,y: x+y, sizes, 0) - - #} END object db read - - #{ object db write - - def store(self, istream): - """Storing individual objects is not feasible as a pack is designed to - hold multiple objects. Writing or rewriting packs for single objects is - inefficient""" - raise UnsupportedOperation() - - def store_async(self, reader): - # TODO: add ObjectDBRW before implementing this - raise NotImplementedError() - - #} END object db write - - - #{ Interface - - def update_cache(self, force=False): - """ - Update our cache with the acutally existing packs on disk. Add new ones, - and remove deleted ones. We keep the unchanged ones - - :param force: If True, the cache will be updated even though the directory - does not appear to have changed according to its modification timestamp. - :return: True if the packs have been updated so there is new information, - False if there was no change to the pack database""" - stat = os.stat(self.root_path()) - if not force and stat.st_mtime <= self._st_mtime: - return False - # END abort early on no change - self._st_mtime = stat.st_mtime - - # packs are supposed to be prefixed with pack- by git-convention - # get all pack files, figure out what changed - pack_files = set(glob.glob(os.path.join(self.root_path(), "pack-*.pack"))) - our_pack_files = set(item[1].pack().path() for item in self._entities) - - # new packs - for pack_file in (pack_files - our_pack_files): - # init the hit-counter/priority with the size, a good measure for hit- - # probability. Its implemented so that only 12 bytes will be read - entity = PackEntity(pack_file) - self._entities.append([entity.pack().size(), entity, entity.index().sha_to_index]) - # END for each new packfile - - # removed packs - for pack_file in (our_pack_files - pack_files): - del_index = -1 - for i, item in enumerate(self._entities): - if item[1].pack().path() == pack_file: - del_index = i - break - # END found index - # END for each entity - assert del_index != -1 - del(self._entities[del_index]) - # END for each removed pack - - # reinitialize prioritiess - self._sort_entities() - return True - - def entities(self): - """:return: list of pack entities operated upon by this database""" - return [ item[1] for item in self._entities ] - - def partial_to_complete_sha(self, partial_binsha, canonical_length): - """:return: 20 byte sha as inferred by the given partial binary sha - :param partial_binsha: binary sha with less than 20 bytes - :param canonical_length: length of the corresponding canonical representation. - It is required as binary sha's cannot display whether the original hex sha - had an odd or even number of characters - :raise AmbiguousObjectName: - :raise BadObject: """ - candidate = None - for item in self._entities: - item_index = item[1].index().partial_sha_to_index(partial_binsha, canonical_length) - if item_index is not None: - sha = item[1].index().sha(item_index) - if candidate and candidate != sha: - raise AmbiguousObjectName(partial_binsha) - candidate = sha - # END handle full sha could be found - # END for each entity - - if candidate: - return candidate - - # still not found ? - raise BadObject(partial_binsha) - - #} END interface + """A database operating on a set of object packs""" + + # sort the priority list every N queries + # Higher values are better, performance tests don't show this has + # any effect, but it should have one + _sort_interval = 500 + + def __init__(self, root_path): + super(PackedDB, self).__init__(root_path) + # list of lists with three items: + # * hits - number of times the pack was hit with a request + # * entity - Pack entity instance + # * sha_to_index - PackIndexFile.sha_to_index method for direct cache query + # self._entities = list() # lazy loaded list + self._hit_count = 0 # amount of hits + self._st_mtime = 0 # last modification data of our root path + + def _set_cache_(self, attr): + if attr == '_entities': + self._entities = list() + self.update_cache(force=True) + # END handle entities initialization + + def _sort_entities(self): + self._entities.sort(key=lambda l: l[0], reverse=True) + + def _pack_info(self, sha): + """:return: tuple(entity, index) for an item at the given sha + :param sha: 20 or 40 byte sha + :raise BadObject: + **Note:** This method is not thread-safe, but may be hit in multi-threaded + operation. The worst thing that can happen though is a counter that + was not incremented, or the list being in wrong order. So we safe + the time for locking here, lets see how that goes""" + # presort ? + if self._hit_count % self._sort_interval == 0: + self._sort_entities() + # END update sorting + + for item in self._entities: + index = item[2](sha) + if index is not None: + item[0] += 1 # one hit for you + self._hit_count += 1 # general hit count + return (item[1], index) + # END index found in pack + # END for each item + + # no hit, see whether we have to update packs + # NOTE: considering packs don't change very often, we safe this call + # and leave it to the super-caller to trigger that + raise BadObject(sha) + + #{ Object DB Read + + def has_object(self, sha): + try: + self._pack_info(sha) + return True + except BadObject: + return False + # END exception handling + + def info(self, sha): + entity, index = self._pack_info(sha) + return entity.info_at_index(index) + + def stream(self, sha): + entity, index = self._pack_info(sha) + return entity.stream_at_index(index) + + def sha_iter(self): + sha_list = list() + for entity in self.entities(): + index = entity.index() + sha_by_index = index.sha + for index in xrange(index.size()): + yield sha_by_index(index) + # END for each index + # END for each entity + + def size(self): + sizes = [item[1].index().size() for item in self._entities] + return reduce(lambda x,y: x+y, sizes, 0) + + #} END object db read + + #{ object db write + + def store(self, istream): + """Storing individual objects is not feasible as a pack is designed to + hold multiple objects. Writing or rewriting packs for single objects is + inefficient""" + raise UnsupportedOperation() + + def store_async(self, reader): + # TODO: add ObjectDBRW before implementing this + raise NotImplementedError() + + #} END object db write + + + #{ Interface + + def update_cache(self, force=False): + """ + Update our cache with the acutally existing packs on disk. Add new ones, + and remove deleted ones. We keep the unchanged ones + + :param force: If True, the cache will be updated even though the directory + does not appear to have changed according to its modification timestamp. + :return: True if the packs have been updated so there is new information, + False if there was no change to the pack database""" + stat = os.stat(self.root_path()) + if not force and stat.st_mtime <= self._st_mtime: + return False + # END abort early on no change + self._st_mtime = stat.st_mtime + + # packs are supposed to be prefixed with pack- by git-convention + # get all pack files, figure out what changed + pack_files = set(glob.glob(os.path.join(self.root_path(), "pack-*.pack"))) + our_pack_files = set(item[1].pack().path() for item in self._entities) + + # new packs + for pack_file in (pack_files - our_pack_files): + # init the hit-counter/priority with the size, a good measure for hit- + # probability. Its implemented so that only 12 bytes will be read + entity = PackEntity(pack_file) + self._entities.append([entity.pack().size(), entity, entity.index().sha_to_index]) + # END for each new packfile + + # removed packs + for pack_file in (our_pack_files - pack_files): + del_index = -1 + for i, item in enumerate(self._entities): + if item[1].pack().path() == pack_file: + del_index = i + break + # END found index + # END for each entity + assert del_index != -1 + del(self._entities[del_index]) + # END for each removed pack + + # reinitialize prioritiess + self._sort_entities() + return True + + def entities(self): + """:return: list of pack entities operated upon by this database""" + return [ item[1] for item in self._entities ] + + def partial_to_complete_sha(self, partial_binsha, canonical_length): + """:return: 20 byte sha as inferred by the given partial binary sha + :param partial_binsha: binary sha with less than 20 bytes + :param canonical_length: length of the corresponding canonical representation. + It is required as binary sha's cannot display whether the original hex sha + had an odd or even number of characters + :raise AmbiguousObjectName: + :raise BadObject: """ + candidate = None + for item in self._entities: + item_index = item[1].index().partial_sha_to_index(partial_binsha, canonical_length) + if item_index is not None: + sha = item[1].index().sha(item_index) + if candidate and candidate != sha: + raise AmbiguousObjectName(partial_binsha) + candidate = sha + # END handle full sha could be found + # END for each entity + + if candidate: + return candidate + + # still not found ? + raise BadObject(partial_binsha) + + #} END interface diff --git a/gitdb/db/ref.py b/gitdb/db/ref.py index 8989843..60004a7 100644 --- a/gitdb/db/ref.py +++ b/gitdb/db/ref.py @@ -3,77 +3,77 @@ # This module is part of GitDB and is released under # the New BSD License: http://www.opensource.org/licenses/bsd-license.php from base import ( - CompoundDB, - ) + CompoundDB, + ) import os __all__ = ('ReferenceDB', ) class ReferenceDB(CompoundDB): - """A database consisting of database referred to in a file""" - - # Configuration - # Specifies the object database to use for the paths found in the alternates - # file. If None, it defaults to the GitDB - ObjectDBCls = None - - def __init__(self, ref_file): - super(ReferenceDB, self).__init__() - self._ref_file = ref_file - - def _set_cache_(self, attr): - if attr == '_dbs': - self._dbs = list() - self._update_dbs_from_ref_file() - else: - super(ReferenceDB, self)._set_cache_(attr) - # END handle attrs - - def _update_dbs_from_ref_file(self): - dbcls = self.ObjectDBCls - if dbcls is None: - # late import - from git import GitDB - dbcls = GitDB - # END get db type - - # try to get as many as possible, don't fail if some are unavailable - ref_paths = list() - try: - ref_paths = [l.strip() for l in open(self._ref_file, 'r').readlines()] - except (OSError, IOError): - pass - # END handle alternates - - ref_paths_set = set(ref_paths) - cur_ref_paths_set = set(db.root_path() for db in self._dbs) - - # remove existing - for path in (cur_ref_paths_set - ref_paths_set): - for i, db in enumerate(self._dbs[:]): - if db.root_path() == path: - del(self._dbs[i]) - continue - # END del matching db - # END for each path to remove - - # add new - # sort them to maintain order - added_paths = sorted(ref_paths_set - cur_ref_paths_set, key=lambda p: ref_paths.index(p)) - for path in added_paths: - try: - db = dbcls(path) - # force an update to verify path - if isinstance(db, CompoundDB): - db.databases() - # END verification - self._dbs.append(db) - except Exception, e: - # ignore invalid paths or issues - pass - # END for each path to add - - def update_cache(self, force=False): - # re-read alternates and update databases - self._update_dbs_from_ref_file() - return super(ReferenceDB, self).update_cache(force) + """A database consisting of database referred to in a file""" + + # Configuration + # Specifies the object database to use for the paths found in the alternates + # file. If None, it defaults to the GitDB + ObjectDBCls = None + + def __init__(self, ref_file): + super(ReferenceDB, self).__init__() + self._ref_file = ref_file + + def _set_cache_(self, attr): + if attr == '_dbs': + self._dbs = list() + self._update_dbs_from_ref_file() + else: + super(ReferenceDB, self)._set_cache_(attr) + # END handle attrs + + def _update_dbs_from_ref_file(self): + dbcls = self.ObjectDBCls + if dbcls is None: + # late import + from git import GitDB + dbcls = GitDB + # END get db type + + # try to get as many as possible, don't fail if some are unavailable + ref_paths = list() + try: + ref_paths = [l.strip() for l in open(self._ref_file, 'r').readlines()] + except (OSError, IOError): + pass + # END handle alternates + + ref_paths_set = set(ref_paths) + cur_ref_paths_set = set(db.root_path() for db in self._dbs) + + # remove existing + for path in (cur_ref_paths_set - ref_paths_set): + for i, db in enumerate(self._dbs[:]): + if db.root_path() == path: + del(self._dbs[i]) + continue + # END del matching db + # END for each path to remove + + # add new + # sort them to maintain order + added_paths = sorted(ref_paths_set - cur_ref_paths_set, key=lambda p: ref_paths.index(p)) + for path in added_paths: + try: + db = dbcls(path) + # force an update to verify path + if isinstance(db, CompoundDB): + db.databases() + # END verification + self._dbs.append(db) + except Exception, e: + # ignore invalid paths or issues + pass + # END for each path to add + + def update_cache(self, force=False): + # re-read alternates and update databases + self._update_dbs_from_ref_file() + return super(ReferenceDB, self).update_cache(force) |
