diff options
Diffstat (limited to 'gitdb/db')
| -rw-r--r-- | gitdb/db/__init__.py | 13 | ||||
| -rw-r--r-- | gitdb/db/base.py | 187 | ||||
| -rw-r--r-- | gitdb/db/git.py | 53 | ||||
| -rw-r--r-- | gitdb/db/loose.py | 150 | ||||
| -rw-r--r-- | gitdb/db/mem.py | 82 | ||||
| -rw-r--r-- | gitdb/db/pack.py | 104 | ||||
| -rw-r--r-- | gitdb/db/ref.py | 29 |
7 files changed, 273 insertions, 345 deletions
diff --git a/gitdb/db/__init__.py b/gitdb/db/__init__.py index e5935b7..0a2a46a 100644 --- a/gitdb/db/__init__.py +++ b/gitdb/db/__init__.py @@ -3,10 +3,9 @@ # This module is part of GitDB and is released under # the New BSD License: http://www.opensource.org/licenses/bsd-license.php -from base import * -from loose import * -from mem import * -from pack import * -from git import * -from ref import * - +from gitdb.db.base import * +from gitdb.db.loose import * +from gitdb.db.mem import * +from gitdb.db.pack import * +from gitdb.db.git import * +from gitdb.db.ref import * diff --git a/gitdb/db/base.py b/gitdb/db/base.py index 867e93a..a670eea 100644 --- a/gitdb/db/base.py +++ b/gitdb/db/base.py @@ -4,22 +4,20 @@ # the New BSD License: http://www.opensource.org/licenses/bsd-license.php """Contains implementations of database retrieveing objects""" from gitdb.util import ( - pool, - join, - LazyMixin, - hex_to_bin - ) + join, + LazyMixin, + hex_to_bin +) +from gitdb.utils.encoding import force_text from gitdb.exc import ( - BadObject, - AmbiguousObjectName - ) - -from async import ( - ChannelThreadTask - ) + BadObject, + AmbiguousObjectName +) from itertools import chain +from functools import reduce + __all__ = ('ObjectDBR', 'ObjectDBW', 'FileDBBase', 'CompoundDB', 'CachingDB') @@ -28,80 +26,51 @@ __all__ = ('ObjectDBR', 'ObjectDBW', 'FileDBBase', 'CompoundDB', 'CachingDB') class ObjectDBR(object): """Defines an interface for object database lookup. Objects are identified either by their 20 byte bin sha""" - + def __contains__(self, sha): return self.has_obj - - #{ Query Interface + + #{ Query Interface def has_object(self, sha): """ :return: True if the object identified by the given 20 bytes binary sha is contained in the database""" raise NotImplementedError("To be implemented in subclass") - - def has_object_async(self, reader): - """Return a reader yielding information about the membership of objects - as identified by shas - :param reader: Reader yielding 20 byte shas. - :return: async.Reader yielding tuples of (sha, bool) pairs which indicate - whether the given sha exists in the database or not""" - task = ChannelThreadTask(reader, str(self.has_object_async), lambda sha: (sha, self.has_object(sha))) - return pool.add_task(task) - + def info(self, sha): """ :return: OInfo instance :param sha: bytes binary sha :raise BadObject:""" raise NotImplementedError("To be implemented in subclass") - - def info_async(self, reader): - """Retrieve information of a multitude of objects asynchronously - :param reader: Channel yielding the sha's of the objects of interest - :return: async.Reader yielding OInfo|InvalidOInfo, in any order""" - task = ChannelThreadTask(reader, str(self.info_async), self.info) - return pool.add_task(task) - + def stream(self, sha): """:return: OStream instance :param sha: 20 bytes binary sha :raise BadObject:""" raise NotImplementedError("To be implemented in subclass") - - def stream_async(self, reader): - """Retrieve the OStream of multiple objects - :param reader: see ``info`` - :param max_threads: see ``ObjectDBW.store`` - :return: async.Reader yielding OStream|InvalidOStream instances in any order - - **Note:** depending on the system configuration, it might not be possible to - read all OStreams at once. Instead, read them individually using reader.read(x) - where x is small enough.""" - # base implementation just uses the stream method repeatedly - task = ChannelThreadTask(reader, str(self.stream_async), self.stream) - return pool.add_task(task) - + def size(self): """:return: amount of objects in this database""" raise NotImplementedError() - + def sha_iter(self): """Return iterator yielding 20 byte shas for all objects in this data base""" raise NotImplementedError() - + #} END query interface - - + + class ObjectDBW(object): """Defines an interface to create objects in the database""" - + def __init__(self, *args, **kwargs): self._ostream = None - + #{ Edit Interface def set_ostream(self, stream): """ Adjusts the stream to which all data should be sent when storing new objects - + :param stream: if not None, the stream to use, if None the default stream will be used. :return: previously installed stream, or None if there was no override @@ -109,99 +78,77 @@ class ObjectDBW(object): cstream = self._ostream self._ostream = stream return cstream - + def ostream(self): """ :return: overridden output stream this instance will write to, or None if it will write to the default stream""" return self._ostream - + def store(self, istream): """ Create a new object in the database :return: the input istream object with its sha set to its corresponding value - - :param istream: IStream compatible instance. If its sha is already set - to a value, the object will just be stored in the our database format, + + :param istream: IStream compatible instance. If its sha is already set + to a value, the object will just be stored in the our database format, in which case the input stream is expected to be in object format ( header + contents ). :raise IOError: if data could not be written""" raise NotImplementedError("To be implemented in subclass") - - def store_async(self, reader): - """ - Create multiple new objects in the database asynchronously. The method will - return right away, returning an output channel which receives the results as - they are computed. - - :return: Channel yielding your IStream which served as input, in any order. - The IStreams sha will be set to the sha it received during the process, - or its error attribute will be set to the exception informing about the error. - - :param reader: async.Reader yielding IStream instances. - The same instances will be used in the output channel as were received - in by the Reader. - - **Note:** As some ODB implementations implement this operation atomic, they might - abort the whole operation if one item could not be processed. Hence check how - many items have actually been produced.""" - # base implementation uses store to perform the work - task = ChannelThreadTask(reader, str(self.store_async), self.store) - return pool.add_task(task) - + #} END edit interface - + class FileDBBase(object): - """Provides basic facilities to retrieve files of interest, including + """Provides basic facilities to retrieve files of interest, including caching facilities to help mapping hexsha's to objects""" - + def __init__(self, root_path): """Initialize this instance to look for its files at the given root path All subsequent operations will be relative to this path - :raise InvalidDBRoot: + :raise InvalidDBRoot: **Note:** The base will not perform any accessablity checking as the base - might not yet be accessible, but become accessible before the first + might not yet be accessible, but become accessible before the first access.""" super(FileDBBase, self).__init__() self._root_path = root_path - - - #{ Interface + + + #{ Interface def root_path(self): """:return: path at which this db operates""" return self._root_path - + def db_path(self, rela_path): """ - :return: the given relative path relative to our database root, allowing + :return: the given relative path relative to our database root, allowing to pontentially access datafiles""" - return join(self._root_path, rela_path) + return join(self._root_path, force_text(rela_path)) #} END interface - + class CachingDB(object): """A database which uses caches to speed-up access""" - - #{ Interface + + #{ Interface def update_cache(self, force=False): """ Call this method if the underlying data changed to trigger an update of the internal caching structures. - + :param force: if True, the update must be performed. Otherwise the implementation may decide not to perform an update if it thinks nothing has changed. :return: True if an update was performed as something change indeed""" - + # END interface def _databases_recursive(database, output): - """Fill output list with database from db, in order. Deals with Loose, Packed + """Fill output list with database from db, in order. Deals with Loose, Packed and compound databases.""" if isinstance(database, CompoundDB): - compounds = list() dbs = database.databases() output.extend(db for db in dbs if not isinstance(db, CompoundDB)) for cdb in (db for db in dbs if isinstance(db, CompoundDB)): @@ -209,11 +156,11 @@ def _databases_recursive(database, output): else: output.append(database) # END handle database type - + class CompoundDB(ObjectDBR, LazyMixin, CachingDB): """A database which delegates calls to sub-databases. - + Databases are stored in the lazy-loaded _dbs attribute. Define _set_cache_ to update it with your databases""" def _set_cache_(self, attr): @@ -223,27 +170,27 @@ class CompoundDB(ObjectDBR, LazyMixin, CachingDB): self._db_cache = dict() else: super(CompoundDB, self)._set_cache_(attr) - + def _db_query(self, sha): """:return: database containing the given 20 byte sha :raise BadObject:""" - # most databases use binary representations, prevent converting + # most databases use binary representations, prevent converting # it everytime a database is being queried try: return self._db_cache[sha] except KeyError: pass # END first level cache - + for db in self._dbs: if db.has_object(sha): self._db_cache[sha] = db return db # END for each database raise BadObject(sha) - - #{ ObjectDBR interface - + + #{ ObjectDBR interface + def has_object(self, sha): try: self._db_query(sha) @@ -251,24 +198,24 @@ class CompoundDB(ObjectDBR, LazyMixin, CachingDB): except BadObject: return False # END handle exceptions - + def info(self, sha): return self._db_query(sha).info(sha) - + def stream(self, sha): return self._db_query(sha).stream(sha) def size(self): """:return: total size of all contained databases""" return reduce(lambda x,y: x+y, (db.size() for db in self._dbs), 0) - + def sha_iter(self): return chain(*(db.sha_iter() for db in self._dbs)) - + #} END object DBR Interface - + #{ Interface - + def databases(self): """:return: tuple of database instances we use for lookups""" return tuple(self._dbs) @@ -283,22 +230,22 @@ class CompoundDB(ObjectDBR, LazyMixin, CachingDB): # END if is caching db # END for each database to update return stat - + def partial_to_complete_sha_hex(self, partial_hexsha): """ - :return: 20 byte binary sha1 from the given less-than-40 byte hexsha + :return: 20 byte binary sha1 from the given less-than-40 byte hexsha (bytes or str) :param partial_hexsha: hexsha with less than 40 byte :raise AmbiguousObjectName: """ databases = list() _databases_recursive(self, databases) - + partial_hexsha = force_text(partial_hexsha) len_partial_hexsha = len(partial_hexsha) if len_partial_hexsha % 2 != 0: partial_binsha = hex_to_bin(partial_hexsha + "0") else: partial_binsha = hex_to_bin(partial_hexsha) - # END assure successful binary conversion - + # END assure successful binary conversion + candidate = None for db in databases: full_bin_sha = None @@ -320,7 +267,5 @@ class CompoundDB(ObjectDBR, LazyMixin, CachingDB): if not candidate: raise BadObject(partial_binsha) return candidate - - #} END interface - + #} END interface diff --git a/gitdb/db/git.py b/gitdb/db/git.py index 1d6ad0f..d22e3f1 100644 --- a/gitdb/db/git.py +++ b/gitdb/db/git.py @@ -2,22 +2,18 @@ # # This module is part of GitDB and is released under # the New BSD License: http://www.opensource.org/licenses/bsd-license.php -from base import ( - CompoundDB, - ObjectDBW, - FileDBBase - ) - -from loose import LooseObjectDB -from pack import PackedDB -from ref import ReferenceDB - -from gitdb.util import LazyMixin -from gitdb.exc import ( - InvalidDBRoot, - BadObject, - AmbiguousObjectName - ) +from gitdb.db.base import ( + CompoundDB, + ObjectDBW, + FileDBBase +) + +from gitdb.db.loose import LooseObjectDB +from gitdb.db.pack import PackedDB +from gitdb.db.ref import ReferenceDB + +from gitdb.exc import InvalidDBRoot + import os __all__ = ('GitDB', ) @@ -30,21 +26,21 @@ class GitDB(FileDBBase, ObjectDBW, CompoundDB): PackDBCls = PackedDB LooseDBCls = LooseObjectDB ReferenceDBCls = ReferenceDB - + # Directories packs_dir = 'pack' loose_dir = '' alternates_dir = os.path.join('info', 'alternates') - + def __init__(self, root_path): """Initialize ourselves on a git objects directory""" super(GitDB, self).__init__(root_path) - + def _set_cache_(self, attr): if attr == '_dbs' or attr == '_loose_db': self._dbs = list() loose_db = None - for subpath, dbcls in ((self.packs_dir, self.PackDBCls), + for subpath, dbcls in ((self.packs_dir, self.PackDBCls), (self.loose_dir, self.LooseDBCls), (self.alternates_dir, self.ReferenceDBCls)): path = self.db_path(subpath) @@ -55,31 +51,30 @@ class GitDB(FileDBBase, ObjectDBW, CompoundDB): # END remember loose db # END check path exists # END for each db type - + # should have at least one subdb if not self._dbs: raise InvalidDBRoot(self.root_path()) # END handle error - + # we the first one should have the store method assert loose_db is not None and hasattr(loose_db, 'store'), "First database needs store functionality" - + # finally set the value self._loose_db = loose_db else: super(GitDB, self)._set_cache_(attr) # END handle attrs - + #{ ObjectDBW interface - + def store(self, istream): return self._loose_db.store(istream) - + def ostream(self): return self._loose_db.ostream() - + def set_ostream(self, ostream): return self._loose_db.set_ostream(ostream) - + #} END objectdbw interface - diff --git a/gitdb/db/loose.py b/gitdb/db/loose.py index dc0ea0e..3743026 100644 --- a/gitdb/db/loose.py +++ b/gitdb/db/loose.py @@ -2,58 +2,58 @@ # # This module is part of GitDB and is released under # the New BSD License: http://www.opensource.org/licenses/bsd-license.php -from base import ( - FileDBBase, - ObjectDBR, - ObjectDBW - ) +from gitdb.db.base import ( + FileDBBase, + ObjectDBR, + ObjectDBW +) from gitdb.exc import ( - InvalidDBRoot, BadObject, AmbiguousObjectName - ) +) from gitdb.stream import ( - DecompressMemMapReader, - FDCompressedSha1Writer, - FDStream, - Sha1Writer - ) + DecompressMemMapReader, + FDCompressedSha1Writer, + FDStream, + Sha1Writer +) from gitdb.base import ( - OStream, - OInfo - ) + OStream, + OInfo +) from gitdb.util import ( - file_contents_ro_filepath, - ENOENT, - hex_to_bin, - bin_to_hex, - exists, - chmod, - isdir, - isfile, - remove, - mkdir, - rename, - dirname, - basename, - join - ) - -from gitdb.fun import ( + file_contents_ro_filepath, + ENOENT, + hex_to_bin, + bin_to_hex, + exists, + chmod, + isdir, + isfile, + remove, + mkdir, + rename, + dirname, + basename, + join +) + +from gitdb.fun import ( chunk_size, - loose_object_header_info, + loose_object_header_info, write_object, stream_copy - ) +) + +from gitdb.utils.compat import MAXSIZE +from gitdb.utils.encoding import force_bytes import tempfile -import mmap -import sys import os @@ -62,18 +62,18 @@ __all__ = ( 'LooseObjectDB', ) class LooseObjectDB(FileDBBase, ObjectDBR, ObjectDBW): """A database which operates on loose object files""" - + # CONFIGURATION # chunks in which data will be copied between streams stream_chunk_size = chunk_size - + # On windows we need to keep it writable, otherwise it cannot be removed # either - new_objects_mode = 0444 + new_objects_mode = int("444", 8) if os.name == 'nt': - new_objects_mode = 0644 - - + new_objects_mode = int("644", 8) + + def __init__(self, root_path): super(LooseObjectDB, self).__init__(root_path) self._hexsha_to_file = dict() @@ -81,14 +81,14 @@ class LooseObjectDB(FileDBBase, ObjectDBR, ObjectDBW): # Depending on the root, this might work for some mounts, for others not, which # is why it is per instance self._fd_open_flags = getattr(os, 'O_NOATIME', 0) - - #{ Interface + + #{ Interface def object_path(self, hexsha): """ - :return: path at which the object with the given hexsha would be stored, + :return: path at which the object with the given hexsha would be stored, relative to the database root""" return join(hexsha[:2], hexsha[2:]) - + def readable_db_object_path(self, hexsha): """ :return: readable object path to the object identified by hexsha @@ -97,8 +97,8 @@ class LooseObjectDB(FileDBBase, ObjectDBR, ObjectDBW): return self._hexsha_to_file[hexsha] except KeyError: pass - # END ignore cache misses - + # END ignore cache misses + # try filesystem path = self.db_path(self.object_path(hexsha)) if exists(path): @@ -106,15 +106,15 @@ class LooseObjectDB(FileDBBase, ObjectDBR, ObjectDBW): return path # END handle cache raise BadObject(hexsha) - + def partial_to_complete_sha_hex(self, partial_hexsha): """:return: 20 byte binary sha1 string which matches the given name uniquely - :param name: hexadecimal partial name - :raise AmbiguousObjectName: + :param name: hexadecimal partial name (bytes or ascii string) + :raise AmbiguousObjectName: :raise BadObject: """ candidate = None for binsha in self.sha_iter(): - if bin_to_hex(binsha).startswith(partial_hexsha): + if bin_to_hex(binsha).startswith(force_bytes(partial_hexsha)): # it can't ever find the same object twice if candidate is not None: raise AmbiguousObjectName(partial_hexsha) @@ -123,9 +123,9 @@ class LooseObjectDB(FileDBBase, ObjectDBR, ObjectDBW): if candidate is None: raise BadObject(partial_hexsha) return candidate - + #} END interface - + def _map_loose_object(self, sha): """ :return: memory map of that file to allow random read access @@ -133,7 +133,7 @@ class LooseObjectDB(FileDBBase, ObjectDBR, ObjectDBW): db_path = self.db_path(self.object_path(bin_to_hex(sha))) try: return file_contents_ro_filepath(db_path, flags=self._fd_open_flags) - except OSError,e: + except OSError as e: if e.errno != ENOENT: # try again without noatime try: @@ -146,32 +146,27 @@ class LooseObjectDB(FileDBBase, ObjectDBR, ObjectDBW): raise BadObject(sha) # END handle error # END exception handling - try: - return mmap.mmap(fd, 0, access=mmap.ACCESS_READ) - finally: - os.close(fd) - # END assure file is closed - + def set_ostream(self, stream): """:raise TypeError: if the stream does not support the Sha1Writer interface""" if stream is not None and not isinstance(stream, Sha1Writer): raise TypeError("Output stream musst support the %s interface" % Sha1Writer.__name__) return super(LooseObjectDB, self).set_ostream(stream) - + def info(self, sha): m = self._map_loose_object(sha) try: - type, size = loose_object_header_info(m) - return OInfo(sha, type, size) + typ, size = loose_object_header_info(m) + return OInfo(sha, typ, size) finally: m.close() # END assure release of system resources - + def stream(self, sha): m = self._map_loose_object(sha) type, size, stream = DecompressMemMapReader.new(m, close_on_deletion = True) return OStream(sha, type, size, stream) - + def has_object(self, sha): try: self.readable_db_object_path(bin_to_hex(sha)) @@ -179,7 +174,7 @@ class LooseObjectDB(FileDBBase, ObjectDBR, ObjectDBW): except BadObject: return False # END check existance - + def store(self, istream): """note: The sha we produce will be hex by nature""" tmp_path = None @@ -187,20 +182,20 @@ class LooseObjectDB(FileDBBase, ObjectDBR, ObjectDBW): if writer is None: # open a tmp file to write the data to fd, tmp_path = tempfile.mkstemp(prefix='obj', dir=self._root_path) - + if istream.binsha is None: writer = FDCompressedSha1Writer(fd) else: writer = FDStream(fd) # END handle direct stream copies # END handle custom writer - + try: try: if istream.binsha is not None: # copy as much as possible, the actual uncompressed item size might # be smaller than the compressed version - stream_copy(istream.read, writer.write, sys.maxint, self.stream_chunk_size) + stream_copy(istream.read, writer.write, MAXSIZE, self.stream_chunk_size) else: # write object with header, we have to make a new one write_object(istream.type, istream.size, istream.read, writer.write, @@ -215,14 +210,14 @@ class LooseObjectDB(FileDBBase, ObjectDBR, ObjectDBW): os.remove(tmp_path) raise # END assure tmpfile removal on error - + hexsha = None if istream.binsha: hexsha = istream.hexsha else: hexsha = writer.sha(as_hex=True) # END handle sha - + if tmp_path: obj_path = self.db_path(self.object_path(hexsha)) obj_dir = dirname(obj_path) @@ -234,29 +229,28 @@ class LooseObjectDB(FileDBBase, ObjectDBR, ObjectDBW): remove(obj_path) # END handle win322 rename(tmp_path, obj_path) - + # make sure its readable for all ! It started out as rw-- tmp file # but needs to be rwrr chmod(obj_path, self.new_objects_mode) # END handle dry_run - + istream.binsha = hex_to_bin(hexsha) return istream - + def sha_iter(self): # find all files which look like an object, extract sha from there for root, dirs, files in os.walk(self.root_path()): root_base = basename(root) if len(root_base) != 2: continue - + for f in files: if len(f) != 38: continue yield hex_to_bin(root_base + f) # END for each file # END for each walk iteration - + def size(self): return len(tuple(self.sha_iter())) - diff --git a/gitdb/db/mem.py b/gitdb/db/mem.py index b9b2b89..1aa0d51 100644 --- a/gitdb/db/mem.py +++ b/gitdb/db/mem.py @@ -3,27 +3,28 @@ # This module is part of GitDB and is released under # the New BSD License: http://www.opensource.org/licenses/bsd-license.php """Contains the MemoryDatabase implementation""" -from loose import LooseObjectDB -from base import ( - ObjectDBR, - ObjectDBW - ) +from gitdb.db.loose import LooseObjectDB +from gitdb.db.base import ( + ObjectDBR, + ObjectDBW +) from gitdb.base import ( - OStream, - IStream, - ) + OStream, + IStream, +) from gitdb.exc import ( - BadObject, - UnsupportedOperation - ) + BadObject, + UnsupportedOperation +) + from gitdb.stream import ( - ZippedStoreShaWriter, - DecompressMemMapReader, - ) + ZippedStoreShaWriter, + DecompressMemMapReader, +) -from cStringIO import StringIO +from io import BytesIO __all__ = ("MemoryDB", ) @@ -31,46 +32,40 @@ class MemoryDB(ObjectDBR, ObjectDBW): """A memory database stores everything to memory, providing fast IO and object retrieval. It should be used to buffer results and obtain SHAs before writing it to the actual physical storage, as it allows to query whether object already - exists in the target storage before introducing actual IO - - **Note:** memory is currently not threadsafe, hence the async methods cannot be used - for storing""" - + exists in the target storage before introducing actual IO""" + def __init__(self): super(MemoryDB, self).__init__() self._db = LooseObjectDB("path/doesnt/matter") - + # maps 20 byte shas to their OStream objects self._cache = dict() - + def set_ostream(self, stream): raise UnsupportedOperation("MemoryDB's always stream into memory") - + def store(self, istream): zstream = ZippedStoreShaWriter() self._db.set_ostream(zstream) - + istream = self._db.store(istream) zstream.close() # close to flush zstream.seek(0) - - # don't provide a size, the stream is written in object format, hence the + + # don't provide a size, the stream is written in object format, hence the # header needs decompression - decomp_stream = DecompressMemMapReader(zstream.getvalue(), close_on_deletion=False) + decomp_stream = DecompressMemMapReader(zstream.getvalue(), close_on_deletion=False) self._cache[istream.binsha] = OStream(istream.binsha, istream.type, istream.size, decomp_stream) - + return istream - - def store_async(self, reader): - raise UnsupportedOperation("MemoryDBs cannot currently be used for async write access") - + def has_object(self, sha): return sha in self._cache def info(self, sha): # we always return streams, which are infos as well return self.stream(sha) - + def stream(self, sha): try: ostream = self._cache[sha] @@ -80,15 +75,18 @@ class MemoryDB(ObjectDBR, ObjectDBW): except KeyError: raise BadObject(sha) # END exception handling - + def size(self): return len(self._cache) - + def sha_iter(self): - return self._cache.iterkeys() - - - #{ Interface + try: + return self._cache.iterkeys() + except AttributeError: + return self._cache.keys() + + + #{ Interface def stream_copy(self, sha_iter, odb): """Copy the streams as identified by sha's yielded by sha_iter into the given odb The streams will be copied directly @@ -100,12 +98,12 @@ class MemoryDB(ObjectDBR, ObjectDBW): if odb.has_object(sha): continue # END check object existance - + ostream = self.stream(sha) # compressed data including header - sio = StringIO(ostream.stream.data()) + sio = BytesIO(ostream.stream.data()) istream = IStream(ostream.type, ostream.size, sio, sha) - + odb.store(istream) count += 1 # END for each sha diff --git a/gitdb/db/pack.py b/gitdb/db/pack.py index 9287319..eaf431a 100644 --- a/gitdb/db/pack.py +++ b/gitdb/db/pack.py @@ -3,21 +3,24 @@ # This module is part of GitDB and is released under # the New BSD License: http://www.opensource.org/licenses/bsd-license.php """Module containing a database to deal with packs""" -from base import ( - FileDBBase, - ObjectDBR, - CachingDB - ) +from gitdb.db.base import ( + FileDBBase, + ObjectDBR, + CachingDB +) from gitdb.util import LazyMixin from gitdb.exc import ( - BadObject, - UnsupportedOperation, - AmbiguousObjectName - ) + BadObject, + UnsupportedOperation, + AmbiguousObjectName +) from gitdb.pack import PackEntity +from gitdb.utils.compat import xrange + +from functools import reduce import os import glob @@ -29,12 +32,12 @@ __all__ = ('PackedDB', ) class PackedDB(FileDBBase, ObjectDBR, CachingDB, LazyMixin): """A database operating on a set of object packs""" - + # sort the priority list every N queries - # Higher values are better, performance tests don't show this has + # Higher values are better, performance tests don't show this has # any effect, but it should have one _sort_interval = 500 - + def __init__(self, root_path): super(PackedDB, self).__init__(root_path) # list of lists with three items: @@ -44,29 +47,29 @@ class PackedDB(FileDBBase, ObjectDBR, CachingDB, LazyMixin): # self._entities = list() # lazy loaded list self._hit_count = 0 # amount of hits self._st_mtime = 0 # last modification data of our root path - + def _set_cache_(self, attr): if attr == '_entities': self._entities = list() self.update_cache(force=True) # END handle entities initialization - + def _sort_entities(self): self._entities.sort(key=lambda l: l[0], reverse=True) - + def _pack_info(self, sha): """:return: tuple(entity, index) for an item at the given sha :param sha: 20 or 40 byte sha :raise BadObject: **Note:** This method is not thread-safe, but may be hit in multi-threaded - operation. The worst thing that can happen though is a counter that + operation. The worst thing that can happen though is a counter that was not incremented, or the list being in wrong order. So we safe the time for locking here, lets see how that goes""" # presort ? if self._hit_count % self._sort_interval == 0: self._sort_entities() # END update sorting - + for item in self._entities: index = item[2](sha) if index is not None: @@ -75,14 +78,14 @@ class PackedDB(FileDBBase, ObjectDBR, CachingDB, LazyMixin): return (item[1], index) # END index found in pack # END for each item - + # no hit, see whether we have to update packs # NOTE: considering packs don't change very often, we safe this call # and leave it to the super-caller to trigger that raise BadObject(sha) - - #{ Object DB Read - + + #{ Object DB Read + def has_object(self, sha): try: self._pack_info(sha) @@ -90,17 +93,16 @@ class PackedDB(FileDBBase, ObjectDBR, CachingDB, LazyMixin): except BadObject: return False # END exception handling - + def info(self, sha): entity, index = self._pack_info(sha) return entity.info_at_index(index) - + def stream(self, sha): entity, index = self._pack_info(sha) return entity.stream_at_index(index) - + def sha_iter(self): - sha_list = list() for entity in self.entities(): index = entity.index() sha_by_index = index.sha @@ -108,50 +110,46 @@ class PackedDB(FileDBBase, ObjectDBR, CachingDB, LazyMixin): yield sha_by_index(index) # END for each index # END for each entity - + def size(self): sizes = [item[1].index().size() for item in self._entities] return reduce(lambda x,y: x+y, sizes, 0) - + #} END object db read - + #{ object db write - + def store(self, istream): - """Storing individual objects is not feasible as a pack is designed to + """Storing individual objects is not feasible as a pack is designed to hold multiple objects. Writing or rewriting packs for single objects is inefficient""" raise UnsupportedOperation() - - def store_async(self, reader): - # TODO: add ObjectDBRW before implementing this - raise NotImplementedError() - + #} END object db write - - - #{ Interface - + + + #{ Interface + def update_cache(self, force=False): """ - Update our cache with the acutally existing packs on disk. Add new ones, + Update our cache with the acutally existing packs on disk. Add new ones, and remove deleted ones. We keep the unchanged ones - + :param force: If True, the cache will be updated even though the directory does not appear to have changed according to its modification timestamp. - :return: True if the packs have been updated so there is new information, + :return: True if the packs have been updated so there is new information, False if there was no change to the pack database""" stat = os.stat(self.root_path()) if not force and stat.st_mtime <= self._st_mtime: return False # END abort early on no change self._st_mtime = stat.st_mtime - + # packs are supposed to be prefixed with pack- by git-convention # get all pack files, figure out what changed pack_files = set(glob.glob(os.path.join(self.root_path(), "pack-*.pack"))) our_pack_files = set(item[1].pack().path() for item in self._entities) - + # new packs for pack_file in (pack_files - our_pack_files): # init the hit-counter/priority with the size, a good measure for hit- @@ -159,7 +157,7 @@ class PackedDB(FileDBBase, ObjectDBR, CachingDB, LazyMixin): entity = PackEntity(pack_file) self._entities.append([entity.pack().size(), entity, entity.index().sha_to_index]) # END for each new packfile - + # removed packs for pack_file in (our_pack_files - pack_files): del_index = -1 @@ -172,22 +170,22 @@ class PackedDB(FileDBBase, ObjectDBR, CachingDB, LazyMixin): assert del_index != -1 del(self._entities[del_index]) # END for each removed pack - + # reinitialize prioritiess self._sort_entities() return True - + def entities(self): """:return: list of pack entities operated upon by this database""" return [ item[1] for item in self._entities ] - + def partial_to_complete_sha(self, partial_binsha, canonical_length): """:return: 20 byte sha as inferred by the given partial binary sha - :param partial_binsha: binary sha with less than 20 bytes + :param partial_binsha: binary sha with less than 20 bytes :param canonical_length: length of the corresponding canonical representation. It is required as binary sha's cannot display whether the original hex sha had an odd or even number of characters - :raise AmbiguousObjectName: + :raise AmbiguousObjectName: :raise BadObject: """ candidate = None for item in self._entities: @@ -199,11 +197,11 @@ class PackedDB(FileDBBase, ObjectDBR, CachingDB, LazyMixin): candidate = sha # END handle full sha could be found # END for each entity - + if candidate: return candidate - + # still not found ? raise BadObject(partial_binsha) - + #} END interface diff --git a/gitdb/db/ref.py b/gitdb/db/ref.py index 0a28b9e..d989126 100644 --- a/gitdb/db/ref.py +++ b/gitdb/db/ref.py @@ -2,25 +2,24 @@ # # This module is part of GitDB and is released under # the New BSD License: http://www.opensource.org/licenses/bsd-license.php -from base import ( - CompoundDB, - ) +from gitdb.db.base import ( + CompoundDB, +) -import os __all__ = ('ReferenceDB', ) class ReferenceDB(CompoundDB): """A database consisting of database referred to in a file""" - + # Configuration # Specifies the object database to use for the paths found in the alternates # file. If None, it defaults to the GitDB ObjectDBCls = None - + def __init__(self, ref_file): super(ReferenceDB, self).__init__() self._ref_file = ref_file - + def _set_cache_(self, attr): if attr == '_dbs': self._dbs = list() @@ -28,15 +27,15 @@ class ReferenceDB(CompoundDB): else: super(ReferenceDB, self)._set_cache_(attr) # END handle attrs - + def _update_dbs_from_ref_file(self): dbcls = self.ObjectDBCls if dbcls is None: # late import - from git import GitDB + from gitdb.db.git import GitDB dbcls = GitDB # END get db type - + # try to get as many as possible, don't fail if some are unavailable ref_paths = list() try: @@ -44,10 +43,10 @@ class ReferenceDB(CompoundDB): except (OSError, IOError): pass # END handle alternates - + ref_paths_set = set(ref_paths) cur_ref_paths_set = set(db.root_path() for db in self._dbs) - + # remove existing for path in (cur_ref_paths_set - ref_paths_set): for i, db in enumerate(self._dbs[:]): @@ -56,7 +55,7 @@ class ReferenceDB(CompoundDB): continue # END del matching db # END for each path to remove - + # add new # sort them to maintain order added_paths = sorted(ref_paths_set - cur_ref_paths_set, key=lambda p: ref_paths.index(p)) @@ -68,11 +67,11 @@ class ReferenceDB(CompoundDB): db.databases() # END verification self._dbs.append(db) - except Exception as e: + except Exception: # ignore invalid paths or issues pass # END for each path to add - + def update_cache(self, force=False): # re-read alternates and update databases self._update_dbs_from_ref_file() |
