diff options
Diffstat (limited to 'gitdb/db/pack.py')
| -rw-r--r-- | gitdb/db/pack.py | 104 |
1 files changed, 51 insertions, 53 deletions
diff --git a/gitdb/db/pack.py b/gitdb/db/pack.py index 9287319..eaf431a 100644 --- a/gitdb/db/pack.py +++ b/gitdb/db/pack.py @@ -3,21 +3,24 @@ # This module is part of GitDB and is released under # the New BSD License: http://www.opensource.org/licenses/bsd-license.php """Module containing a database to deal with packs""" -from base import ( - FileDBBase, - ObjectDBR, - CachingDB - ) +from gitdb.db.base import ( + FileDBBase, + ObjectDBR, + CachingDB +) from gitdb.util import LazyMixin from gitdb.exc import ( - BadObject, - UnsupportedOperation, - AmbiguousObjectName - ) + BadObject, + UnsupportedOperation, + AmbiguousObjectName +) from gitdb.pack import PackEntity +from gitdb.utils.compat import xrange + +from functools import reduce import os import glob @@ -29,12 +32,12 @@ __all__ = ('PackedDB', ) class PackedDB(FileDBBase, ObjectDBR, CachingDB, LazyMixin): """A database operating on a set of object packs""" - + # sort the priority list every N queries - # Higher values are better, performance tests don't show this has + # Higher values are better, performance tests don't show this has # any effect, but it should have one _sort_interval = 500 - + def __init__(self, root_path): super(PackedDB, self).__init__(root_path) # list of lists with three items: @@ -44,29 +47,29 @@ class PackedDB(FileDBBase, ObjectDBR, CachingDB, LazyMixin): # self._entities = list() # lazy loaded list self._hit_count = 0 # amount of hits self._st_mtime = 0 # last modification data of our root path - + def _set_cache_(self, attr): if attr == '_entities': self._entities = list() self.update_cache(force=True) # END handle entities initialization - + def _sort_entities(self): self._entities.sort(key=lambda l: l[0], reverse=True) - + def _pack_info(self, sha): """:return: tuple(entity, index) for an item at the given sha :param sha: 20 or 40 byte sha :raise BadObject: **Note:** This method is not thread-safe, but may be hit in multi-threaded - operation. The worst thing that can happen though is a counter that + operation. The worst thing that can happen though is a counter that was not incremented, or the list being in wrong order. So we safe the time for locking here, lets see how that goes""" # presort ? if self._hit_count % self._sort_interval == 0: self._sort_entities() # END update sorting - + for item in self._entities: index = item[2](sha) if index is not None: @@ -75,14 +78,14 @@ class PackedDB(FileDBBase, ObjectDBR, CachingDB, LazyMixin): return (item[1], index) # END index found in pack # END for each item - + # no hit, see whether we have to update packs # NOTE: considering packs don't change very often, we safe this call # and leave it to the super-caller to trigger that raise BadObject(sha) - - #{ Object DB Read - + + #{ Object DB Read + def has_object(self, sha): try: self._pack_info(sha) @@ -90,17 +93,16 @@ class PackedDB(FileDBBase, ObjectDBR, CachingDB, LazyMixin): except BadObject: return False # END exception handling - + def info(self, sha): entity, index = self._pack_info(sha) return entity.info_at_index(index) - + def stream(self, sha): entity, index = self._pack_info(sha) return entity.stream_at_index(index) - + def sha_iter(self): - sha_list = list() for entity in self.entities(): index = entity.index() sha_by_index = index.sha @@ -108,50 +110,46 @@ class PackedDB(FileDBBase, ObjectDBR, CachingDB, LazyMixin): yield sha_by_index(index) # END for each index # END for each entity - + def size(self): sizes = [item[1].index().size() for item in self._entities] return reduce(lambda x,y: x+y, sizes, 0) - + #} END object db read - + #{ object db write - + def store(self, istream): - """Storing individual objects is not feasible as a pack is designed to + """Storing individual objects is not feasible as a pack is designed to hold multiple objects. Writing or rewriting packs for single objects is inefficient""" raise UnsupportedOperation() - - def store_async(self, reader): - # TODO: add ObjectDBRW before implementing this - raise NotImplementedError() - + #} END object db write - - - #{ Interface - + + + #{ Interface + def update_cache(self, force=False): """ - Update our cache with the acutally existing packs on disk. Add new ones, + Update our cache with the acutally existing packs on disk. Add new ones, and remove deleted ones. We keep the unchanged ones - + :param force: If True, the cache will be updated even though the directory does not appear to have changed according to its modification timestamp. - :return: True if the packs have been updated so there is new information, + :return: True if the packs have been updated so there is new information, False if there was no change to the pack database""" stat = os.stat(self.root_path()) if not force and stat.st_mtime <= self._st_mtime: return False # END abort early on no change self._st_mtime = stat.st_mtime - + # packs are supposed to be prefixed with pack- by git-convention # get all pack files, figure out what changed pack_files = set(glob.glob(os.path.join(self.root_path(), "pack-*.pack"))) our_pack_files = set(item[1].pack().path() for item in self._entities) - + # new packs for pack_file in (pack_files - our_pack_files): # init the hit-counter/priority with the size, a good measure for hit- @@ -159,7 +157,7 @@ class PackedDB(FileDBBase, ObjectDBR, CachingDB, LazyMixin): entity = PackEntity(pack_file) self._entities.append([entity.pack().size(), entity, entity.index().sha_to_index]) # END for each new packfile - + # removed packs for pack_file in (our_pack_files - pack_files): del_index = -1 @@ -172,22 +170,22 @@ class PackedDB(FileDBBase, ObjectDBR, CachingDB, LazyMixin): assert del_index != -1 del(self._entities[del_index]) # END for each removed pack - + # reinitialize prioritiess self._sort_entities() return True - + def entities(self): """:return: list of pack entities operated upon by this database""" return [ item[1] for item in self._entities ] - + def partial_to_complete_sha(self, partial_binsha, canonical_length): """:return: 20 byte sha as inferred by the given partial binary sha - :param partial_binsha: binary sha with less than 20 bytes + :param partial_binsha: binary sha with less than 20 bytes :param canonical_length: length of the corresponding canonical representation. It is required as binary sha's cannot display whether the original hex sha had an odd or even number of characters - :raise AmbiguousObjectName: + :raise AmbiguousObjectName: :raise BadObject: """ candidate = None for item in self._entities: @@ -199,11 +197,11 @@ class PackedDB(FileDBBase, ObjectDBR, CachingDB, LazyMixin): candidate = sha # END handle full sha could be found # END for each entity - + if candidate: return candidate - + # still not found ? raise BadObject(partial_binsha) - + #} END interface |
