summaryrefslogtreecommitdiff
path: root/gitdb/db/pack.py
diff options
context:
space:
mode:
authorSebastian Thiel <byronimo@gmail.com>2014-11-14 12:45:19 +0100
committerSebastian Thiel <byronimo@gmail.com>2014-11-14 12:45:19 +0100
commit2f2fe4eea8ba4f47e63a7392a1f27f74f5ee925d (patch)
tree176a493d114fab7cc6e930bf318b2339db386cf5 /gitdb/db/pack.py
parent81707c606b88e971cc359e3e9f3abeeea2204860 (diff)
parent0dcec5a27b341ce58e5ab169f91aa25b2cafec0c (diff)
downloadgitdb-0.6.0.tar.gz
Merge branch 'py2n3'0.6.0
* python 3 compatibility * all tests work in py2.6, 2.7, 3.3, 3.4
Diffstat (limited to 'gitdb/db/pack.py')
-rw-r--r--gitdb/db/pack.py104
1 files changed, 51 insertions, 53 deletions
diff --git a/gitdb/db/pack.py b/gitdb/db/pack.py
index 9287319..eaf431a 100644
--- a/gitdb/db/pack.py
+++ b/gitdb/db/pack.py
@@ -3,21 +3,24 @@
# This module is part of GitDB and is released under
# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
"""Module containing a database to deal with packs"""
-from base import (
- FileDBBase,
- ObjectDBR,
- CachingDB
- )
+from gitdb.db.base import (
+ FileDBBase,
+ ObjectDBR,
+ CachingDB
+)
from gitdb.util import LazyMixin
from gitdb.exc import (
- BadObject,
- UnsupportedOperation,
- AmbiguousObjectName
- )
+ BadObject,
+ UnsupportedOperation,
+ AmbiguousObjectName
+)
from gitdb.pack import PackEntity
+from gitdb.utils.compat import xrange
+
+from functools import reduce
import os
import glob
@@ -29,12 +32,12 @@ __all__ = ('PackedDB', )
class PackedDB(FileDBBase, ObjectDBR, CachingDB, LazyMixin):
"""A database operating on a set of object packs"""
-
+
# sort the priority list every N queries
- # Higher values are better, performance tests don't show this has
+ # Higher values are better, performance tests don't show this has
# any effect, but it should have one
_sort_interval = 500
-
+
def __init__(self, root_path):
super(PackedDB, self).__init__(root_path)
# list of lists with three items:
@@ -44,29 +47,29 @@ class PackedDB(FileDBBase, ObjectDBR, CachingDB, LazyMixin):
# self._entities = list() # lazy loaded list
self._hit_count = 0 # amount of hits
self._st_mtime = 0 # last modification data of our root path
-
+
def _set_cache_(self, attr):
if attr == '_entities':
self._entities = list()
self.update_cache(force=True)
# END handle entities initialization
-
+
def _sort_entities(self):
self._entities.sort(key=lambda l: l[0], reverse=True)
-
+
def _pack_info(self, sha):
""":return: tuple(entity, index) for an item at the given sha
:param sha: 20 or 40 byte sha
:raise BadObject:
**Note:** This method is not thread-safe, but may be hit in multi-threaded
- operation. The worst thing that can happen though is a counter that
+ operation. The worst thing that can happen though is a counter that
was not incremented, or the list being in wrong order. So we safe
the time for locking here, lets see how that goes"""
# presort ?
if self._hit_count % self._sort_interval == 0:
self._sort_entities()
# END update sorting
-
+
for item in self._entities:
index = item[2](sha)
if index is not None:
@@ -75,14 +78,14 @@ class PackedDB(FileDBBase, ObjectDBR, CachingDB, LazyMixin):
return (item[1], index)
# END index found in pack
# END for each item
-
+
# no hit, see whether we have to update packs
# NOTE: considering packs don't change very often, we safe this call
# and leave it to the super-caller to trigger that
raise BadObject(sha)
-
- #{ Object DB Read
-
+
+ #{ Object DB Read
+
def has_object(self, sha):
try:
self._pack_info(sha)
@@ -90,17 +93,16 @@ class PackedDB(FileDBBase, ObjectDBR, CachingDB, LazyMixin):
except BadObject:
return False
# END exception handling
-
+
def info(self, sha):
entity, index = self._pack_info(sha)
return entity.info_at_index(index)
-
+
def stream(self, sha):
entity, index = self._pack_info(sha)
return entity.stream_at_index(index)
-
+
def sha_iter(self):
- sha_list = list()
for entity in self.entities():
index = entity.index()
sha_by_index = index.sha
@@ -108,50 +110,46 @@ class PackedDB(FileDBBase, ObjectDBR, CachingDB, LazyMixin):
yield sha_by_index(index)
# END for each index
# END for each entity
-
+
def size(self):
sizes = [item[1].index().size() for item in self._entities]
return reduce(lambda x,y: x+y, sizes, 0)
-
+
#} END object db read
-
+
#{ object db write
-
+
def store(self, istream):
- """Storing individual objects is not feasible as a pack is designed to
+ """Storing individual objects is not feasible as a pack is designed to
hold multiple objects. Writing or rewriting packs for single objects is
inefficient"""
raise UnsupportedOperation()
-
- def store_async(self, reader):
- # TODO: add ObjectDBRW before implementing this
- raise NotImplementedError()
-
+
#} END object db write
-
-
- #{ Interface
-
+
+
+ #{ Interface
+
def update_cache(self, force=False):
"""
- Update our cache with the acutally existing packs on disk. Add new ones,
+ Update our cache with the acutally existing packs on disk. Add new ones,
and remove deleted ones. We keep the unchanged ones
-
+
:param force: If True, the cache will be updated even though the directory
does not appear to have changed according to its modification timestamp.
- :return: True if the packs have been updated so there is new information,
+ :return: True if the packs have been updated so there is new information,
False if there was no change to the pack database"""
stat = os.stat(self.root_path())
if not force and stat.st_mtime <= self._st_mtime:
return False
# END abort early on no change
self._st_mtime = stat.st_mtime
-
+
# packs are supposed to be prefixed with pack- by git-convention
# get all pack files, figure out what changed
pack_files = set(glob.glob(os.path.join(self.root_path(), "pack-*.pack")))
our_pack_files = set(item[1].pack().path() for item in self._entities)
-
+
# new packs
for pack_file in (pack_files - our_pack_files):
# init the hit-counter/priority with the size, a good measure for hit-
@@ -159,7 +157,7 @@ class PackedDB(FileDBBase, ObjectDBR, CachingDB, LazyMixin):
entity = PackEntity(pack_file)
self._entities.append([entity.pack().size(), entity, entity.index().sha_to_index])
# END for each new packfile
-
+
# removed packs
for pack_file in (our_pack_files - pack_files):
del_index = -1
@@ -172,22 +170,22 @@ class PackedDB(FileDBBase, ObjectDBR, CachingDB, LazyMixin):
assert del_index != -1
del(self._entities[del_index])
# END for each removed pack
-
+
# reinitialize prioritiess
self._sort_entities()
return True
-
+
def entities(self):
""":return: list of pack entities operated upon by this database"""
return [ item[1] for item in self._entities ]
-
+
def partial_to_complete_sha(self, partial_binsha, canonical_length):
""":return: 20 byte sha as inferred by the given partial binary sha
- :param partial_binsha: binary sha with less than 20 bytes
+ :param partial_binsha: binary sha with less than 20 bytes
:param canonical_length: length of the corresponding canonical representation.
It is required as binary sha's cannot display whether the original hex sha
had an odd or even number of characters
- :raise AmbiguousObjectName:
+ :raise AmbiguousObjectName:
:raise BadObject: """
candidate = None
for item in self._entities:
@@ -199,11 +197,11 @@ class PackedDB(FileDBBase, ObjectDBR, CachingDB, LazyMixin):
candidate = sha
# END handle full sha could be found
# END for each entity
-
+
if candidate:
return candidate
-
+
# still not found ?
raise BadObject(partial_binsha)
-
+
#} END interface