summaryrefslogtreecommitdiff
path: root/gitdb/test/performance
diff options
context:
space:
mode:
authorSebastian Thiel <byronimo@gmail.com>2015-01-01 16:00:55 +0100
committerSebastian Thiel <byronimo@gmail.com>2015-01-01 16:01:35 +0100
commit0d22c80e041dbb5d9d985926b39b7bd7a0573a7a (patch)
tree98c8113e64616a61b87db29263e43c7b99d45b1a /gitdb/test/performance
parent6b32bbcc0b9ca142fc3b066fcd0d76e2a731423d (diff)
downloadgitdb-0d22c80e041dbb5d9d985926b39b7bd7a0573a7a.tar.gz
Added integrity test for loose objects to search large datasets for
the issue described in https://github.com/gitpython-developers/GitPython/issues/220 See test notes for proper usage, it all depends on a useful dataset with high entropy
Diffstat (limited to 'gitdb/test/performance')
-rw-r--r--gitdb/test/performance/test_pack.py31
1 files changed, 31 insertions, 0 deletions
diff --git a/gitdb/test/performance/test_pack.py b/gitdb/test/performance/test_pack.py
index 97c450d..d54a74c 100644
--- a/gitdb/test/performance/test_pack.py
+++ b/gitdb/test/performance/test_pack.py
@@ -9,6 +9,11 @@ from gitdb.test.performance.lib import (
TestBigRepoR
)
+from gitdb import (
+ MemoryDB,
+ IStream,
+)
+from gitdb.typ import str_blob_type
from gitdb.exc import UnsupportedOperation
from gitdb.db.pack import PackedDB
from gitdb.utils.compat import xrange
@@ -71,6 +76,32 @@ class TestPackedDBPerformance(TestBigRepoR):
print("PDB: Obtained %i streams by sha and read all bytes totallying %i KiB ( %f KiB / s ) in %f s ( %f streams/s )" % (max_items, total_kib, total_kib/elapsed , elapsed, max_items / elapsed), file=sys.stderr)
@skip_on_travis_ci
+ def test_loose_correctness(self):
+ """based on the pack(s) of our packed object DB, we will just copy and verify all objects in the back
+ into the loose object db (memory).
+ This should help finding dormant issues like this one https://github.com/gitpython-developers/GitPython/issues/220
+ faster
+ :note: It doesn't seem this test can find the issue unless the given pack contains highly compressed
+ data files, like archives."""
+ pdb = PackedDB(os.path.join(self.gitrepopath, "objects/pack"))
+ mdb = MemoryDB()
+ for c, sha in enumerate(pdb.sha_iter()):
+ ostream = pdb.stream(sha)
+ # the issue only showed on larger files which are hardly compressible ...
+ if ostream.type != str_blob_type:
+ continue
+ istream = IStream(ostream.type, ostream.size, ostream.stream)
+ mdb.store(istream)
+ assert istream.binsha == sha
+ # this can fail ... sometimes, so the packs dataset should be huge
+ assert len(mdb.stream(sha).read()) == ostream.size
+
+ if c and c % 1000 == 0:
+ print("Verified %i loose object compression/decompression cycles" % c, file=sys.stderr)
+ mdb._cache.clear()
+ # end for each sha to copy
+
+ @skip_on_travis_ci
def test_correctness(self):
pdb = PackedDB(os.path.join(self.gitrepopath, "objects/pack"))
# disabled for now as it used to work perfectly, checking big repositories takes a long time