Added integrity test for loose objects to search large datasets for

the issue described in https://github.com/gitpython-developers/GitPython/issues/220 See test notes for proper usage, it all depends on a useful dataset with high entropy
author: Sebastian Thiel <byronimo@gmail.com> 2015-01-01 16:00:55 +0100
committer: Sebastian Thiel <byronimo@gmail.com> 2015-01-01 16:01:35 +0100
commit: 0d22c80e041dbb5d9d985926b39b7bd7a0573a7a (patch)
tree: 98c8113e64616a61b87db29263e43c7b99d45b1a /gitdb/test/performance
parent: 6b32bbcc0b9ca142fc3b066fcd0d76e2a731423d (diff)
download: gitdb-0d22c80e041dbb5d9d985926b39b7bd7a0573a7a.tar.gz
1 files changed, 31 insertions, 0 deletions
diff --git a/gitdb/test/performance/test_pack.py b/gitdb/test/performance/test_pack.py
index 97c450d..d54a74c 100644
--- a/gitdb/test/performance/test_pack.py
+++ b/gitdb/test/performance/test_pack.py
@@ -9,6 +9,11 @@ from gitdb.test.performance.lib import (
     TestBigRepoR 
 )
 
+from gitdb import (
+    MemoryDB,
+    IStream,
+)
+from gitdb.typ import str_blob_type
 from gitdb.exc import UnsupportedOperation
 from gitdb.db.pack import PackedDB
 from gitdb.utils.compat import xrange
@@ -71,6 +76,32 @@ class TestPackedDBPerformance(TestBigRepoR):
         print("PDB: Obtained %i streams by sha and read all bytes totallying %i KiB ( %f KiB / s ) in %f s ( %f streams/s )" % (max_items, total_kib, total_kib/elapsed , elapsed, max_items / elapsed), file=sys.stderr)
         
     @skip_on_travis_ci
+    def test_loose_correctness(self):
+        """based on the pack(s) of our packed object DB, we will just copy and verify all objects in the back
+        into the loose object db (memory).
+        This should help finding dormant issues like this one https://github.com/gitpython-developers/GitPython/issues/220
+        faster
+        :note: It doesn't seem this test can find the issue unless the given pack contains highly compressed
+        data files, like archives."""
+        pdb = PackedDB(os.path.join(self.gitrepopath, "objects/pack"))
+        mdb = MemoryDB()
+        for c, sha in enumerate(pdb.sha_iter()):
+            ostream = pdb.stream(sha)
+            # the issue only showed on larger files which are hardly compressible ... 
+            if ostream.type != str_blob_type:
+                continue
+            istream = IStream(ostream.type, ostream.size, ostream.stream)
+            mdb.store(istream)
+            assert istream.binsha == sha
+            # this can fail ... sometimes, so the packs dataset should be huge
+            assert len(mdb.stream(sha).read()) == ostream.size
+
+            if c and c % 1000 == 0:
+                print("Verified %i loose object compression/decompression cycles" % c, file=sys.stderr)
+            mdb._cache.clear()
+        # end for each sha to copy 
+
+    @skip_on_travis_ci
     def test_correctness(self):
         pdb = PackedDB(os.path.join(self.gitrepopath, "objects/pack"))
         # disabled for now as it used to work perfectly, checking big repositories takes a long time
author	Sebastian Thiel <byronimo@gmail.com>	2015-01-01 16:00:55 +0100
committer	Sebastian Thiel <byronimo@gmail.com>	2015-01-01 16:01:35 +0100
commit	0d22c80e041dbb5d9d985926b39b7bd7a0573a7a (patch)
tree	98c8113e64616a61b87db29263e43c7b99d45b1a /gitdb/test/performance
parent	6b32bbcc0b9ca142fc3b066fcd0d76e2a731423d (diff)
download	gitdb-0d22c80e041dbb5d9d985926b39b7bd7a0573a7a.tar.gz