diff options
| author | Sebastian Thiel <byronimo@gmail.com> | 2015-01-01 16:00:55 +0100 |
|---|---|---|
| committer | Sebastian Thiel <byronimo@gmail.com> | 2015-01-01 16:01:35 +0100 |
| commit | 0d22c80e041dbb5d9d985926b39b7bd7a0573a7a (patch) | |
| tree | 98c8113e64616a61b87db29263e43c7b99d45b1a /gitdb/test/performance | |
| parent | 6b32bbcc0b9ca142fc3b066fcd0d76e2a731423d (diff) | |
| download | gitdb-0d22c80e041dbb5d9d985926b39b7bd7a0573a7a.tar.gz | |
Added integrity test for loose objects to search large datasets for
the issue described in https://github.com/gitpython-developers/GitPython/issues/220
See test notes for proper usage, it all depends on a useful dataset with high entropy
Diffstat (limited to 'gitdb/test/performance')
| -rw-r--r-- | gitdb/test/performance/test_pack.py | 31 |
1 files changed, 31 insertions, 0 deletions
diff --git a/gitdb/test/performance/test_pack.py b/gitdb/test/performance/test_pack.py index 97c450d..d54a74c 100644 --- a/gitdb/test/performance/test_pack.py +++ b/gitdb/test/performance/test_pack.py @@ -9,6 +9,11 @@ from gitdb.test.performance.lib import ( TestBigRepoR ) +from gitdb import ( + MemoryDB, + IStream, +) +from gitdb.typ import str_blob_type from gitdb.exc import UnsupportedOperation from gitdb.db.pack import PackedDB from gitdb.utils.compat import xrange @@ -71,6 +76,32 @@ class TestPackedDBPerformance(TestBigRepoR): print("PDB: Obtained %i streams by sha and read all bytes totallying %i KiB ( %f KiB / s ) in %f s ( %f streams/s )" % (max_items, total_kib, total_kib/elapsed , elapsed, max_items / elapsed), file=sys.stderr) @skip_on_travis_ci + def test_loose_correctness(self): + """based on the pack(s) of our packed object DB, we will just copy and verify all objects in the back + into the loose object db (memory). + This should help finding dormant issues like this one https://github.com/gitpython-developers/GitPython/issues/220 + faster + :note: It doesn't seem this test can find the issue unless the given pack contains highly compressed + data files, like archives.""" + pdb = PackedDB(os.path.join(self.gitrepopath, "objects/pack")) + mdb = MemoryDB() + for c, sha in enumerate(pdb.sha_iter()): + ostream = pdb.stream(sha) + # the issue only showed on larger files which are hardly compressible ... + if ostream.type != str_blob_type: + continue + istream = IStream(ostream.type, ostream.size, ostream.stream) + mdb.store(istream) + assert istream.binsha == sha + # this can fail ... sometimes, so the packs dataset should be huge + assert len(mdb.stream(sha).read()) == ostream.size + + if c and c % 1000 == 0: + print("Verified %i loose object compression/decompression cycles" % c, file=sys.stderr) + mdb._cache.clear() + # end for each sha to copy + + @skip_on_travis_ci def test_correctness(self): pdb = PackedDB(os.path.join(self.gitrepopath, "objects/pack")) # disabled for now as it used to work perfectly, checking big repositories takes a long time |
