diff options
Diffstat (limited to 'gitdb/test/performance')
| -rw-r--r-- | gitdb/test/performance/__init__.py | 1 | ||||
| -rw-r--r-- | gitdb/test/performance/lib.py | 31 | ||||
| -rw-r--r-- | gitdb/test/performance/test_pack.py | 28 | ||||
| -rw-r--r-- | gitdb/test/performance/test_pack_streaming.py | 21 | ||||
| -rw-r--r-- | gitdb/test/performance/test_stream.py | 126 |
5 files changed, 59 insertions, 148 deletions
diff --git a/gitdb/test/performance/__init__.py b/gitdb/test/performance/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/gitdb/test/performance/__init__.py @@ -0,0 +1 @@ + diff --git a/gitdb/test/performance/lib.py b/gitdb/test/performance/lib.py index 3563fcf..ec45cf3 100644 --- a/gitdb/test/performance/lib.py +++ b/gitdb/test/performance/lib.py @@ -4,9 +4,8 @@ # the New BSD License: http://www.opensource.org/licenses/bsd-license.php """Contains library functions""" import os -from gitdb.test.lib import * -import shutil -import tempfile +import logging +from gitdb.test.lib import TestBase #{ Invvariants @@ -14,17 +13,6 @@ k_env_git_repo = "GITDB_TEST_GIT_REPO_BASE" #} END invariants -#{ Utilities -def resolve_or_fail(env_var): - """:return: resolved environment variable or raise EnvironmentError""" - try: - return os.environ[env_var] - except KeyError: - raise EnvironmentError("Please set the %r envrionment variable and retry" % env_var) - # END exception handling - -#} END utilities - #{ Base Classes @@ -41,14 +29,19 @@ class TestBigRepoR(TestBase): head_sha_50 = '32347c375250fd470973a5d76185cac718955fd5' #} END invariants - @classmethod - def setUpAll(cls): + def setUp(self): try: - super(TestBigRepoR, cls).setUpAll() + super(TestBigRepoR, self).setUp() except AttributeError: pass - cls.gitrepopath = resolve_or_fail(k_env_git_repo) - assert cls.gitrepopath.endswith('.git') + + self.gitrepopath = os.environ.get(k_env_git_repo) + if not self.gitrepopath: + logging.info("You can set the %s environment variable to a .git repository of your choice - defaulting to the gitdb repository") + ospd = os.path.dirname + self.gitrepopath = os.path.join(ospd(ospd(ospd(ospd(__file__)))), '.git') + # end assure gitrepo is set + assert self.gitrepopath.endswith('.git') #} END base classes diff --git a/gitdb/test/performance/test_pack.py b/gitdb/test/performance/test_pack.py index 63856e2..db3b48d 100644 --- a/gitdb/test/performance/test_pack.py +++ b/gitdb/test/performance/test_pack.py @@ -3,22 +3,24 @@ # This module is part of GitDB and is released under # the New BSD License: http://www.opensource.org/licenses/bsd-license.php """Performance tests for object store""" -from lib import ( +from __future__ import print_function + +from gitdb.test.performance.lib import ( TestBigRepoR - ) +) from gitdb.exc import UnsupportedOperation from gitdb.db.pack import PackedDB +from gitdb.utils.compat import xrange +from gitdb.test.lib import skip_on_travis_ci import sys import os from time import time -import random - -from nose import SkipTest class TestPackedDBPerformance(TestBigRepoR): - + + @skip_on_travis_ci def test_pack_random_access(self): pdb = PackedDB(os.path.join(self.gitrepopath, "objects/pack")) @@ -27,7 +29,7 @@ class TestPackedDBPerformance(TestBigRepoR): sha_list = list(pdb.sha_iter()) elapsed = time() - st ns = len(sha_list) - print >> sys.stderr, "PDB: looked up %i shas by index in %f s ( %f shas/s )" % (ns, elapsed, ns / elapsed) + print("PDB: looked up %i shas by index in %f s ( %f shas/s )" % (ns, elapsed, ns / elapsed), file=sys.stderr) # sha lookup: best-case and worst case access pdb_pack_info = pdb._pack_info @@ -41,7 +43,7 @@ class TestPackedDBPerformance(TestBigRepoR): # discard cache del(pdb._entities) pdb.entities() - print >> sys.stderr, "PDB: looked up %i sha in %i packs in %f s ( %f shas/s )" % (ns, len(pdb.entities()), elapsed, ns / elapsed) + print("PDB: looked up %i sha in %i packs in %f s ( %f shas/s )" % (ns, len(pdb.entities()), elapsed, ns / elapsed), file=sys.stderr) # END for each random mode # query info and streams only @@ -51,7 +53,7 @@ class TestPackedDBPerformance(TestBigRepoR): for sha in sha_list[:max_items]: pdb_fun(sha) elapsed = time() - st - print >> sys.stderr, "PDB: Obtained %i object %s by sha in %f s ( %f items/s )" % (max_items, pdb_fun.__name__.upper(), elapsed, max_items / elapsed) + print("PDB: Obtained %i object %s by sha in %f s ( %f items/s )" % (max_items, pdb_fun.__name__.upper(), elapsed, max_items / elapsed), file=sys.stderr) # END for each function # retrieve stream and read all @@ -65,13 +67,13 @@ class TestPackedDBPerformance(TestBigRepoR): total_size += stream.size elapsed = time() - st total_kib = total_size / 1000 - print >> sys.stderr, "PDB: Obtained %i streams by sha and read all bytes totallying %i KiB ( %f KiB / s ) in %f s ( %f streams/s )" % (max_items, total_kib, total_kib/elapsed , elapsed, max_items / elapsed) + print("PDB: Obtained %i streams by sha and read all bytes totallying %i KiB ( %f KiB / s ) in %f s ( %f streams/s )" % (max_items, total_kib, total_kib/elapsed , elapsed, max_items / elapsed), file=sys.stderr) + @skip_on_travis_ci def test_correctness(self): - raise SkipTest("Takes too long, enable it if you change the algorithm and want to be sure you decode packs correctly") pdb = PackedDB(os.path.join(self.gitrepopath, "objects/pack")) # disabled for now as it used to work perfectly, checking big repositories takes a long time - print >> sys.stderr, "Endurance run: verify streaming of objects (crc and sha)" + print("Endurance run: verify streaming of objects (crc and sha)", file=sys.stderr) for crc in range(2): count = 0 st = time() @@ -88,6 +90,6 @@ class TestPackedDBPerformance(TestBigRepoR): # END for each index # END for each entity elapsed = time() - st - print >> sys.stderr, "PDB: verified %i objects (crc=%i) in %f s ( %f objects/s )" % (count, crc, elapsed, count / elapsed) + print("PDB: verified %i objects (crc=%i) in %f s ( %f objects/s )" % (count, crc, elapsed, count / elapsed), file=sys.stderr) # END for each verify mode diff --git a/gitdb/test/performance/test_pack_streaming.py b/gitdb/test/performance/test_pack_streaming.py index c66e60c..fe160ea 100644 --- a/gitdb/test/performance/test_pack_streaming.py +++ b/gitdb/test/performance/test_pack_streaming.py @@ -3,18 +3,20 @@ # This module is part of GitDB and is released under # the New BSD License: http://www.opensource.org/licenses/bsd-license.php """Specific test for pack streams only""" -from lib import ( +from __future__ import print_function + +from gitdb.test.performance.lib import ( TestBigRepoR - ) +) from gitdb.db.pack import PackedDB from gitdb.stream import NullStream from gitdb.pack import PackEntity +from gitdb.test.lib import skip_on_travis_ci import os import sys from time import time -from nose import SkipTest class CountedNullStream(NullStream): __slots__ = '_bw' @@ -30,15 +32,15 @@ class CountedNullStream(NullStream): class TestPackStreamingPerformance(TestBigRepoR): + @skip_on_travis_ci def test_pack_writing(self): # see how fast we can write a pack from object streams. # This will not be fast, as we take time for decompressing the streams as well ostream = CountedNullStream() pdb = PackedDB(os.path.join(self.gitrepopath, "objects/pack")) - ni = 5000 + ni = 1000 count = 0 - total_size = 0 st = time() for sha in pdb.sha_iter(): count += 1 @@ -47,17 +49,18 @@ class TestPackStreamingPerformance(TestBigRepoR): break #END gather objects for pack-writing elapsed = time() - st - print >> sys.stderr, "PDB Streaming: Got %i streams by sha in in %f s ( %f streams/s )" % (ni, elapsed, ni / elapsed) + print("PDB Streaming: Got %i streams by sha in in %f s ( %f streams/s )" % (ni, elapsed, ni / elapsed), file=sys.stderr) st = time() PackEntity.write_pack((pdb.stream(sha) for sha in pdb.sha_iter()), ostream.write, object_count=ni) elapsed = time() - st total_kb = ostream.bytes_written() / 1000 - print >> sys.stderr, "PDB Streaming: Wrote pack of size %i kb in %f s (%f kb/s)" % (total_kb, elapsed, total_kb/elapsed) + print(sys.stderr, "PDB Streaming: Wrote pack of size %i kb in %f s (%f kb/s)" % (total_kb, elapsed, total_kb/elapsed), sys.stderr) + @skip_on_travis_ci def test_stream_reading(self): - raise SkipTest() + # raise SkipTest() pdb = PackedDB(os.path.join(self.gitrepopath, "objects/pack")) # streaming only, meant for --with-profile runs @@ -75,5 +78,5 @@ class TestPackStreamingPerformance(TestBigRepoR): count += 1 elapsed = time() - st total_kib = total_size / 1000 - print >> sys.stderr, "PDB Streaming: Got %i streams by sha and read all bytes totallying %i KiB ( %f KiB / s ) in %f s ( %f streams/s )" % (ni, total_kib, total_kib/elapsed , elapsed, ni / elapsed) + print(sys.stderr, "PDB Streaming: Got %i streams by sha and read all bytes totallying %i KiB ( %f KiB / s ) in %f s ( %f streams/s )" % (ni, total_kib, total_kib/elapsed , elapsed, ni / elapsed), sys.stderr) diff --git a/gitdb/test/performance/test_stream.py b/gitdb/test/performance/test_stream.py index 010003d..84c9dea 100644 --- a/gitdb/test/performance/test_stream.py +++ b/gitdb/test/performance/test_stream.py @@ -3,35 +3,25 @@ # This module is part of GitDB and is released under # the New BSD License: http://www.opensource.org/licenses/bsd-license.php """Performance data streaming performance""" -from lib import TestBigRepoR -from gitdb.db import * -from gitdb.base import * -from gitdb.stream import * -from gitdb.util import ( - pool, - bin_to_hex - ) -from gitdb.typ import str_blob_type -from gitdb.fun import chunk_size +from __future__ import print_function + +from gitdb.test.performance.lib import TestBigRepoR +from gitdb.db import LooseObjectDB +from gitdb import IStream -from async import ( - IteratorReader, - ChannelThreadTask, - ) +from gitdb.util import bin_to_hex +from gitdb.fun import chunk_size -from cStringIO import StringIO from time import time import os import sys -import stat -import subprocess -from lib import ( - TestBigRepoR, +from gitdb.test.lib import ( make_memory_file, - with_rw_directory - ) + with_rw_directory, + skip_on_travis_ci +) #{ Utilities @@ -47,22 +37,14 @@ def read_chunked_stream(stream): return stream -class TestStreamReader(ChannelThreadTask): - """Expects input streams and reads them in chunks. It will read one at a time, - requireing a queue chunk of size 1""" - def __init__(self, *args): - super(TestStreamReader, self).__init__(*args) - self.fun = read_chunked_stream - self.max_chunksize = 1 - - #} END utilities class TestObjDBPerformance(TestBigRepoR): large_data_size_bytes = 1000*1000*50 # some MiB should do it moderate_data_size_bytes = 1000*1000*1 # just 1 MiB - + + @skip_on_travis_ci @with_rw_directory def test_large_data_streaming(self, path): ldb = LooseObjectDB(path) @@ -71,11 +53,11 @@ class TestObjDBPerformance(TestBigRepoR): # serial mode for randomize in range(2): desc = (randomize and 'random ') or '' - print >> sys.stderr, "Creating %s data ..." % desc + print("Creating %s data ..." % desc, file=sys.stderr) st = time() size, stream = make_memory_file(self.large_data_size_bytes, randomize) elapsed = time() - st - print >> sys.stderr, "Done (in %f s)" % elapsed + print("Done (in %f s)" % elapsed, file=sys.stderr) string_ios.append(stream) # writing - due to the compression it will seem faster than it is @@ -88,7 +70,7 @@ class TestObjDBPerformance(TestBigRepoR): size_kib = size / 1000 - print >> sys.stderr, "Added %i KiB (filesize = %i KiB) of %s data to loose odb in %f s ( %f Write KiB / s)" % (size_kib, fsize_kib, desc, elapsed_add, size_kib / elapsed_add) + print("Added %i KiB (filesize = %i KiB) of %s data to loose odb in %f s ( %f Write KiB / s)" % (size_kib, fsize_kib, desc, elapsed_add, size_kib / elapsed_add), file=sys.stderr) # reading all at once st = time() @@ -98,7 +80,7 @@ class TestObjDBPerformance(TestBigRepoR): stream.seek(0) assert shadata == stream.getvalue() - print >> sys.stderr, "Read %i KiB of %s data at once from loose odb in %f s ( %f Read KiB / s)" % (size_kib, desc, elapsed_readall, size_kib / elapsed_readall) + print("Read %i KiB of %s data at once from loose odb in %f s ( %f Read KiB / s)" % (size_kib, desc, elapsed_readall, size_kib / elapsed_readall), file=sys.stderr) # reading in chunks of 1 MiB @@ -115,81 +97,11 @@ class TestObjDBPerformance(TestBigRepoR): elapsed_readchunks = time() - st stream.seek(0) - assert ''.join(chunks) == stream.getvalue() + assert b''.join(chunks) == stream.getvalue() cs_kib = cs / 1000 - print >> sys.stderr, "Read %i KiB of %s data in %i KiB chunks from loose odb in %f s ( %f Read KiB / s)" % (size_kib, desc, cs_kib, elapsed_readchunks, size_kib / elapsed_readchunks) + print("Read %i KiB of %s data in %i KiB chunks from loose odb in %f s ( %f Read KiB / s)" % (size_kib, desc, cs_kib, elapsed_readchunks, size_kib / elapsed_readchunks), file=sys.stderr) # del db file so we keep something to do os.remove(db_file) # END for each randomization factor - - - # multi-threaded mode - # want two, should be supported by most of todays cpus - pool.set_size(2) - total_kib = 0 - nsios = len(string_ios) - for stream in string_ios: - stream.seek(0) - total_kib += len(stream.getvalue()) / 1000 - # END rewind - - def istream_iter(): - for stream in string_ios: - stream.seek(0) - yield IStream(str_blob_type, len(stream.getvalue()), stream) - # END for each stream - # END util - - # write multiple objects at once, involving concurrent compression - reader = IteratorReader(istream_iter()) - istream_reader = ldb.store_async(reader) - istream_reader.task().max_chunksize = 1 - - st = time() - istreams = istream_reader.read(nsios) - assert len(istreams) == nsios - elapsed = time() - st - - print >> sys.stderr, "Threads(%i): Compressed %i KiB of data in loose odb in %f s ( %f Write KiB / s)" % (pool.size(), total_kib, elapsed, total_kib / elapsed) - - # decompress multiple at once, by reading them - # chunk size is not important as the stream will not really be decompressed - - # until its read - istream_reader = IteratorReader(iter([ i.binsha for i in istreams ])) - ostream_reader = ldb.stream_async(istream_reader) - - chunk_task = TestStreamReader(ostream_reader, "chunker", None) - output_reader = pool.add_task(chunk_task) - output_reader.task().max_chunksize = 1 - - st = time() - assert len(output_reader.read(nsios)) == nsios - elapsed = time() - st - - print >> sys.stderr, "Threads(%i): Decompressed %i KiB of data in loose odb in %f s ( %f Read KiB / s)" % (pool.size(), total_kib, elapsed, total_kib / elapsed) - - # store the files, and read them back. For the reading, we use a task - # as well which is chunked into one item per task. Reading all will - # very quickly result in two threads handling two bytestreams of - # chained compression/decompression streams - reader = IteratorReader(istream_iter()) - istream_reader = ldb.store_async(reader) - istream_reader.task().max_chunksize = 1 - - istream_to_sha = lambda items: [ i.binsha for i in items ] - istream_reader.set_post_cb(istream_to_sha) - - ostream_reader = ldb.stream_async(istream_reader) - - chunk_task = TestStreamReader(ostream_reader, "chunker", None) - output_reader = pool.add_task(chunk_task) - output_reader.max_chunksize = 1 - - st = time() - assert len(output_reader.read(nsios)) == nsios - elapsed = time() - st - - print >> sys.stderr, "Threads(%i): Compressed and decompressed and read %i KiB of data in loose odb in %f s ( %f Combined KiB / s)" % (pool.size(), total_kib, elapsed, total_kib / elapsed) |
