diff options
Diffstat (limited to 'gitdb/test')
| -rw-r--r-- | gitdb/test/__init__.py | 12 | ||||
| -rw-r--r-- | gitdb/test/db/lib.py | 149 | ||||
| -rw-r--r-- | gitdb/test/db/test_git.py | 33 | ||||
| -rw-r--r-- | gitdb/test/db/test_loose.py | 21 | ||||
| -rw-r--r-- | gitdb/test/db/test_mem.py | 23 | ||||
| -rw-r--r-- | gitdb/test/db/test_pack.py | 41 | ||||
| -rw-r--r-- | gitdb/test/db/test_ref.py | 35 | ||||
| -rw-r--r-- | gitdb/test/lib.py | 67 | ||||
| -rw-r--r-- | gitdb/test/performance/__init__.py | 1 | ||||
| -rw-r--r-- | gitdb/test/performance/lib.py | 31 | ||||
| -rw-r--r-- | gitdb/test/performance/test_pack.py | 28 | ||||
| -rw-r--r-- | gitdb/test/performance/test_pack_streaming.py | 21 | ||||
| -rw-r--r-- | gitdb/test/performance/test_stream.py | 126 | ||||
| -rw-r--r-- | gitdb/test/test_base.py | 40 | ||||
| -rw-r--r-- | gitdb/test/test_example.py | 46 | ||||
| -rw-r--r-- | gitdb/test/test_pack.py | 123 | ||||
| -rw-r--r-- | gitdb/test/test_stream.py | 101 | ||||
| -rw-r--r-- | gitdb/test/test_util.py | 57 |
18 files changed, 392 insertions, 563 deletions
diff --git a/gitdb/test/__init__.py b/gitdb/test/__init__.py index f805944..8a681e4 100644 --- a/gitdb/test/__init__.py +++ b/gitdb/test/__init__.py @@ -2,15 +2,3 @@ # # This module is part of GitDB and is released under # the New BSD License: http://www.opensource.org/licenses/bsd-license.php - -import gitdb.util - -#{ Initialization -def _init_pool(): - """Assure the pool is actually threaded""" - size = 2 - print "Setting ThreadPool to %i" % size - gitdb.util.pool.set_size(size) - - -#} END initialization diff --git a/gitdb/test/db/lib.py b/gitdb/test/db/lib.py index 62614ee..af6d9e0 100644 --- a/gitdb/test/db/lib.py +++ b/gitdb/test/db/lib.py @@ -6,63 +6,66 @@ from gitdb.test.lib import ( with_rw_directory, with_packs_rw, - ZippedStoreShaWriter, fixture_path, TestBase - ) +) -from gitdb.stream import Sha1Writer +from gitdb.stream import ( + Sha1Writer, + ZippedStoreShaWriter +) from gitdb.base import ( - IStream, - OStream, - OInfo - ) - + IStream, + OStream, + OInfo +) + from gitdb.exc import BadObject from gitdb.typ import str_blob_type +from gitdb.utils.compat import xrange + +from io import BytesIO -from async import IteratorReader -from cStringIO import StringIO from struct import pack __all__ = ('TestDBBase', 'with_rw_directory', 'with_packs_rw', 'fixture_path') - + class TestDBBase(TestBase): """Base class providing testing routines on databases""" - + # data - two_lines = "1234\nhello world" + two_lines = b'1234\nhello world' all_data = (two_lines, ) - + def _assert_object_writing_simple(self, db): # write a bunch of objects and query their streams and info null_objs = db.size() ni = 250 for i in xrange(ni): data = pack(">L", i) - istream = IStream(str_blob_type, len(data), StringIO(data)) + istream = IStream(str_blob_type, len(data), BytesIO(data)) new_istream = db.store(istream) assert new_istream is istream assert db.has_object(istream.binsha) - + info = db.info(istream.binsha) assert isinstance(info, OInfo) assert info.type == istream.type and info.size == istream.size - + stream = db.stream(istream.binsha) assert isinstance(stream, OStream) assert stream.binsha == info.binsha and stream.type == info.type assert stream.read() == data # END for each item - + assert db.size() == null_objs + ni shas = list(db.sha_iter()) assert len(shas) == db.size() assert len(shas[0]) == 20 - - + + def _assert_object_writing(self, db): """General tests to verify object writing, compatible to ObjectDBW **Note:** requires write access to the database""" @@ -76,25 +79,24 @@ class TestDBBase(TestBase): ostream = ostreamcls() assert isinstance(ostream, Sha1Writer) # END create ostream - + prev_ostream = db.set_ostream(ostream) - assert type(prev_ostream) in ostreams or prev_ostream in ostreams - - istream = IStream(str_blob_type, len(data), StringIO(data)) - + assert type(prev_ostream) in ostreams or prev_ostream in ostreams + istream = IStream(str_blob_type, len(data), BytesIO(data)) + # store returns same istream instance, with new sha set my_istream = db.store(istream) sha = istream.binsha assert my_istream is istream assert db.has_object(sha) != dry_run - assert len(sha) == 20 - + assert len(sha) == 20 + # verify data - the slow way, we want to run code if not dry_run: info = db.info(sha) assert str_blob_type == info.type assert info.size == len(data) - + ostream = db.stream(sha) assert ostream.read() == data assert ostream.type == str_blob_type @@ -102,107 +104,26 @@ class TestDBBase(TestBase): else: self.failUnlessRaises(BadObject, db.info, sha) self.failUnlessRaises(BadObject, db.stream, sha) - + # DIRECT STREAM COPY # our data hase been written in object format to the StringIO # we pasesd as output stream. No physical database representation # was created. - # Test direct stream copy of object streams, the result must be + # Test direct stream copy of object streams, the result must be # identical to what we fed in ostream.seek(0) istream.stream = ostream assert istream.binsha is not None prev_sha = istream.binsha - + db.set_ostream(ZippedStoreShaWriter()) db.store(istream) assert istream.binsha == prev_sha new_ostream = db.ostream() - + # note: only works as long our store write uses the same compression # level, which is zip_best assert ostream.getvalue() == new_ostream.getvalue() # END for each data set # END for each dry_run mode - - def _assert_object_writing_async(self, db): - """Test generic object writing using asynchronous access""" - ni = 5000 - def istream_generator(offset=0, ni=ni): - for data_src in xrange(ni): - data = str(data_src + offset) - yield IStream(str_blob_type, len(data), StringIO(data)) - # END for each item - # END generator utility - - # for now, we are very trusty here as we expect it to work if it worked - # in the single-stream case - - # write objects - reader = IteratorReader(istream_generator()) - istream_reader = db.store_async(reader) - istreams = istream_reader.read() # read all - assert istream_reader.task().error() is None - assert len(istreams) == ni - - for stream in istreams: - assert stream.error is None - assert len(stream.binsha) == 20 - assert isinstance(stream, IStream) - # END assert each stream - - # test has-object-async - we must have all previously added ones - reader = IteratorReader( istream.binsha for istream in istreams ) - hasobject_reader = db.has_object_async(reader) - count = 0 - for sha, has_object in hasobject_reader: - assert has_object - count += 1 - # END for each sha - assert count == ni - - # read the objects we have just written - reader = IteratorReader( istream.binsha for istream in istreams ) - ostream_reader = db.stream_async(reader) - - # read items individually to prevent hitting possible sys-limits - count = 0 - for ostream in ostream_reader: - assert isinstance(ostream, OStream) - count += 1 - # END for each ostream - assert ostream_reader.task().error() is None - assert count == ni - - # get info about our items - reader = IteratorReader( istream.binsha for istream in istreams ) - info_reader = db.info_async(reader) - - count = 0 - for oinfo in info_reader: - assert isinstance(oinfo, OInfo) - count += 1 - # END for each oinfo instance - assert count == ni - - - # combined read-write using a converter - # add 2500 items, and obtain their output streams - nni = 2500 - reader = IteratorReader(istream_generator(offset=ni, ni=nni)) - istream_to_sha = lambda istreams: [ istream.binsha for istream in istreams ] - - istream_reader = db.store_async(reader) - istream_reader.set_post_cb(istream_to_sha) - - ostream_reader = db.stream_async(istream_reader) - - count = 0 - # read it individually, otherwise we might run into the ulimit - for ostream in ostream_reader: - assert isinstance(ostream, OStream) - count += 1 - # END for each ostream - assert count == nni - - + diff --git a/gitdb/test/db/test_git.py b/gitdb/test/db/test_git.py index 1ef577a..e141c2b 100644 --- a/gitdb/test/db/test_git.py +++ b/gitdb/test/db/test_git.py @@ -2,46 +2,51 @@ # # This module is part of GitDB and is released under # the New BSD License: http://www.opensource.org/licenses/bsd-license.php -from lib import * +from gitdb.test.db.lib import ( + TestDBBase, + fixture_path, + with_rw_directory +) from gitdb.exc import BadObject from gitdb.db import GitDB from gitdb.base import OStream, OInfo from gitdb.util import hex_to_bin, bin_to_hex - + class TestGitDB(TestDBBase): - + def test_reading(self): gdb = GitDB(fixture_path('../../../.git/objects')) - + # we have packs and loose objects, alternates doesn't necessarily exist assert 1 < len(gdb.databases()) < 4 - + # access should be possible gitdb_sha = hex_to_bin("5690fd0d3304f378754b23b098bd7cb5f4aa1976") assert isinstance(gdb.info(gitdb_sha), OInfo) assert isinstance(gdb.stream(gitdb_sha), OStream) - assert gdb.size() > 200 + ni = 50 + assert gdb.size() >= ni sha_list = list(gdb.sha_iter()) assert len(sha_list) == gdb.size() - - - # This is actually a test for compound functionality, but it doesn't + sha_list = sha_list[:ni] # speed up tests ... + + + # This is actually a test for compound functionality, but it doesn't # have a separate test module # test partial shas # this one as uneven and quite short assert gdb.partial_to_complete_sha_hex('155b6') == hex_to_bin("155b62a9af0aa7677078331e111d0f7aa6eb4afc") - + # mix even/uneven hexshas for i, binsha in enumerate(sha_list): assert gdb.partial_to_complete_sha_hex(bin_to_hex(binsha)[:8-(i%2)]) == binsha # END for each sha - + self.failUnlessRaises(BadObject, gdb.partial_to_complete_sha_hex, "0000") - + @with_rw_directory def test_writing(self, path): gdb = GitDB(path) - + # its possible to write objects self._assert_object_writing(gdb) - self._assert_object_writing_async(gdb) diff --git a/gitdb/test/db/test_loose.py b/gitdb/test/db/test_loose.py index d7e1d01..1d6af9c 100644 --- a/gitdb/test/db/test_loose.py +++ b/gitdb/test/db/test_loose.py @@ -2,33 +2,34 @@ # # This module is part of GitDB and is released under # the New BSD License: http://www.opensource.org/licenses/bsd-license.php -from lib import * +from gitdb.test.db.lib import ( + TestDBBase, + with_rw_directory +) from gitdb.db import LooseObjectDB from gitdb.exc import BadObject from gitdb.util import bin_to_hex - + class TestLooseDB(TestDBBase): - + @with_rw_directory def test_basics(self, path): ldb = LooseObjectDB(path) - + # write data self._assert_object_writing(ldb) - self._assert_object_writing_async(ldb) - + # verify sha iteration and size shas = list(ldb.sha_iter()) assert shas and len(shas[0]) == 20 - + assert len(shas) == ldb.size() - + # verify find short object long_sha = bin_to_hex(shas[-1]) for short_sha in (long_sha[:20], long_sha[:5]): assert bin_to_hex(ldb.partial_to_complete_sha_hex(short_sha)) == long_sha # END for each sha - + self.failUnlessRaises(BadObject, ldb.partial_to_complete_sha_hex, '0000') # raises if no object could be foudn - diff --git a/gitdb/test/db/test_mem.py b/gitdb/test/db/test_mem.py index df428e2..97f7217 100644 --- a/gitdb/test/db/test_mem.py +++ b/gitdb/test/db/test_mem.py @@ -2,29 +2,32 @@ # # This module is part of GitDB and is released under # the New BSD License: http://www.opensource.org/licenses/bsd-license.php -from lib import * +from gitdb.test.db.lib import ( + TestDBBase, + with_rw_directory +) from gitdb.db import ( - MemoryDB, - LooseObjectDB - ) - + MemoryDB, + LooseObjectDB +) + class TestMemoryDB(TestDBBase): - + @with_rw_directory def test_writing(self, path): mdb = MemoryDB() - + # write data self._assert_object_writing_simple(mdb) - + # test stream copy ldb = LooseObjectDB(path) assert ldb.size() == 0 num_streams_copied = mdb.stream_copy(mdb.sha_iter(), ldb) assert num_streams_copied == mdb.size() - + assert ldb.size() == mdb.size() for sha in mdb.sha_iter(): assert ldb.has_object(sha) - assert ldb.stream(sha).read() == mdb.stream(sha).read() + assert ldb.stream(sha).read() == mdb.stream(sha).read() # END verify objects where copied and are equal diff --git a/gitdb/test/db/test_pack.py b/gitdb/test/db/test_pack.py index f4cb5bb..963a71a 100644 --- a/gitdb/test/db/test_pack.py +++ b/gitdb/test/db/test_pack.py @@ -2,9 +2,12 @@ # # This module is part of GitDB and is released under # the New BSD License: http://www.opensource.org/licenses/bsd-license.php -from lib import * +from gitdb.test.db.lib import ( + TestDBBase, + with_rw_directory, + with_packs_rw +) from gitdb.db import PackedDB -from gitdb.test.lib import fixture_path from gitdb.exc import BadObject, AmbiguousObjectName @@ -12,45 +15,45 @@ import os import random class TestPackDB(TestDBBase): - + @with_rw_directory @with_packs_rw def test_writing(self, path): pdb = PackedDB(path) - + # on demand, we init our pack cache num_packs = len(pdb.entities()) assert pdb._st_mtime != 0 - - # test pack directory changed: + + # test pack directory changed: # packs removed - rename a file, should affect the glob pack_path = pdb.entities()[0].pack().path() new_pack_path = pack_path + "renamed" os.rename(pack_path, new_pack_path) - + pdb.update_cache(force=True) assert len(pdb.entities()) == num_packs - 1 - + # packs added os.rename(new_pack_path, pack_path) pdb.update_cache(force=True) assert len(pdb.entities()) == num_packs - + # bang on the cache # access the Entities directly, as there is no iteration interface # yet ( or required for now ) sha_list = list(pdb.sha_iter()) assert len(sha_list) == pdb.size() - + # hit all packs in random order random.shuffle(sha_list) - + for sha in sha_list: - info = pdb.info(sha) - stream = pdb.stream(sha) + pdb.info(sha) + pdb.stream(sha) # END for each sha to query - - + + # test short finding - be a bit more brutal here max_bytes = 19 min_bytes = 2 @@ -64,10 +67,10 @@ class TestPackDB(TestDBBase): pass # valid, we can have short objects # END exception handling # END for each sha to find - + # we should have at least one ambiguous, considering the small sizes - # but in our pack, there is no ambigious ... + # but in our pack, there is no ambigious ... # assert num_ambiguous - + # non-existing - self.failUnlessRaises(BadObject, pdb.partial_to_complete_sha, "\0\0", 4) + self.failUnlessRaises(BadObject, pdb.partial_to_complete_sha, b'\0\0', 4) diff --git a/gitdb/test/db/test_ref.py b/gitdb/test/db/test_ref.py index 1637bff..db93082 100644 --- a/gitdb/test/db/test_ref.py +++ b/gitdb/test/db/test_ref.py @@ -2,59 +2,58 @@ # # This module is part of GitDB and is released under # the New BSD License: http://www.opensource.org/licenses/bsd-license.php -from lib import * +from gitdb.test.db.lib import ( + TestDBBase, + with_rw_directory, + fixture_path +) from gitdb.db import ReferenceDB from gitdb.util import ( - NULL_BIN_SHA, - hex_to_bin - ) + NULL_BIN_SHA, + hex_to_bin +) import os - + class TestReferenceDB(TestDBBase): - + def make_alt_file(self, alt_path, alt_list): """Create an alternates file which contains the given alternates. The list can be empty""" alt_file = open(alt_path, "wb") for alt in alt_list: - alt_file.write(alt + "\n") + alt_file.write(alt.encode("utf-8") + "\n".encode("ascii")) alt_file.close() - + @with_rw_directory def test_writing(self, path): - NULL_BIN_SHA = '\0' * 20 - alt_path = os.path.join(path, 'alternates') rdb = ReferenceDB(alt_path) assert len(rdb.databases()) == 0 assert rdb.size() == 0 assert len(list(rdb.sha_iter())) == 0 - + # try empty, non-existing assert not rdb.has_object(NULL_BIN_SHA) - - + # setup alternate file # add two, one is invalid own_repo_path = fixture_path('../../../.git/objects') # use own repo self.make_alt_file(alt_path, [own_repo_path, "invalid/path"]) rdb.update_cache() assert len(rdb.databases()) == 1 - + # we should now find a default revision of ours gitdb_sha = hex_to_bin("5690fd0d3304f378754b23b098bd7cb5f4aa1976") assert rdb.has_object(gitdb_sha) - + # remove valid self.make_alt_file(alt_path, ["just/one/invalid/path"]) rdb.update_cache() assert len(rdb.databases()) == 0 - + # add valid self.make_alt_file(alt_path, [own_repo_path]) rdb.update_cache() assert len(rdb.databases()) == 1 - - diff --git a/gitdb/test/lib.py b/gitdb/test/lib.py index ac8473a..d09b1cb 100644 --- a/gitdb/test/lib.py +++ b/gitdb/test/lib.py @@ -3,20 +3,14 @@ # This module is part of GitDB and is released under # the New BSD License: http://www.opensource.org/licenses/bsd-license.php """Utilities used in ODB testing""" -from gitdb import ( - OStream, - ) -from gitdb.stream import ( - Sha1Writer, - ZippedStoreShaWriter - ) - -from gitdb.util import zlib +from gitdb import OStream +from gitdb.utils.compat import xrange import sys import random from array import array -from cStringIO import StringIO + +from io import BytesIO import glob import unittest @@ -24,20 +18,36 @@ import tempfile import shutil import os import gc +from functools import wraps #{ Bases class TestBase(unittest.TestCase): """Base class for all tests""" - + #} END bases #{ Decorators +def skip_on_travis_ci(func): + """All tests decorated with this one will raise SkipTest when run on travis ci. + Use it to workaround difficult to solve issues + NOTE: copied from bcore (https://github.com/Byron/bcore)""" + @wraps(func) + def wrapper(self, *args, **kwargs): + if 'TRAVIS' in os.environ: + import nose + raise nose.SkipTest("Cannot run on travis-ci") + # end check for travis ci + return func(self, *args, **kwargs) + # end wrapper + return wrapper + + def with_rw_directory(func): - """Create a temporary directory which can be written to, remove it if the + """Create a temporary directory which can be written to, remove it if the test suceeds, but leave it otherwise to aid additional debugging""" def wrapper(self): path = tempfile.mktemp(prefix=func.__name__) @@ -47,12 +57,12 @@ def with_rw_directory(func): try: return func(self, path) except Exception: - print >> sys.stderr, "Test %s.%s failed, output is at %r" % (type(self).__name__, func.__name__, path) + sys.stderr.write("Test %s.%s failed, output is at %r\n" % (type(self).__name__, func.__name__, path)) keep = True raise finally: # Need to collect here to be sure all handles have been closed. It appears - # a windows-only issue. In fact things should be deleted, as well as + # a windows-only issue. In fact things should be deleted, as well as # memory maps closed, once objects go out of scope. For some reason # though this is not the case here unless we collect explicitly. if not keep: @@ -60,20 +70,20 @@ def with_rw_directory(func): shutil.rmtree(path) # END handle exception # END wrapper - + wrapper.__name__ = func.__name__ return wrapper def with_packs_rw(func): - """Function that provides a path into which the packs for testing should be + """Function that provides a path into which the packs for testing should be copied. Will pass on the path to the actual function afterwards""" def wrapper(self, path): src_pack_glob = fixture_path('packs/*') copy_files_globbed(src_pack_glob, path, hard_link_ok=True) return func(self, path) # END wrapper - + wrapper.__name__ = func.__name__ return wrapper @@ -86,10 +96,10 @@ def fixture_path(relapath=''): :param relapath: relative path into the fixtures directory, or '' to obtain the fixture directory itself""" return os.path.join(os.path.dirname(__file__), 'fixtures', relapath) - + def copy_files_globbed(source_glob, target_dir, hard_link_ok=False): """Copy all files found according to the given source glob into the target directory - :param hard_link_ok: if True, hard links will be created if possible. Otherwise + :param hard_link_ok: if True, hard links will be created if possible. Otherwise the files will be copied""" for src_file in glob.glob(source_glob): if hard_link_ok and hasattr(os, 'link'): @@ -103,12 +113,12 @@ def copy_files_globbed(source_glob, target_dir, hard_link_ok=False): shutil.copy(src_file, target_dir) # END try hard link # END for each file to copy - + def make_bytes(size_in_bytes, randomize=False): """:return: string with given size in bytes :param randomize: try to produce a very random stream""" - actual_size = size_in_bytes / 4 + actual_size = size_in_bytes // 4 producer = xrange(actual_size) if randomize: producer = list(producer) @@ -120,13 +130,13 @@ def make_bytes(size_in_bytes, randomize=False): def make_object(type, data): """:return: bytes resembling an uncompressed object""" odata = "blob %i\0" % len(data) - return odata + data - + return odata.encode("ascii") + data + def make_memory_file(size_in_bytes, randomize=False): """:return: tuple(size_of_stream, stream) :param randomize: try to produce a very random stream""" d = make_bytes(size_in_bytes, randomize) - return len(d), StringIO(d) + return len(d), BytesIO(d) #} END routines @@ -137,14 +147,14 @@ class DummyStream(object): self.was_read = False self.bytes = 0 self.closed = False - + def read(self, size): self.was_read = True self.bytes = size - + def close(self): self.closed = True - + def _assert(self): assert self.was_read @@ -153,10 +163,9 @@ class DeriveTest(OStream): def __init__(self, sha, type, size, stream, *args, **kwargs): self.myarg = kwargs.pop('myarg') self.args = args - + def _assert(self): assert self.args assert self.myarg #} END stream utilitiess - diff --git a/gitdb/test/performance/__init__.py b/gitdb/test/performance/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/gitdb/test/performance/__init__.py @@ -0,0 +1 @@ + diff --git a/gitdb/test/performance/lib.py b/gitdb/test/performance/lib.py index 3563fcf..ec45cf3 100644 --- a/gitdb/test/performance/lib.py +++ b/gitdb/test/performance/lib.py @@ -4,9 +4,8 @@ # the New BSD License: http://www.opensource.org/licenses/bsd-license.php """Contains library functions""" import os -from gitdb.test.lib import * -import shutil -import tempfile +import logging +from gitdb.test.lib import TestBase #{ Invvariants @@ -14,17 +13,6 @@ k_env_git_repo = "GITDB_TEST_GIT_REPO_BASE" #} END invariants -#{ Utilities -def resolve_or_fail(env_var): - """:return: resolved environment variable or raise EnvironmentError""" - try: - return os.environ[env_var] - except KeyError: - raise EnvironmentError("Please set the %r envrionment variable and retry" % env_var) - # END exception handling - -#} END utilities - #{ Base Classes @@ -41,14 +29,19 @@ class TestBigRepoR(TestBase): head_sha_50 = '32347c375250fd470973a5d76185cac718955fd5' #} END invariants - @classmethod - def setUpAll(cls): + def setUp(self): try: - super(TestBigRepoR, cls).setUpAll() + super(TestBigRepoR, self).setUp() except AttributeError: pass - cls.gitrepopath = resolve_or_fail(k_env_git_repo) - assert cls.gitrepopath.endswith('.git') + + self.gitrepopath = os.environ.get(k_env_git_repo) + if not self.gitrepopath: + logging.info("You can set the %s environment variable to a .git repository of your choice - defaulting to the gitdb repository") + ospd = os.path.dirname + self.gitrepopath = os.path.join(ospd(ospd(ospd(ospd(__file__)))), '.git') + # end assure gitrepo is set + assert self.gitrepopath.endswith('.git') #} END base classes diff --git a/gitdb/test/performance/test_pack.py b/gitdb/test/performance/test_pack.py index 63856e2..db3b48d 100644 --- a/gitdb/test/performance/test_pack.py +++ b/gitdb/test/performance/test_pack.py @@ -3,22 +3,24 @@ # This module is part of GitDB and is released under # the New BSD License: http://www.opensource.org/licenses/bsd-license.php """Performance tests for object store""" -from lib import ( +from __future__ import print_function + +from gitdb.test.performance.lib import ( TestBigRepoR - ) +) from gitdb.exc import UnsupportedOperation from gitdb.db.pack import PackedDB +from gitdb.utils.compat import xrange +from gitdb.test.lib import skip_on_travis_ci import sys import os from time import time -import random - -from nose import SkipTest class TestPackedDBPerformance(TestBigRepoR): - + + @skip_on_travis_ci def test_pack_random_access(self): pdb = PackedDB(os.path.join(self.gitrepopath, "objects/pack")) @@ -27,7 +29,7 @@ class TestPackedDBPerformance(TestBigRepoR): sha_list = list(pdb.sha_iter()) elapsed = time() - st ns = len(sha_list) - print >> sys.stderr, "PDB: looked up %i shas by index in %f s ( %f shas/s )" % (ns, elapsed, ns / elapsed) + print("PDB: looked up %i shas by index in %f s ( %f shas/s )" % (ns, elapsed, ns / elapsed), file=sys.stderr) # sha lookup: best-case and worst case access pdb_pack_info = pdb._pack_info @@ -41,7 +43,7 @@ class TestPackedDBPerformance(TestBigRepoR): # discard cache del(pdb._entities) pdb.entities() - print >> sys.stderr, "PDB: looked up %i sha in %i packs in %f s ( %f shas/s )" % (ns, len(pdb.entities()), elapsed, ns / elapsed) + print("PDB: looked up %i sha in %i packs in %f s ( %f shas/s )" % (ns, len(pdb.entities()), elapsed, ns / elapsed), file=sys.stderr) # END for each random mode # query info and streams only @@ -51,7 +53,7 @@ class TestPackedDBPerformance(TestBigRepoR): for sha in sha_list[:max_items]: pdb_fun(sha) elapsed = time() - st - print >> sys.stderr, "PDB: Obtained %i object %s by sha in %f s ( %f items/s )" % (max_items, pdb_fun.__name__.upper(), elapsed, max_items / elapsed) + print("PDB: Obtained %i object %s by sha in %f s ( %f items/s )" % (max_items, pdb_fun.__name__.upper(), elapsed, max_items / elapsed), file=sys.stderr) # END for each function # retrieve stream and read all @@ -65,13 +67,13 @@ class TestPackedDBPerformance(TestBigRepoR): total_size += stream.size elapsed = time() - st total_kib = total_size / 1000 - print >> sys.stderr, "PDB: Obtained %i streams by sha and read all bytes totallying %i KiB ( %f KiB / s ) in %f s ( %f streams/s )" % (max_items, total_kib, total_kib/elapsed , elapsed, max_items / elapsed) + print("PDB: Obtained %i streams by sha and read all bytes totallying %i KiB ( %f KiB / s ) in %f s ( %f streams/s )" % (max_items, total_kib, total_kib/elapsed , elapsed, max_items / elapsed), file=sys.stderr) + @skip_on_travis_ci def test_correctness(self): - raise SkipTest("Takes too long, enable it if you change the algorithm and want to be sure you decode packs correctly") pdb = PackedDB(os.path.join(self.gitrepopath, "objects/pack")) # disabled for now as it used to work perfectly, checking big repositories takes a long time - print >> sys.stderr, "Endurance run: verify streaming of objects (crc and sha)" + print("Endurance run: verify streaming of objects (crc and sha)", file=sys.stderr) for crc in range(2): count = 0 st = time() @@ -88,6 +90,6 @@ class TestPackedDBPerformance(TestBigRepoR): # END for each index # END for each entity elapsed = time() - st - print >> sys.stderr, "PDB: verified %i objects (crc=%i) in %f s ( %f objects/s )" % (count, crc, elapsed, count / elapsed) + print("PDB: verified %i objects (crc=%i) in %f s ( %f objects/s )" % (count, crc, elapsed, count / elapsed), file=sys.stderr) # END for each verify mode diff --git a/gitdb/test/performance/test_pack_streaming.py b/gitdb/test/performance/test_pack_streaming.py index c66e60c..fe160ea 100644 --- a/gitdb/test/performance/test_pack_streaming.py +++ b/gitdb/test/performance/test_pack_streaming.py @@ -3,18 +3,20 @@ # This module is part of GitDB and is released under # the New BSD License: http://www.opensource.org/licenses/bsd-license.php """Specific test for pack streams only""" -from lib import ( +from __future__ import print_function + +from gitdb.test.performance.lib import ( TestBigRepoR - ) +) from gitdb.db.pack import PackedDB from gitdb.stream import NullStream from gitdb.pack import PackEntity +from gitdb.test.lib import skip_on_travis_ci import os import sys from time import time -from nose import SkipTest class CountedNullStream(NullStream): __slots__ = '_bw' @@ -30,15 +32,15 @@ class CountedNullStream(NullStream): class TestPackStreamingPerformance(TestBigRepoR): + @skip_on_travis_ci def test_pack_writing(self): # see how fast we can write a pack from object streams. # This will not be fast, as we take time for decompressing the streams as well ostream = CountedNullStream() pdb = PackedDB(os.path.join(self.gitrepopath, "objects/pack")) - ni = 5000 + ni = 1000 count = 0 - total_size = 0 st = time() for sha in pdb.sha_iter(): count += 1 @@ -47,17 +49,18 @@ class TestPackStreamingPerformance(TestBigRepoR): break #END gather objects for pack-writing elapsed = time() - st - print >> sys.stderr, "PDB Streaming: Got %i streams by sha in in %f s ( %f streams/s )" % (ni, elapsed, ni / elapsed) + print("PDB Streaming: Got %i streams by sha in in %f s ( %f streams/s )" % (ni, elapsed, ni / elapsed), file=sys.stderr) st = time() PackEntity.write_pack((pdb.stream(sha) for sha in pdb.sha_iter()), ostream.write, object_count=ni) elapsed = time() - st total_kb = ostream.bytes_written() / 1000 - print >> sys.stderr, "PDB Streaming: Wrote pack of size %i kb in %f s (%f kb/s)" % (total_kb, elapsed, total_kb/elapsed) + print(sys.stderr, "PDB Streaming: Wrote pack of size %i kb in %f s (%f kb/s)" % (total_kb, elapsed, total_kb/elapsed), sys.stderr) + @skip_on_travis_ci def test_stream_reading(self): - raise SkipTest() + # raise SkipTest() pdb = PackedDB(os.path.join(self.gitrepopath, "objects/pack")) # streaming only, meant for --with-profile runs @@ -75,5 +78,5 @@ class TestPackStreamingPerformance(TestBigRepoR): count += 1 elapsed = time() - st total_kib = total_size / 1000 - print >> sys.stderr, "PDB Streaming: Got %i streams by sha and read all bytes totallying %i KiB ( %f KiB / s ) in %f s ( %f streams/s )" % (ni, total_kib, total_kib/elapsed , elapsed, ni / elapsed) + print(sys.stderr, "PDB Streaming: Got %i streams by sha and read all bytes totallying %i KiB ( %f KiB / s ) in %f s ( %f streams/s )" % (ni, total_kib, total_kib/elapsed , elapsed, ni / elapsed), sys.stderr) diff --git a/gitdb/test/performance/test_stream.py b/gitdb/test/performance/test_stream.py index 010003d..84c9dea 100644 --- a/gitdb/test/performance/test_stream.py +++ b/gitdb/test/performance/test_stream.py @@ -3,35 +3,25 @@ # This module is part of GitDB and is released under # the New BSD License: http://www.opensource.org/licenses/bsd-license.php """Performance data streaming performance""" -from lib import TestBigRepoR -from gitdb.db import * -from gitdb.base import * -from gitdb.stream import * -from gitdb.util import ( - pool, - bin_to_hex - ) -from gitdb.typ import str_blob_type -from gitdb.fun import chunk_size +from __future__ import print_function + +from gitdb.test.performance.lib import TestBigRepoR +from gitdb.db import LooseObjectDB +from gitdb import IStream -from async import ( - IteratorReader, - ChannelThreadTask, - ) +from gitdb.util import bin_to_hex +from gitdb.fun import chunk_size -from cStringIO import StringIO from time import time import os import sys -import stat -import subprocess -from lib import ( - TestBigRepoR, +from gitdb.test.lib import ( make_memory_file, - with_rw_directory - ) + with_rw_directory, + skip_on_travis_ci +) #{ Utilities @@ -47,22 +37,14 @@ def read_chunked_stream(stream): return stream -class TestStreamReader(ChannelThreadTask): - """Expects input streams and reads them in chunks. It will read one at a time, - requireing a queue chunk of size 1""" - def __init__(self, *args): - super(TestStreamReader, self).__init__(*args) - self.fun = read_chunked_stream - self.max_chunksize = 1 - - #} END utilities class TestObjDBPerformance(TestBigRepoR): large_data_size_bytes = 1000*1000*50 # some MiB should do it moderate_data_size_bytes = 1000*1000*1 # just 1 MiB - + + @skip_on_travis_ci @with_rw_directory def test_large_data_streaming(self, path): ldb = LooseObjectDB(path) @@ -71,11 +53,11 @@ class TestObjDBPerformance(TestBigRepoR): # serial mode for randomize in range(2): desc = (randomize and 'random ') or '' - print >> sys.stderr, "Creating %s data ..." % desc + print("Creating %s data ..." % desc, file=sys.stderr) st = time() size, stream = make_memory_file(self.large_data_size_bytes, randomize) elapsed = time() - st - print >> sys.stderr, "Done (in %f s)" % elapsed + print("Done (in %f s)" % elapsed, file=sys.stderr) string_ios.append(stream) # writing - due to the compression it will seem faster than it is @@ -88,7 +70,7 @@ class TestObjDBPerformance(TestBigRepoR): size_kib = size / 1000 - print >> sys.stderr, "Added %i KiB (filesize = %i KiB) of %s data to loose odb in %f s ( %f Write KiB / s)" % (size_kib, fsize_kib, desc, elapsed_add, size_kib / elapsed_add) + print("Added %i KiB (filesize = %i KiB) of %s data to loose odb in %f s ( %f Write KiB / s)" % (size_kib, fsize_kib, desc, elapsed_add, size_kib / elapsed_add), file=sys.stderr) # reading all at once st = time() @@ -98,7 +80,7 @@ class TestObjDBPerformance(TestBigRepoR): stream.seek(0) assert shadata == stream.getvalue() - print >> sys.stderr, "Read %i KiB of %s data at once from loose odb in %f s ( %f Read KiB / s)" % (size_kib, desc, elapsed_readall, size_kib / elapsed_readall) + print("Read %i KiB of %s data at once from loose odb in %f s ( %f Read KiB / s)" % (size_kib, desc, elapsed_readall, size_kib / elapsed_readall), file=sys.stderr) # reading in chunks of 1 MiB @@ -115,81 +97,11 @@ class TestObjDBPerformance(TestBigRepoR): elapsed_readchunks = time() - st stream.seek(0) - assert ''.join(chunks) == stream.getvalue() + assert b''.join(chunks) == stream.getvalue() cs_kib = cs / 1000 - print >> sys.stderr, "Read %i KiB of %s data in %i KiB chunks from loose odb in %f s ( %f Read KiB / s)" % (size_kib, desc, cs_kib, elapsed_readchunks, size_kib / elapsed_readchunks) + print("Read %i KiB of %s data in %i KiB chunks from loose odb in %f s ( %f Read KiB / s)" % (size_kib, desc, cs_kib, elapsed_readchunks, size_kib / elapsed_readchunks), file=sys.stderr) # del db file so we keep something to do os.remove(db_file) # END for each randomization factor - - - # multi-threaded mode - # want two, should be supported by most of todays cpus - pool.set_size(2) - total_kib = 0 - nsios = len(string_ios) - for stream in string_ios: - stream.seek(0) - total_kib += len(stream.getvalue()) / 1000 - # END rewind - - def istream_iter(): - for stream in string_ios: - stream.seek(0) - yield IStream(str_blob_type, len(stream.getvalue()), stream) - # END for each stream - # END util - - # write multiple objects at once, involving concurrent compression - reader = IteratorReader(istream_iter()) - istream_reader = ldb.store_async(reader) - istream_reader.task().max_chunksize = 1 - - st = time() - istreams = istream_reader.read(nsios) - assert len(istreams) == nsios - elapsed = time() - st - - print >> sys.stderr, "Threads(%i): Compressed %i KiB of data in loose odb in %f s ( %f Write KiB / s)" % (pool.size(), total_kib, elapsed, total_kib / elapsed) - - # decompress multiple at once, by reading them - # chunk size is not important as the stream will not really be decompressed - - # until its read - istream_reader = IteratorReader(iter([ i.binsha for i in istreams ])) - ostream_reader = ldb.stream_async(istream_reader) - - chunk_task = TestStreamReader(ostream_reader, "chunker", None) - output_reader = pool.add_task(chunk_task) - output_reader.task().max_chunksize = 1 - - st = time() - assert len(output_reader.read(nsios)) == nsios - elapsed = time() - st - - print >> sys.stderr, "Threads(%i): Decompressed %i KiB of data in loose odb in %f s ( %f Read KiB / s)" % (pool.size(), total_kib, elapsed, total_kib / elapsed) - - # store the files, and read them back. For the reading, we use a task - # as well which is chunked into one item per task. Reading all will - # very quickly result in two threads handling two bytestreams of - # chained compression/decompression streams - reader = IteratorReader(istream_iter()) - istream_reader = ldb.store_async(reader) - istream_reader.task().max_chunksize = 1 - - istream_to_sha = lambda items: [ i.binsha for i in items ] - istream_reader.set_post_cb(istream_to_sha) - - ostream_reader = ldb.stream_async(istream_reader) - - chunk_task = TestStreamReader(ostream_reader, "chunker", None) - output_reader = pool.add_task(chunk_task) - output_reader.max_chunksize = 1 - - st = time() - assert len(output_reader.read(nsios)) == nsios - elapsed = time() - st - - print >> sys.stderr, "Threads(%i): Compressed and decompressed and read %i KiB of data in loose odb in %f s ( %f Combined KiB / s)" % (pool.size(), total_kib, elapsed, total_kib / elapsed) diff --git a/gitdb/test/test_base.py b/gitdb/test/test_base.py index d4ce428..578c29f 100644 --- a/gitdb/test/test_base.py +++ b/gitdb/test/test_base.py @@ -3,13 +3,21 @@ # This module is part of GitDB and is released under # the New BSD License: http://www.opensource.org/licenses/bsd-license.php """Test for object db""" -from lib import ( +from gitdb.test.lib import ( TestBase, DummyStream, - DeriveTest, + DeriveTest, ) -from gitdb import * +from gitdb import ( + OInfo, + OPackInfo, + ODeltaPackInfo, + OStream, + OPackStream, + ODeltaPackStream, + IStream +) from gitdb.util import ( NULL_BIN_SHA ) @@ -20,33 +28,33 @@ from gitdb.typ import ( class TestBaseTypes(TestBase): - + def test_streams(self): # test info sha = NULL_BIN_SHA s = 20 blob_id = 3 - + info = OInfo(sha, str_blob_type, s) assert info.binsha == sha assert info.type == str_blob_type assert info.type_id == blob_id assert info.size == s - + # test pack info # provides type_id pinfo = OPackInfo(0, blob_id, s) assert pinfo.type == str_blob_type assert pinfo.type_id == blob_id assert pinfo.pack_offset == 0 - + dpinfo = ODeltaPackInfo(0, blob_id, s, sha) assert dpinfo.type == str_blob_type assert dpinfo.type_id == blob_id assert dpinfo.delta_info == sha assert dpinfo.pack_offset == 0 - - + + # test ostream stream = DummyStream() ostream = OStream(*(info + (stream, ))) @@ -56,33 +64,33 @@ class TestBaseTypes(TestBase): assert stream.bytes == 15 ostream.read(20) assert stream.bytes == 20 - + # test packstream postream = OPackStream(*(pinfo + (stream, ))) assert postream.stream is stream postream.read(10) stream._assert() assert stream.bytes == 10 - + # test deltapackstream dpostream = ODeltaPackStream(*(dpinfo + (stream, ))) dpostream.stream is stream dpostream.read(5) stream._assert() assert stream.bytes == 5 - + # derive with own args DeriveTest(sha, str_blob_type, s, stream, 'mine',myarg = 3)._assert() - + # test istream istream = IStream(str_blob_type, s, stream) assert istream.binsha == None istream.binsha = sha assert istream.binsha == sha - + assert len(istream.binsha) == 20 assert len(istream.hexsha) == 40 - + assert istream.size == s istream.size = s * 2 istream.size == s * 2 @@ -92,7 +100,7 @@ class TestBaseTypes(TestBase): assert istream.stream is stream istream.stream = None assert istream.stream is None - + assert istream.error is None istream.error = Exception() assert isinstance(istream.error, Exception) diff --git a/gitdb/test/test_example.py b/gitdb/test/test_example.py index 611ae42..aa43a09 100644 --- a/gitdb/test/test_example.py +++ b/gitdb/test/test_example.py @@ -3,25 +3,25 @@ # This module is part of GitDB and is released under # the New BSD License: http://www.opensource.org/licenses/bsd-license.php """Module with examples from the tutorial section of the docs""" -from lib import * +from gitdb.test.lib import ( + TestBase, + fixture_path +) from gitdb import IStream from gitdb.db import LooseObjectDB -from gitdb.util import pool - -from cStringIO import StringIO -from async import IteratorReader - +from io import BytesIO + class TestExamples(TestBase): - + def test_base(self): ldb = LooseObjectDB(fixture_path("../../../.git/objects")) - + for sha1 in ldb.sha_iter(): oinfo = ldb.info(sha1) ostream = ldb.stream(sha1) assert oinfo[:3] == ostream[:3] - + assert len(ostream.read()) == ostream.size assert ldb.has_object(oinfo.binsha) # END for each sha in database @@ -32,33 +32,13 @@ class TestExamples(TestBase): except UnboundLocalError: pass # END ignore exception if there are no loose objects - - data = "my data" - istream = IStream("blob", len(data), StringIO(data)) - + + data = "my data".encode("ascii") + istream = IStream("blob", len(data), BytesIO(data)) + # the object does not yet have a sha assert istream.binsha is None ldb.store(istream) # now the sha is set assert len(istream.binsha) == 20 assert ldb.has_object(istream.binsha) - - - # async operation - # Create a reader from an iterator - reader = IteratorReader(ldb.sha_iter()) - - # get reader for object streams - info_reader = ldb.stream_async(reader) - - # read one - info = info_reader.read(1)[0] - - # read all the rest until depletion - ostreams = info_reader.read() - - # set the pool to use two threads - pool.set_size(2) - - # synchronize the mode of operation - pool.set_size(0) diff --git a/gitdb/test/test_pack.py b/gitdb/test/test_pack.py index 779155a..3ab2fec 100644 --- a/gitdb/test/test_pack.py +++ b/gitdb/test/test_pack.py @@ -3,33 +3,38 @@ # This module is part of GitDB and is released under # the New BSD License: http://www.opensource.org/licenses/bsd-license.php """Test everything about packs reading and writing""" -from lib import ( - TestBase, - with_rw_directory, - with_packs_rw, - fixture_path - ) +from gitdb.test.lib import ( + TestBase, + with_rw_directory, + fixture_path +) + from gitdb.stream import DeltaApplyReader from gitdb.pack import ( - PackEntity, - PackIndexFile, - PackFile - ) + PackEntity, + PackIndexFile, + PackFile +) from gitdb.base import ( - OInfo, - OStream, - ) + OInfo, + OStream, +) from gitdb.fun import delta_types from gitdb.exc import UnsupportedOperation from gitdb.util import to_bin_sha -from itertools import izip, chain +from gitdb.utils.compat import xrange + +try: + from itertools import izip +except ImportError: + izip = zip + from nose import SkipTest import os -import sys import tempfile @@ -39,15 +44,15 @@ def bin_sha_from_filename(filename): #} END utilities class TestPack(TestBase): - + packindexfile_v1 = (fixture_path('packs/pack-c0438c19fb16422b6bbcce24387b3264416d485b.idx'), 1, 67) packindexfile_v2 = (fixture_path('packs/pack-11fdfa9e156ab73caae3b6da867192221f2089c2.idx'), 2, 30) packindexfile_v2_3_ascii = (fixture_path('packs/pack-a2bf8e71d8c18879e499335762dd95119d93d9f1.idx'), 2, 42) packfile_v2_1 = (fixture_path('packs/pack-c0438c19fb16422b6bbcce24387b3264416d485b.pack'), 2, packindexfile_v1[2]) packfile_v2_2 = (fixture_path('packs/pack-11fdfa9e156ab73caae3b6da867192221f2089c2.pack'), 2, packindexfile_v2[2]) packfile_v2_3_ascii = (fixture_path('packs/pack-a2bf8e71d8c18879e499335762dd95119d93d9f1.pack'), 2, packindexfile_v2_3_ascii[2]) - - + + def _assert_index_file(self, index, version, size): assert index.packfile_checksum() != index.indexfile_checksum() assert len(index.packfile_checksum()) == 20 @@ -55,93 +60,93 @@ class TestPack(TestBase): assert index.version() == version assert index.size() == size assert len(index.offsets()) == size - + # get all data of all objects for oidx in xrange(index.size()): sha = index.sha(oidx) assert oidx == index.sha_to_index(sha) - + entry = index.entry(oidx) assert len(entry) == 3 - + assert entry[0] == index.offset(oidx) assert entry[1] == sha assert entry[2] == index.crc(oidx) - + # verify partial sha for l in (4,8,11,17,20): assert index.partial_sha_to_index(sha[:l], l*2) == oidx - + # END for each object index in indexfile self.failUnlessRaises(ValueError, index.partial_sha_to_index, "\0", 2) - - + + def _assert_pack_file(self, pack, version, size): assert pack.version() == 2 assert pack.size() == size assert len(pack.checksum()) == 20 - + num_obj = 0 for obj in pack.stream_iter(): num_obj += 1 info = pack.info(obj.pack_offset) stream = pack.stream(obj.pack_offset) - + assert info.pack_offset == stream.pack_offset assert info.type_id == stream.type_id assert hasattr(stream, 'read') - + # it should be possible to read from both streams assert obj.read() == stream.read() - + streams = pack.collect_streams(obj.pack_offset) assert streams - + # read the stream try: dstream = DeltaApplyReader.new(streams) except ValueError: - # ignore these, old git versions use only ref deltas, + # ignore these, old git versions use only ref deltas, # which we havent resolved ( as we are without an index ) # Also ignore non-delta streams continue # END get deltastream - + # read all data = dstream.read() assert len(data) == dstream.size - + # test seek dstream.seek(0) assert dstream.read() == data - - + + # read chunks # NOTE: the current implementation is safe, it basically transfers # all calls to the underlying memory map - + # END for each object assert num_obj == size - - + + def test_pack_index(self): # check version 1 and 2 - for indexfile, version, size in (self.packindexfile_v1, self.packindexfile_v2): + for indexfile, version, size in (self.packindexfile_v1, self.packindexfile_v2): index = PackIndexFile(indexfile) self._assert_index_file(index, version, size) # END run tests - + def test_pack(self): - # there is this special version 3, but apparently its like 2 ... + # there is this special version 3, but apparently its like 2 ... for packfile, version, size in (self.packfile_v2_3_ascii, self.packfile_v2_1, self.packfile_v2_2): pack = PackFile(packfile) self._assert_pack_file(pack, version, size) # END for each pack to test - + @with_rw_directory def test_pack_entity(self, rw_dir): pack_objs = list() - for packinfo, indexinfo in ( (self.packfile_v2_1, self.packindexfile_v1), + for packinfo, indexinfo in ( (self.packfile_v2_1, self.packindexfile_v1), (self.packfile_v2_2, self.packindexfile_v2), (self.packfile_v2_3_ascii, self.packindexfile_v2_3_ascii)): packfile, version, size = packinfo @@ -150,7 +155,7 @@ class TestPack(TestBase): assert entity.pack().path() == packfile assert entity.index().path() == indexfile pack_objs.extend(entity.stream_iter()) - + count = 0 for info, stream in izip(entity.info_iter(), entity.stream_iter()): count += 1 @@ -158,10 +163,10 @@ class TestPack(TestBase): assert len(info.binsha) == 20 assert info.type_id == stream.type_id assert info.size == stream.size - + # we return fully resolved items, which is implied by the sha centric access assert not info.type_id in delta_types - + # try all calls assert len(entity.collect_streams(info.binsha)) oinfo = entity.info(info.binsha) @@ -170,7 +175,7 @@ class TestPack(TestBase): ostream = entity.stream(info.binsha) assert isinstance(ostream, OStream) assert ostream.binsha is not None - + # verify the stream try: assert entity.is_valid_stream(info.binsha, use_crc=True) @@ -180,16 +185,16 @@ class TestPack(TestBase): assert entity.is_valid_stream(info.binsha, use_crc=False) # END for each info, stream tuple assert count == size - + # END for each entity - + # pack writing - write all packs into one # index path can be None pack_path = tempfile.mktemp('', "pack", rw_dir) index_path = tempfile.mktemp('', 'index', rw_dir) iteration = 0 def rewind_streams(): - for obj in pack_objs: + for obj in pack_objs: obj.stream.seek(0) #END utility for ppath, ipath, num_obj in zip((pack_path, )*2, (index_path, None), (len(pack_objs), None)): @@ -199,23 +204,23 @@ class TestPack(TestBase): ifile = open(ipath, 'wb') iwrite = ifile.write #END handle ip - + # make sure we rewind the streams ... we work on the same objects over and over again - if iteration > 0: + if iteration > 0: rewind_streams() #END rewind streams iteration += 1 - + pack_sha, index_sha = PackEntity.write_pack(pack_objs, pfile.write, iwrite, object_count=num_obj) pfile.close() assert os.path.getsize(ppath) > 100 - + # verify pack pf = PackFile(ppath) assert pf.size() == len(pack_objs) assert pf.version() == PackFile.pack_version_default assert pf.checksum() == pack_sha - + # verify index if ipath is not None: ifile.close() @@ -227,7 +232,7 @@ class TestPack(TestBase): assert idx.size() == len(pack_objs) #END verify files exist #END for each packpath, indexpath pair - + # verify the packs throughly rewind_streams() entity = PackEntity.create(pack_objs, rw_dir) @@ -239,9 +244,9 @@ class TestPack(TestBase): # END for each crc mode #END for each info assert count == len(pack_objs) - - + + def test_pack_64(self): # TODO: hex-edit a pack helping us to verify that we can handle 64 byte offsets - # of course without really needing such a huge pack + # of course without really needing such a huge pack raise SkipTest() diff --git a/gitdb/test/test_stream.py b/gitdb/test/test_stream.py index 6dc2746..50db44b 100644 --- a/gitdb/test/test_stream.py +++ b/gitdb/test/test_stream.py @@ -3,49 +3,47 @@ # This module is part of GitDB and is released under # the New BSD License: http://www.opensource.org/licenses/bsd-license.php """Test for object db""" -from lib import ( - TestBase, - DummyStream, - Sha1Writer, - make_bytes, - make_object, - fixture_path - ) - -from gitdb import * -from gitdb.util import ( - NULL_HEX_SHA, - hex_to_bin - ) - -from gitdb.util import zlib + +from gitdb.test.lib import ( + TestBase, + DummyStream, + make_bytes, + make_object, + fixture_path +) + +from gitdb import ( + DecompressMemMapReader, + FDCompressedSha1Writer, + LooseObjectDB, + Sha1Writer +) +from gitdb.util import hex_to_bin + +import zlib from gitdb.typ import ( str_blob_type - ) +) -import time import tempfile import os - - - class TestStream(TestBase): """Test stream classes""" - + data_sizes = (15, 10000, 1000*1024+512) - + def _assert_stream_reader(self, stream, cdata, rewind_stream=lambda s: None): - """Make stream tests - the orig_stream is seekable, allowing it to be + """Make stream tests - the orig_stream is seekable, allowing it to be rewound and reused :param cdata: the data we expect to read from stream, the contents :param rewind_stream: function called to rewind the stream to make it ready for reuse""" ns = 10 assert len(cdata) > ns-1, "Data must be larger than %i, was %i" % (ns, len(cdata)) - + # read in small steps - ss = len(cdata) / ns + ss = len(cdata) // ns for i in range(ns): data = stream.read(ss) chunk = cdata[i*ss:(i+1)*ss] @@ -55,38 +53,38 @@ class TestStream(TestBase): if rest: assert rest == cdata[-len(rest):] # END handle rest - + if isinstance(stream, DecompressMemMapReader): assert len(stream.data()) == stream.compressed_bytes_read() # END handle special type - + rewind_stream(stream) - + # read everything rdata = stream.read() assert rdata == cdata - + if isinstance(stream, DecompressMemMapReader): assert len(stream.data()) == stream.compressed_bytes_read() # END handle special type - + def test_decompress_reader(self): for close_on_deletion in range(2): for with_size in range(2): for ds in self.data_sizes: cdata = make_bytes(ds, randomize=False) - + # zdata = zipped actual data # cdata = original content data - + # create reader if with_size: # need object data zdata = zlib.compress(make_object(str_blob_type, cdata)) - type, size, reader = DecompressMemMapReader.new(zdata, close_on_deletion) + typ, size, reader = DecompressMemMapReader.new(zdata, close_on_deletion) assert size == len(cdata) - assert type == str_blob_type - + assert typ == str_blob_type + # even if we don't set the size, it will be set automatically on first read test_reader = DecompressMemMapReader(zdata, close_on_deletion=False) assert test_reader._s == len(cdata) @@ -95,60 +93,59 @@ class TestStream(TestBase): zdata = zlib.compress(cdata) reader = DecompressMemMapReader(zdata, close_on_deletion, len(cdata)) assert reader._s == len(cdata) - # END get reader - + # END get reader + self._assert_stream_reader(reader, cdata, lambda r: r.seek(0)) - + # put in a dummy stream for closing dummy = DummyStream() reader._m = dummy - + assert not dummy.closed del(reader) assert dummy.closed == close_on_deletion # END for each datasize # END whether size should be used # END whether stream should be closed when deleted - + def test_sha_writer(self): writer = Sha1Writer() - assert 2 == writer.write("hi") + assert 2 == writer.write("hi".encode("ascii")) assert len(writer.sha(as_hex=1)) == 40 assert len(writer.sha(as_hex=0)) == 20 - + # make sure it does something ;) prev_sha = writer.sha() - writer.write("hi again") + writer.write("hi again".encode("ascii")) assert writer.sha() != prev_sha - + def test_compressed_writer(self): for ds in self.data_sizes: fd, path = tempfile.mkstemp() ostream = FDCompressedSha1Writer(fd) data = make_bytes(ds, randomize=False) - + # for now, just a single write, code doesn't care about chunking assert len(data) == ostream.write(data) ostream.close() - + # its closed already self.failUnlessRaises(OSError, os.close, fd) - + # read everything back, compare to data we zip fd = os.open(path, os.O_RDONLY|getattr(os, 'O_BINARY', 0)) written_data = os.read(fd, os.path.getsize(path)) assert len(written_data) == os.path.getsize(path) os.close(fd) assert written_data == zlib.compress(data, 1) # best speed - + os.remove(path) # END for each os - + def test_decompress_reader_special_case(self): odb = LooseObjectDB(fixture_path('objects')) ostream = odb.stream(hex_to_bin('7bb839852ed5e3a069966281bb08d50012fb309b')) - + # if there is a bug, we will be missing one byte exactly ! data = ostream.read() assert len(data) == ostream.size - diff --git a/gitdb/test/test_util.py b/gitdb/test/test_util.py index 35f9f44..e79355a 100644 --- a/gitdb/test/test_util.py +++ b/gitdb/test/test_util.py @@ -6,74 +6,74 @@ import tempfile import os -from lib import TestBase +from gitdb.test.lib import TestBase from gitdb.util import ( - to_hex_sha, - to_bin_sha, - NULL_HEX_SHA, + to_hex_sha, + to_bin_sha, + NULL_HEX_SHA, LockedFD - ) +) + - class TestUtils(TestBase): def test_basics(self): assert to_hex_sha(NULL_HEX_SHA) == NULL_HEX_SHA assert len(to_bin_sha(NULL_HEX_SHA)) == 20 - assert to_hex_sha(to_bin_sha(NULL_HEX_SHA)) == NULL_HEX_SHA - + assert to_hex_sha(to_bin_sha(NULL_HEX_SHA)) == NULL_HEX_SHA.encode("ascii") + def _cmp_contents(self, file_path, data): - # raise if data from file at file_path + # raise if data from file at file_path # does not match data string fp = open(file_path, "rb") try: - assert fp.read() == data + assert fp.read() == data.encode("ascii") finally: fp.close() - + def test_lockedfd(self): my_file = tempfile.mktemp() orig_data = "hello" new_data = "world" my_file_fp = open(my_file, "wb") - my_file_fp.write(orig_data) + my_file_fp.write(orig_data.encode("ascii")) my_file_fp.close() - + try: lfd = LockedFD(my_file) - lockfilepath = lfd._lockfilepath() - + lockfilepath = lfd._lockfilepath() + # cannot end before it was started self.failUnlessRaises(AssertionError, lfd.rollback) self.failUnlessRaises(AssertionError, lfd.commit) - + # open for writing assert not os.path.isfile(lockfilepath) wfd = lfd.open(write=True) assert lfd._fd is wfd assert os.path.isfile(lockfilepath) - + # write data and fail - os.write(wfd, new_data) + os.write(wfd, new_data.encode("ascii")) lfd.rollback() assert lfd._fd is None self._cmp_contents(my_file, orig_data) assert not os.path.isfile(lockfilepath) - + # additional call doesnt fail lfd.commit() lfd.rollback() - + # test reading lfd = LockedFD(my_file) rfd = lfd.open(write=False) - assert os.read(rfd, len(orig_data)) == orig_data - + assert os.read(rfd, len(orig_data)) == orig_data.encode("ascii") + assert os.path.isfile(lockfilepath) # deletion rolls back del(lfd) assert not os.path.isfile(lockfilepath) - - + + # write data - concurrently lfd = LockedFD(my_file) olfd = LockedFD(my_file) @@ -82,17 +82,17 @@ class TestUtils(TestBase): assert os.path.isfile(lockfilepath) # another one fails self.failUnlessRaises(IOError, olfd.open) - - wfdstream.write(new_data) + + wfdstream.write(new_data.encode("ascii")) lfd.commit() assert not os.path.isfile(lockfilepath) self._cmp_contents(my_file, new_data) - + # could test automatic _end_writing on destruction finally: os.remove(my_file) # END final cleanup - + # try non-existing file for reading lfd = LockedFD(tempfile.mktemp()) try: @@ -102,4 +102,3 @@ class TestUtils(TestBase): else: self.fail("expected OSError") # END handle exceptions - |
