diff options
| author | Sebastian Thiel <byronimo@gmail.com> | 2015-01-04 11:21:36 +0100 |
|---|---|---|
| committer | Sebastian Thiel <byronimo@gmail.com> | 2015-01-04 11:21:36 +0100 |
| commit | ff7615321ee31d981a171f7677a56a971c554059 (patch) | |
| tree | c7057424c95ed246242bb4589798c2c01e8a5648 | |
| parent | 8b4939630a0d7362e5a6fbca052922d710a87c7e (diff) | |
| download | gitdb-ff7615321ee31d981a171f7677a56a971c554059.tar.gz | |
Applied autopep8
autopep8 -v -j 8 --max-line-length 120 --in-place --recursive
36 files changed, 459 insertions, 371 deletions
diff --git a/doc/source/conf.py b/doc/source/conf.py index 723a345..68d9a3f 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -11,7 +11,8 @@ # All configuration values have a default; values that are commented out # serve to show the default. -import sys, os +import sys +import os # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the @@ -171,8 +172,8 @@ htmlhelp_basename = 'GitDBdoc' # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, author, documentclass [howto/manual]). latex_documents = [ - ('index', 'GitDB.tex', u'GitDB Documentation', - u'Sebastian Thiel', 'manual'), + ('index', 'GitDB.tex', u'GitDB Documentation', + u'Sebastian Thiel', 'manual'), ] # The name of an image file (relative to this directory) to place at the top of diff --git a/gitdb/__init__.py b/gitdb/__init__.py index 2a68940..791a2ef 100644 --- a/gitdb/__init__.py +++ b/gitdb/__init__.py @@ -8,6 +8,8 @@ import sys import os #{ Initialization + + def _init_externals(): """Initialize external projects by putting them into the path""" for module in ('smmap',): @@ -17,8 +19,8 @@ def _init_externals(): __import__(module) except ImportError: raise ImportError("'%s' could not be imported, assure it is located in your PYTHONPATH" % module) - #END verify import - #END handel imports + # END verify import + # END handel imports #} END initialization diff --git a/gitdb/base.py b/gitdb/base.py index a33fb67..5760b8a 100644 --- a/gitdb/base.py +++ b/gitdb/base.py @@ -11,12 +11,14 @@ from gitdb.fun import ( ) __all__ = ('OInfo', 'OPackInfo', 'ODeltaPackInfo', - 'OStream', 'OPackStream', 'ODeltaPackStream', - 'IStream', 'InvalidOInfo', 'InvalidOStream' ) + 'OStream', 'OPackStream', 'ODeltaPackStream', + 'IStream', 'InvalidOInfo', 'InvalidOStream') #{ ODB Bases + class OInfo(tuple): + """Carries information about an object in an ODB, provding information about the binary sha of the object, the type_string as well as the uncompressed size in bytes. @@ -62,6 +64,7 @@ class OInfo(tuple): class OPackInfo(tuple): + """As OInfo, but provides a type_id property to retrieve the numerical type id, and does not include a sha. @@ -71,7 +74,7 @@ class OPackInfo(tuple): __slots__ = tuple() def __new__(cls, packoffset, type, size): - return tuple.__new__(cls, (packoffset,type, size)) + return tuple.__new__(cls, (packoffset, type, size)) def __init__(self, *args): tuple.__init__(self) @@ -98,6 +101,7 @@ class OPackInfo(tuple): class ODeltaPackInfo(OPackInfo): + """Adds delta specific information, Either the 20 byte sha which points to some object in the database, or the negative offset from the pack_offset, so that pack_offset - delta_info yields @@ -115,6 +119,7 @@ class ODeltaPackInfo(OPackInfo): class OStream(OInfo): + """Base for object streams retrieved from the database, providing additional information about the stream. Generally, ODB streams are read-only as objects are immutable""" @@ -124,7 +129,6 @@ class OStream(OInfo): """Helps with the initialization of subclasses""" return tuple.__new__(cls, (sha, type, size, stream)) - def __init__(self, *args, **kwargs): tuple.__init__(self) @@ -141,6 +145,7 @@ class OStream(OInfo): class ODeltaStream(OStream): + """Uses size info of its stream, delaying reads""" def __new__(cls, sha, type, size, stream, *args, **kwargs): @@ -157,6 +162,7 @@ class ODeltaStream(OStream): class OPackStream(OPackInfo): + """Next to pack object information, a stream outputting an undeltified base object is provided""" __slots__ = tuple() @@ -176,13 +182,13 @@ class OPackStream(OPackInfo): class ODeltaPackStream(ODeltaPackInfo): + """Provides a stream outputting the uncompressed offset delta information""" __slots__ = tuple() def __new__(cls, packoffset, type, size, delta_info, stream): return tuple.__new__(cls, (packoffset, type, size, delta_info, stream)) - #{ Stream Reader Interface def read(self, size=-1): return self[4].read(size) @@ -194,6 +200,7 @@ class ODeltaPackStream(ODeltaPackInfo): class IStream(list): + """Represents an input content stream to be fed into the ODB. It is mutable to allow the ODB to record information about the operations outcome right in this instance. @@ -246,7 +253,6 @@ class IStream(list): binsha = property(_binsha, _set_binsha) - def _type(self): return self[1] @@ -275,6 +281,7 @@ class IStream(list): class InvalidOInfo(tuple): + """Carries information about a sha identifying an object which is invalid in the queried database. The exception attribute provides more information about the cause of the issue""" @@ -301,6 +308,7 @@ class InvalidOInfo(tuple): class InvalidOStream(InvalidOInfo): + """Carries information about an invalid ODB stream""" __slots__ = tuple() diff --git a/gitdb/db/base.py b/gitdb/db/base.py index a670eea..2615b13 100644 --- a/gitdb/db/base.py +++ b/gitdb/db/base.py @@ -19,11 +19,11 @@ from itertools import chain from functools import reduce - __all__ = ('ObjectDBR', 'ObjectDBW', 'FileDBBase', 'CompoundDB', 'CachingDB') class ObjectDBR(object): + """Defines an interface for object database lookup. Objects are identified either by their 20 byte bin sha""" @@ -61,6 +61,7 @@ class ObjectDBR(object): class ObjectDBW(object): + """Defines an interface to create objects in the database""" def __init__(self, *args, **kwargs): @@ -100,6 +101,7 @@ class ObjectDBW(object): class FileDBBase(object): + """Provides basic facilities to retrieve files of interest, including caching facilities to help mapping hexsha's to objects""" @@ -113,7 +115,6 @@ class FileDBBase(object): super(FileDBBase, self).__init__() self._root_path = root_path - #{ Interface def root_path(self): """:return: path at which this db operates""" @@ -128,6 +129,7 @@ class FileDBBase(object): class CachingDB(object): + """A database which uses caches to speed-up access""" #{ Interface @@ -143,8 +145,6 @@ class CachingDB(object): # END interface - - def _databases_recursive(database, output): """Fill output list with database from db, in order. Deals with Loose, Packed and compound databases.""" @@ -159,10 +159,12 @@ def _databases_recursive(database, output): class CompoundDB(ObjectDBR, LazyMixin, CachingDB): + """A database which delegates calls to sub-databases. Databases are stored in the lazy-loaded _dbs attribute. Define _set_cache_ to update it with your databases""" + def _set_cache_(self, attr): if attr == '_dbs': self._dbs = list() @@ -207,7 +209,7 @@ class CompoundDB(ObjectDBR, LazyMixin, CachingDB): def size(self): """:return: total size of all contained databases""" - return reduce(lambda x,y: x+y, (db.size() for db in self._dbs), 0) + return reduce(lambda x, y: x + y, (db.size() for db in self._dbs), 0) def sha_iter(self): return chain(*(db.sha_iter() for db in self._dbs)) diff --git a/gitdb/db/git.py b/gitdb/db/git.py index d22e3f1..a4f6f54 100644 --- a/gitdb/db/git.py +++ b/gitdb/db/git.py @@ -20,6 +20,7 @@ __all__ = ('GitDB', ) class GitDB(FileDBBase, ObjectDBW, CompoundDB): + """A git-style object database, which contains all objects in the 'objects' subdirectory""" # Configuration @@ -41,8 +42,8 @@ class GitDB(FileDBBase, ObjectDBW, CompoundDB): self._dbs = list() loose_db = None for subpath, dbcls in ((self.packs_dir, self.PackDBCls), - (self.loose_dir, self.LooseDBCls), - (self.alternates_dir, self.ReferenceDBCls)): + (self.loose_dir, self.LooseDBCls), + (self.alternates_dir, self.ReferenceDBCls)): path = self.db_path(subpath) if os.path.exists(path): self._dbs.append(dbcls(path)) diff --git a/gitdb/db/loose.py b/gitdb/db/loose.py index 3743026..e924080 100644 --- a/gitdb/db/loose.py +++ b/gitdb/db/loose.py @@ -57,10 +57,11 @@ import tempfile import os -__all__ = ( 'LooseObjectDB', ) +__all__ = ('LooseObjectDB', ) class LooseObjectDB(FileDBBase, ObjectDBR, ObjectDBW): + """A database which operates on loose object files""" # CONFIGURATION @@ -73,7 +74,6 @@ class LooseObjectDB(FileDBBase, ObjectDBR, ObjectDBW): if os.name == 'nt': new_objects_mode = int("644", 8) - def __init__(self, root_path): super(LooseObjectDB, self).__init__(root_path) self._hexsha_to_file = dict() @@ -164,7 +164,7 @@ class LooseObjectDB(FileDBBase, ObjectDBR, ObjectDBW): def stream(self, sha): m = self._map_loose_object(sha) - type, size, stream = DecompressMemMapReader.new(m, close_on_deletion = True) + type, size, stream = DecompressMemMapReader.new(m, close_on_deletion=True) return OStream(sha, type, size, stream) def has_object(self, sha): @@ -199,7 +199,7 @@ class LooseObjectDB(FileDBBase, ObjectDBR, ObjectDBW): else: # write object with header, we have to make a new one write_object(istream.type, istream.size, istream.read, writer.write, - chunk_size=self.stream_chunk_size) + chunk_size=self.stream_chunk_size) # END handle direct stream copies finally: if tmp_path: diff --git a/gitdb/db/mem.py b/gitdb/db/mem.py index 1aa0d51..595dbf4 100644 --- a/gitdb/db/mem.py +++ b/gitdb/db/mem.py @@ -28,7 +28,9 @@ from io import BytesIO __all__ = ("MemoryDB", ) + class MemoryDB(ObjectDBR, ObjectDBW): + """A memory database stores everything to memory, providing fast IO and object retrieval. It should be used to buffer results and obtain SHAs before writing it to the actual physical storage, as it allows to query whether object already @@ -85,7 +87,6 @@ class MemoryDB(ObjectDBR, ObjectDBW): except AttributeError: return self._cache.keys() - #{ Interface def stream_copy(self, sha_iter, odb): """Copy the streams as identified by sha's yielded by sha_iter into the given odb diff --git a/gitdb/db/pack.py b/gitdb/db/pack.py index eaf431a..6b03d83 100644 --- a/gitdb/db/pack.py +++ b/gitdb/db/pack.py @@ -31,6 +31,7 @@ __all__ = ('PackedDB', ) class PackedDB(FileDBBase, ObjectDBR, CachingDB, LazyMixin): + """A database operating on a set of object packs""" # sort the priority list every N queries @@ -113,7 +114,7 @@ class PackedDB(FileDBBase, ObjectDBR, CachingDB, LazyMixin): def size(self): sizes = [item[1].index().size() for item in self._entities] - return reduce(lambda x,y: x+y, sizes, 0) + return reduce(lambda x, y: x + y, sizes, 0) #} END object db read @@ -127,7 +128,6 @@ class PackedDB(FileDBBase, ObjectDBR, CachingDB, LazyMixin): #} END object db write - #{ Interface def update_cache(self, force=False): @@ -177,7 +177,7 @@ class PackedDB(FileDBBase, ObjectDBR, CachingDB, LazyMixin): def entities(self): """:return: list of pack entities operated upon by this database""" - return [ item[1] for item in self._entities ] + return [item[1] for item in self._entities] def partial_to_complete_sha(self, partial_binsha, canonical_length): """:return: 20 byte sha as inferred by the given partial binary sha diff --git a/gitdb/db/ref.py b/gitdb/db/ref.py index d989126..83a9f61 100644 --- a/gitdb/db/ref.py +++ b/gitdb/db/ref.py @@ -8,7 +8,9 @@ from gitdb.db.base import ( __all__ = ('ReferenceDB', ) + class ReferenceDB(CompoundDB): + """A database consisting of database referred to in a file""" # Configuration diff --git a/gitdb/exc.py b/gitdb/exc.py index 73f84d2..d58442f 100644 --- a/gitdb/exc.py +++ b/gitdb/exc.py @@ -5,28 +5,42 @@ """Module with common exceptions""" from gitdb.util import to_hex_sha + class ODBError(Exception): + """All errors thrown by the object database""" + class InvalidDBRoot(ODBError): + """Thrown if an object database cannot be initialized at the given path""" + class BadObject(ODBError): + """The object with the given SHA does not exist. Instantiate with the failed sha""" def __str__(self): return "BadObject: %s" % to_hex_sha(self.args[0]) + class ParseError(ODBError): + """Thrown if the parsing of a file failed due to an invalid format""" + class AmbiguousObjectName(ODBError): + """Thrown if a possibly shortened name does not uniquely represent a single object in the database""" + class BadObjectType(ODBError): + """The object had an unsupported type""" + class UnsupportedOperation(ODBError): + """Thrown if the given operation cannot be supported by the object database""" diff --git a/gitdb/ext/smmap b/gitdb/ext/smmap -Subproject eb40b44ce4a6e646aabf7b7091d876738336c42 +Subproject 84929ed811142e366d6c5916125302c1419acad diff --git a/gitdb/fun.py b/gitdb/fun.py index b7662b4..17da4e5 100644 --- a/gitdb/fun.py +++ b/gitdb/fun.py @@ -31,15 +31,15 @@ OFS_DELTA = 6 REF_DELTA = 7 delta_types = (OFS_DELTA, REF_DELTA) -type_id_to_type_map = { - 0 : b'', # EXT 1 - 1 : str_commit_type, - 2 : str_tree_type, - 3 : str_blob_type, - 4 : str_tag_type, - 5 : b'', # EXT 2 - OFS_DELTA : "OFS_DELTA", # OFFSET DELTA - REF_DELTA : "REF_DELTA" # REFERENCE DELTA +type_id_to_type_map = { + 0: b'', # EXT 1 + 1: str_commit_type, + 2: str_tree_type, + 3: str_blob_type, + 4: str_tag_type, + 5: b'', # EXT 2 + OFS_DELTA: "OFS_DELTA", # OFFSET DELTA + REF_DELTA: "REF_DELTA" # REFERENCE DELTA } type_to_type_id_map = { @@ -55,8 +55,8 @@ type_to_type_id_map = { chunk_size = 1000 * mmap.PAGESIZE __all__ = ('is_loose_object', 'loose_object_header_info', 'msb_size', 'pack_object_header_info', - 'write_object', 'loose_object_header', 'stream_copy', 'apply_delta_data', - 'is_equal_canonical_sha', 'connect_deltas', 'DeltaChunkList', 'create_pack_object_header') + 'write_object', 'loose_object_header', 'stream_copy', 'apply_delta_data', + 'is_equal_canonical_sha', 'connect_deltas', 'DeltaChunkList', 'create_pack_object_header') #{ Structures @@ -72,6 +72,7 @@ def _set_delta_rbound(d, size): # MUST NOT DO THIS HERE return d + def _move_delta_lbound(d, bytes): """Move the delta by the given amount of bytes, reducing its size so that its right bound stays static @@ -89,9 +90,11 @@ def _move_delta_lbound(d, bytes): return d + def delta_duplicate(src): return DeltaChunk(src.to, src.ts, src.so, src.data) + def delta_chunk_apply(dc, bbuf, write): """Apply own data to the target buffer :param bbuf: buffer providing source bytes for copy operations @@ -112,15 +115,16 @@ def delta_chunk_apply(dc, bbuf, write): class DeltaChunk(object): + """Represents a piece of a delta, it can either add new data, or copy existing one from a source buffer""" __slots__ = ( - 'to', # start offset in the target buffer in bytes + 'to', # start offset in the target buffer in bytes 'ts', # size of this chunk in the target buffer in bytes 'so', # start offset in the source buffer in bytes or None 'data', # chunk of bytes to be added to the target buffer, # DeltaChunkList to use as base, or None - ) + ) def __init__(self, to, ts, so, data): self.to = to @@ -142,6 +146,7 @@ class DeltaChunk(object): #} END interface + def _closest_index(dcl, absofs): """:return: index at which the given absofs should be inserted. The index points to the DeltaChunk with a target buffer absofs that equals or is greater than @@ -160,7 +165,8 @@ def _closest_index(dcl, absofs): lo = mid + 1 # END handle bound # END for each delta absofs - return len(dcl)-1 + return len(dcl) - 1 + def delta_list_apply(dcl, bbuf, write): """Apply the chain's changes and write the final result using the passed @@ -173,6 +179,7 @@ def delta_list_apply(dcl, bbuf, write): delta_chunk_apply(dc, bbuf, write) # END for each dc + def delta_list_slice(dcl, absofs, size, ndcl): """:return: Subsection of this list at the given absolute offset, with the given size in bytes. @@ -209,6 +216,7 @@ def delta_list_slice(dcl, absofs, size, ndcl): class DeltaChunkList(list): + """List with special functionality to deal with DeltaChunks. There are two types of lists we represent. The one was created bottom-up, working towards the latest delta, the other kind was created top-down, working from the @@ -252,16 +260,16 @@ class DeltaChunkList(list): dc = self[i] i += 1 if dc.data is None: - if first_data_index is not None and i-2-first_data_index > 1: - #if first_data_index is not None: + if first_data_index is not None and i - 2 - first_data_index > 1: + # if first_data_index is not None: nd = StringIO() # new data so = self[first_data_index].to # start offset in target buffer - for x in xrange(first_data_index, i-1): + for x in xrange(first_data_index, i - 1): xdc = self[x] nd.write(xdc.data[:xdc.ts]) # END collect data - del(self[first_data_index:i-1]) + del(self[first_data_index:i - 1]) buf = nd.getvalue() self.insert(first_data_index, DeltaChunk(so, len(buf), 0, buf)) @@ -274,10 +282,10 @@ class DeltaChunkList(list): # END skip non-data chunks if first_data_index is None: - first_data_index = i-1 + first_data_index = i - 1 # END iterate list - #if slen_orig != len(self): + # if slen_orig != len(self): # print "INFO: Reduced delta list len to %f %% of former size" % ((float(len(self)) / slen_orig) * 100) return self @@ -288,7 +296,7 @@ class DeltaChunkList(list): :raise AssertionError: if the size doen't match""" if target_size > -1: assert self[-1].rbound() == target_size - assert reduce(lambda x,y: x+y, (d.ts for d in self), 0) == target_size + assert reduce(lambda x, y: x + y, (d.ts for d in self), 0) == target_size # END target size verification if len(self) < 2: @@ -301,18 +309,19 @@ class DeltaChunkList(list): assert len(dc.data) >= dc.ts # END for each dc - left = islice(self, 0, len(self)-1) + left = islice(self, 0, len(self) - 1) right = iter(self) right.next() # this is very pythonic - we might have just use index based access here, # but this could actually be faster - for lft,rgt in izip(left, right): + for lft, rgt in izip(left, right): assert lft.rbound() == rgt.to assert lft.to + lft.ts == rgt.to # END for each pair class TopdownDeltaChunkList(DeltaChunkList): + """Represents a list which is generated by feeding its ancestor streams one by one""" __slots__ = tuple() @@ -356,19 +365,19 @@ class TopdownDeltaChunkList(DeltaChunkList): # END update target bounds if len(ccl) == 1: - self[dci-1] = ccl[0] + self[dci - 1] = ccl[0] else: # maybe try to compute the expenses here, and pick the right algorithm # It would normally be faster than copying everything physically though # TODO: Use a deque here, and decide by the index whether to extend # or extend left ! post_dci = self[dci:] - del(self[dci-1:]) # include deletion of dc + del(self[dci - 1:]) # include deletion of dc self.extend(ccl) self.extend(post_dci) slen = len(self) - dci += len(ccl)-1 # deleted dc, added rest + dci += len(ccl) - 1 # deleted dc, added rest # END handle chunk replacement # END for each chunk @@ -391,6 +400,7 @@ def is_loose_object(m): word = (b0 << 8) + b1 return b0 == 0x78 and (word % 31) == 0 + def loose_object_header_info(m): """ :return: tuple(type_string, uncompressed_size_in_bytes) the type string of the @@ -402,6 +412,7 @@ def loose_object_header_info(m): return type_name, int(size) + def pack_object_header_info(data): """ :return: tuple(type_id, uncompressed_size_in_bytes, byte_offset) @@ -430,6 +441,7 @@ def pack_object_header_info(data): # end performance at expense of maintenance ... return (type_id, size, i) + def create_pack_object_header(obj_type, obj_size): """ :return: string defining the pack header comprised of the object type @@ -439,7 +451,7 @@ def create_pack_object_header(obj_type, obj_size): :param obj_size: uncompressed size in bytes of the following object stream""" c = 0 # 1 byte if PY3: - hdr = bytearray() # output string + hdr = bytearray() # output string c = (obj_type << 4) | (obj_size & 0xf) obj_size >>= 4 @@ -447,10 +459,10 @@ def create_pack_object_header(obj_type, obj_size): hdr.append(c | 0x80) c = obj_size & 0x7f obj_size >>= 7 - #END until size is consumed + # END until size is consumed hdr.append(c) else: - hdr = bytes() # output string + hdr = bytes() # output string c = (obj_type << 4) | (obj_size & 0xf) obj_size >>= 4 @@ -458,11 +470,12 @@ def create_pack_object_header(obj_type, obj_size): hdr += chr(c | 0x80) c = obj_size & 0x7f obj_size >>= 7 - #END until size is consumed + # END until size is consumed hdr += chr(c) # end handle interpreter return hdr + def msb_size(data, offset=0): """ :return: tuple(read_bytes, size) read the msb size from the given random @@ -473,8 +486,8 @@ def msb_size(data, offset=0): hit_msb = False if PY3: while i < l: - c = data[i+offset] - size |= (c & 0x7f) << i*7 + c = data[i + offset] + size |= (c & 0x7f) << i * 7 i += 1 if not c & 0x80: hit_msb = True @@ -483,8 +496,8 @@ def msb_size(data, offset=0): # END while in range else: while i < l: - c = ord(data[i+offset]) - size |= (c & 0x7f) << i*7 + c = ord(data[i + offset]) + size |= (c & 0x7f) << i * 7 i += 1 if not c & 0x80: hit_msb = True @@ -494,7 +507,8 @@ def msb_size(data, offset=0): # end performance ... if not hit_msb: raise AssertionError("Could not find terminating MSB byte in data stream") - return i+offset, size + return i + offset, size + def loose_object_header(type, size): """ @@ -502,6 +516,7 @@ def loose_object_header(type, size): followed by the content stream of size 'size'""" return ('%s %i\0' % (force_text(type), size)).encode('ascii') + def write_object(type, size, read, write, chunk_size=chunk_size): """ Write the object as identified by type, size and source_stream into the @@ -522,6 +537,7 @@ def write_object(type, size, read, write, chunk_size=chunk_size): return tbw + def stream_copy(read, write, size, chunk_size): """ Copy a stream up to size bytes using the provided read and write methods, @@ -532,7 +548,7 @@ def stream_copy(read, write, size, chunk_size): # WRITE ALL DATA UP TO SIZE while True: - cs = min(chunk_size, size-dbw) + cs = min(chunk_size, size - dbw) # NOTE: not all write methods return the amount of written bytes, like # mmap.write. Its bad, but we just deal with it ... perhaps its not # even less efficient @@ -548,6 +564,7 @@ def stream_copy(read, write, size, chunk_size): # END duplicate data return dbw + def connect_deltas(dstreams): """ Read the condensed delta chunk information from dstream and merge its information @@ -602,7 +619,7 @@ def connect_deltas(dstreams): rbound = cp_off + cp_size if (rbound < cp_size or - rbound > base_size): + rbound > base_size): break dcl.append(DeltaChunk(tbw, cp_size, cp_off, None)) @@ -610,7 +627,7 @@ def connect_deltas(dstreams): elif c: # NOTE: in C, the data chunks should probably be concatenated here. # In python, we do it as a post-process - dcl.append(DeltaChunk(tbw, c, 0, db[i:i+c])) + dcl.append(DeltaChunk(tbw, c, 0, db[i:i + c])) i += c tbw += c else: @@ -632,6 +649,7 @@ def connect_deltas(dstreams): return tdcl + def apply_delta_data(src_buf, src_buf_size, delta_buf, delta_buf_size, write): """ Apply data from a delta buffer using a source buffer to the target file @@ -678,11 +696,11 @@ def apply_delta_data(src_buf, src_buf_size, delta_buf, delta_buf_size, write): rbound = cp_off + cp_size if (rbound < cp_size or - rbound > src_buf_size): + rbound > src_buf_size): break write(buffer(src_buf, cp_off, cp_size)) elif c: - write(db[i:i+c]) + write(db[i:i + c]) i += c else: raise ValueError("unexpected delta opcode 0") @@ -721,11 +739,11 @@ def apply_delta_data(src_buf, src_buf_size, delta_buf, delta_buf_size, write): rbound = cp_off + cp_size if (rbound < cp_size or - rbound > src_buf_size): + rbound > src_buf_size): break write(buffer(src_buf, cp_off, cp_size)) elif c: - write(db[i:i+c]) + write(db[i:i + c]) i += c else: raise ValueError("unexpected delta opcode 0") @@ -749,7 +767,7 @@ def is_equal_canonical_sha(canonical_length, match, sha1): return False if canonical_length - binary_length and \ - (byte_ord(match[-1]) ^ byte_ord(sha1[len(match)-1])) & 0xf0: + (byte_ord(match[-1]) ^ byte_ord(sha1[len(match) - 1])) & 0xf0: return False # END handle uneven canonnical length return True diff --git a/gitdb/pack.py b/gitdb/pack.py index 375cc59..b4ba787 100644 --- a/gitdb/pack.py +++ b/gitdb/pack.py @@ -72,8 +72,6 @@ import sys __all__ = ('PackIndexFile', 'PackFile', 'PackEntity') - - #{ Utilities def pack_object_at(cursor, offset, as_stream): @@ -107,7 +105,7 @@ def pack_object_at(cursor, offset, as_stream): total_rela_offset = i # REF DELTA elif type_id == REF_DELTA: - total_rela_offset = data_rela_offset+20 + total_rela_offset = data_rela_offset + 20 delta_info = data[data_rela_offset:total_rela_offset] # BASE OBJECT else: @@ -129,6 +127,7 @@ def pack_object_at(cursor, offset, as_stream): # END handle info # END handle stream + def write_stream_to_pack(read, write, zstream, base_crc=None): """Copy a stream as read from read function, zip it, and write the result. Count the number of written bytes and return it @@ -142,7 +141,7 @@ def write_stream_to_pack(read, write, zstream, base_crc=None): crc = 0 if want_crc: crc = base_crc - #END initialize crc + # END initialize crc while True: chunk = read(chunk_size) @@ -153,18 +152,18 @@ def write_stream_to_pack(read, write, zstream, base_crc=None): if want_crc: crc = crc32(compressed, crc) - #END handle crc + # END handle crc if len(chunk) != chunk_size: break - #END copy loop + # END copy loop compressed = zstream.flush() bw += len(compressed) write(compressed) if want_crc: crc = crc32(compressed, crc) - #END handle crc + # END handle crc return (br, bw, crc) @@ -173,6 +172,7 @@ def write_stream_to_pack(read, write, zstream, base_crc=None): class IndexWriter(object): + """Utility to cache index information, allowing to write all information later in one go to the given stream **Note:** currently only writes v2 indices""" @@ -198,15 +198,15 @@ class IndexWriter(object): sha_write(pack(">L", PackIndexFile.index_version_default)) # fanout - tmplist = list((0,)*256) # fanout or list with 64 bit offsets + tmplist = list((0,) * 256) # fanout or list with 64 bit offsets for t in self._objs: tmplist[byte_ord(t[0][0])] += 1 - #END prepare fanout + # END prepare fanout for i in xrange(255): v = tmplist[i] sha_write(pack('>L', v)) - tmplist[i+1] += v - #END write each fanout entry + tmplist[i + 1] += v + # END write each fanout entry sha_write(pack('>L', tmplist[255])) # sha1 ordered @@ -215,8 +215,8 @@ class IndexWriter(object): # crc32 for t in self._objs: - sha_write(pack('>L', t[1]&0xffffffff)) - #END for each crc + sha_write(pack('>L', t[1] & 0xffffffff)) + # END for each crc tmplist = list() # offset 32 @@ -224,15 +224,15 @@ class IndexWriter(object): ofs = t[2] if ofs > 0x7fffffff: tmplist.append(ofs) - ofs = 0x80000000 + len(tmplist)-1 - #END hande 64 bit offsets - sha_write(pack('>L', ofs&0xffffffff)) - #END for each offset + ofs = 0x80000000 + len(tmplist) - 1 + # END hande 64 bit offsets + sha_write(pack('>L', ofs & 0xffffffff)) + # END for each offset # offset 64 for ofs in tmplist: sha_write(pack(">Q", ofs)) - #END for each offset + # END for each offset # trailer assert(len(pack_sha) == 20) @@ -242,8 +242,8 @@ class IndexWriter(object): return sha - class PackIndexFile(LazyMixin): + """A pack index provides offsets into the corresponding pack, allowing to find locations for offsets faster.""" @@ -273,8 +273,9 @@ class PackIndexFile(LazyMixin): self._cursor = mman.make_cursor(self._indexpath).use_region() # We will assume that the index will always fully fit into memory ! if mman.window_size() > 0 and self._cursor.file_size() > mman.window_size(): - raise AssertionError("The index file at %s is too large to fit into a mapped window (%i > %i). This is a limitation of the implementation" % (self._indexpath, self._cursor.file_size(), mman.window_size())) - #END assert window size + raise AssertionError("The index file at %s is too large to fit into a mapped window (%i > %i). This is a limitation of the implementation" % ( + self._indexpath, self._cursor.file_size(), mman.window_size())) + # END assert window size else: # now its time to initialize everything - if we are here, someone wants # to access the fanout table or related properties @@ -293,27 +294,25 @@ class PackIndexFile(LazyMixin): setattr(self, fname, getattr(self, "_%s_v%i" % (fname, self._version))) # END for each function to initialize - # INITIALIZE DATA # byte offset is 8 if version is 2, 0 otherwise self._initialize() # END handle attributes - #{ Access V1 def _entry_v1(self, i): """:return: tuple(offset, binsha, 0)""" - return unpack_from(">L20s", self._cursor.map(), 1024 + i*24) + (0, ) + return unpack_from(">L20s", self._cursor.map(), 1024 + i * 24) + (0, ) def _offset_v1(self, i): """see ``_offset_v2``""" - return unpack_from(">L", self._cursor.map(), 1024 + i*24)[0] + return unpack_from(">L", self._cursor.map(), 1024 + i * 24)[0] def _sha_v1(self, i): """see ``_sha_v2``""" - base = 1024 + (i*24)+4 - return self._cursor.map()[base:base+20] + base = 1024 + (i * 24) + 4 + return self._cursor.map()[base:base + 20] def _crc_v1(self, i): """unsupported""" @@ -343,7 +342,7 @@ class PackIndexFile(LazyMixin): def _sha_v2(self, i): """:return: sha at the given index of this file index instance""" base = self._sha_list_offset + i * 20 - return self._cursor.map()[base:base+20] + return self._cursor.map()[base:base + 20] def _crc_v2(self, i): """:return: 4 bytes crc for the object at index i""" @@ -369,7 +368,7 @@ class PackIndexFile(LazyMixin): out = list() append = out.append for i in xrange(256): - append(unpack_from('>L', d, byte_offset + i*4)[0]) + append(unpack_from('>L', d, byte_offset + i * 4)[0]) # END for each entry return out @@ -421,7 +420,7 @@ class PackIndexFile(LazyMixin): get_sha = self.sha lo = 0 # lower index, the left bound of the bisection if first_byte != 0: - lo = self._fanout_table[first_byte-1] + lo = self._fanout_table[first_byte - 1] hi = self._fanout_table[first_byte] # the upper, right bound of the bisection # bisect until we have the sha @@ -455,7 +454,7 @@ class PackIndexFile(LazyMixin): get_sha = self.sha lo = 0 # lower index, the left bound of the bisection if first_byte != 0: - lo = self._fanout_table[first_byte-1] + lo = self._fanout_table[first_byte - 1] hi = self._fanout_table[first_byte] # the upper, right bound of the bisection # fill the partial to full 20 bytes @@ -481,7 +480,7 @@ class PackIndexFile(LazyMixin): if is_equal_canonical_sha(canonical_length, partial_bin_sha, cur_sha): next_sha = None if lo + 1 < self.size(): - next_sha = get_sha(lo+1) + next_sha = get_sha(lo + 1) if next_sha and next_sha == cur_sha: raise AmbiguousObjectName(partial_bin_sha) return lo @@ -500,6 +499,7 @@ class PackIndexFile(LazyMixin): class PackFile(LazyMixin): + """A pack is a file written according to the Version 2 for git packs As we currently use memory maps, it could be assumed that the maximum size of @@ -516,7 +516,7 @@ class PackFile(LazyMixin): pack_version_default = 2 # offset into our data at which the first object starts - first_object_offset = 3*4 # header bytes + first_object_offset = 3 * 4 # header bytes footer_size = 20 # final sha def __init__(self, packpath): @@ -549,7 +549,6 @@ class PackFile(LazyMixin): stream_copy(ostream.read, null.write, ostream.size, chunk_size) cur_offset += (data_offset - ostream.pack_offset) + ostream.stream.compressed_bytes_read() - # if a stream is requested, reset it beforehand # Otherwise return the Stream object directly, its derived from the # info object @@ -578,7 +577,7 @@ class PackFile(LazyMixin): def checksum(self): """:return: 20 byte sha1 hash on all object sha's contained in this file""" - return self._cursor.use_region(self._cursor.file_size()-20).buffer()[:] + return self._cursor.use_region(self._cursor.file_size() - 20).buffer()[:] def path(self): """:return: path to the packfile""" @@ -645,13 +644,14 @@ class PackFile(LazyMixin): class PackEntity(LazyMixin): + """Combines the PackIndexFile and the PackFile into one, allowing the actual objects to be resolved and iterated""" - __slots__ = ( '_index', # our index file - '_pack', # our pack file - '_offset_map' # on demand dict mapping one offset to the next consecutive one - ) + __slots__ = ('_index', # our index file + '_pack', # our pack file + '_offset_map' # on demand dict mapping one offset to the next consecutive one + ) IndexFileCls = PackIndexFile PackFileCls = PackFile @@ -673,7 +673,7 @@ class PackEntity(LazyMixin): offset_map = None if len(offsets_sorted) == 1: - offset_map = { offsets_sorted[0] : last_offset } + offset_map = {offsets_sorted[0]: last_offset} else: iter_offsets = iter(offsets_sorted) iter_offsets_plus_one = iter(offsets_sorted) @@ -895,10 +895,9 @@ class PackEntity(LazyMixin): :raise BadObject:""" return self.collect_streams_at_offset(self._index.offset(self._sha_to_index(sha))) - @classmethod def write_pack(cls, object_iter, pack_write, index_write=None, - object_count = None, zlib_compression = zlib.Z_BEST_SPEED): + object_count=None, zlib_compression=zlib.Z_BEST_SPEED): """ Create a new pack by putting all objects obtained by the object_iterator into a pack which is written using the pack_write method. @@ -923,9 +922,9 @@ class PackEntity(LazyMixin): if not object_count: if not isinstance(object_iter, (tuple, list)): objs = list(object_iter) - #END handle list type + # END handle list type object_count = len(objs) - #END handle object + # END handle object pack_writer = FlexibleSha1Writer(pack_write) pwrite = pack_writer.write @@ -939,7 +938,7 @@ class PackEntity(LazyMixin): if wants_index: index = IndexWriter() - #END handle index header + # END handle index header actual_count = 0 for obj in objs: @@ -952,30 +951,31 @@ class PackEntity(LazyMixin): crc = crc32(hdr) else: crc = None - #END handle crc + # END handle crc pwrite(hdr) # data stream zstream = zlib.compressobj(zlib_compression) ostream = obj.stream - br, bw, crc = write_stream_to_pack(ostream.read, pwrite, zstream, base_crc = crc) + br, bw, crc = write_stream_to_pack(ostream.read, pwrite, zstream, base_crc=crc) assert(br == obj.size) if wants_index: index.append(obj.binsha, crc, ofs) - #END handle index + # END handle index ofs += len(hdr) + bw if actual_count == object_count: break - #END abort once we are done - #END for each object + # END abort once we are done + # END for each object if actual_count != object_count: - raise ValueError("Expected to write %i objects into pack, but received only %i from iterators" % (object_count, actual_count)) - #END count assertion + raise ValueError( + "Expected to write %i objects into pack, but received only %i from iterators" % (object_count, actual_count)) + # END count assertion # write footer - pack_sha = pack_writer.sha(as_hex = False) + pack_sha = pack_writer.sha(as_hex=False) assert len(pack_sha) == 20 pack_write(pack_sha) ofs += len(pack_sha) # just for completeness ;) @@ -983,12 +983,12 @@ class PackEntity(LazyMixin): index_sha = None if wants_index: index_sha = index.write(pack_sha, index_write) - #END handle index + # END handle index return pack_sha, index_sha @classmethod - def create(cls, object_iter, base_dir, object_count = None, zlib_compression = zlib.Z_BEST_SPEED): + def create(cls, object_iter, base_dir, object_count=None, zlib_compression=zlib.Z_BEST_SPEED): """Create a new on-disk entity comprised of a properly named pack file and a properly named and corresponding index file. The pack contains all OStream objects contained in object iter. :param base_dir: directory which is to contain the files @@ -1012,5 +1012,4 @@ class PackEntity(LazyMixin): return cls(new_pack_path) - #} END interface diff --git a/gitdb/stream.py b/gitdb/stream.py index b0a8900..4478a0f 100644 --- a/gitdb/stream.py +++ b/gitdb/stream.py @@ -38,14 +38,15 @@ try: except ImportError: pass -__all__ = ( 'DecompressMemMapReader', 'FDCompressedSha1Writer', 'DeltaApplyReader', - 'Sha1Writer', 'FlexibleSha1Writer', 'ZippedStoreShaWriter', 'FDCompressedSha1Writer', - 'FDStream', 'NullStream') +__all__ = ('DecompressMemMapReader', 'FDCompressedSha1Writer', 'DeltaApplyReader', + 'Sha1Writer', 'FlexibleSha1Writer', 'ZippedStoreShaWriter', 'FDCompressedSha1Writer', + 'FDStream', 'NullStream') #{ RO Streams class DecompressMemMapReader(LazyMixin): + """Reads data in chunks from a memory map and decompresses it. The client sees only the uncompressed data, respective file-like read calls are handling on-demand buffered decompression accordingly @@ -63,9 +64,9 @@ class DecompressMemMapReader(LazyMixin): to better support streamed reading - it would only need to keep the mmap and decompress it into chunks, thats all ... """ __slots__ = ('_m', '_zip', '_buf', '_buflen', '_br', '_cws', '_cwe', '_s', '_close', - '_cbr', '_phi') + '_cbr', '_phi') - max_read_size = 512*1024 # currently unused + max_read_size = 512 * 1024 # currently unused def __init__(self, m, close_on_deletion, size=None): """Initialize with mmap for stream reading @@ -214,7 +215,6 @@ class DecompressMemMapReader(LazyMixin): return bytes() # END handle depletion - # deplete the buffer, then just continue using the decompress object # which has an own buffer. We just need this to transparently parse the # header from the zlib stream @@ -263,7 +263,6 @@ class DecompressMemMapReader(LazyMixin): self._cwe = cws + size # END handle tail - # if window is too small, make it larger so zip can decompress something if self._cwe - self._cws < 8: self._cwe = self._cws + 8 @@ -285,7 +284,7 @@ class DecompressMemMapReader(LazyMixin): unused_datalen = len(self._zip.unconsumed_tail) else: unused_datalen = len(self._zip.unconsumed_tail) + len(self._zip.unused_data) - # end handle very special case ... + # end handle very special case ... self._cbr += len(indata) - unused_datalen self._br += len(dcompdat) @@ -301,12 +300,13 @@ class DecompressMemMapReader(LazyMixin): # to read, if we are called by compressed_bytes_read - it manipulates # us to empty the stream if dcompdat and (len(dcompdat) - len(dat)) < size and self._br < self._s: - dcompdat += self.read(size-len(dcompdat)) + dcompdat += self.read(size - len(dcompdat)) # END handle special case return dcompdat class DeltaApplyReader(LazyMixin): + """A reader which dynamically applies pack deltas to a base object, keeping the memory demands to a minimum. @@ -332,15 +332,15 @@ class DeltaApplyReader(LazyMixin): * cmd == 0 - invalid operation ( or error in delta stream ) """ __slots__ = ( - "_bstream", # base stream to which to apply the deltas - "_dstreams", # tuple of delta stream readers - "_mm_target", # memory map of the delta-applied data - "_size", # actual number of bytes in _mm_target - "_br" # number of bytes read - ) + "_bstream", # base stream to which to apply the deltas + "_dstreams", # tuple of delta stream readers + "_mm_target", # memory map of the delta-applied data + "_size", # actual number of bytes in _mm_target + "_br" # number of bytes read + ) #{ Configuration - k_max_memory_move = 250*1000*1000 + k_max_memory_move = 250 * 1000 * 1000 #} END configuration def __init__(self, stream_list): @@ -414,7 +414,6 @@ class DeltaApplyReader(LazyMixin): base_size = target_size = max(base_size, max_target_size) # END adjust buffer sizes - # Allocate private memory map big enough to hold the first base buffer # We need random access to it bbuf = allocate_memory(base_size) @@ -440,11 +439,11 @@ class DeltaApplyReader(LazyMixin): ddata = allocate_memory(dstream.size - offset) ddata.write(dbuf) # read the rest from the stream. The size we give is larger than necessary - stream_copy(dstream.read, ddata.write, dstream.size, 256*mmap.PAGESIZE) + stream_copy(dstream.read, ddata.write, dstream.size, 256 * mmap.PAGESIZE) ####################################################################### if 'c_apply_delta' in globals(): - c_apply_delta(bbuf, ddata, tbuf); + c_apply_delta(bbuf, ddata, tbuf) else: apply_delta_data(bbuf, src_size, ddata, len(ddata), tbuf.write) ####################################################################### @@ -463,7 +462,6 @@ class DeltaApplyReader(LazyMixin): self._mm_target = bbuf self._size = final_target_size - #{ Configuration if not has_perf_mod: _set_cache_ = _set_cache_brute_ @@ -512,13 +510,13 @@ class DeltaApplyReader(LazyMixin): # END single object special handling if stream_list[-1].type_id in delta_types: - raise ValueError("Cannot resolve deltas if there is no base object stream, last one was type: %s" % stream_list[-1].type) + raise ValueError( + "Cannot resolve deltas if there is no base object stream, last one was type: %s" % stream_list[-1].type) # END check stream return cls(stream_list) #} END interface - #{ OInfo like Interface @property @@ -543,6 +541,7 @@ class DeltaApplyReader(LazyMixin): #{ W Streams class Sha1Writer(object): + """Simple stream writer which produces a sha whenever you like as it degests everything it is supposed to write""" __slots__ = "sha1" @@ -565,7 +564,7 @@ class Sha1Writer(object): #{ Interface - def sha(self, as_hex = False): + def sha(self, as_hex=False): """:return: sha so far :param as_hex: if True, sha will be hex-encoded, binary otherwise""" if as_hex: @@ -576,6 +575,7 @@ class Sha1Writer(object): class FlexibleSha1Writer(Sha1Writer): + """Writer producing a sha1 while passing on the written bytes to the given write function""" __slots__ = 'writer' @@ -590,8 +590,10 @@ class FlexibleSha1Writer(Sha1Writer): class ZippedStoreShaWriter(Sha1Writer): + """Remembers everything someone writes to it and generates a sha""" __slots__ = ('buf', 'zip') + def __init__(self): Sha1Writer.__init__(self) self.buf = BytesIO() @@ -623,6 +625,7 @@ class ZippedStoreShaWriter(Sha1Writer): class FDCompressedSha1Writer(Sha1Writer): + """Digests data written to it, making the sha available, then compress the data and write it to the file descriptor @@ -662,10 +665,12 @@ class FDCompressedSha1Writer(Sha1Writer): class FDStream(object): + """A simple wrapper providing the most basic functions on a file descriptor with the fileobject interface. Cannot use os.fdopen as the resulting stream takes ownership""" __slots__ = ("_fd", '_pos') + def __init__(self, fd): self._fd = fd self._pos = 0 @@ -694,6 +699,7 @@ class FDStream(object): class NullStream(object): + """A stream that does nothing but providing a stream interface. Use it like /dev/null""" __slots__ = tuple() diff --git a/gitdb/test/db/lib.py b/gitdb/test/db/lib.py index af6d9e0..528bcc1 100644 --- a/gitdb/test/db/lib.py +++ b/gitdb/test/db/lib.py @@ -32,7 +32,9 @@ from struct import pack __all__ = ('TestDBBase', 'with_rw_directory', 'with_packs_rw', 'fixture_path') + class TestDBBase(TestBase): + """Base class providing testing routines on databases""" # data @@ -65,7 +67,6 @@ class TestDBBase(TestBase): assert len(shas) == db.size() assert len(shas[0]) == 20 - def _assert_object_writing(self, db): """General tests to verify object writing, compatible to ObjectDBW **Note:** requires write access to the database""" @@ -126,4 +127,3 @@ class TestDBBase(TestBase): assert ostream.getvalue() == new_ostream.getvalue() # END for each data set # END for each dry_run mode - diff --git a/gitdb/test/db/test_git.py b/gitdb/test/db/test_git.py index e141c2b..f962067 100644 --- a/gitdb/test/db/test_git.py +++ b/gitdb/test/db/test_git.py @@ -3,7 +3,7 @@ # This module is part of GitDB and is released under # the New BSD License: http://www.opensource.org/licenses/bsd-license.php from gitdb.test.db.lib import ( - TestDBBase, + TestDBBase, fixture_path, with_rw_directory ) @@ -12,6 +12,7 @@ from gitdb.db import GitDB from gitdb.base import OStream, OInfo from gitdb.util import hex_to_bin, bin_to_hex + class TestGitDB(TestDBBase): def test_reading(self): @@ -28,8 +29,7 @@ class TestGitDB(TestDBBase): assert gdb.size() >= ni sha_list = list(gdb.sha_iter()) assert len(sha_list) == gdb.size() - sha_list = sha_list[:ni] # speed up tests ... - + sha_list = sha_list[:ni] # speed up tests ... # This is actually a test for compound functionality, but it doesn't # have a separate test module @@ -39,7 +39,7 @@ class TestGitDB(TestDBBase): # mix even/uneven hexshas for i, binsha in enumerate(sha_list): - assert gdb.partial_to_complete_sha_hex(bin_to_hex(binsha)[:8-(i%2)]) == binsha + assert gdb.partial_to_complete_sha_hex(bin_to_hex(binsha)[:8 - (i % 2)]) == binsha # END for each sha self.failUnlessRaises(BadObject, gdb.partial_to_complete_sha_hex, "0000") diff --git a/gitdb/test/db/test_loose.py b/gitdb/test/db/test_loose.py index 1d6af9c..024c194 100644 --- a/gitdb/test/db/test_loose.py +++ b/gitdb/test/db/test_loose.py @@ -3,13 +3,14 @@ # This module is part of GitDB and is released under # the New BSD License: http://www.opensource.org/licenses/bsd-license.php from gitdb.test.db.lib import ( - TestDBBase, + TestDBBase, with_rw_directory ) from gitdb.db import LooseObjectDB from gitdb.exc import BadObject from gitdb.util import bin_to_hex + class TestLooseDB(TestDBBase): @with_rw_directory diff --git a/gitdb/test/db/test_mem.py b/gitdb/test/db/test_mem.py index 97f7217..eb563c0 100644 --- a/gitdb/test/db/test_mem.py +++ b/gitdb/test/db/test_mem.py @@ -11,6 +11,7 @@ from gitdb.db import ( LooseObjectDB ) + class TestMemoryDB(TestDBBase): @with_rw_directory diff --git a/gitdb/test/db/test_pack.py b/gitdb/test/db/test_pack.py index 963a71a..a901581 100644 --- a/gitdb/test/db/test_pack.py +++ b/gitdb/test/db/test_pack.py @@ -14,6 +14,7 @@ from gitdb.exc import BadObject, AmbiguousObjectName import os import random + class TestPackDB(TestDBBase): @with_rw_directory @@ -53,7 +54,6 @@ class TestPackDB(TestDBBase): pdb.stream(sha) # END for each sha to query - # test short finding - be a bit more brutal here max_bytes = 19 min_bytes = 2 @@ -61,10 +61,10 @@ class TestPackDB(TestDBBase): for i, sha in enumerate(sha_list): short_sha = sha[:max((i % max_bytes), min_bytes)] try: - assert pdb.partial_to_complete_sha(short_sha, len(short_sha)*2) == sha + assert pdb.partial_to_complete_sha(short_sha, len(short_sha) * 2) == sha except AmbiguousObjectName: num_ambiguous += 1 - pass # valid, we can have short objects + pass # valid, we can have short objects # END exception handling # END for each sha to find diff --git a/gitdb/test/db/test_ref.py b/gitdb/test/db/test_ref.py index db93082..b774baf 100644 --- a/gitdb/test/db/test_ref.py +++ b/gitdb/test/db/test_ref.py @@ -3,8 +3,8 @@ # This module is part of GitDB and is released under # the New BSD License: http://www.opensource.org/licenses/bsd-license.php from gitdb.test.db.lib import ( - TestDBBase, - with_rw_directory, + TestDBBase, + with_rw_directory, fixture_path ) from gitdb.db import ReferenceDB @@ -16,6 +16,7 @@ from gitdb.util import ( import os + class TestReferenceDB(TestDBBase): def make_alt_file(self, alt_path, alt_list): diff --git a/gitdb/test/lib.py b/gitdb/test/lib.py index d09b1cb..c4acd92 100644 --- a/gitdb/test/lib.py +++ b/gitdb/test/lib.py @@ -24,6 +24,7 @@ from functools import wraps #{ Bases class TestBase(unittest.TestCase): + """Base class for all tests""" @@ -49,6 +50,7 @@ def skip_on_travis_ci(func): def with_rw_directory(func): """Create a temporary directory which can be written to, remove it if the test suceeds, but leave it otherwise to aid additional debugging""" + def wrapper(self): path = tempfile.mktemp(prefix=func.__name__) os.mkdir(path) @@ -78,6 +80,7 @@ def with_rw_directory(func): def with_packs_rw(func): """Function that provides a path into which the packs for testing should be copied. Will pass on the path to the actual function afterwards""" + def wrapper(self, path): src_pack_glob = fixture_path('packs/*') copy_files_globbed(src_pack_glob, path, hard_link_ok=True) @@ -91,12 +94,14 @@ def with_packs_rw(func): #{ Routines + def fixture_path(relapath=''): """:return: absolute path into the fixture directory :param relapath: relative path into the fixtures directory, or '' to obtain the fixture directory itself""" return os.path.join(os.path.dirname(__file__), 'fixtures', relapath) + def copy_files_globbed(source_glob, target_dir, hard_link_ok=False): """Copy all files found according to the given source glob into the target directory :param hard_link_ok: if True, hard links will be created if possible. Otherwise @@ -127,11 +132,13 @@ def make_bytes(size_in_bytes, randomize=False): a = array('i', producer) return a.tostring() + def make_object(type, data): """:return: bytes resembling an uncompressed object""" odata = "blob %i\0" % len(data) return odata.encode("ascii") + data + def make_memory_file(size_in_bytes, randomize=False): """:return: tuple(size_of_stream, stream) :param randomize: try to produce a very random stream""" @@ -142,24 +149,27 @@ def make_memory_file(size_in_bytes, randomize=False): #{ Stream Utilities + class DummyStream(object): - def __init__(self): - self.was_read = False - self.bytes = 0 - self.closed = False - def read(self, size): - self.was_read = True - self.bytes = size + def __init__(self): + self.was_read = False + self.bytes = 0 + self.closed = False - def close(self): - self.closed = True + def read(self, size): + self.was_read = True + self.bytes = size - def _assert(self): - assert self.was_read + def close(self): + self.closed = True + + def _assert(self): + assert self.was_read class DeriveTest(OStream): + def __init__(self, sha, type, size, stream, *args, **kwargs): self.myarg = kwargs.pop('myarg') self.args = args diff --git a/gitdb/test/performance/__init__.py b/gitdb/test/performance/__init__.py index 8b13789..e69de29 100644 --- a/gitdb/test/performance/__init__.py +++ b/gitdb/test/performance/__init__.py @@ -1 +0,0 @@ - diff --git a/gitdb/test/performance/lib.py b/gitdb/test/performance/lib.py index ec45cf3..cbc52bc 100644 --- a/gitdb/test/performance/lib.py +++ b/gitdb/test/performance/lib.py @@ -13,22 +13,17 @@ k_env_git_repo = "GITDB_TEST_GIT_REPO_BASE" #} END invariants - -#{ Base Classes +#{ Base Classes class TestBigRepoR(TestBase): + """TestCase providing access to readonly 'big' repositories using the following member variables: - + * gitrepopath - + * read-only base path of the git source repository, i.e. .../git/.git""" - - #{ Invariants - head_sha_2k = '235d521da60e4699e5bd59ac658b5b48bd76ddca' - head_sha_50 = '32347c375250fd470973a5d76185cac718955fd5' - #} END invariants - + def setUp(self): try: super(TestBigRepoR, self).setUp() @@ -37,11 +32,12 @@ class TestBigRepoR(TestBase): self.gitrepopath = os.environ.get(k_env_git_repo) if not self.gitrepopath: - logging.info("You can set the %s environment variable to a .git repository of your choice - defaulting to the gitdb repository") + logging.info( + "You can set the %s environment variable to a .git repository of your choice - defaulting to the gitdb repository", k_env_git_repo) ospd = os.path.dirname self.gitrepopath = os.path.join(ospd(ospd(ospd(ospd(__file__)))), '.git') # end assure gitrepo is set assert self.gitrepopath.endswith('.git') - + #} END base classes diff --git a/gitdb/test/performance/test_pack.py b/gitdb/test/performance/test_pack.py index e460311..bdd2b0a 100644 --- a/gitdb/test/performance/test_pack.py +++ b/gitdb/test/performance/test_pack.py @@ -6,7 +6,7 @@ from __future__ import print_function from gitdb.test.performance.lib import ( - TestBigRepoR + TestBigRepoR ) from gitdb import ( @@ -24,19 +24,20 @@ import sys import os from time import time + class TestPackedDBPerformance(TestBigRepoR): - @skip_on_travis_ci + @skip_on_travis_ci def test_pack_random_access(self): pdb = PackedDB(os.path.join(self.gitrepopath, "objects/pack")) - + # sha lookup st = time() sha_list = list(pdb.sha_iter()) elapsed = time() - st ns = len(sha_list) print("PDB: looked up %i shas by index in %f s ( %f shas/s )" % (ns, elapsed, ns / elapsed), file=sys.stderr) - + # sha lookup: best-case and worst case access pdb_pack_info = pdb._pack_info # END shuffle shas @@ -45,13 +46,14 @@ class TestPackedDBPerformance(TestBigRepoR): pdb_pack_info(sha) # END for each sha to look up elapsed = time() - st - + # discard cache del(pdb._entities) pdb.entities() - print("PDB: looked up %i sha in %i packs in %f s ( %f shas/s )" % (ns, len(pdb.entities()), elapsed, ns / elapsed), file=sys.stderr) + print("PDB: looked up %i sha in %i packs in %f s ( %f shas/s )" % + (ns, len(pdb.entities()), elapsed, ns / elapsed), file=sys.stderr) # END for each random mode - + # query info and streams only max_items = 10000 # can wait longer when testing memory for pdb_fun in (pdb.info, pdb.stream): @@ -59,9 +61,10 @@ class TestPackedDBPerformance(TestBigRepoR): for sha in sha_list[:max_items]: pdb_fun(sha) elapsed = time() - st - print("PDB: Obtained %i object %s by sha in %f s ( %f items/s )" % (max_items, pdb_fun.__name__.upper(), elapsed, max_items / elapsed), file=sys.stderr) + print("PDB: Obtained %i object %s by sha in %f s ( %f items/s )" % + (max_items, pdb_fun.__name__.upper(), elapsed, max_items / elapsed), file=sys.stderr) # END for each function - + # retrieve stream and read all max_items = 5000 pdb_stream = pdb.stream @@ -74,8 +77,9 @@ class TestPackedDBPerformance(TestBigRepoR): total_size += stream.size elapsed = time() - st total_kib = total_size / 1000 - print("PDB: Obtained %i streams by sha and read all bytes totallying %i KiB ( %f KiB / s ) in %f s ( %f streams/s )" % (max_items, total_kib, total_kib/elapsed , elapsed, max_items / elapsed), file=sys.stderr) - + print("PDB: Obtained %i streams by sha and read all bytes totallying %i KiB ( %f KiB / s ) in %f s ( %f streams/s )" % + (max_items, total_kib, total_kib / elapsed, elapsed, max_items / elapsed), file=sys.stderr) + @skip_on_travis_ci def test_loose_correctness(self): """based on the pack(s) of our packed object DB, we will just copy and verify all objects in the back @@ -89,7 +93,7 @@ class TestPackedDBPerformance(TestBigRepoR): mdb = MemoryDB() for c, sha in enumerate(pdb.sha_iter()): ostream = pdb.stream(sha) - # the issue only showed on larger files which are hardly compressible ... + # the issue only showed on larger files which are hardly compressible ... if ostream.type != str_blob_type: continue istream = IStream(ostream.type, ostream.size, ostream.stream) @@ -101,7 +105,7 @@ class TestPackedDBPerformance(TestBigRepoR): if c and c % 1000 == 0: print("Verified %i loose object compression/decompression cycles" % c, file=sys.stderr) mdb._cache.clear() - # end for each sha to copy + # end for each sha to copy @skip_on_travis_ci def test_correctness(self): @@ -124,6 +128,6 @@ class TestPackedDBPerformance(TestBigRepoR): # END for each index # END for each entity elapsed = time() - st - print("PDB: verified %i objects (crc=%i) in %f s ( %f objects/s )" % (count, crc, elapsed, count / elapsed), file=sys.stderr) + print("PDB: verified %i objects (crc=%i) in %f s ( %f objects/s )" % + (count, crc, elapsed, count / elapsed), file=sys.stderr) # END for each verify mode - diff --git a/gitdb/test/performance/test_pack_streaming.py b/gitdb/test/performance/test_pack_streaming.py index fe160ea..f805e59 100644 --- a/gitdb/test/performance/test_pack_streaming.py +++ b/gitdb/test/performance/test_pack_streaming.py @@ -6,7 +6,7 @@ from __future__ import print_function from gitdb.test.performance.lib import ( - TestBigRepoR + TestBigRepoR ) from gitdb.db.pack import PackedDB @@ -18,27 +18,29 @@ import os import sys from time import time + class CountedNullStream(NullStream): __slots__ = '_bw' + def __init__(self): self._bw = 0 - + def bytes_written(self): return self._bw - + def write(self, d): self._bw += NullStream.write(self, d) - + class TestPackStreamingPerformance(TestBigRepoR): - + @skip_on_travis_ci def test_pack_writing(self): # see how fast we can write a pack from object streams. # This will not be fast, as we take time for decompressing the streams as well ostream = CountedNullStream() pdb = PackedDB(os.path.join(self.gitrepopath, "objects/pack")) - + ni = 1000 count = 0 st = time() @@ -47,22 +49,23 @@ class TestPackStreamingPerformance(TestBigRepoR): pdb.stream(sha) if count == ni: break - #END gather objects for pack-writing + # END gather objects for pack-writing elapsed = time() - st - print("PDB Streaming: Got %i streams by sha in in %f s ( %f streams/s )" % (ni, elapsed, ni / elapsed), file=sys.stderr) - + print("PDB Streaming: Got %i streams by sha in in %f s ( %f streams/s )" % + (ni, elapsed, ni / elapsed), file=sys.stderr) + st = time() PackEntity.write_pack((pdb.stream(sha) for sha in pdb.sha_iter()), ostream.write, object_count=ni) elapsed = time() - st total_kb = ostream.bytes_written() / 1000 - print(sys.stderr, "PDB Streaming: Wrote pack of size %i kb in %f s (%f kb/s)" % (total_kb, elapsed, total_kb/elapsed), sys.stderr) - - + print(sys.stderr, "PDB Streaming: Wrote pack of size %i kb in %f s (%f kb/s)" % + (total_kb, elapsed, total_kb / elapsed), sys.stderr) + @skip_on_travis_ci def test_stream_reading(self): # raise SkipTest() pdb = PackedDB(os.path.join(self.gitrepopath, "objects/pack")) - + # streaming only, meant for --with-profile runs ni = 5000 count = 0 @@ -78,5 +81,5 @@ class TestPackStreamingPerformance(TestBigRepoR): count += 1 elapsed = time() - st total_kib = total_size / 1000 - print(sys.stderr, "PDB Streaming: Got %i streams by sha and read all bytes totallying %i KiB ( %f KiB / s ) in %f s ( %f streams/s )" % (ni, total_kib, total_kib/elapsed , elapsed, ni / elapsed), sys.stderr) - + print(sys.stderr, "PDB Streaming: Got %i streams by sha and read all bytes totallying %i KiB ( %f KiB / s ) in %f s ( %f streams/s )" % + (ni, total_kib, total_kib / elapsed, elapsed, ni / elapsed), sys.stderr) diff --git a/gitdb/test/performance/test_stream.py b/gitdb/test/performance/test_stream.py index 84c9dea..bd66b26 100644 --- a/gitdb/test/performance/test_stream.py +++ b/gitdb/test/performance/test_stream.py @@ -35,22 +35,22 @@ def read_chunked_stream(stream): # END read stream loop assert total == stream.size return stream - - + + #} END utilities class TestObjDBPerformance(TestBigRepoR): - - large_data_size_bytes = 1000*1000*50 # some MiB should do it - moderate_data_size_bytes = 1000*1000*1 # just 1 MiB - - @skip_on_travis_ci + + large_data_size_bytes = 1000 * 1000 * 50 # some MiB should do it + moderate_data_size_bytes = 1000 * 1000 * 1 # just 1 MiB + + @skip_on_travis_ci @with_rw_directory def test_large_data_streaming(self, path): ldb = LooseObjectDB(path) string_ios = list() # list of streams we previously created - - # serial mode + + # serial mode for randomize in range(2): desc = (randomize and 'random ') or '' print("Creating %s data ..." % desc, file=sys.stderr) @@ -59,32 +59,32 @@ class TestObjDBPerformance(TestBigRepoR): elapsed = time() - st print("Done (in %f s)" % elapsed, file=sys.stderr) string_ios.append(stream) - - # writing - due to the compression it will seem faster than it is + + # writing - due to the compression it will seem faster than it is st = time() sha = ldb.store(IStream('blob', size, stream)).binsha elapsed_add = time() - st assert ldb.has_object(sha) db_file = ldb.readable_db_object_path(bin_to_hex(sha)) fsize_kib = os.path.getsize(db_file) / 1000 - - + size_kib = size / 1000 - print("Added %i KiB (filesize = %i KiB) of %s data to loose odb in %f s ( %f Write KiB / s)" % (size_kib, fsize_kib, desc, elapsed_add, size_kib / elapsed_add), file=sys.stderr) - + print("Added %i KiB (filesize = %i KiB) of %s data to loose odb in %f s ( %f Write KiB / s)" % + (size_kib, fsize_kib, desc, elapsed_add, size_kib / elapsed_add), file=sys.stderr) + # reading all at once st = time() ostream = ldb.stream(sha) shadata = ostream.read() elapsed_readall = time() - st - + stream.seek(0) assert shadata == stream.getvalue() - print("Read %i KiB of %s data at once from loose odb in %f s ( %f Read KiB / s)" % (size_kib, desc, elapsed_readall, size_kib / elapsed_readall), file=sys.stderr) - - + print("Read %i KiB of %s data at once from loose odb in %f s ( %f Read KiB / s)" % + (size_kib, desc, elapsed_readall, size_kib / elapsed_readall), file=sys.stderr) + # reading in chunks of 1 MiB - cs = 512*1000 + cs = 512 * 1000 chunks = list() st = time() ostream = ldb.stream(sha) @@ -95,13 +95,14 @@ class TestObjDBPerformance(TestBigRepoR): break # END read in chunks elapsed_readchunks = time() - st - + stream.seek(0) assert b''.join(chunks) == stream.getvalue() - + cs_kib = cs / 1000 - print("Read %i KiB of %s data in %i KiB chunks from loose odb in %f s ( %f Read KiB / s)" % (size_kib, desc, cs_kib, elapsed_readchunks, size_kib / elapsed_readchunks), file=sys.stderr) - + print("Read %i KiB of %s data in %i KiB chunks from loose odb in %f s ( %f Read KiB / s)" % + (size_kib, desc, cs_kib, elapsed_readchunks, size_kib / elapsed_readchunks), file=sys.stderr) + # del db file so we keep something to do os.remove(db_file) # END for each randomization factor diff --git a/gitdb/test/test_base.py b/gitdb/test/test_base.py index 578c29f..519cdfd 100644 --- a/gitdb/test/test_base.py +++ b/gitdb/test/test_base.py @@ -4,10 +4,10 @@ # the New BSD License: http://www.opensource.org/licenses/bsd-license.php """Test for object db""" from gitdb.test.lib import ( - TestBase, - DummyStream, - DeriveTest, - ) + TestBase, + DummyStream, + DeriveTest, +) from gitdb import ( OInfo, @@ -20,11 +20,11 @@ from gitdb import ( ) from gitdb.util import ( NULL_BIN_SHA - ) +) from gitdb.typ import ( str_blob_type - ) +) class TestBaseTypes(TestBase): @@ -54,7 +54,6 @@ class TestBaseTypes(TestBase): assert dpinfo.delta_info == sha assert dpinfo.pack_offset == 0 - # test ostream stream = DummyStream() ostream = OStream(*(info + (stream, ))) @@ -80,7 +79,7 @@ class TestBaseTypes(TestBase): assert stream.bytes == 5 # derive with own args - DeriveTest(sha, str_blob_type, s, stream, 'mine',myarg = 3)._assert() + DeriveTest(sha, str_blob_type, s, stream, 'mine', myarg=3)._assert() # test istream istream = IStream(str_blob_type, s, stream) diff --git a/gitdb/test/test_example.py b/gitdb/test/test_example.py index aa43a09..ed0a885 100644 --- a/gitdb/test/test_example.py +++ b/gitdb/test/test_example.py @@ -4,7 +4,7 @@ # the New BSD License: http://www.opensource.org/licenses/bsd-license.php """Module with examples from the tutorial section of the docs""" from gitdb.test.lib import ( - TestBase, + TestBase, fixture_path ) from gitdb import IStream @@ -12,6 +12,7 @@ from gitdb.db import LooseObjectDB from io import BytesIO + class TestExamples(TestBase): def test_base(self): diff --git a/gitdb/test/test_pack.py b/gitdb/test/test_pack.py index 3ab2fec..ff10572 100644 --- a/gitdb/test/test_pack.py +++ b/gitdb/test/test_pack.py @@ -43,6 +43,7 @@ def bin_sha_from_filename(filename): return to_bin_sha(os.path.splitext(os.path.basename(filename))[0][5:]) #} END utilities + class TestPack(TestBase): packindexfile_v1 = (fixture_path('packs/pack-c0438c19fb16422b6bbcce24387b3264416d485b.idx'), 1, 67) @@ -50,8 +51,8 @@ class TestPack(TestBase): packindexfile_v2_3_ascii = (fixture_path('packs/pack-a2bf8e71d8c18879e499335762dd95119d93d9f1.idx'), 2, 42) packfile_v2_1 = (fixture_path('packs/pack-c0438c19fb16422b6bbcce24387b3264416d485b.pack'), 2, packindexfile_v1[2]) packfile_v2_2 = (fixture_path('packs/pack-11fdfa9e156ab73caae3b6da867192221f2089c2.pack'), 2, packindexfile_v2[2]) - packfile_v2_3_ascii = (fixture_path('packs/pack-a2bf8e71d8c18879e499335762dd95119d93d9f1.pack'), 2, packindexfile_v2_3_ascii[2]) - + packfile_v2_3_ascii = ( + fixture_path('packs/pack-a2bf8e71d8c18879e499335762dd95119d93d9f1.pack'), 2, packindexfile_v2_3_ascii[2]) def _assert_index_file(self, index, version, size): assert index.packfile_checksum() != index.indexfile_checksum() @@ -74,13 +75,12 @@ class TestPack(TestBase): assert entry[2] == index.crc(oidx) # verify partial sha - for l in (4,8,11,17,20): - assert index.partial_sha_to_index(sha[:l], l*2) == oidx + for l in (4, 8, 11, 17, 20): + assert index.partial_sha_to_index(sha[:l], l * 2) == oidx # END for each object index in indexfile self.failUnlessRaises(ValueError, index.partial_sha_to_index, "\0", 2) - def _assert_pack_file(self, pack, version, size): assert pack.version() == 2 assert pack.size() == size @@ -120,7 +120,6 @@ class TestPack(TestBase): dstream.seek(0) assert dstream.read() == data - # read chunks # NOTE: the current implementation is safe, it basically transfers # all calls to the underlying memory map @@ -128,7 +127,6 @@ class TestPack(TestBase): # END for each object assert num_obj == size - def test_pack_index(self): # check version 1 and 2 for indexfile, version, size in (self.packindexfile_v1, self.packindexfile_v2): @@ -146,9 +144,9 @@ class TestPack(TestBase): @with_rw_directory def test_pack_entity(self, rw_dir): pack_objs = list() - for packinfo, indexinfo in ( (self.packfile_v2_1, self.packindexfile_v1), - (self.packfile_v2_2, self.packindexfile_v2), - (self.packfile_v2_3_ascii, self.packindexfile_v2_3_ascii)): + for packinfo, indexinfo in ((self.packfile_v2_1, self.packindexfile_v1), + (self.packfile_v2_2, self.packindexfile_v2), + (self.packfile_v2_3_ascii, self.packindexfile_v2_3_ascii)): packfile, version, size = packinfo indexfile, version, size = indexinfo entity = PackEntity(packfile) @@ -193,22 +191,23 @@ class TestPack(TestBase): pack_path = tempfile.mktemp('', "pack", rw_dir) index_path = tempfile.mktemp('', 'index', rw_dir) iteration = 0 + def rewind_streams(): for obj in pack_objs: obj.stream.seek(0) - #END utility - for ppath, ipath, num_obj in zip((pack_path, )*2, (index_path, None), (len(pack_objs), None)): + # END utility + for ppath, ipath, num_obj in zip((pack_path, ) * 2, (index_path, None), (len(pack_objs), None)): pfile = open(ppath, 'wb') iwrite = None if ipath: ifile = open(ipath, 'wb') iwrite = ifile.write - #END handle ip + # END handle ip # make sure we rewind the streams ... we work on the same objects over and over again if iteration > 0: rewind_streams() - #END rewind streams + # END rewind streams iteration += 1 pack_sha, index_sha = PackEntity.write_pack(pack_objs, pfile.write, iwrite, object_count=num_obj) @@ -230,8 +229,8 @@ class TestPack(TestBase): assert idx.packfile_checksum() == pack_sha assert idx.indexfile_checksum() == index_sha assert idx.size() == len(pack_objs) - #END verify files exist - #END for each packpath, indexpath pair + # END verify files exist + # END for each packpath, indexpath pair # verify the packs throughly rewind_streams() @@ -242,10 +241,9 @@ class TestPack(TestBase): for use_crc in range(2): assert entity.is_valid_stream(info.binsha, use_crc) # END for each crc mode - #END for each info + # END for each info assert count == len(pack_objs) - def test_pack_64(self): # TODO: hex-edit a pack helping us to verify that we can handle 64 byte offsets # of course without really needing such a huge pack diff --git a/gitdb/test/test_stream.py b/gitdb/test/test_stream.py index 44a557d..9626825 100644 --- a/gitdb/test/test_stream.py +++ b/gitdb/test/test_stream.py @@ -31,10 +31,12 @@ import tempfile import os from io import BytesIO + class TestStream(TestBase): + """Test stream classes""" - data_sizes = (15, 10000, 1000*1024+512) + data_sizes = (15, 10000, 1000 * 1024 + 512) def _assert_stream_reader(self, stream, cdata, rewind_stream=lambda s: None): """Make stream tests - the orig_stream is seekable, allowing it to be @@ -43,13 +45,13 @@ class TestStream(TestBase): :param rewind_stream: function called to rewind the stream to make it ready for reuse""" ns = 10 - assert len(cdata) > ns-1, "Data must be larger than %i, was %i" % (ns, len(cdata)) + assert len(cdata) > ns - 1, "Data must be larger than %i, was %i" % (ns, len(cdata)) # read in small steps ss = len(cdata) // ns for i in range(ns): data = stream.read(ss) - chunk = cdata[i*ss:(i+1)*ss] + chunk = cdata[i * ss:(i + 1) * ss] assert data == chunk # END for each step rest = stream.read() @@ -136,7 +138,7 @@ class TestStream(TestBase): self.failUnlessRaises(OSError, os.close, fd) # read everything back, compare to data we zip - fd = os.open(path, os.O_RDONLY|getattr(os, 'O_BINARY', 0)) + fd = os.open(path, os.O_RDONLY | getattr(os, 'O_BINARY', 0)) written_data = os.read(fd, os.path.getsize(path)) assert len(written_data) == os.path.getsize(path) os.close(fd) @@ -156,7 +158,7 @@ class TestStream(TestBase): data = ostream.read() assert len(data) == ostream.size - # Putting it back in should yield nothing new - after all, we have + # Putting it back in should yield nothing new - after all, we have dump = mdb.store(IStream(ostream.type, ostream.size, BytesIO(data))) assert dump.hexsha == sha # end for each loose object sha to test diff --git a/gitdb/test/test_util.py b/gitdb/test/test_util.py index e79355a..1dee544 100644 --- a/gitdb/test/test_util.py +++ b/gitdb/test/test_util.py @@ -16,6 +16,7 @@ from gitdb.util import ( class TestUtils(TestBase): + def test_basics(self): assert to_hex_sha(NULL_HEX_SHA) == NULL_HEX_SHA assert len(to_bin_sha(NULL_HEX_SHA)) == 20 @@ -73,7 +74,6 @@ class TestUtils(TestBase): del(lfd) assert not os.path.isfile(lockfilepath) - # write data - concurrently lfd = LockedFD(my_file) olfd = LockedFD(my_file) diff --git a/gitdb/typ.py b/gitdb/typ.py index bc7ba58..98d15f3 100644 --- a/gitdb/typ.py +++ b/gitdb/typ.py @@ -4,7 +4,7 @@ # the New BSD License: http://www.opensource.org/licenses/bsd-license.php """Module containing information about types known to the database""" -str_blob_type = b'blob' +str_blob_type = b'blob' str_commit_type = b'commit' -str_tree_type = b'tree' -str_tag_type = b'tag' +str_tree_type = b'tree' +str_tag_type = b'tag' diff --git a/gitdb/util.py b/gitdb/util.py index 93ba7f0..5b451fa 100644 --- a/gitdb/util.py +++ b/gitdb/util.py @@ -11,10 +11,10 @@ import errno from io import StringIO from smmap import ( - StaticWindowMapManager, - SlidingWindowMapManager, - SlidingWindowMapBuffer - ) + StaticWindowMapManager, + SlidingWindowMapManager, + SlidingWindowMapBuffer +) # initialize our global memory manager instance # Use it to free cached (and unused) resources. @@ -22,7 +22,7 @@ if sys.version_info[1] < 6: mman = StaticWindowMapManager() else: mman = SlidingWindowMapManager() -#END handle mman +# END handle mman import hashlib @@ -31,6 +31,7 @@ try: except ImportError: from struct import unpack, calcsize __calcsize_cache = dict() + def unpack_from(fmt, data, offset=0): try: size = __calcsize_cache[fmt] @@ -38,7 +39,7 @@ except ImportError: size = calcsize(fmt) __calcsize_cache[fmt] = size # END exception handling - return unpack(fmt, data[offset : offset + size]) + return unpack(fmt, data[offset: offset + size]) # END own unpack_from implementation @@ -67,8 +68,8 @@ close = os.close fsync = os.fsync # Backwards compatibility imports -from gitdb.const import ( - NULL_BIN_SHA, +from gitdb.const import ( + NULL_BIN_SHA, NULL_HEX_SHA ) @@ -76,7 +77,9 @@ from gitdb.const import ( #{ compatibility stuff ... + class _RandomAccessStringIO(object): + """Wrapper to provide required functionality in case memory maps cannot or may not be used. This is only really required in python 2.4""" __slots__ = '_sio' @@ -96,6 +99,7 @@ class _RandomAccessStringIO(object): def __getslice__(self, start, end): return self.getvalue()[start:end] + def byte_ord(b): """ Return the integer representation of the byte string. This supports Python @@ -110,6 +114,7 @@ def byte_ord(b): #{ Routines + def make_sha(source=''.encode("ascii")): """A python2.4 workaround for the sha/hashlib module fiasco @@ -121,6 +126,7 @@ def make_sha(source=''.encode("ascii")): sha1 = sha.sha(source) return sha1 + def allocate_memory(size): """:return: a file-protocol accessible memory block of the given size""" if size == 0: @@ -134,7 +140,7 @@ def allocate_memory(size): # this of course may fail if the amount of memory is not available in # one chunk - would only be the case in python 2.4, being more likely on # 32 bit systems. - return _RandomAccessStringIO("\0"*size) + return _RandomAccessStringIO("\0" * size) # END handle memory allocation @@ -166,6 +172,7 @@ def file_contents_ro(fd, stream=False, allow_mmap=True): return _RandomAccessStringIO(contents) return contents + def file_contents_ro_filepath(filepath, stream=False, allow_mmap=True, flags=0): """Get the file contents at filepath as fast as possible @@ -178,25 +185,28 @@ def file_contents_ro_filepath(filepath, stream=False, allow_mmap=True, flags=0): **Note** for now we don't try to use O_NOATIME directly as the right value needs to be shared per database in fact. It only makes a real difference for loose object databases anyway, and they use it with the help of the ``flags`` parameter""" - fd = os.open(filepath, os.O_RDONLY|getattr(os, 'O_BINARY', 0)|flags) + fd = os.open(filepath, os.O_RDONLY | getattr(os, 'O_BINARY', 0) | flags) try: return file_contents_ro(fd, stream, allow_mmap) finally: close(fd) # END assure file is closed + def sliding_ro_buffer(filepath, flags=0): """ :return: a buffer compatible object which uses our mapped memory manager internally ready to read the whole given filepath""" return SlidingWindowMapBuffer(mman.make_cursor(filepath), flags=flags) + def to_hex_sha(sha): """:return: hexified version of sha""" if len(sha) == 40: return sha return bin_to_hex(sha) + def to_bin_sha(sha): if len(sha) == 20: return sha @@ -209,6 +219,7 @@ def to_bin_sha(sha): #{ Utilities class LazyMixin(object): + """ Base class providing an interface to lazily retrieve attribute values upon first access. If slots are used, memory will only be reserved once the attribute @@ -240,6 +251,7 @@ class LazyMixin(object): class LockedFD(object): + """ This class facilitates a safe read and write operation to a file on disk. If we write to 'file', we obtain a lock file at 'file.lock' and write to @@ -290,7 +302,7 @@ class LockedFD(object): # try to open the lock file binary = getattr(os, 'O_BINARY', 0) - lockmode = os.O_WRONLY | os.O_CREAT | os.O_EXCL | binary + lockmode = os.O_WRONLY | os.O_CREAT | os.O_EXCL | binary try: fd = os.open(self._lockfilepath(), lockmode, int("600", 8)) if not write: diff --git a/gitdb/utils/compat.py b/gitdb/utils/compat.py index a2640fd..c08cab5 100644 --- a/gitdb/utils/compat.py +++ b/gitdb/utils/compat.py @@ -24,7 +24,7 @@ except NameError: return obj[offset:] else: # return memoryview(obj)[offset:offset+size] - return obj[offset:offset+size] + return obj[offset:offset + size] # end buffer reimplementation memoryview = memoryview diff --git a/gitdb/utils/encoding.py b/gitdb/utils/encoding.py index 2d03ad3..5855062 100644 --- a/gitdb/utils/encoding.py +++ b/gitdb/utils/encoding.py @@ -7,6 +7,7 @@ else: string_types = (basestring, ) text_type = unicode + def force_bytes(data, encoding="ascii"): if isinstance(data, bytes): return data @@ -16,6 +17,7 @@ def force_bytes(data, encoding="ascii"): return data + def force_text(data, encoding="utf-8"): if isinstance(data, text_type): return data @@ -1,70 +1,74 @@ #!/usr/bin/env python -from distutils.core import setup, Extension +from distutils.core import setup, Extension from distutils.command.build_py import build_py from distutils.command.build_ext import build_ext -import os, sys +import os +import sys -# wow, this is a mixed bag ... I am pretty upset about all of this ... +# wow, this is a mixed bag ... I am pretty upset about all of this ... setuptools_build_py_module = None try: - # don't pull it in if we don't have to - if 'setuptools' in sys.modules: - import setuptools.command.build_py as setuptools_build_py_module - from setuptools.command.build_ext import build_ext + # don't pull it in if we don't have to + if 'setuptools' in sys.modules: + import setuptools.command.build_py as setuptools_build_py_module + from setuptools.command.build_ext import build_ext except ImportError: - pass + pass + class build_ext_nofail(build_ext): - """Doesn't fail when build our optional extensions""" - def run(self): - try: - build_ext.run(self) - except Exception: - print("Ignored failure when building extensions, pure python modules will be used instead") - # END ignore errors - + + """Doesn't fail when build our optional extensions""" + + def run(self): + try: + build_ext.run(self) + except Exception: + print("Ignored failure when building extensions, pure python modules will be used instead") + # END ignore errors + def get_data_files(self): - """Can you feel the pain ? So, in python2.5 and python2.4 coming with maya, - the line dealing with the ``plen`` has a bug which causes it to truncate too much. - It is fixed in the system interpreters as they receive patches, and shows how - bad it is if something doesn't have proper unittests. - The code here is a plain copy of the python2.6 version which works for all. - - Generate list of '(package,src_dir,build_dir,filenames)' tuples""" - data = [] - if not self.packages: - return data - - # this one is just for the setup tools ! They don't iniitlialize this variable - # when they should, but do it on demand using this method.Its crazy - if hasattr(self, 'analyze_manifest'): - self.analyze_manifest() - # END handle setuptools ... - - for package in self.packages: - # Locate package source directory - src_dir = self.get_package_dir(package) - - # Compute package build directory - build_dir = os.path.join(*([self.build_lib] + package.split('.'))) - - # Length of path to strip from found files - plen = 0 - if src_dir: - plen = len(src_dir)+1 - - # Strip directory from globbed filenames - filenames = [ - file[plen:] for file in self.find_data_files(package, src_dir) - ] - data.append((package, src_dir, build_dir, filenames)) - return data - + """Can you feel the pain ? So, in python2.5 and python2.4 coming with maya, + the line dealing with the ``plen`` has a bug which causes it to truncate too much. + It is fixed in the system interpreters as they receive patches, and shows how + bad it is if something doesn't have proper unittests. + The code here is a plain copy of the python2.6 version which works for all. + + Generate list of '(package,src_dir,build_dir,filenames)' tuples""" + data = [] + if not self.packages: + return data + + # this one is just for the setup tools ! They don't iniitlialize this variable + # when they should, but do it on demand using this method.Its crazy + if hasattr(self, 'analyze_manifest'): + self.analyze_manifest() + # END handle setuptools ... + + for package in self.packages: + # Locate package source directory + src_dir = self.get_package_dir(package) + + # Compute package build directory + build_dir = os.path.join(*([self.build_lib] + package.split('.'))) + + # Length of path to strip from found files + plen = 0 + if src_dir: + plen = len(src_dir) + 1 + + # Strip directory from globbed filenames + filenames = [ + file[plen:] for file in self.find_data_files(package, src_dir) + ] + data.append((package, src_dir, build_dir, filenames)) + return data + build_py.get_data_files = get_data_files if setuptools_build_py_module: - setuptools_build_py_module.build_py._get_data_files = get_data_files + setuptools_build_py_module.build_py._get_data_files = get_data_files # END apply setuptools patch too # NOTE: This is currently duplicated from the gitdb.__init__ module, as we cannot @@ -76,15 +80,15 @@ __homepage__ = "https://github.com/gitpython-developers/gitdb" version_info = (0, 6, 1) __version__ = '.'.join(str(i) for i in version_info) -setup(cmdclass={'build_ext':build_ext_nofail}, - name = "gitdb", - version = __version__, - description = "Git Object Database", - author = __author__, - author_email = __contact__, - url = __homepage__, - packages = ('gitdb', 'gitdb.db', 'gitdb.utils', 'gitdb.test'), - package_dir = {'gitdb':'gitdb'}, +setup(cmdclass={'build_ext': build_ext_nofail}, + name="gitdb", + version=__version__, + description="Git Object Database", + author=__author__, + author_email=__contact__, + url=__homepage__, + packages=('gitdb', 'gitdb.db', 'gitdb.utils', 'gitdb.test'), + package_dir = {'gitdb': 'gitdb'}, ext_modules=[Extension('gitdb._perf', ['gitdb/_fun.c', 'gitdb/_delta_apply.c'], include_dirs=['gitdb'])], license = "BSD License", zip_safe=False, @@ -94,26 +98,26 @@ setup(cmdclass={'build_ext':build_ext_nofail}, # See https://pypi.python.org/pypi?%3Aaction=list_classifiers classifiers=[ # Picked from - # http://pypi.python.org/pypi?:action=list_classifiers - #"Development Status :: 1 - Planning", - #"Development Status :: 2 - Pre-Alpha", - #"Development Status :: 3 - Alpha", - # "Development Status :: 4 - Beta", - "Development Status :: 5 - Production/Stable", - #"Development Status :: 6 - Mature", - #"Development Status :: 7 - Inactive", - "Environment :: Console", - "Intended Audience :: Developers", - "License :: OSI Approved :: BSD License", - "Operating System :: OS Independent", - "Operating System :: POSIX", - "Operating System :: Microsoft :: Windows", - "Operating System :: MacOS :: MacOS X", - "Programming Language :: Python", - "Programming Language :: Python :: 2", - "Programming Language :: Python :: 2.6", - "Programming Language :: Python :: 2.7", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.3", - "Programming Language :: Python :: 3.4", - ],) + # http://pypi.python.org/pypi?:action=list_classifiers + #"Development Status :: 1 - Planning", + #"Development Status :: 2 - Pre-Alpha", + #"Development Status :: 3 - Alpha", + # "Development Status :: 4 - Beta", + "Development Status :: 5 - Production/Stable", + #"Development Status :: 6 - Mature", + #"Development Status :: 7 - Inactive", + "Environment :: Console", + "Intended Audience :: Developers", + "License :: OSI Approved :: BSD License", + "Operating System :: OS Independent", + "Operating System :: POSIX", + "Operating System :: Microsoft :: Windows", + "Operating System :: MacOS :: MacOS X", + "Programming Language :: Python", + "Programming Language :: Python :: 2", + "Programming Language :: Python :: 2.6", + "Programming Language :: Python :: 2.7", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.3", + "Programming Language :: Python :: 3.4", +],) |
