summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSebastian Thiel <byronimo@gmail.com>2015-01-04 11:21:36 +0100
committerSebastian Thiel <byronimo@gmail.com>2015-01-04 11:21:36 +0100
commitff7615321ee31d981a171f7677a56a971c554059 (patch)
treec7057424c95ed246242bb4589798c2c01e8a5648
parent8b4939630a0d7362e5a6fbca052922d710a87c7e (diff)
downloadgitdb-ff7615321ee31d981a171f7677a56a971c554059.tar.gz
Applied autopep8
autopep8 -v -j 8 --max-line-length 120 --in-place --recursive
-rw-r--r--doc/source/conf.py7
-rw-r--r--gitdb/__init__.py6
-rw-r--r--gitdb/base.py20
-rw-r--r--gitdb/db/base.py12
-rw-r--r--gitdb/db/git.py5
-rw-r--r--gitdb/db/loose.py8
-rw-r--r--gitdb/db/mem.py3
-rw-r--r--gitdb/db/pack.py6
-rw-r--r--gitdb/db/ref.py2
-rw-r--r--gitdb/exc.py14
m---------gitdb/ext/smmap0
-rw-r--r--gitdb/fun.py104
-rw-r--r--gitdb/pack.py111
-rw-r--r--gitdb/stream.py52
-rw-r--r--gitdb/test/db/lib.py4
-rw-r--r--gitdb/test/db/test_git.py8
-rw-r--r--gitdb/test/db/test_loose.py3
-rw-r--r--gitdb/test/db/test_mem.py1
-rw-r--r--gitdb/test/db/test_pack.py6
-rw-r--r--gitdb/test/db/test_ref.py5
-rw-r--r--gitdb/test/lib.py32
-rw-r--r--gitdb/test/performance/__init__.py1
-rw-r--r--gitdb/test/performance/lib.py20
-rw-r--r--gitdb/test/performance/test_pack.py34
-rw-r--r--gitdb/test/performance/test_pack_streaming.py33
-rw-r--r--gitdb/test/performance/test_stream.py49
-rw-r--r--gitdb/test/test_base.py15
-rw-r--r--gitdb/test/test_example.py3
-rw-r--r--gitdb/test/test_pack.py34
-rw-r--r--gitdb/test/test_stream.py12
-rw-r--r--gitdb/test/test_util.py2
-rw-r--r--gitdb/typ.py6
-rw-r--r--gitdb/util.py34
-rw-r--r--gitdb/utils/compat.py2
-rw-r--r--gitdb/utils/encoding.py2
-rwxr-xr-xsetup.py174
36 files changed, 459 insertions, 371 deletions
diff --git a/doc/source/conf.py b/doc/source/conf.py
index 723a345..68d9a3f 100644
--- a/doc/source/conf.py
+++ b/doc/source/conf.py
@@ -11,7 +11,8 @@
# All configuration values have a default; values that are commented out
# serve to show the default.
-import sys, os
+import sys
+import os
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
@@ -171,8 +172,8 @@ htmlhelp_basename = 'GitDBdoc'
# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title, author, documentclass [howto/manual]).
latex_documents = [
- ('index', 'GitDB.tex', u'GitDB Documentation',
- u'Sebastian Thiel', 'manual'),
+ ('index', 'GitDB.tex', u'GitDB Documentation',
+ u'Sebastian Thiel', 'manual'),
]
# The name of an image file (relative to this directory) to place at the top of
diff --git a/gitdb/__init__.py b/gitdb/__init__.py
index 2a68940..791a2ef 100644
--- a/gitdb/__init__.py
+++ b/gitdb/__init__.py
@@ -8,6 +8,8 @@ import sys
import os
#{ Initialization
+
+
def _init_externals():
"""Initialize external projects by putting them into the path"""
for module in ('smmap',):
@@ -17,8 +19,8 @@ def _init_externals():
__import__(module)
except ImportError:
raise ImportError("'%s' could not be imported, assure it is located in your PYTHONPATH" % module)
- #END verify import
- #END handel imports
+ # END verify import
+ # END handel imports
#} END initialization
diff --git a/gitdb/base.py b/gitdb/base.py
index a33fb67..5760b8a 100644
--- a/gitdb/base.py
+++ b/gitdb/base.py
@@ -11,12 +11,14 @@ from gitdb.fun import (
)
__all__ = ('OInfo', 'OPackInfo', 'ODeltaPackInfo',
- 'OStream', 'OPackStream', 'ODeltaPackStream',
- 'IStream', 'InvalidOInfo', 'InvalidOStream' )
+ 'OStream', 'OPackStream', 'ODeltaPackStream',
+ 'IStream', 'InvalidOInfo', 'InvalidOStream')
#{ ODB Bases
+
class OInfo(tuple):
+
"""Carries information about an object in an ODB, provding information
about the binary sha of the object, the type_string as well as the uncompressed size
in bytes.
@@ -62,6 +64,7 @@ class OInfo(tuple):
class OPackInfo(tuple):
+
"""As OInfo, but provides a type_id property to retrieve the numerical type id, and
does not include a sha.
@@ -71,7 +74,7 @@ class OPackInfo(tuple):
__slots__ = tuple()
def __new__(cls, packoffset, type, size):
- return tuple.__new__(cls, (packoffset,type, size))
+ return tuple.__new__(cls, (packoffset, type, size))
def __init__(self, *args):
tuple.__init__(self)
@@ -98,6 +101,7 @@ class OPackInfo(tuple):
class ODeltaPackInfo(OPackInfo):
+
"""Adds delta specific information,
Either the 20 byte sha which points to some object in the database,
or the negative offset from the pack_offset, so that pack_offset - delta_info yields
@@ -115,6 +119,7 @@ class ODeltaPackInfo(OPackInfo):
class OStream(OInfo):
+
"""Base for object streams retrieved from the database, providing additional
information about the stream.
Generally, ODB streams are read-only as objects are immutable"""
@@ -124,7 +129,6 @@ class OStream(OInfo):
"""Helps with the initialization of subclasses"""
return tuple.__new__(cls, (sha, type, size, stream))
-
def __init__(self, *args, **kwargs):
tuple.__init__(self)
@@ -141,6 +145,7 @@ class OStream(OInfo):
class ODeltaStream(OStream):
+
"""Uses size info of its stream, delaying reads"""
def __new__(cls, sha, type, size, stream, *args, **kwargs):
@@ -157,6 +162,7 @@ class ODeltaStream(OStream):
class OPackStream(OPackInfo):
+
"""Next to pack object information, a stream outputting an undeltified base object
is provided"""
__slots__ = tuple()
@@ -176,13 +182,13 @@ class OPackStream(OPackInfo):
class ODeltaPackStream(ODeltaPackInfo):
+
"""Provides a stream outputting the uncompressed offset delta information"""
__slots__ = tuple()
def __new__(cls, packoffset, type, size, delta_info, stream):
return tuple.__new__(cls, (packoffset, type, size, delta_info, stream))
-
#{ Stream Reader Interface
def read(self, size=-1):
return self[4].read(size)
@@ -194,6 +200,7 @@ class ODeltaPackStream(ODeltaPackInfo):
class IStream(list):
+
"""Represents an input content stream to be fed into the ODB. It is mutable to allow
the ODB to record information about the operations outcome right in this instance.
@@ -246,7 +253,6 @@ class IStream(list):
binsha = property(_binsha, _set_binsha)
-
def _type(self):
return self[1]
@@ -275,6 +281,7 @@ class IStream(list):
class InvalidOInfo(tuple):
+
"""Carries information about a sha identifying an object which is invalid in
the queried database. The exception attribute provides more information about
the cause of the issue"""
@@ -301,6 +308,7 @@ class InvalidOInfo(tuple):
class InvalidOStream(InvalidOInfo):
+
"""Carries information about an invalid ODB stream"""
__slots__ = tuple()
diff --git a/gitdb/db/base.py b/gitdb/db/base.py
index a670eea..2615b13 100644
--- a/gitdb/db/base.py
+++ b/gitdb/db/base.py
@@ -19,11 +19,11 @@ from itertools import chain
from functools import reduce
-
__all__ = ('ObjectDBR', 'ObjectDBW', 'FileDBBase', 'CompoundDB', 'CachingDB')
class ObjectDBR(object):
+
"""Defines an interface for object database lookup.
Objects are identified either by their 20 byte bin sha"""
@@ -61,6 +61,7 @@ class ObjectDBR(object):
class ObjectDBW(object):
+
"""Defines an interface to create objects in the database"""
def __init__(self, *args, **kwargs):
@@ -100,6 +101,7 @@ class ObjectDBW(object):
class FileDBBase(object):
+
"""Provides basic facilities to retrieve files of interest, including
caching facilities to help mapping hexsha's to objects"""
@@ -113,7 +115,6 @@ class FileDBBase(object):
super(FileDBBase, self).__init__()
self._root_path = root_path
-
#{ Interface
def root_path(self):
""":return: path at which this db operates"""
@@ -128,6 +129,7 @@ class FileDBBase(object):
class CachingDB(object):
+
"""A database which uses caches to speed-up access"""
#{ Interface
@@ -143,8 +145,6 @@ class CachingDB(object):
# END interface
-
-
def _databases_recursive(database, output):
"""Fill output list with database from db, in order. Deals with Loose, Packed
and compound databases."""
@@ -159,10 +159,12 @@ def _databases_recursive(database, output):
class CompoundDB(ObjectDBR, LazyMixin, CachingDB):
+
"""A database which delegates calls to sub-databases.
Databases are stored in the lazy-loaded _dbs attribute.
Define _set_cache_ to update it with your databases"""
+
def _set_cache_(self, attr):
if attr == '_dbs':
self._dbs = list()
@@ -207,7 +209,7 @@ class CompoundDB(ObjectDBR, LazyMixin, CachingDB):
def size(self):
""":return: total size of all contained databases"""
- return reduce(lambda x,y: x+y, (db.size() for db in self._dbs), 0)
+ return reduce(lambda x, y: x + y, (db.size() for db in self._dbs), 0)
def sha_iter(self):
return chain(*(db.sha_iter() for db in self._dbs))
diff --git a/gitdb/db/git.py b/gitdb/db/git.py
index d22e3f1..a4f6f54 100644
--- a/gitdb/db/git.py
+++ b/gitdb/db/git.py
@@ -20,6 +20,7 @@ __all__ = ('GitDB', )
class GitDB(FileDBBase, ObjectDBW, CompoundDB):
+
"""A git-style object database, which contains all objects in the 'objects'
subdirectory"""
# Configuration
@@ -41,8 +42,8 @@ class GitDB(FileDBBase, ObjectDBW, CompoundDB):
self._dbs = list()
loose_db = None
for subpath, dbcls in ((self.packs_dir, self.PackDBCls),
- (self.loose_dir, self.LooseDBCls),
- (self.alternates_dir, self.ReferenceDBCls)):
+ (self.loose_dir, self.LooseDBCls),
+ (self.alternates_dir, self.ReferenceDBCls)):
path = self.db_path(subpath)
if os.path.exists(path):
self._dbs.append(dbcls(path))
diff --git a/gitdb/db/loose.py b/gitdb/db/loose.py
index 3743026..e924080 100644
--- a/gitdb/db/loose.py
+++ b/gitdb/db/loose.py
@@ -57,10 +57,11 @@ import tempfile
import os
-__all__ = ( 'LooseObjectDB', )
+__all__ = ('LooseObjectDB', )
class LooseObjectDB(FileDBBase, ObjectDBR, ObjectDBW):
+
"""A database which operates on loose object files"""
# CONFIGURATION
@@ -73,7 +74,6 @@ class LooseObjectDB(FileDBBase, ObjectDBR, ObjectDBW):
if os.name == 'nt':
new_objects_mode = int("644", 8)
-
def __init__(self, root_path):
super(LooseObjectDB, self).__init__(root_path)
self._hexsha_to_file = dict()
@@ -164,7 +164,7 @@ class LooseObjectDB(FileDBBase, ObjectDBR, ObjectDBW):
def stream(self, sha):
m = self._map_loose_object(sha)
- type, size, stream = DecompressMemMapReader.new(m, close_on_deletion = True)
+ type, size, stream = DecompressMemMapReader.new(m, close_on_deletion=True)
return OStream(sha, type, size, stream)
def has_object(self, sha):
@@ -199,7 +199,7 @@ class LooseObjectDB(FileDBBase, ObjectDBR, ObjectDBW):
else:
# write object with header, we have to make a new one
write_object(istream.type, istream.size, istream.read, writer.write,
- chunk_size=self.stream_chunk_size)
+ chunk_size=self.stream_chunk_size)
# END handle direct stream copies
finally:
if tmp_path:
diff --git a/gitdb/db/mem.py b/gitdb/db/mem.py
index 1aa0d51..595dbf4 100644
--- a/gitdb/db/mem.py
+++ b/gitdb/db/mem.py
@@ -28,7 +28,9 @@ from io import BytesIO
__all__ = ("MemoryDB", )
+
class MemoryDB(ObjectDBR, ObjectDBW):
+
"""A memory database stores everything to memory, providing fast IO and object
retrieval. It should be used to buffer results and obtain SHAs before writing
it to the actual physical storage, as it allows to query whether object already
@@ -85,7 +87,6 @@ class MemoryDB(ObjectDBR, ObjectDBW):
except AttributeError:
return self._cache.keys()
-
#{ Interface
def stream_copy(self, sha_iter, odb):
"""Copy the streams as identified by sha's yielded by sha_iter into the given odb
diff --git a/gitdb/db/pack.py b/gitdb/db/pack.py
index eaf431a..6b03d83 100644
--- a/gitdb/db/pack.py
+++ b/gitdb/db/pack.py
@@ -31,6 +31,7 @@ __all__ = ('PackedDB', )
class PackedDB(FileDBBase, ObjectDBR, CachingDB, LazyMixin):
+
"""A database operating on a set of object packs"""
# sort the priority list every N queries
@@ -113,7 +114,7 @@ class PackedDB(FileDBBase, ObjectDBR, CachingDB, LazyMixin):
def size(self):
sizes = [item[1].index().size() for item in self._entities]
- return reduce(lambda x,y: x+y, sizes, 0)
+ return reduce(lambda x, y: x + y, sizes, 0)
#} END object db read
@@ -127,7 +128,6 @@ class PackedDB(FileDBBase, ObjectDBR, CachingDB, LazyMixin):
#} END object db write
-
#{ Interface
def update_cache(self, force=False):
@@ -177,7 +177,7 @@ class PackedDB(FileDBBase, ObjectDBR, CachingDB, LazyMixin):
def entities(self):
""":return: list of pack entities operated upon by this database"""
- return [ item[1] for item in self._entities ]
+ return [item[1] for item in self._entities]
def partial_to_complete_sha(self, partial_binsha, canonical_length):
""":return: 20 byte sha as inferred by the given partial binary sha
diff --git a/gitdb/db/ref.py b/gitdb/db/ref.py
index d989126..83a9f61 100644
--- a/gitdb/db/ref.py
+++ b/gitdb/db/ref.py
@@ -8,7 +8,9 @@ from gitdb.db.base import (
__all__ = ('ReferenceDB', )
+
class ReferenceDB(CompoundDB):
+
"""A database consisting of database referred to in a file"""
# Configuration
diff --git a/gitdb/exc.py b/gitdb/exc.py
index 73f84d2..d58442f 100644
--- a/gitdb/exc.py
+++ b/gitdb/exc.py
@@ -5,28 +5,42 @@
"""Module with common exceptions"""
from gitdb.util import to_hex_sha
+
class ODBError(Exception):
+
"""All errors thrown by the object database"""
+
class InvalidDBRoot(ODBError):
+
"""Thrown if an object database cannot be initialized at the given path"""
+
class BadObject(ODBError):
+
"""The object with the given SHA does not exist. Instantiate with the
failed sha"""
def __str__(self):
return "BadObject: %s" % to_hex_sha(self.args[0])
+
class ParseError(ODBError):
+
"""Thrown if the parsing of a file failed due to an invalid format"""
+
class AmbiguousObjectName(ODBError):
+
"""Thrown if a possibly shortened name does not uniquely represent a single object
in the database"""
+
class BadObjectType(ODBError):
+
"""The object had an unsupported type"""
+
class UnsupportedOperation(ODBError):
+
"""Thrown if the given operation cannot be supported by the object database"""
diff --git a/gitdb/ext/smmap b/gitdb/ext/smmap
-Subproject eb40b44ce4a6e646aabf7b7091d876738336c42
+Subproject 84929ed811142e366d6c5916125302c1419acad
diff --git a/gitdb/fun.py b/gitdb/fun.py
index b7662b4..17da4e5 100644
--- a/gitdb/fun.py
+++ b/gitdb/fun.py
@@ -31,15 +31,15 @@ OFS_DELTA = 6
REF_DELTA = 7
delta_types = (OFS_DELTA, REF_DELTA)
-type_id_to_type_map = {
- 0 : b'', # EXT 1
- 1 : str_commit_type,
- 2 : str_tree_type,
- 3 : str_blob_type,
- 4 : str_tag_type,
- 5 : b'', # EXT 2
- OFS_DELTA : "OFS_DELTA", # OFFSET DELTA
- REF_DELTA : "REF_DELTA" # REFERENCE DELTA
+type_id_to_type_map = {
+ 0: b'', # EXT 1
+ 1: str_commit_type,
+ 2: str_tree_type,
+ 3: str_blob_type,
+ 4: str_tag_type,
+ 5: b'', # EXT 2
+ OFS_DELTA: "OFS_DELTA", # OFFSET DELTA
+ REF_DELTA: "REF_DELTA" # REFERENCE DELTA
}
type_to_type_id_map = {
@@ -55,8 +55,8 @@ type_to_type_id_map = {
chunk_size = 1000 * mmap.PAGESIZE
__all__ = ('is_loose_object', 'loose_object_header_info', 'msb_size', 'pack_object_header_info',
- 'write_object', 'loose_object_header', 'stream_copy', 'apply_delta_data',
- 'is_equal_canonical_sha', 'connect_deltas', 'DeltaChunkList', 'create_pack_object_header')
+ 'write_object', 'loose_object_header', 'stream_copy', 'apply_delta_data',
+ 'is_equal_canonical_sha', 'connect_deltas', 'DeltaChunkList', 'create_pack_object_header')
#{ Structures
@@ -72,6 +72,7 @@ def _set_delta_rbound(d, size):
# MUST NOT DO THIS HERE
return d
+
def _move_delta_lbound(d, bytes):
"""Move the delta by the given amount of bytes, reducing its size so that its
right bound stays static
@@ -89,9 +90,11 @@ def _move_delta_lbound(d, bytes):
return d
+
def delta_duplicate(src):
return DeltaChunk(src.to, src.ts, src.so, src.data)
+
def delta_chunk_apply(dc, bbuf, write):
"""Apply own data to the target buffer
:param bbuf: buffer providing source bytes for copy operations
@@ -112,15 +115,16 @@ def delta_chunk_apply(dc, bbuf, write):
class DeltaChunk(object):
+
"""Represents a piece of a delta, it can either add new data, or copy existing
one from a source buffer"""
__slots__ = (
- 'to', # start offset in the target buffer in bytes
+ 'to', # start offset in the target buffer in bytes
'ts', # size of this chunk in the target buffer in bytes
'so', # start offset in the source buffer in bytes or None
'data', # chunk of bytes to be added to the target buffer,
# DeltaChunkList to use as base, or None
- )
+ )
def __init__(self, to, ts, so, data):
self.to = to
@@ -142,6 +146,7 @@ class DeltaChunk(object):
#} END interface
+
def _closest_index(dcl, absofs):
""":return: index at which the given absofs should be inserted. The index points
to the DeltaChunk with a target buffer absofs that equals or is greater than
@@ -160,7 +165,8 @@ def _closest_index(dcl, absofs):
lo = mid + 1
# END handle bound
# END for each delta absofs
- return len(dcl)-1
+ return len(dcl) - 1
+
def delta_list_apply(dcl, bbuf, write):
"""Apply the chain's changes and write the final result using the passed
@@ -173,6 +179,7 @@ def delta_list_apply(dcl, bbuf, write):
delta_chunk_apply(dc, bbuf, write)
# END for each dc
+
def delta_list_slice(dcl, absofs, size, ndcl):
""":return: Subsection of this list at the given absolute offset, with the given
size in bytes.
@@ -209,6 +216,7 @@ def delta_list_slice(dcl, absofs, size, ndcl):
class DeltaChunkList(list):
+
"""List with special functionality to deal with DeltaChunks.
There are two types of lists we represent. The one was created bottom-up, working
towards the latest delta, the other kind was created top-down, working from the
@@ -252,16 +260,16 @@ class DeltaChunkList(list):
dc = self[i]
i += 1
if dc.data is None:
- if first_data_index is not None and i-2-first_data_index > 1:
- #if first_data_index is not None:
+ if first_data_index is not None and i - 2 - first_data_index > 1:
+ # if first_data_index is not None:
nd = StringIO() # new data
so = self[first_data_index].to # start offset in target buffer
- for x in xrange(first_data_index, i-1):
+ for x in xrange(first_data_index, i - 1):
xdc = self[x]
nd.write(xdc.data[:xdc.ts])
# END collect data
- del(self[first_data_index:i-1])
+ del(self[first_data_index:i - 1])
buf = nd.getvalue()
self.insert(first_data_index, DeltaChunk(so, len(buf), 0, buf))
@@ -274,10 +282,10 @@ class DeltaChunkList(list):
# END skip non-data chunks
if first_data_index is None:
- first_data_index = i-1
+ first_data_index = i - 1
# END iterate list
- #if slen_orig != len(self):
+ # if slen_orig != len(self):
# print "INFO: Reduced delta list len to %f %% of former size" % ((float(len(self)) / slen_orig) * 100)
return self
@@ -288,7 +296,7 @@ class DeltaChunkList(list):
:raise AssertionError: if the size doen't match"""
if target_size > -1:
assert self[-1].rbound() == target_size
- assert reduce(lambda x,y: x+y, (d.ts for d in self), 0) == target_size
+ assert reduce(lambda x, y: x + y, (d.ts for d in self), 0) == target_size
# END target size verification
if len(self) < 2:
@@ -301,18 +309,19 @@ class DeltaChunkList(list):
assert len(dc.data) >= dc.ts
# END for each dc
- left = islice(self, 0, len(self)-1)
+ left = islice(self, 0, len(self) - 1)
right = iter(self)
right.next()
# this is very pythonic - we might have just use index based access here,
# but this could actually be faster
- for lft,rgt in izip(left, right):
+ for lft, rgt in izip(left, right):
assert lft.rbound() == rgt.to
assert lft.to + lft.ts == rgt.to
# END for each pair
class TopdownDeltaChunkList(DeltaChunkList):
+
"""Represents a list which is generated by feeding its ancestor streams one by
one"""
__slots__ = tuple()
@@ -356,19 +365,19 @@ class TopdownDeltaChunkList(DeltaChunkList):
# END update target bounds
if len(ccl) == 1:
- self[dci-1] = ccl[0]
+ self[dci - 1] = ccl[0]
else:
# maybe try to compute the expenses here, and pick the right algorithm
# It would normally be faster than copying everything physically though
# TODO: Use a deque here, and decide by the index whether to extend
# or extend left !
post_dci = self[dci:]
- del(self[dci-1:]) # include deletion of dc
+ del(self[dci - 1:]) # include deletion of dc
self.extend(ccl)
self.extend(post_dci)
slen = len(self)
- dci += len(ccl)-1 # deleted dc, added rest
+ dci += len(ccl) - 1 # deleted dc, added rest
# END handle chunk replacement
# END for each chunk
@@ -391,6 +400,7 @@ def is_loose_object(m):
word = (b0 << 8) + b1
return b0 == 0x78 and (word % 31) == 0
+
def loose_object_header_info(m):
"""
:return: tuple(type_string, uncompressed_size_in_bytes) the type string of the
@@ -402,6 +412,7 @@ def loose_object_header_info(m):
return type_name, int(size)
+
def pack_object_header_info(data):
"""
:return: tuple(type_id, uncompressed_size_in_bytes, byte_offset)
@@ -430,6 +441,7 @@ def pack_object_header_info(data):
# end performance at expense of maintenance ...
return (type_id, size, i)
+
def create_pack_object_header(obj_type, obj_size):
"""
:return: string defining the pack header comprised of the object type
@@ -439,7 +451,7 @@ def create_pack_object_header(obj_type, obj_size):
:param obj_size: uncompressed size in bytes of the following object stream"""
c = 0 # 1 byte
if PY3:
- hdr = bytearray() # output string
+ hdr = bytearray() # output string
c = (obj_type << 4) | (obj_size & 0xf)
obj_size >>= 4
@@ -447,10 +459,10 @@ def create_pack_object_header(obj_type, obj_size):
hdr.append(c | 0x80)
c = obj_size & 0x7f
obj_size >>= 7
- #END until size is consumed
+ # END until size is consumed
hdr.append(c)
else:
- hdr = bytes() # output string
+ hdr = bytes() # output string
c = (obj_type << 4) | (obj_size & 0xf)
obj_size >>= 4
@@ -458,11 +470,12 @@ def create_pack_object_header(obj_type, obj_size):
hdr += chr(c | 0x80)
c = obj_size & 0x7f
obj_size >>= 7
- #END until size is consumed
+ # END until size is consumed
hdr += chr(c)
# end handle interpreter
return hdr
+
def msb_size(data, offset=0):
"""
:return: tuple(read_bytes, size) read the msb size from the given random
@@ -473,8 +486,8 @@ def msb_size(data, offset=0):
hit_msb = False
if PY3:
while i < l:
- c = data[i+offset]
- size |= (c & 0x7f) << i*7
+ c = data[i + offset]
+ size |= (c & 0x7f) << i * 7
i += 1
if not c & 0x80:
hit_msb = True
@@ -483,8 +496,8 @@ def msb_size(data, offset=0):
# END while in range
else:
while i < l:
- c = ord(data[i+offset])
- size |= (c & 0x7f) << i*7
+ c = ord(data[i + offset])
+ size |= (c & 0x7f) << i * 7
i += 1
if not c & 0x80:
hit_msb = True
@@ -494,7 +507,8 @@ def msb_size(data, offset=0):
# end performance ...
if not hit_msb:
raise AssertionError("Could not find terminating MSB byte in data stream")
- return i+offset, size
+ return i + offset, size
+
def loose_object_header(type, size):
"""
@@ -502,6 +516,7 @@ def loose_object_header(type, size):
followed by the content stream of size 'size'"""
return ('%s %i\0' % (force_text(type), size)).encode('ascii')
+
def write_object(type, size, read, write, chunk_size=chunk_size):
"""
Write the object as identified by type, size and source_stream into the
@@ -522,6 +537,7 @@ def write_object(type, size, read, write, chunk_size=chunk_size):
return tbw
+
def stream_copy(read, write, size, chunk_size):
"""
Copy a stream up to size bytes using the provided read and write methods,
@@ -532,7 +548,7 @@ def stream_copy(read, write, size, chunk_size):
# WRITE ALL DATA UP TO SIZE
while True:
- cs = min(chunk_size, size-dbw)
+ cs = min(chunk_size, size - dbw)
# NOTE: not all write methods return the amount of written bytes, like
# mmap.write. Its bad, but we just deal with it ... perhaps its not
# even less efficient
@@ -548,6 +564,7 @@ def stream_copy(read, write, size, chunk_size):
# END duplicate data
return dbw
+
def connect_deltas(dstreams):
"""
Read the condensed delta chunk information from dstream and merge its information
@@ -602,7 +619,7 @@ def connect_deltas(dstreams):
rbound = cp_off + cp_size
if (rbound < cp_size or
- rbound > base_size):
+ rbound > base_size):
break
dcl.append(DeltaChunk(tbw, cp_size, cp_off, None))
@@ -610,7 +627,7 @@ def connect_deltas(dstreams):
elif c:
# NOTE: in C, the data chunks should probably be concatenated here.
# In python, we do it as a post-process
- dcl.append(DeltaChunk(tbw, c, 0, db[i:i+c]))
+ dcl.append(DeltaChunk(tbw, c, 0, db[i:i + c]))
i += c
tbw += c
else:
@@ -632,6 +649,7 @@ def connect_deltas(dstreams):
return tdcl
+
def apply_delta_data(src_buf, src_buf_size, delta_buf, delta_buf_size, write):
"""
Apply data from a delta buffer using a source buffer to the target file
@@ -678,11 +696,11 @@ def apply_delta_data(src_buf, src_buf_size, delta_buf, delta_buf_size, write):
rbound = cp_off + cp_size
if (rbound < cp_size or
- rbound > src_buf_size):
+ rbound > src_buf_size):
break
write(buffer(src_buf, cp_off, cp_size))
elif c:
- write(db[i:i+c])
+ write(db[i:i + c])
i += c
else:
raise ValueError("unexpected delta opcode 0")
@@ -721,11 +739,11 @@ def apply_delta_data(src_buf, src_buf_size, delta_buf, delta_buf_size, write):
rbound = cp_off + cp_size
if (rbound < cp_size or
- rbound > src_buf_size):
+ rbound > src_buf_size):
break
write(buffer(src_buf, cp_off, cp_size))
elif c:
- write(db[i:i+c])
+ write(db[i:i + c])
i += c
else:
raise ValueError("unexpected delta opcode 0")
@@ -749,7 +767,7 @@ def is_equal_canonical_sha(canonical_length, match, sha1):
return False
if canonical_length - binary_length and \
- (byte_ord(match[-1]) ^ byte_ord(sha1[len(match)-1])) & 0xf0:
+ (byte_ord(match[-1]) ^ byte_ord(sha1[len(match) - 1])) & 0xf0:
return False
# END handle uneven canonnical length
return True
diff --git a/gitdb/pack.py b/gitdb/pack.py
index 375cc59..b4ba787 100644
--- a/gitdb/pack.py
+++ b/gitdb/pack.py
@@ -72,8 +72,6 @@ import sys
__all__ = ('PackIndexFile', 'PackFile', 'PackEntity')
-
-
#{ Utilities
def pack_object_at(cursor, offset, as_stream):
@@ -107,7 +105,7 @@ def pack_object_at(cursor, offset, as_stream):
total_rela_offset = i
# REF DELTA
elif type_id == REF_DELTA:
- total_rela_offset = data_rela_offset+20
+ total_rela_offset = data_rela_offset + 20
delta_info = data[data_rela_offset:total_rela_offset]
# BASE OBJECT
else:
@@ -129,6 +127,7 @@ def pack_object_at(cursor, offset, as_stream):
# END handle info
# END handle stream
+
def write_stream_to_pack(read, write, zstream, base_crc=None):
"""Copy a stream as read from read function, zip it, and write the result.
Count the number of written bytes and return it
@@ -142,7 +141,7 @@ def write_stream_to_pack(read, write, zstream, base_crc=None):
crc = 0
if want_crc:
crc = base_crc
- #END initialize crc
+ # END initialize crc
while True:
chunk = read(chunk_size)
@@ -153,18 +152,18 @@ def write_stream_to_pack(read, write, zstream, base_crc=None):
if want_crc:
crc = crc32(compressed, crc)
- #END handle crc
+ # END handle crc
if len(chunk) != chunk_size:
break
- #END copy loop
+ # END copy loop
compressed = zstream.flush()
bw += len(compressed)
write(compressed)
if want_crc:
crc = crc32(compressed, crc)
- #END handle crc
+ # END handle crc
return (br, bw, crc)
@@ -173,6 +172,7 @@ def write_stream_to_pack(read, write, zstream, base_crc=None):
class IndexWriter(object):
+
"""Utility to cache index information, allowing to write all information later
in one go to the given stream
**Note:** currently only writes v2 indices"""
@@ -198,15 +198,15 @@ class IndexWriter(object):
sha_write(pack(">L", PackIndexFile.index_version_default))
# fanout
- tmplist = list((0,)*256) # fanout or list with 64 bit offsets
+ tmplist = list((0,) * 256) # fanout or list with 64 bit offsets
for t in self._objs:
tmplist[byte_ord(t[0][0])] += 1
- #END prepare fanout
+ # END prepare fanout
for i in xrange(255):
v = tmplist[i]
sha_write(pack('>L', v))
- tmplist[i+1] += v
- #END write each fanout entry
+ tmplist[i + 1] += v
+ # END write each fanout entry
sha_write(pack('>L', tmplist[255]))
# sha1 ordered
@@ -215,8 +215,8 @@ class IndexWriter(object):
# crc32
for t in self._objs:
- sha_write(pack('>L', t[1]&0xffffffff))
- #END for each crc
+ sha_write(pack('>L', t[1] & 0xffffffff))
+ # END for each crc
tmplist = list()
# offset 32
@@ -224,15 +224,15 @@ class IndexWriter(object):
ofs = t[2]
if ofs > 0x7fffffff:
tmplist.append(ofs)
- ofs = 0x80000000 + len(tmplist)-1
- #END hande 64 bit offsets
- sha_write(pack('>L', ofs&0xffffffff))
- #END for each offset
+ ofs = 0x80000000 + len(tmplist) - 1
+ # END hande 64 bit offsets
+ sha_write(pack('>L', ofs & 0xffffffff))
+ # END for each offset
# offset 64
for ofs in tmplist:
sha_write(pack(">Q", ofs))
- #END for each offset
+ # END for each offset
# trailer
assert(len(pack_sha) == 20)
@@ -242,8 +242,8 @@ class IndexWriter(object):
return sha
-
class PackIndexFile(LazyMixin):
+
"""A pack index provides offsets into the corresponding pack, allowing to find
locations for offsets faster."""
@@ -273,8 +273,9 @@ class PackIndexFile(LazyMixin):
self._cursor = mman.make_cursor(self._indexpath).use_region()
# We will assume that the index will always fully fit into memory !
if mman.window_size() > 0 and self._cursor.file_size() > mman.window_size():
- raise AssertionError("The index file at %s is too large to fit into a mapped window (%i > %i). This is a limitation of the implementation" % (self._indexpath, self._cursor.file_size(), mman.window_size()))
- #END assert window size
+ raise AssertionError("The index file at %s is too large to fit into a mapped window (%i > %i). This is a limitation of the implementation" % (
+ self._indexpath, self._cursor.file_size(), mman.window_size()))
+ # END assert window size
else:
# now its time to initialize everything - if we are here, someone wants
# to access the fanout table or related properties
@@ -293,27 +294,25 @@ class PackIndexFile(LazyMixin):
setattr(self, fname, getattr(self, "_%s_v%i" % (fname, self._version)))
# END for each function to initialize
-
# INITIALIZE DATA
# byte offset is 8 if version is 2, 0 otherwise
self._initialize()
# END handle attributes
-
#{ Access V1
def _entry_v1(self, i):
""":return: tuple(offset, binsha, 0)"""
- return unpack_from(">L20s", self._cursor.map(), 1024 + i*24) + (0, )
+ return unpack_from(">L20s", self._cursor.map(), 1024 + i * 24) + (0, )
def _offset_v1(self, i):
"""see ``_offset_v2``"""
- return unpack_from(">L", self._cursor.map(), 1024 + i*24)[0]
+ return unpack_from(">L", self._cursor.map(), 1024 + i * 24)[0]
def _sha_v1(self, i):
"""see ``_sha_v2``"""
- base = 1024 + (i*24)+4
- return self._cursor.map()[base:base+20]
+ base = 1024 + (i * 24) + 4
+ return self._cursor.map()[base:base + 20]
def _crc_v1(self, i):
"""unsupported"""
@@ -343,7 +342,7 @@ class PackIndexFile(LazyMixin):
def _sha_v2(self, i):
""":return: sha at the given index of this file index instance"""
base = self._sha_list_offset + i * 20
- return self._cursor.map()[base:base+20]
+ return self._cursor.map()[base:base + 20]
def _crc_v2(self, i):
""":return: 4 bytes crc for the object at index i"""
@@ -369,7 +368,7 @@ class PackIndexFile(LazyMixin):
out = list()
append = out.append
for i in xrange(256):
- append(unpack_from('>L', d, byte_offset + i*4)[0])
+ append(unpack_from('>L', d, byte_offset + i * 4)[0])
# END for each entry
return out
@@ -421,7 +420,7 @@ class PackIndexFile(LazyMixin):
get_sha = self.sha
lo = 0 # lower index, the left bound of the bisection
if first_byte != 0:
- lo = self._fanout_table[first_byte-1]
+ lo = self._fanout_table[first_byte - 1]
hi = self._fanout_table[first_byte] # the upper, right bound of the bisection
# bisect until we have the sha
@@ -455,7 +454,7 @@ class PackIndexFile(LazyMixin):
get_sha = self.sha
lo = 0 # lower index, the left bound of the bisection
if first_byte != 0:
- lo = self._fanout_table[first_byte-1]
+ lo = self._fanout_table[first_byte - 1]
hi = self._fanout_table[first_byte] # the upper, right bound of the bisection
# fill the partial to full 20 bytes
@@ -481,7 +480,7 @@ class PackIndexFile(LazyMixin):
if is_equal_canonical_sha(canonical_length, partial_bin_sha, cur_sha):
next_sha = None
if lo + 1 < self.size():
- next_sha = get_sha(lo+1)
+ next_sha = get_sha(lo + 1)
if next_sha and next_sha == cur_sha:
raise AmbiguousObjectName(partial_bin_sha)
return lo
@@ -500,6 +499,7 @@ class PackIndexFile(LazyMixin):
class PackFile(LazyMixin):
+
"""A pack is a file written according to the Version 2 for git packs
As we currently use memory maps, it could be assumed that the maximum size of
@@ -516,7 +516,7 @@ class PackFile(LazyMixin):
pack_version_default = 2
# offset into our data at which the first object starts
- first_object_offset = 3*4 # header bytes
+ first_object_offset = 3 * 4 # header bytes
footer_size = 20 # final sha
def __init__(self, packpath):
@@ -549,7 +549,6 @@ class PackFile(LazyMixin):
stream_copy(ostream.read, null.write, ostream.size, chunk_size)
cur_offset += (data_offset - ostream.pack_offset) + ostream.stream.compressed_bytes_read()
-
# if a stream is requested, reset it beforehand
# Otherwise return the Stream object directly, its derived from the
# info object
@@ -578,7 +577,7 @@ class PackFile(LazyMixin):
def checksum(self):
""":return: 20 byte sha1 hash on all object sha's contained in this file"""
- return self._cursor.use_region(self._cursor.file_size()-20).buffer()[:]
+ return self._cursor.use_region(self._cursor.file_size() - 20).buffer()[:]
def path(self):
""":return: path to the packfile"""
@@ -645,13 +644,14 @@ class PackFile(LazyMixin):
class PackEntity(LazyMixin):
+
"""Combines the PackIndexFile and the PackFile into one, allowing the
actual objects to be resolved and iterated"""
- __slots__ = ( '_index', # our index file
- '_pack', # our pack file
- '_offset_map' # on demand dict mapping one offset to the next consecutive one
- )
+ __slots__ = ('_index', # our index file
+ '_pack', # our pack file
+ '_offset_map' # on demand dict mapping one offset to the next consecutive one
+ )
IndexFileCls = PackIndexFile
PackFileCls = PackFile
@@ -673,7 +673,7 @@ class PackEntity(LazyMixin):
offset_map = None
if len(offsets_sorted) == 1:
- offset_map = { offsets_sorted[0] : last_offset }
+ offset_map = {offsets_sorted[0]: last_offset}
else:
iter_offsets = iter(offsets_sorted)
iter_offsets_plus_one = iter(offsets_sorted)
@@ -895,10 +895,9 @@ class PackEntity(LazyMixin):
:raise BadObject:"""
return self.collect_streams_at_offset(self._index.offset(self._sha_to_index(sha)))
-
@classmethod
def write_pack(cls, object_iter, pack_write, index_write=None,
- object_count = None, zlib_compression = zlib.Z_BEST_SPEED):
+ object_count=None, zlib_compression=zlib.Z_BEST_SPEED):
"""
Create a new pack by putting all objects obtained by the object_iterator
into a pack which is written using the pack_write method.
@@ -923,9 +922,9 @@ class PackEntity(LazyMixin):
if not object_count:
if not isinstance(object_iter, (tuple, list)):
objs = list(object_iter)
- #END handle list type
+ # END handle list type
object_count = len(objs)
- #END handle object
+ # END handle object
pack_writer = FlexibleSha1Writer(pack_write)
pwrite = pack_writer.write
@@ -939,7 +938,7 @@ class PackEntity(LazyMixin):
if wants_index:
index = IndexWriter()
- #END handle index header
+ # END handle index header
actual_count = 0
for obj in objs:
@@ -952,30 +951,31 @@ class PackEntity(LazyMixin):
crc = crc32(hdr)
else:
crc = None
- #END handle crc
+ # END handle crc
pwrite(hdr)
# data stream
zstream = zlib.compressobj(zlib_compression)
ostream = obj.stream
- br, bw, crc = write_stream_to_pack(ostream.read, pwrite, zstream, base_crc = crc)
+ br, bw, crc = write_stream_to_pack(ostream.read, pwrite, zstream, base_crc=crc)
assert(br == obj.size)
if wants_index:
index.append(obj.binsha, crc, ofs)
- #END handle index
+ # END handle index
ofs += len(hdr) + bw
if actual_count == object_count:
break
- #END abort once we are done
- #END for each object
+ # END abort once we are done
+ # END for each object
if actual_count != object_count:
- raise ValueError("Expected to write %i objects into pack, but received only %i from iterators" % (object_count, actual_count))
- #END count assertion
+ raise ValueError(
+ "Expected to write %i objects into pack, but received only %i from iterators" % (object_count, actual_count))
+ # END count assertion
# write footer
- pack_sha = pack_writer.sha(as_hex = False)
+ pack_sha = pack_writer.sha(as_hex=False)
assert len(pack_sha) == 20
pack_write(pack_sha)
ofs += len(pack_sha) # just for completeness ;)
@@ -983,12 +983,12 @@ class PackEntity(LazyMixin):
index_sha = None
if wants_index:
index_sha = index.write(pack_sha, index_write)
- #END handle index
+ # END handle index
return pack_sha, index_sha
@classmethod
- def create(cls, object_iter, base_dir, object_count = None, zlib_compression = zlib.Z_BEST_SPEED):
+ def create(cls, object_iter, base_dir, object_count=None, zlib_compression=zlib.Z_BEST_SPEED):
"""Create a new on-disk entity comprised of a properly named pack file and a properly named
and corresponding index file. The pack contains all OStream objects contained in object iter.
:param base_dir: directory which is to contain the files
@@ -1012,5 +1012,4 @@ class PackEntity(LazyMixin):
return cls(new_pack_path)
-
#} END interface
diff --git a/gitdb/stream.py b/gitdb/stream.py
index b0a8900..4478a0f 100644
--- a/gitdb/stream.py
+++ b/gitdb/stream.py
@@ -38,14 +38,15 @@ try:
except ImportError:
pass
-__all__ = ( 'DecompressMemMapReader', 'FDCompressedSha1Writer', 'DeltaApplyReader',
- 'Sha1Writer', 'FlexibleSha1Writer', 'ZippedStoreShaWriter', 'FDCompressedSha1Writer',
- 'FDStream', 'NullStream')
+__all__ = ('DecompressMemMapReader', 'FDCompressedSha1Writer', 'DeltaApplyReader',
+ 'Sha1Writer', 'FlexibleSha1Writer', 'ZippedStoreShaWriter', 'FDCompressedSha1Writer',
+ 'FDStream', 'NullStream')
#{ RO Streams
class DecompressMemMapReader(LazyMixin):
+
"""Reads data in chunks from a memory map and decompresses it. The client sees
only the uncompressed data, respective file-like read calls are handling on-demand
buffered decompression accordingly
@@ -63,9 +64,9 @@ class DecompressMemMapReader(LazyMixin):
to better support streamed reading - it would only need to keep the mmap
and decompress it into chunks, thats all ... """
__slots__ = ('_m', '_zip', '_buf', '_buflen', '_br', '_cws', '_cwe', '_s', '_close',
- '_cbr', '_phi')
+ '_cbr', '_phi')
- max_read_size = 512*1024 # currently unused
+ max_read_size = 512 * 1024 # currently unused
def __init__(self, m, close_on_deletion, size=None):
"""Initialize with mmap for stream reading
@@ -214,7 +215,6 @@ class DecompressMemMapReader(LazyMixin):
return bytes()
# END handle depletion
-
# deplete the buffer, then just continue using the decompress object
# which has an own buffer. We just need this to transparently parse the
# header from the zlib stream
@@ -263,7 +263,6 @@ class DecompressMemMapReader(LazyMixin):
self._cwe = cws + size
# END handle tail
-
# if window is too small, make it larger so zip can decompress something
if self._cwe - self._cws < 8:
self._cwe = self._cws + 8
@@ -285,7 +284,7 @@ class DecompressMemMapReader(LazyMixin):
unused_datalen = len(self._zip.unconsumed_tail)
else:
unused_datalen = len(self._zip.unconsumed_tail) + len(self._zip.unused_data)
- # end handle very special case ...
+ # end handle very special case ...
self._cbr += len(indata) - unused_datalen
self._br += len(dcompdat)
@@ -301,12 +300,13 @@ class DecompressMemMapReader(LazyMixin):
# to read, if we are called by compressed_bytes_read - it manipulates
# us to empty the stream
if dcompdat and (len(dcompdat) - len(dat)) < size and self._br < self._s:
- dcompdat += self.read(size-len(dcompdat))
+ dcompdat += self.read(size - len(dcompdat))
# END handle special case
return dcompdat
class DeltaApplyReader(LazyMixin):
+
"""A reader which dynamically applies pack deltas to a base object, keeping the
memory demands to a minimum.
@@ -332,15 +332,15 @@ class DeltaApplyReader(LazyMixin):
* cmd == 0 - invalid operation ( or error in delta stream )
"""
__slots__ = (
- "_bstream", # base stream to which to apply the deltas
- "_dstreams", # tuple of delta stream readers
- "_mm_target", # memory map of the delta-applied data
- "_size", # actual number of bytes in _mm_target
- "_br" # number of bytes read
- )
+ "_bstream", # base stream to which to apply the deltas
+ "_dstreams", # tuple of delta stream readers
+ "_mm_target", # memory map of the delta-applied data
+ "_size", # actual number of bytes in _mm_target
+ "_br" # number of bytes read
+ )
#{ Configuration
- k_max_memory_move = 250*1000*1000
+ k_max_memory_move = 250 * 1000 * 1000
#} END configuration
def __init__(self, stream_list):
@@ -414,7 +414,6 @@ class DeltaApplyReader(LazyMixin):
base_size = target_size = max(base_size, max_target_size)
# END adjust buffer sizes
-
# Allocate private memory map big enough to hold the first base buffer
# We need random access to it
bbuf = allocate_memory(base_size)
@@ -440,11 +439,11 @@ class DeltaApplyReader(LazyMixin):
ddata = allocate_memory(dstream.size - offset)
ddata.write(dbuf)
# read the rest from the stream. The size we give is larger than necessary
- stream_copy(dstream.read, ddata.write, dstream.size, 256*mmap.PAGESIZE)
+ stream_copy(dstream.read, ddata.write, dstream.size, 256 * mmap.PAGESIZE)
#######################################################################
if 'c_apply_delta' in globals():
- c_apply_delta(bbuf, ddata, tbuf);
+ c_apply_delta(bbuf, ddata, tbuf)
else:
apply_delta_data(bbuf, src_size, ddata, len(ddata), tbuf.write)
#######################################################################
@@ -463,7 +462,6 @@ class DeltaApplyReader(LazyMixin):
self._mm_target = bbuf
self._size = final_target_size
-
#{ Configuration
if not has_perf_mod:
_set_cache_ = _set_cache_brute_
@@ -512,13 +510,13 @@ class DeltaApplyReader(LazyMixin):
# END single object special handling
if stream_list[-1].type_id in delta_types:
- raise ValueError("Cannot resolve deltas if there is no base object stream, last one was type: %s" % stream_list[-1].type)
+ raise ValueError(
+ "Cannot resolve deltas if there is no base object stream, last one was type: %s" % stream_list[-1].type)
# END check stream
return cls(stream_list)
#} END interface
-
#{ OInfo like Interface
@property
@@ -543,6 +541,7 @@ class DeltaApplyReader(LazyMixin):
#{ W Streams
class Sha1Writer(object):
+
"""Simple stream writer which produces a sha whenever you like as it degests
everything it is supposed to write"""
__slots__ = "sha1"
@@ -565,7 +564,7 @@ class Sha1Writer(object):
#{ Interface
- def sha(self, as_hex = False):
+ def sha(self, as_hex=False):
""":return: sha so far
:param as_hex: if True, sha will be hex-encoded, binary otherwise"""
if as_hex:
@@ -576,6 +575,7 @@ class Sha1Writer(object):
class FlexibleSha1Writer(Sha1Writer):
+
"""Writer producing a sha1 while passing on the written bytes to the given
write function"""
__slots__ = 'writer'
@@ -590,8 +590,10 @@ class FlexibleSha1Writer(Sha1Writer):
class ZippedStoreShaWriter(Sha1Writer):
+
"""Remembers everything someone writes to it and generates a sha"""
__slots__ = ('buf', 'zip')
+
def __init__(self):
Sha1Writer.__init__(self)
self.buf = BytesIO()
@@ -623,6 +625,7 @@ class ZippedStoreShaWriter(Sha1Writer):
class FDCompressedSha1Writer(Sha1Writer):
+
"""Digests data written to it, making the sha available, then compress the
data and write it to the file descriptor
@@ -662,10 +665,12 @@ class FDCompressedSha1Writer(Sha1Writer):
class FDStream(object):
+
"""A simple wrapper providing the most basic functions on a file descriptor
with the fileobject interface. Cannot use os.fdopen as the resulting stream
takes ownership"""
__slots__ = ("_fd", '_pos')
+
def __init__(self, fd):
self._fd = fd
self._pos = 0
@@ -694,6 +699,7 @@ class FDStream(object):
class NullStream(object):
+
"""A stream that does nothing but providing a stream interface.
Use it like /dev/null"""
__slots__ = tuple()
diff --git a/gitdb/test/db/lib.py b/gitdb/test/db/lib.py
index af6d9e0..528bcc1 100644
--- a/gitdb/test/db/lib.py
+++ b/gitdb/test/db/lib.py
@@ -32,7 +32,9 @@ from struct import pack
__all__ = ('TestDBBase', 'with_rw_directory', 'with_packs_rw', 'fixture_path')
+
class TestDBBase(TestBase):
+
"""Base class providing testing routines on databases"""
# data
@@ -65,7 +67,6 @@ class TestDBBase(TestBase):
assert len(shas) == db.size()
assert len(shas[0]) == 20
-
def _assert_object_writing(self, db):
"""General tests to verify object writing, compatible to ObjectDBW
**Note:** requires write access to the database"""
@@ -126,4 +127,3 @@ class TestDBBase(TestBase):
assert ostream.getvalue() == new_ostream.getvalue()
# END for each data set
# END for each dry_run mode
-
diff --git a/gitdb/test/db/test_git.py b/gitdb/test/db/test_git.py
index e141c2b..f962067 100644
--- a/gitdb/test/db/test_git.py
+++ b/gitdb/test/db/test_git.py
@@ -3,7 +3,7 @@
# This module is part of GitDB and is released under
# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
from gitdb.test.db.lib import (
- TestDBBase,
+ TestDBBase,
fixture_path,
with_rw_directory
)
@@ -12,6 +12,7 @@ from gitdb.db import GitDB
from gitdb.base import OStream, OInfo
from gitdb.util import hex_to_bin, bin_to_hex
+
class TestGitDB(TestDBBase):
def test_reading(self):
@@ -28,8 +29,7 @@ class TestGitDB(TestDBBase):
assert gdb.size() >= ni
sha_list = list(gdb.sha_iter())
assert len(sha_list) == gdb.size()
- sha_list = sha_list[:ni] # speed up tests ...
-
+ sha_list = sha_list[:ni] # speed up tests ...
# This is actually a test for compound functionality, but it doesn't
# have a separate test module
@@ -39,7 +39,7 @@ class TestGitDB(TestDBBase):
# mix even/uneven hexshas
for i, binsha in enumerate(sha_list):
- assert gdb.partial_to_complete_sha_hex(bin_to_hex(binsha)[:8-(i%2)]) == binsha
+ assert gdb.partial_to_complete_sha_hex(bin_to_hex(binsha)[:8 - (i % 2)]) == binsha
# END for each sha
self.failUnlessRaises(BadObject, gdb.partial_to_complete_sha_hex, "0000")
diff --git a/gitdb/test/db/test_loose.py b/gitdb/test/db/test_loose.py
index 1d6af9c..024c194 100644
--- a/gitdb/test/db/test_loose.py
+++ b/gitdb/test/db/test_loose.py
@@ -3,13 +3,14 @@
# This module is part of GitDB and is released under
# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
from gitdb.test.db.lib import (
- TestDBBase,
+ TestDBBase,
with_rw_directory
)
from gitdb.db import LooseObjectDB
from gitdb.exc import BadObject
from gitdb.util import bin_to_hex
+
class TestLooseDB(TestDBBase):
@with_rw_directory
diff --git a/gitdb/test/db/test_mem.py b/gitdb/test/db/test_mem.py
index 97f7217..eb563c0 100644
--- a/gitdb/test/db/test_mem.py
+++ b/gitdb/test/db/test_mem.py
@@ -11,6 +11,7 @@ from gitdb.db import (
LooseObjectDB
)
+
class TestMemoryDB(TestDBBase):
@with_rw_directory
diff --git a/gitdb/test/db/test_pack.py b/gitdb/test/db/test_pack.py
index 963a71a..a901581 100644
--- a/gitdb/test/db/test_pack.py
+++ b/gitdb/test/db/test_pack.py
@@ -14,6 +14,7 @@ from gitdb.exc import BadObject, AmbiguousObjectName
import os
import random
+
class TestPackDB(TestDBBase):
@with_rw_directory
@@ -53,7 +54,6 @@ class TestPackDB(TestDBBase):
pdb.stream(sha)
# END for each sha to query
-
# test short finding - be a bit more brutal here
max_bytes = 19
min_bytes = 2
@@ -61,10 +61,10 @@ class TestPackDB(TestDBBase):
for i, sha in enumerate(sha_list):
short_sha = sha[:max((i % max_bytes), min_bytes)]
try:
- assert pdb.partial_to_complete_sha(short_sha, len(short_sha)*2) == sha
+ assert pdb.partial_to_complete_sha(short_sha, len(short_sha) * 2) == sha
except AmbiguousObjectName:
num_ambiguous += 1
- pass # valid, we can have short objects
+ pass # valid, we can have short objects
# END exception handling
# END for each sha to find
diff --git a/gitdb/test/db/test_ref.py b/gitdb/test/db/test_ref.py
index db93082..b774baf 100644
--- a/gitdb/test/db/test_ref.py
+++ b/gitdb/test/db/test_ref.py
@@ -3,8 +3,8 @@
# This module is part of GitDB and is released under
# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
from gitdb.test.db.lib import (
- TestDBBase,
- with_rw_directory,
+ TestDBBase,
+ with_rw_directory,
fixture_path
)
from gitdb.db import ReferenceDB
@@ -16,6 +16,7 @@ from gitdb.util import (
import os
+
class TestReferenceDB(TestDBBase):
def make_alt_file(self, alt_path, alt_list):
diff --git a/gitdb/test/lib.py b/gitdb/test/lib.py
index d09b1cb..c4acd92 100644
--- a/gitdb/test/lib.py
+++ b/gitdb/test/lib.py
@@ -24,6 +24,7 @@ from functools import wraps
#{ Bases
class TestBase(unittest.TestCase):
+
"""Base class for all tests"""
@@ -49,6 +50,7 @@ def skip_on_travis_ci(func):
def with_rw_directory(func):
"""Create a temporary directory which can be written to, remove it if the
test suceeds, but leave it otherwise to aid additional debugging"""
+
def wrapper(self):
path = tempfile.mktemp(prefix=func.__name__)
os.mkdir(path)
@@ -78,6 +80,7 @@ def with_rw_directory(func):
def with_packs_rw(func):
"""Function that provides a path into which the packs for testing should be
copied. Will pass on the path to the actual function afterwards"""
+
def wrapper(self, path):
src_pack_glob = fixture_path('packs/*')
copy_files_globbed(src_pack_glob, path, hard_link_ok=True)
@@ -91,12 +94,14 @@ def with_packs_rw(func):
#{ Routines
+
def fixture_path(relapath=''):
""":return: absolute path into the fixture directory
:param relapath: relative path into the fixtures directory, or ''
to obtain the fixture directory itself"""
return os.path.join(os.path.dirname(__file__), 'fixtures', relapath)
+
def copy_files_globbed(source_glob, target_dir, hard_link_ok=False):
"""Copy all files found according to the given source glob into the target directory
:param hard_link_ok: if True, hard links will be created if possible. Otherwise
@@ -127,11 +132,13 @@ def make_bytes(size_in_bytes, randomize=False):
a = array('i', producer)
return a.tostring()
+
def make_object(type, data):
""":return: bytes resembling an uncompressed object"""
odata = "blob %i\0" % len(data)
return odata.encode("ascii") + data
+
def make_memory_file(size_in_bytes, randomize=False):
""":return: tuple(size_of_stream, stream)
:param randomize: try to produce a very random stream"""
@@ -142,24 +149,27 @@ def make_memory_file(size_in_bytes, randomize=False):
#{ Stream Utilities
+
class DummyStream(object):
- def __init__(self):
- self.was_read = False
- self.bytes = 0
- self.closed = False
- def read(self, size):
- self.was_read = True
- self.bytes = size
+ def __init__(self):
+ self.was_read = False
+ self.bytes = 0
+ self.closed = False
- def close(self):
- self.closed = True
+ def read(self, size):
+ self.was_read = True
+ self.bytes = size
- def _assert(self):
- assert self.was_read
+ def close(self):
+ self.closed = True
+
+ def _assert(self):
+ assert self.was_read
class DeriveTest(OStream):
+
def __init__(self, sha, type, size, stream, *args, **kwargs):
self.myarg = kwargs.pop('myarg')
self.args = args
diff --git a/gitdb/test/performance/__init__.py b/gitdb/test/performance/__init__.py
index 8b13789..e69de29 100644
--- a/gitdb/test/performance/__init__.py
+++ b/gitdb/test/performance/__init__.py
@@ -1 +0,0 @@
-
diff --git a/gitdb/test/performance/lib.py b/gitdb/test/performance/lib.py
index ec45cf3..cbc52bc 100644
--- a/gitdb/test/performance/lib.py
+++ b/gitdb/test/performance/lib.py
@@ -13,22 +13,17 @@ k_env_git_repo = "GITDB_TEST_GIT_REPO_BASE"
#} END invariants
-
-#{ Base Classes
+#{ Base Classes
class TestBigRepoR(TestBase):
+
"""TestCase providing access to readonly 'big' repositories using the following
member variables:
-
+
* gitrepopath
-
+
* read-only base path of the git source repository, i.e. .../git/.git"""
-
- #{ Invariants
- head_sha_2k = '235d521da60e4699e5bd59ac658b5b48bd76ddca'
- head_sha_50 = '32347c375250fd470973a5d76185cac718955fd5'
- #} END invariants
-
+
def setUp(self):
try:
super(TestBigRepoR, self).setUp()
@@ -37,11 +32,12 @@ class TestBigRepoR(TestBase):
self.gitrepopath = os.environ.get(k_env_git_repo)
if not self.gitrepopath:
- logging.info("You can set the %s environment variable to a .git repository of your choice - defaulting to the gitdb repository")
+ logging.info(
+ "You can set the %s environment variable to a .git repository of your choice - defaulting to the gitdb repository", k_env_git_repo)
ospd = os.path.dirname
self.gitrepopath = os.path.join(ospd(ospd(ospd(ospd(__file__)))), '.git')
# end assure gitrepo is set
assert self.gitrepopath.endswith('.git')
-
+
#} END base classes
diff --git a/gitdb/test/performance/test_pack.py b/gitdb/test/performance/test_pack.py
index e460311..bdd2b0a 100644
--- a/gitdb/test/performance/test_pack.py
+++ b/gitdb/test/performance/test_pack.py
@@ -6,7 +6,7 @@
from __future__ import print_function
from gitdb.test.performance.lib import (
- TestBigRepoR
+ TestBigRepoR
)
from gitdb import (
@@ -24,19 +24,20 @@ import sys
import os
from time import time
+
class TestPackedDBPerformance(TestBigRepoR):
- @skip_on_travis_ci
+ @skip_on_travis_ci
def test_pack_random_access(self):
pdb = PackedDB(os.path.join(self.gitrepopath, "objects/pack"))
-
+
# sha lookup
st = time()
sha_list = list(pdb.sha_iter())
elapsed = time() - st
ns = len(sha_list)
print("PDB: looked up %i shas by index in %f s ( %f shas/s )" % (ns, elapsed, ns / elapsed), file=sys.stderr)
-
+
# sha lookup: best-case and worst case access
pdb_pack_info = pdb._pack_info
# END shuffle shas
@@ -45,13 +46,14 @@ class TestPackedDBPerformance(TestBigRepoR):
pdb_pack_info(sha)
# END for each sha to look up
elapsed = time() - st
-
+
# discard cache
del(pdb._entities)
pdb.entities()
- print("PDB: looked up %i sha in %i packs in %f s ( %f shas/s )" % (ns, len(pdb.entities()), elapsed, ns / elapsed), file=sys.stderr)
+ print("PDB: looked up %i sha in %i packs in %f s ( %f shas/s )" %
+ (ns, len(pdb.entities()), elapsed, ns / elapsed), file=sys.stderr)
# END for each random mode
-
+
# query info and streams only
max_items = 10000 # can wait longer when testing memory
for pdb_fun in (pdb.info, pdb.stream):
@@ -59,9 +61,10 @@ class TestPackedDBPerformance(TestBigRepoR):
for sha in sha_list[:max_items]:
pdb_fun(sha)
elapsed = time() - st
- print("PDB: Obtained %i object %s by sha in %f s ( %f items/s )" % (max_items, pdb_fun.__name__.upper(), elapsed, max_items / elapsed), file=sys.stderr)
+ print("PDB: Obtained %i object %s by sha in %f s ( %f items/s )" %
+ (max_items, pdb_fun.__name__.upper(), elapsed, max_items / elapsed), file=sys.stderr)
# END for each function
-
+
# retrieve stream and read all
max_items = 5000
pdb_stream = pdb.stream
@@ -74,8 +77,9 @@ class TestPackedDBPerformance(TestBigRepoR):
total_size += stream.size
elapsed = time() - st
total_kib = total_size / 1000
- print("PDB: Obtained %i streams by sha and read all bytes totallying %i KiB ( %f KiB / s ) in %f s ( %f streams/s )" % (max_items, total_kib, total_kib/elapsed , elapsed, max_items / elapsed), file=sys.stderr)
-
+ print("PDB: Obtained %i streams by sha and read all bytes totallying %i KiB ( %f KiB / s ) in %f s ( %f streams/s )" %
+ (max_items, total_kib, total_kib / elapsed, elapsed, max_items / elapsed), file=sys.stderr)
+
@skip_on_travis_ci
def test_loose_correctness(self):
"""based on the pack(s) of our packed object DB, we will just copy and verify all objects in the back
@@ -89,7 +93,7 @@ class TestPackedDBPerformance(TestBigRepoR):
mdb = MemoryDB()
for c, sha in enumerate(pdb.sha_iter()):
ostream = pdb.stream(sha)
- # the issue only showed on larger files which are hardly compressible ...
+ # the issue only showed on larger files which are hardly compressible ...
if ostream.type != str_blob_type:
continue
istream = IStream(ostream.type, ostream.size, ostream.stream)
@@ -101,7 +105,7 @@ class TestPackedDBPerformance(TestBigRepoR):
if c and c % 1000 == 0:
print("Verified %i loose object compression/decompression cycles" % c, file=sys.stderr)
mdb._cache.clear()
- # end for each sha to copy
+ # end for each sha to copy
@skip_on_travis_ci
def test_correctness(self):
@@ -124,6 +128,6 @@ class TestPackedDBPerformance(TestBigRepoR):
# END for each index
# END for each entity
elapsed = time() - st
- print("PDB: verified %i objects (crc=%i) in %f s ( %f objects/s )" % (count, crc, elapsed, count / elapsed), file=sys.stderr)
+ print("PDB: verified %i objects (crc=%i) in %f s ( %f objects/s )" %
+ (count, crc, elapsed, count / elapsed), file=sys.stderr)
# END for each verify mode
-
diff --git a/gitdb/test/performance/test_pack_streaming.py b/gitdb/test/performance/test_pack_streaming.py
index fe160ea..f805e59 100644
--- a/gitdb/test/performance/test_pack_streaming.py
+++ b/gitdb/test/performance/test_pack_streaming.py
@@ -6,7 +6,7 @@
from __future__ import print_function
from gitdb.test.performance.lib import (
- TestBigRepoR
+ TestBigRepoR
)
from gitdb.db.pack import PackedDB
@@ -18,27 +18,29 @@ import os
import sys
from time import time
+
class CountedNullStream(NullStream):
__slots__ = '_bw'
+
def __init__(self):
self._bw = 0
-
+
def bytes_written(self):
return self._bw
-
+
def write(self, d):
self._bw += NullStream.write(self, d)
-
+
class TestPackStreamingPerformance(TestBigRepoR):
-
+
@skip_on_travis_ci
def test_pack_writing(self):
# see how fast we can write a pack from object streams.
# This will not be fast, as we take time for decompressing the streams as well
ostream = CountedNullStream()
pdb = PackedDB(os.path.join(self.gitrepopath, "objects/pack"))
-
+
ni = 1000
count = 0
st = time()
@@ -47,22 +49,23 @@ class TestPackStreamingPerformance(TestBigRepoR):
pdb.stream(sha)
if count == ni:
break
- #END gather objects for pack-writing
+ # END gather objects for pack-writing
elapsed = time() - st
- print("PDB Streaming: Got %i streams by sha in in %f s ( %f streams/s )" % (ni, elapsed, ni / elapsed), file=sys.stderr)
-
+ print("PDB Streaming: Got %i streams by sha in in %f s ( %f streams/s )" %
+ (ni, elapsed, ni / elapsed), file=sys.stderr)
+
st = time()
PackEntity.write_pack((pdb.stream(sha) for sha in pdb.sha_iter()), ostream.write, object_count=ni)
elapsed = time() - st
total_kb = ostream.bytes_written() / 1000
- print(sys.stderr, "PDB Streaming: Wrote pack of size %i kb in %f s (%f kb/s)" % (total_kb, elapsed, total_kb/elapsed), sys.stderr)
-
-
+ print(sys.stderr, "PDB Streaming: Wrote pack of size %i kb in %f s (%f kb/s)" %
+ (total_kb, elapsed, total_kb / elapsed), sys.stderr)
+
@skip_on_travis_ci
def test_stream_reading(self):
# raise SkipTest()
pdb = PackedDB(os.path.join(self.gitrepopath, "objects/pack"))
-
+
# streaming only, meant for --with-profile runs
ni = 5000
count = 0
@@ -78,5 +81,5 @@ class TestPackStreamingPerformance(TestBigRepoR):
count += 1
elapsed = time() - st
total_kib = total_size / 1000
- print(sys.stderr, "PDB Streaming: Got %i streams by sha and read all bytes totallying %i KiB ( %f KiB / s ) in %f s ( %f streams/s )" % (ni, total_kib, total_kib/elapsed , elapsed, ni / elapsed), sys.stderr)
-
+ print(sys.stderr, "PDB Streaming: Got %i streams by sha and read all bytes totallying %i KiB ( %f KiB / s ) in %f s ( %f streams/s )" %
+ (ni, total_kib, total_kib / elapsed, elapsed, ni / elapsed), sys.stderr)
diff --git a/gitdb/test/performance/test_stream.py b/gitdb/test/performance/test_stream.py
index 84c9dea..bd66b26 100644
--- a/gitdb/test/performance/test_stream.py
+++ b/gitdb/test/performance/test_stream.py
@@ -35,22 +35,22 @@ def read_chunked_stream(stream):
# END read stream loop
assert total == stream.size
return stream
-
-
+
+
#} END utilities
class TestObjDBPerformance(TestBigRepoR):
-
- large_data_size_bytes = 1000*1000*50 # some MiB should do it
- moderate_data_size_bytes = 1000*1000*1 # just 1 MiB
-
- @skip_on_travis_ci
+
+ large_data_size_bytes = 1000 * 1000 * 50 # some MiB should do it
+ moderate_data_size_bytes = 1000 * 1000 * 1 # just 1 MiB
+
+ @skip_on_travis_ci
@with_rw_directory
def test_large_data_streaming(self, path):
ldb = LooseObjectDB(path)
string_ios = list() # list of streams we previously created
-
- # serial mode
+
+ # serial mode
for randomize in range(2):
desc = (randomize and 'random ') or ''
print("Creating %s data ..." % desc, file=sys.stderr)
@@ -59,32 +59,32 @@ class TestObjDBPerformance(TestBigRepoR):
elapsed = time() - st
print("Done (in %f s)" % elapsed, file=sys.stderr)
string_ios.append(stream)
-
- # writing - due to the compression it will seem faster than it is
+
+ # writing - due to the compression it will seem faster than it is
st = time()
sha = ldb.store(IStream('blob', size, stream)).binsha
elapsed_add = time() - st
assert ldb.has_object(sha)
db_file = ldb.readable_db_object_path(bin_to_hex(sha))
fsize_kib = os.path.getsize(db_file) / 1000
-
-
+
size_kib = size / 1000
- print("Added %i KiB (filesize = %i KiB) of %s data to loose odb in %f s ( %f Write KiB / s)" % (size_kib, fsize_kib, desc, elapsed_add, size_kib / elapsed_add), file=sys.stderr)
-
+ print("Added %i KiB (filesize = %i KiB) of %s data to loose odb in %f s ( %f Write KiB / s)" %
+ (size_kib, fsize_kib, desc, elapsed_add, size_kib / elapsed_add), file=sys.stderr)
+
# reading all at once
st = time()
ostream = ldb.stream(sha)
shadata = ostream.read()
elapsed_readall = time() - st
-
+
stream.seek(0)
assert shadata == stream.getvalue()
- print("Read %i KiB of %s data at once from loose odb in %f s ( %f Read KiB / s)" % (size_kib, desc, elapsed_readall, size_kib / elapsed_readall), file=sys.stderr)
-
-
+ print("Read %i KiB of %s data at once from loose odb in %f s ( %f Read KiB / s)" %
+ (size_kib, desc, elapsed_readall, size_kib / elapsed_readall), file=sys.stderr)
+
# reading in chunks of 1 MiB
- cs = 512*1000
+ cs = 512 * 1000
chunks = list()
st = time()
ostream = ldb.stream(sha)
@@ -95,13 +95,14 @@ class TestObjDBPerformance(TestBigRepoR):
break
# END read in chunks
elapsed_readchunks = time() - st
-
+
stream.seek(0)
assert b''.join(chunks) == stream.getvalue()
-
+
cs_kib = cs / 1000
- print("Read %i KiB of %s data in %i KiB chunks from loose odb in %f s ( %f Read KiB / s)" % (size_kib, desc, cs_kib, elapsed_readchunks, size_kib / elapsed_readchunks), file=sys.stderr)
-
+ print("Read %i KiB of %s data in %i KiB chunks from loose odb in %f s ( %f Read KiB / s)" %
+ (size_kib, desc, cs_kib, elapsed_readchunks, size_kib / elapsed_readchunks), file=sys.stderr)
+
# del db file so we keep something to do
os.remove(db_file)
# END for each randomization factor
diff --git a/gitdb/test/test_base.py b/gitdb/test/test_base.py
index 578c29f..519cdfd 100644
--- a/gitdb/test/test_base.py
+++ b/gitdb/test/test_base.py
@@ -4,10 +4,10 @@
# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
"""Test for object db"""
from gitdb.test.lib import (
- TestBase,
- DummyStream,
- DeriveTest,
- )
+ TestBase,
+ DummyStream,
+ DeriveTest,
+)
from gitdb import (
OInfo,
@@ -20,11 +20,11 @@ from gitdb import (
)
from gitdb.util import (
NULL_BIN_SHA
- )
+)
from gitdb.typ import (
str_blob_type
- )
+)
class TestBaseTypes(TestBase):
@@ -54,7 +54,6 @@ class TestBaseTypes(TestBase):
assert dpinfo.delta_info == sha
assert dpinfo.pack_offset == 0
-
# test ostream
stream = DummyStream()
ostream = OStream(*(info + (stream, )))
@@ -80,7 +79,7 @@ class TestBaseTypes(TestBase):
assert stream.bytes == 5
# derive with own args
- DeriveTest(sha, str_blob_type, s, stream, 'mine',myarg = 3)._assert()
+ DeriveTest(sha, str_blob_type, s, stream, 'mine', myarg=3)._assert()
# test istream
istream = IStream(str_blob_type, s, stream)
diff --git a/gitdb/test/test_example.py b/gitdb/test/test_example.py
index aa43a09..ed0a885 100644
--- a/gitdb/test/test_example.py
+++ b/gitdb/test/test_example.py
@@ -4,7 +4,7 @@
# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
"""Module with examples from the tutorial section of the docs"""
from gitdb.test.lib import (
- TestBase,
+ TestBase,
fixture_path
)
from gitdb import IStream
@@ -12,6 +12,7 @@ from gitdb.db import LooseObjectDB
from io import BytesIO
+
class TestExamples(TestBase):
def test_base(self):
diff --git a/gitdb/test/test_pack.py b/gitdb/test/test_pack.py
index 3ab2fec..ff10572 100644
--- a/gitdb/test/test_pack.py
+++ b/gitdb/test/test_pack.py
@@ -43,6 +43,7 @@ def bin_sha_from_filename(filename):
return to_bin_sha(os.path.splitext(os.path.basename(filename))[0][5:])
#} END utilities
+
class TestPack(TestBase):
packindexfile_v1 = (fixture_path('packs/pack-c0438c19fb16422b6bbcce24387b3264416d485b.idx'), 1, 67)
@@ -50,8 +51,8 @@ class TestPack(TestBase):
packindexfile_v2_3_ascii = (fixture_path('packs/pack-a2bf8e71d8c18879e499335762dd95119d93d9f1.idx'), 2, 42)
packfile_v2_1 = (fixture_path('packs/pack-c0438c19fb16422b6bbcce24387b3264416d485b.pack'), 2, packindexfile_v1[2])
packfile_v2_2 = (fixture_path('packs/pack-11fdfa9e156ab73caae3b6da867192221f2089c2.pack'), 2, packindexfile_v2[2])
- packfile_v2_3_ascii = (fixture_path('packs/pack-a2bf8e71d8c18879e499335762dd95119d93d9f1.pack'), 2, packindexfile_v2_3_ascii[2])
-
+ packfile_v2_3_ascii = (
+ fixture_path('packs/pack-a2bf8e71d8c18879e499335762dd95119d93d9f1.pack'), 2, packindexfile_v2_3_ascii[2])
def _assert_index_file(self, index, version, size):
assert index.packfile_checksum() != index.indexfile_checksum()
@@ -74,13 +75,12 @@ class TestPack(TestBase):
assert entry[2] == index.crc(oidx)
# verify partial sha
- for l in (4,8,11,17,20):
- assert index.partial_sha_to_index(sha[:l], l*2) == oidx
+ for l in (4, 8, 11, 17, 20):
+ assert index.partial_sha_to_index(sha[:l], l * 2) == oidx
# END for each object index in indexfile
self.failUnlessRaises(ValueError, index.partial_sha_to_index, "\0", 2)
-
def _assert_pack_file(self, pack, version, size):
assert pack.version() == 2
assert pack.size() == size
@@ -120,7 +120,6 @@ class TestPack(TestBase):
dstream.seek(0)
assert dstream.read() == data
-
# read chunks
# NOTE: the current implementation is safe, it basically transfers
# all calls to the underlying memory map
@@ -128,7 +127,6 @@ class TestPack(TestBase):
# END for each object
assert num_obj == size
-
def test_pack_index(self):
# check version 1 and 2
for indexfile, version, size in (self.packindexfile_v1, self.packindexfile_v2):
@@ -146,9 +144,9 @@ class TestPack(TestBase):
@with_rw_directory
def test_pack_entity(self, rw_dir):
pack_objs = list()
- for packinfo, indexinfo in ( (self.packfile_v2_1, self.packindexfile_v1),
- (self.packfile_v2_2, self.packindexfile_v2),
- (self.packfile_v2_3_ascii, self.packindexfile_v2_3_ascii)):
+ for packinfo, indexinfo in ((self.packfile_v2_1, self.packindexfile_v1),
+ (self.packfile_v2_2, self.packindexfile_v2),
+ (self.packfile_v2_3_ascii, self.packindexfile_v2_3_ascii)):
packfile, version, size = packinfo
indexfile, version, size = indexinfo
entity = PackEntity(packfile)
@@ -193,22 +191,23 @@ class TestPack(TestBase):
pack_path = tempfile.mktemp('', "pack", rw_dir)
index_path = tempfile.mktemp('', 'index', rw_dir)
iteration = 0
+
def rewind_streams():
for obj in pack_objs:
obj.stream.seek(0)
- #END utility
- for ppath, ipath, num_obj in zip((pack_path, )*2, (index_path, None), (len(pack_objs), None)):
+ # END utility
+ for ppath, ipath, num_obj in zip((pack_path, ) * 2, (index_path, None), (len(pack_objs), None)):
pfile = open(ppath, 'wb')
iwrite = None
if ipath:
ifile = open(ipath, 'wb')
iwrite = ifile.write
- #END handle ip
+ # END handle ip
# make sure we rewind the streams ... we work on the same objects over and over again
if iteration > 0:
rewind_streams()
- #END rewind streams
+ # END rewind streams
iteration += 1
pack_sha, index_sha = PackEntity.write_pack(pack_objs, pfile.write, iwrite, object_count=num_obj)
@@ -230,8 +229,8 @@ class TestPack(TestBase):
assert idx.packfile_checksum() == pack_sha
assert idx.indexfile_checksum() == index_sha
assert idx.size() == len(pack_objs)
- #END verify files exist
- #END for each packpath, indexpath pair
+ # END verify files exist
+ # END for each packpath, indexpath pair
# verify the packs throughly
rewind_streams()
@@ -242,10 +241,9 @@ class TestPack(TestBase):
for use_crc in range(2):
assert entity.is_valid_stream(info.binsha, use_crc)
# END for each crc mode
- #END for each info
+ # END for each info
assert count == len(pack_objs)
-
def test_pack_64(self):
# TODO: hex-edit a pack helping us to verify that we can handle 64 byte offsets
# of course without really needing such a huge pack
diff --git a/gitdb/test/test_stream.py b/gitdb/test/test_stream.py
index 44a557d..9626825 100644
--- a/gitdb/test/test_stream.py
+++ b/gitdb/test/test_stream.py
@@ -31,10 +31,12 @@ import tempfile
import os
from io import BytesIO
+
class TestStream(TestBase):
+
"""Test stream classes"""
- data_sizes = (15, 10000, 1000*1024+512)
+ data_sizes = (15, 10000, 1000 * 1024 + 512)
def _assert_stream_reader(self, stream, cdata, rewind_stream=lambda s: None):
"""Make stream tests - the orig_stream is seekable, allowing it to be
@@ -43,13 +45,13 @@ class TestStream(TestBase):
:param rewind_stream: function called to rewind the stream to make it ready
for reuse"""
ns = 10
- assert len(cdata) > ns-1, "Data must be larger than %i, was %i" % (ns, len(cdata))
+ assert len(cdata) > ns - 1, "Data must be larger than %i, was %i" % (ns, len(cdata))
# read in small steps
ss = len(cdata) // ns
for i in range(ns):
data = stream.read(ss)
- chunk = cdata[i*ss:(i+1)*ss]
+ chunk = cdata[i * ss:(i + 1) * ss]
assert data == chunk
# END for each step
rest = stream.read()
@@ -136,7 +138,7 @@ class TestStream(TestBase):
self.failUnlessRaises(OSError, os.close, fd)
# read everything back, compare to data we zip
- fd = os.open(path, os.O_RDONLY|getattr(os, 'O_BINARY', 0))
+ fd = os.open(path, os.O_RDONLY | getattr(os, 'O_BINARY', 0))
written_data = os.read(fd, os.path.getsize(path))
assert len(written_data) == os.path.getsize(path)
os.close(fd)
@@ -156,7 +158,7 @@ class TestStream(TestBase):
data = ostream.read()
assert len(data) == ostream.size
- # Putting it back in should yield nothing new - after all, we have
+ # Putting it back in should yield nothing new - after all, we have
dump = mdb.store(IStream(ostream.type, ostream.size, BytesIO(data)))
assert dump.hexsha == sha
# end for each loose object sha to test
diff --git a/gitdb/test/test_util.py b/gitdb/test/test_util.py
index e79355a..1dee544 100644
--- a/gitdb/test/test_util.py
+++ b/gitdb/test/test_util.py
@@ -16,6 +16,7 @@ from gitdb.util import (
class TestUtils(TestBase):
+
def test_basics(self):
assert to_hex_sha(NULL_HEX_SHA) == NULL_HEX_SHA
assert len(to_bin_sha(NULL_HEX_SHA)) == 20
@@ -73,7 +74,6 @@ class TestUtils(TestBase):
del(lfd)
assert not os.path.isfile(lockfilepath)
-
# write data - concurrently
lfd = LockedFD(my_file)
olfd = LockedFD(my_file)
diff --git a/gitdb/typ.py b/gitdb/typ.py
index bc7ba58..98d15f3 100644
--- a/gitdb/typ.py
+++ b/gitdb/typ.py
@@ -4,7 +4,7 @@
# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
"""Module containing information about types known to the database"""
-str_blob_type = b'blob'
+str_blob_type = b'blob'
str_commit_type = b'commit'
-str_tree_type = b'tree'
-str_tag_type = b'tag'
+str_tree_type = b'tree'
+str_tag_type = b'tag'
diff --git a/gitdb/util.py b/gitdb/util.py
index 93ba7f0..5b451fa 100644
--- a/gitdb/util.py
+++ b/gitdb/util.py
@@ -11,10 +11,10 @@ import errno
from io import StringIO
from smmap import (
- StaticWindowMapManager,
- SlidingWindowMapManager,
- SlidingWindowMapBuffer
- )
+ StaticWindowMapManager,
+ SlidingWindowMapManager,
+ SlidingWindowMapBuffer
+)
# initialize our global memory manager instance
# Use it to free cached (and unused) resources.
@@ -22,7 +22,7 @@ if sys.version_info[1] < 6:
mman = StaticWindowMapManager()
else:
mman = SlidingWindowMapManager()
-#END handle mman
+# END handle mman
import hashlib
@@ -31,6 +31,7 @@ try:
except ImportError:
from struct import unpack, calcsize
__calcsize_cache = dict()
+
def unpack_from(fmt, data, offset=0):
try:
size = __calcsize_cache[fmt]
@@ -38,7 +39,7 @@ except ImportError:
size = calcsize(fmt)
__calcsize_cache[fmt] = size
# END exception handling
- return unpack(fmt, data[offset : offset + size])
+ return unpack(fmt, data[offset: offset + size])
# END own unpack_from implementation
@@ -67,8 +68,8 @@ close = os.close
fsync = os.fsync
# Backwards compatibility imports
-from gitdb.const import (
- NULL_BIN_SHA,
+from gitdb.const import (
+ NULL_BIN_SHA,
NULL_HEX_SHA
)
@@ -76,7 +77,9 @@ from gitdb.const import (
#{ compatibility stuff ...
+
class _RandomAccessStringIO(object):
+
"""Wrapper to provide required functionality in case memory maps cannot or may
not be used. This is only really required in python 2.4"""
__slots__ = '_sio'
@@ -96,6 +99,7 @@ class _RandomAccessStringIO(object):
def __getslice__(self, start, end):
return self.getvalue()[start:end]
+
def byte_ord(b):
"""
Return the integer representation of the byte string. This supports Python
@@ -110,6 +114,7 @@ def byte_ord(b):
#{ Routines
+
def make_sha(source=''.encode("ascii")):
"""A python2.4 workaround for the sha/hashlib module fiasco
@@ -121,6 +126,7 @@ def make_sha(source=''.encode("ascii")):
sha1 = sha.sha(source)
return sha1
+
def allocate_memory(size):
""":return: a file-protocol accessible memory block of the given size"""
if size == 0:
@@ -134,7 +140,7 @@ def allocate_memory(size):
# this of course may fail if the amount of memory is not available in
# one chunk - would only be the case in python 2.4, being more likely on
# 32 bit systems.
- return _RandomAccessStringIO("\0"*size)
+ return _RandomAccessStringIO("\0" * size)
# END handle memory allocation
@@ -166,6 +172,7 @@ def file_contents_ro(fd, stream=False, allow_mmap=True):
return _RandomAccessStringIO(contents)
return contents
+
def file_contents_ro_filepath(filepath, stream=False, allow_mmap=True, flags=0):
"""Get the file contents at filepath as fast as possible
@@ -178,25 +185,28 @@ def file_contents_ro_filepath(filepath, stream=False, allow_mmap=True, flags=0):
**Note** for now we don't try to use O_NOATIME directly as the right value needs to be
shared per database in fact. It only makes a real difference for loose object
databases anyway, and they use it with the help of the ``flags`` parameter"""
- fd = os.open(filepath, os.O_RDONLY|getattr(os, 'O_BINARY', 0)|flags)
+ fd = os.open(filepath, os.O_RDONLY | getattr(os, 'O_BINARY', 0) | flags)
try:
return file_contents_ro(fd, stream, allow_mmap)
finally:
close(fd)
# END assure file is closed
+
def sliding_ro_buffer(filepath, flags=0):
"""
:return: a buffer compatible object which uses our mapped memory manager internally
ready to read the whole given filepath"""
return SlidingWindowMapBuffer(mman.make_cursor(filepath), flags=flags)
+
def to_hex_sha(sha):
""":return: hexified version of sha"""
if len(sha) == 40:
return sha
return bin_to_hex(sha)
+
def to_bin_sha(sha):
if len(sha) == 20:
return sha
@@ -209,6 +219,7 @@ def to_bin_sha(sha):
#{ Utilities
class LazyMixin(object):
+
"""
Base class providing an interface to lazily retrieve attribute values upon
first access. If slots are used, memory will only be reserved once the attribute
@@ -240,6 +251,7 @@ class LazyMixin(object):
class LockedFD(object):
+
"""
This class facilitates a safe read and write operation to a file on disk.
If we write to 'file', we obtain a lock file at 'file.lock' and write to
@@ -290,7 +302,7 @@ class LockedFD(object):
# try to open the lock file
binary = getattr(os, 'O_BINARY', 0)
- lockmode = os.O_WRONLY | os.O_CREAT | os.O_EXCL | binary
+ lockmode = os.O_WRONLY | os.O_CREAT | os.O_EXCL | binary
try:
fd = os.open(self._lockfilepath(), lockmode, int("600", 8))
if not write:
diff --git a/gitdb/utils/compat.py b/gitdb/utils/compat.py
index a2640fd..c08cab5 100644
--- a/gitdb/utils/compat.py
+++ b/gitdb/utils/compat.py
@@ -24,7 +24,7 @@ except NameError:
return obj[offset:]
else:
# return memoryview(obj)[offset:offset+size]
- return obj[offset:offset+size]
+ return obj[offset:offset + size]
# end buffer reimplementation
memoryview = memoryview
diff --git a/gitdb/utils/encoding.py b/gitdb/utils/encoding.py
index 2d03ad3..5855062 100644
--- a/gitdb/utils/encoding.py
+++ b/gitdb/utils/encoding.py
@@ -7,6 +7,7 @@ else:
string_types = (basestring, )
text_type = unicode
+
def force_bytes(data, encoding="ascii"):
if isinstance(data, bytes):
return data
@@ -16,6 +17,7 @@ def force_bytes(data, encoding="ascii"):
return data
+
def force_text(data, encoding="utf-8"):
if isinstance(data, text_type):
return data
diff --git a/setup.py b/setup.py
index 4f8d1d5..6c67dc6 100755
--- a/setup.py
+++ b/setup.py
@@ -1,70 +1,74 @@
#!/usr/bin/env python
-from distutils.core import setup, Extension
+from distutils.core import setup, Extension
from distutils.command.build_py import build_py
from distutils.command.build_ext import build_ext
-import os, sys
+import os
+import sys
-# wow, this is a mixed bag ... I am pretty upset about all of this ...
+# wow, this is a mixed bag ... I am pretty upset about all of this ...
setuptools_build_py_module = None
try:
- # don't pull it in if we don't have to
- if 'setuptools' in sys.modules:
- import setuptools.command.build_py as setuptools_build_py_module
- from setuptools.command.build_ext import build_ext
+ # don't pull it in if we don't have to
+ if 'setuptools' in sys.modules:
+ import setuptools.command.build_py as setuptools_build_py_module
+ from setuptools.command.build_ext import build_ext
except ImportError:
- pass
+ pass
+
class build_ext_nofail(build_ext):
- """Doesn't fail when build our optional extensions"""
- def run(self):
- try:
- build_ext.run(self)
- except Exception:
- print("Ignored failure when building extensions, pure python modules will be used instead")
- # END ignore errors
-
+
+ """Doesn't fail when build our optional extensions"""
+
+ def run(self):
+ try:
+ build_ext.run(self)
+ except Exception:
+ print("Ignored failure when building extensions, pure python modules will be used instead")
+ # END ignore errors
+
def get_data_files(self):
- """Can you feel the pain ? So, in python2.5 and python2.4 coming with maya,
- the line dealing with the ``plen`` has a bug which causes it to truncate too much.
- It is fixed in the system interpreters as they receive patches, and shows how
- bad it is if something doesn't have proper unittests.
- The code here is a plain copy of the python2.6 version which works for all.
-
- Generate list of '(package,src_dir,build_dir,filenames)' tuples"""
- data = []
- if not self.packages:
- return data
-
- # this one is just for the setup tools ! They don't iniitlialize this variable
- # when they should, but do it on demand using this method.Its crazy
- if hasattr(self, 'analyze_manifest'):
- self.analyze_manifest()
- # END handle setuptools ...
-
- for package in self.packages:
- # Locate package source directory
- src_dir = self.get_package_dir(package)
-
- # Compute package build directory
- build_dir = os.path.join(*([self.build_lib] + package.split('.')))
-
- # Length of path to strip from found files
- plen = 0
- if src_dir:
- plen = len(src_dir)+1
-
- # Strip directory from globbed filenames
- filenames = [
- file[plen:] for file in self.find_data_files(package, src_dir)
- ]
- data.append((package, src_dir, build_dir, filenames))
- return data
-
+ """Can you feel the pain ? So, in python2.5 and python2.4 coming with maya,
+ the line dealing with the ``plen`` has a bug which causes it to truncate too much.
+ It is fixed in the system interpreters as they receive patches, and shows how
+ bad it is if something doesn't have proper unittests.
+ The code here is a plain copy of the python2.6 version which works for all.
+
+ Generate list of '(package,src_dir,build_dir,filenames)' tuples"""
+ data = []
+ if not self.packages:
+ return data
+
+ # this one is just for the setup tools ! They don't iniitlialize this variable
+ # when they should, but do it on demand using this method.Its crazy
+ if hasattr(self, 'analyze_manifest'):
+ self.analyze_manifest()
+ # END handle setuptools ...
+
+ for package in self.packages:
+ # Locate package source directory
+ src_dir = self.get_package_dir(package)
+
+ # Compute package build directory
+ build_dir = os.path.join(*([self.build_lib] + package.split('.')))
+
+ # Length of path to strip from found files
+ plen = 0
+ if src_dir:
+ plen = len(src_dir) + 1
+
+ # Strip directory from globbed filenames
+ filenames = [
+ file[plen:] for file in self.find_data_files(package, src_dir)
+ ]
+ data.append((package, src_dir, build_dir, filenames))
+ return data
+
build_py.get_data_files = get_data_files
if setuptools_build_py_module:
- setuptools_build_py_module.build_py._get_data_files = get_data_files
+ setuptools_build_py_module.build_py._get_data_files = get_data_files
# END apply setuptools patch too
# NOTE: This is currently duplicated from the gitdb.__init__ module, as we cannot
@@ -76,15 +80,15 @@ __homepage__ = "https://github.com/gitpython-developers/gitdb"
version_info = (0, 6, 1)
__version__ = '.'.join(str(i) for i in version_info)
-setup(cmdclass={'build_ext':build_ext_nofail},
- name = "gitdb",
- version = __version__,
- description = "Git Object Database",
- author = __author__,
- author_email = __contact__,
- url = __homepage__,
- packages = ('gitdb', 'gitdb.db', 'gitdb.utils', 'gitdb.test'),
- package_dir = {'gitdb':'gitdb'},
+setup(cmdclass={'build_ext': build_ext_nofail},
+ name="gitdb",
+ version=__version__,
+ description="Git Object Database",
+ author=__author__,
+ author_email=__contact__,
+ url=__homepage__,
+ packages=('gitdb', 'gitdb.db', 'gitdb.utils', 'gitdb.test'),
+ package_dir = {'gitdb': 'gitdb'},
ext_modules=[Extension('gitdb._perf', ['gitdb/_fun.c', 'gitdb/_delta_apply.c'], include_dirs=['gitdb'])],
license = "BSD License",
zip_safe=False,
@@ -94,26 +98,26 @@ setup(cmdclass={'build_ext':build_ext_nofail},
# See https://pypi.python.org/pypi?%3Aaction=list_classifiers
classifiers=[
# Picked from
- # http://pypi.python.org/pypi?:action=list_classifiers
- #"Development Status :: 1 - Planning",
- #"Development Status :: 2 - Pre-Alpha",
- #"Development Status :: 3 - Alpha",
- # "Development Status :: 4 - Beta",
- "Development Status :: 5 - Production/Stable",
- #"Development Status :: 6 - Mature",
- #"Development Status :: 7 - Inactive",
- "Environment :: Console",
- "Intended Audience :: Developers",
- "License :: OSI Approved :: BSD License",
- "Operating System :: OS Independent",
- "Operating System :: POSIX",
- "Operating System :: Microsoft :: Windows",
- "Operating System :: MacOS :: MacOS X",
- "Programming Language :: Python",
- "Programming Language :: Python :: 2",
- "Programming Language :: Python :: 2.6",
- "Programming Language :: Python :: 2.7",
- "Programming Language :: Python :: 3",
- "Programming Language :: Python :: 3.3",
- "Programming Language :: Python :: 3.4",
- ],)
+ # http://pypi.python.org/pypi?:action=list_classifiers
+ #"Development Status :: 1 - Planning",
+ #"Development Status :: 2 - Pre-Alpha",
+ #"Development Status :: 3 - Alpha",
+ # "Development Status :: 4 - Beta",
+ "Development Status :: 5 - Production/Stable",
+ #"Development Status :: 6 - Mature",
+ #"Development Status :: 7 - Inactive",
+ "Environment :: Console",
+ "Intended Audience :: Developers",
+ "License :: OSI Approved :: BSD License",
+ "Operating System :: OS Independent",
+ "Operating System :: POSIX",
+ "Operating System :: Microsoft :: Windows",
+ "Operating System :: MacOS :: MacOS X",
+ "Programming Language :: Python",
+ "Programming Language :: Python :: 2",
+ "Programming Language :: Python :: 2.6",
+ "Programming Language :: Python :: 2.7",
+ "Programming Language :: Python :: 3",
+ "Programming Language :: Python :: 3.3",
+ "Programming Language :: Python :: 3.4",
+],)