summaryrefslogtreecommitdiff
path: root/git/test/performance/test_pack_streaming.py
blob: 795ed1e26ed66edbb741f16c6d04b60b0fcf44d5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
#
# This module is part of GitDB and is released under
# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
"""Specific test for pack streams only"""
from lib import (
	TestBigRepoR 
	)

from gitdb.db.pack import PackedDB
from gitdb.stream import NullStream
from gitdb.pack import PackEntity

import os
import sys
from time import time
from nose import SkipTest

class CountedNullStream(NullStream):
	__slots__ = '_bw'
	def __init__(self):
		self._bw = 0
		
	def bytes_written(self):
		return self._bw
		
	def write(self, d):
		self._bw += NullStream.write(self, d)
	

class TestPackStreamingPerformance(TestBigRepoR):
	
	def test_pack_writing(self):
		# see how fast we can write a pack from object streams.
		# This will not be fast, as we take time for decompressing the streams as well
		ostream = CountedNullStream()
		pdb = PackedDB(os.path.join(self.gitrepopath, "objects/pack"))
		
		ni = 5000
		count = 0
		total_size = 0
		st = time()
		objs = list()
		for sha in pdb.sha_iter():
			count += 1
			objs.append(pdb.stream(sha))
			if count == ni:
				break
		#END gather objects for pack-writing
		elapsed = time() - st
		print >> sys.stderr, "PDB Streaming: Got %i streams by sha in in %f s ( %f streams/s )" % (ni, elapsed, ni / elapsed)
		
		st = time()
		PackEntity.write_pack(objs, ostream.write)
		elapsed = time() - st
		total_kb = ostream.bytes_written() / 1000
		print >> sys.stderr, "PDB Streaming: Wrote pack of size %i kb in %f s (%f kb/s)" % (total_kb, elapsed, total_kb/elapsed)
		
	
	def test_stream_reading(self):
		raise SkipTest()
		pdb = PackedDB(os.path.join(self.gitrepopath, "objects/pack"))
		
		# streaming only, meant for --with-profile runs
		ni = 5000
		count = 0
		pdb_stream = pdb.stream
		total_size = 0
		st = time()
		for sha in pdb.sha_iter():
			if count == ni:
				break
			stream = pdb_stream(sha)
			stream.read()
			total_size += stream.size
			count += 1
		elapsed = time() - st
		total_kib = total_size / 1000
		print >> sys.stderr, "PDB Streaming: Got %i streams by sha and read all bytes totallying %i KiB ( %f KiB / s ) in %f s ( %f streams/s )" % (ni, total_kib, total_kib/elapsed , elapsed, ni / elapsed)