summaryrefslogtreecommitdiff
path: root/git/test/performance/db/packedodb_impl.py
blob: b95a8d13f157e4ab133fe8723a997c11cec6e0ca (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
#
# This module is part of GitDB and is released under
# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
"""Performance tests for object store"""
from git.test.performance.lib import (
	TestBigRepoR, 
	GlobalsItemDeletorMetaCls
	)

from git.exc import UnsupportedOperation

import sys
import os
from time import time
import random


class PerfBaseDeletorMetaClass(GlobalsItemDeletorMetaCls):
	ModuleToDelete = 'TestPurePackedODBPerformanceBase'

class TestPurePackedODBPerformanceBase(TestBigRepoR):
	__metaclass__ = PerfBaseDeletorMetaClass
	
	#{ Configuration
	PackedODBCls = None
	#} END configuration
	
	@classmethod
	def setUpAll(cls):
		super(TestPurePackedODBPerformanceBase, cls).setUpAll()
		if cls.PackedODBCls is None:
			raise AssertionError("PackedODBCls must be set in subclass")
		#END assert configuration
		cls.ropdb = cls.PackedODBCls(cls.rorepo.db_path("pack"))
	
	def test_pack_random_access(self):
		pdb = self.ropdb
		
		# sha lookup
		st = time()
		sha_list = list(pdb.sha_iter())
		elapsed = time() - st
		ns = len(sha_list)
		print >> sys.stderr, "PDB: looked up %i shas by index in %f s ( %f shas/s )" % (ns, elapsed, ns / elapsed)
		
		# sha lookup: best-case and worst case access
		pdb_pack_info = pdb._pack_info
		# END shuffle shas
		st = time()
		for sha in sha_list:
			pdb_pack_info(sha)
		# END for each sha to look up
		elapsed = time() - st
		
		# discard cache
		del(pdb._entities)
		pdb.entities()
		print >> sys.stderr, "PDB: looked up %i sha in %i packs in %f s ( %f shas/s )" % (ns, len(pdb.entities()), elapsed, ns / elapsed)
		# END for each random mode
		
		# query info and streams only
		max_items = 10000			# can wait longer when testing memory
		for pdb_fun in (pdb.info, pdb.stream):
			st = time()
			for sha in sha_list[:max_items]:
				pdb_fun(sha)
			elapsed = time() - st
			print >> sys.stderr, "PDB: Obtained %i object %s by sha in %f s ( %f items/s )" % (max_items, pdb_fun.__name__.upper(), elapsed, max_items / elapsed)
		# END for each function
		
		# retrieve stream and read all
		max_items = 5000
		pdb_stream = pdb.stream
		total_size = 0
		st = time()
		for sha in sha_list[:max_items]:
			stream = pdb_stream(sha)
			stream.read()
			total_size += stream.size
		elapsed = time() - st
		total_kib = total_size / 1000
		print >> sys.stderr, "PDB: Obtained %i streams by sha and read all bytes totallying %i KiB ( %f KiB / s ) in %f s ( %f streams/s )" % (max_items, total_kib, total_kib/elapsed , elapsed, max_items / elapsed)
		
	def test_correctness(self):
		pdb = self.ropdb
		# disabled for now as it used to work perfectly, checking big repositories takes a long time
		print >> sys.stderr, "Endurance run: verify streaming of objects (crc and sha)"
		for crc in range(2):
			count = 0
			st = time()
			for entity in pdb.entities():
				pack_verify = entity.is_valid_stream
				sha_by_index = entity.index().sha
				for index in xrange(entity.index().size()):
					try:
						assert pack_verify(sha_by_index(index), use_crc=crc)
						count += 1
					except UnsupportedOperation:
						pass
					# END ignore old indices
				# END for each index
			# END for each entity
			elapsed = time() - st
			print >> sys.stderr, "PDB: verified %i objects (crc=%i) in %f s ( %f objects/s )" % (count, crc, elapsed, count / elapsed)
		# END for each verify mode