Reverse Delta Application was a nice experiment, as it has one major flaw: Currently it integrates chunks from its base into the topmost delta chunk list, which causes plenty of mem-move operations. Plenty means, many many many, and its getting worse the more deltas you have of course. The algorithm was supposed to reduce the amount of memory activity, but failed at this point, making it worse than before. Probably it would just be fastest to implement the previous python algorithm, which swaps two buffers, in c

author: Sebastian Thiel <byronimo@gmail.com> 2010-10-13 15:51:04 +0200
committer: Sebastian Thiel <byronimo@gmail.com> 2010-10-13 15:51:48 +0200
commit: fa03f746746df99763deb45943988d68fb450b4b (patch)
tree: a5ad9e1e133473ac53424014148d8a06e36e7e35
parent: 9c5672e1b0cf56f1cf5151d60acb35d610e904a8 (diff)
download: gitdb-fa03f746746df99763deb45943988d68fb450b4b.tar.gz
2 files changed, 8 insertions, 4 deletions
diff --git a/_delta_apply.c b/_delta_apply.c
index d81e65d..e667a2e 100644
--- a/_delta_apply.c
+++ b/_delta_apply.c
@@ -398,7 +398,6 @@ int DCV_dbg_check_integrity(const DeltaChunkVector* vec)
 inline
 void DCV_copy_slice_to(const DeltaChunkVector* src, DeltaChunkVector* dest, ull ofs, ull size)
 {
-	//fprintf(stderr, "Copy Slice To: src->size = %i, ofs = %i, size=%i\n", (int)src->size, (int)ofs, (int)size); 
 	assert(DCV_lbound(src) <= ofs);
 	assert((ofs + size) <= DCV_rbound(src));
 	
@@ -443,7 +442,6 @@ void DCV_copy_slice_to(const DeltaChunkVector* src, DeltaChunkVector* dest, ull
 inline
 void DCV_replace_one_by_many(const DeltaChunkVector* from, DeltaChunkVector* to, DeltaChunk* at)
 {
-	//fprintf(stderr, "Replace one by many: from->size = %i, to->size = %i, to->reserved = %i\n", (int)from->size, (int)to->size, (int)to->reserved_size);
 	assert(from->size > 1);
 	assert(to->size + from->size - 1 <= to->reserved_size);
 	
@@ -452,7 +450,6 @@ void DCV_replace_one_by_many(const DeltaChunkVector* from, DeltaChunkVector* to,
 	
 	// If we are somewhere in the middle, we have to make some space
 	if (DCV_last(to) != at) {
-		//fprintf(stderr, "moving to %i from %i, num chunks = %i\n", (int)((at+from->size)-to->mem), (int)((at+1)-to->mem), (int)(DCV_end(to) - (at+1)));
 		memmove((void*)(at+from->size), (void*)(at+1), (size_t)((DCV_end(to) - (at+1)) * sizeof(DeltaChunk)));
 	}
 	
@@ -725,7 +722,8 @@ static PyObject* connect_deltas(PyObject *self, PyObject *dstreams)
 		const ull target_size = msb_size(&data, dend);
 		
 		// estimate number of ops - assume one third adds, half two byte (size+offset) copies
-		const uint approx_num_cmds = (dlen / 3) + (((dlen / 3) * 2) / (2+2+1));
+		// Assume good compression for the adds
+		const uint approx_num_cmds = ((dlen / 3) / 10) + (((dlen / 3) * 2) / (2+2+1));
 		DCV_reserve_memory(&dcv, approx_num_cmds);
 	
 		// parse command stream
@@ -825,6 +823,11 @@ static PyObject* connect_deltas(PyObject *self, PyObject *dstreams)
 			DCV_connect_with_base(&tdcv, &dcv, &tmpl);
 		}
 		
+		#ifdef DEBUG
+		fprintf(stderr, "tdcv->size = %i, tdcv->reserved_size = %i\n", (int)tdcv.size, (int)tdcv.reserved_size);
+		fprintf(stderr, "dcv->size = %i, dcv->reserved_size = %i\n", (int)dcv.size, (int)dcv.reserved_size);
+		#endif
+		
 		if (is_first_run){
 			tdcv = dcv;
 			// wipe out dcv without destroying the members, get its own memory
diff --git a/stream.py b/stream.py
index af4591f..8f9f9c3 100644
--- a/stream.py
+++ b/stream.py
@@ -337,6 +337,7 @@ class DeltaApplyReader(LazyMixin):
 		# Aggregate all deltas into one delta in reverse order. Hence we take 
 		# the last delta, and reverse-merge its ancestor delta, until we receive
 		# the final delta data stream.
+		# print "Handling %i delta streams, sizes: %s" % (len(self._dstreams), [ds.size for ds in self._dstreams])
 		dcl = connect_deltas(self._dstreams)
 		
 		# call len directly, as the (optional) c version doesn't implement the sequence
author	Sebastian Thiel <byronimo@gmail.com>	2010-10-13 15:51:04 +0200
committer	Sebastian Thiel <byronimo@gmail.com>	2010-10-13 15:51:48 +0200
commit	fa03f746746df99763deb45943988d68fb450b4b (patch)
tree	a5ad9e1e133473ac53424014148d8a06e36e7e35
parent	9c5672e1b0cf56f1cf5151d60acb35d610e904a8 (diff)
download	gitdb-fa03f746746df99763deb45943988d68fb450b4b.tar.gz