summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSebastian Thiel <byronimo@gmail.com>2010-10-13 15:51:04 +0200
committerSebastian Thiel <byronimo@gmail.com>2010-10-13 15:51:48 +0200
commitfa03f746746df99763deb45943988d68fb450b4b (patch)
treea5ad9e1e133473ac53424014148d8a06e36e7e35
parent9c5672e1b0cf56f1cf5151d60acb35d610e904a8 (diff)
downloadgitdb-fa03f746746df99763deb45943988d68fb450b4b.tar.gz
Reverse Delta Application was a nice experiment, as it has one major flaw: Currently it integrates chunks from its base into the topmost delta chunk list, which causes plenty of mem-move operations. Plenty means, many many many, and its getting worse the more deltas you have of course. The algorithm was supposed to reduce the amount of memory activity, but failed at this point, making it worse than before. Probably it would just be fastest to implement the previous python algorithm, which swaps two buffers, in c
-rw-r--r--_delta_apply.c11
-rw-r--r--stream.py1
2 files changed, 8 insertions, 4 deletions
diff --git a/_delta_apply.c b/_delta_apply.c
index d81e65d..e667a2e 100644
--- a/_delta_apply.c
+++ b/_delta_apply.c
@@ -398,7 +398,6 @@ int DCV_dbg_check_integrity(const DeltaChunkVector* vec)
inline
void DCV_copy_slice_to(const DeltaChunkVector* src, DeltaChunkVector* dest, ull ofs, ull size)
{
- //fprintf(stderr, "Copy Slice To: src->size = %i, ofs = %i, size=%i\n", (int)src->size, (int)ofs, (int)size);
assert(DCV_lbound(src) <= ofs);
assert((ofs + size) <= DCV_rbound(src));
@@ -443,7 +442,6 @@ void DCV_copy_slice_to(const DeltaChunkVector* src, DeltaChunkVector* dest, ull
inline
void DCV_replace_one_by_many(const DeltaChunkVector* from, DeltaChunkVector* to, DeltaChunk* at)
{
- //fprintf(stderr, "Replace one by many: from->size = %i, to->size = %i, to->reserved = %i\n", (int)from->size, (int)to->size, (int)to->reserved_size);
assert(from->size > 1);
assert(to->size + from->size - 1 <= to->reserved_size);
@@ -452,7 +450,6 @@ void DCV_replace_one_by_many(const DeltaChunkVector* from, DeltaChunkVector* to,
// If we are somewhere in the middle, we have to make some space
if (DCV_last(to) != at) {
- //fprintf(stderr, "moving to %i from %i, num chunks = %i\n", (int)((at+from->size)-to->mem), (int)((at+1)-to->mem), (int)(DCV_end(to) - (at+1)));
memmove((void*)(at+from->size), (void*)(at+1), (size_t)((DCV_end(to) - (at+1)) * sizeof(DeltaChunk)));
}
@@ -725,7 +722,8 @@ static PyObject* connect_deltas(PyObject *self, PyObject *dstreams)
const ull target_size = msb_size(&data, dend);
// estimate number of ops - assume one third adds, half two byte (size+offset) copies
- const uint approx_num_cmds = (dlen / 3) + (((dlen / 3) * 2) / (2+2+1));
+ // Assume good compression for the adds
+ const uint approx_num_cmds = ((dlen / 3) / 10) + (((dlen / 3) * 2) / (2+2+1));
DCV_reserve_memory(&dcv, approx_num_cmds);
// parse command stream
@@ -825,6 +823,11 @@ static PyObject* connect_deltas(PyObject *self, PyObject *dstreams)
DCV_connect_with_base(&tdcv, &dcv, &tmpl);
}
+ #ifdef DEBUG
+ fprintf(stderr, "tdcv->size = %i, tdcv->reserved_size = %i\n", (int)tdcv.size, (int)tdcv.reserved_size);
+ fprintf(stderr, "dcv->size = %i, dcv->reserved_size = %i\n", (int)dcv.size, (int)dcv.reserved_size);
+ #endif
+
if (is_first_run){
tdcv = dcv;
// wipe out dcv without destroying the members, get its own memory
diff --git a/stream.py b/stream.py
index af4591f..8f9f9c3 100644
--- a/stream.py
+++ b/stream.py
@@ -337,6 +337,7 @@ class DeltaApplyReader(LazyMixin):
# Aggregate all deltas into one delta in reverse order. Hence we take
# the last delta, and reverse-merge its ancestor delta, until we receive
# the final delta data stream.
+ # print "Handling %i delta streams, sizes: %s" % (len(self._dstreams), [ds.size for ds in self._dstreams])
dcl = connect_deltas(self._dstreams)
# call len directly, as the (optional) c version doesn't implement the sequence