diff options
| author | Sebastian Thiel <byronimo@gmail.com> | 2010-10-13 15:51:04 +0200 |
|---|---|---|
| committer | Sebastian Thiel <byronimo@gmail.com> | 2010-10-13 15:51:48 +0200 |
| commit | fa03f746746df99763deb45943988d68fb450b4b (patch) | |
| tree | a5ad9e1e133473ac53424014148d8a06e36e7e35 | |
| parent | 9c5672e1b0cf56f1cf5151d60acb35d610e904a8 (diff) | |
| download | gitdb-fa03f746746df99763deb45943988d68fb450b4b.tar.gz | |
Reverse Delta Application was a nice experiment, as it has one major flaw: Currently it integrates chunks from its base into the topmost delta chunk list, which causes plenty of mem-move operations. Plenty means, many many many, and its getting worse the more deltas you have of course. The algorithm was supposed to reduce the amount of memory activity, but failed at this point, making it worse than before. Probably it would just be fastest to implement the previous python algorithm, which swaps two buffers, in c
| -rw-r--r-- | _delta_apply.c | 11 | ||||
| -rw-r--r-- | stream.py | 1 |
2 files changed, 8 insertions, 4 deletions
diff --git a/_delta_apply.c b/_delta_apply.c index d81e65d..e667a2e 100644 --- a/_delta_apply.c +++ b/_delta_apply.c @@ -398,7 +398,6 @@ int DCV_dbg_check_integrity(const DeltaChunkVector* vec) inline void DCV_copy_slice_to(const DeltaChunkVector* src, DeltaChunkVector* dest, ull ofs, ull size) { - //fprintf(stderr, "Copy Slice To: src->size = %i, ofs = %i, size=%i\n", (int)src->size, (int)ofs, (int)size); assert(DCV_lbound(src) <= ofs); assert((ofs + size) <= DCV_rbound(src)); @@ -443,7 +442,6 @@ void DCV_copy_slice_to(const DeltaChunkVector* src, DeltaChunkVector* dest, ull inline void DCV_replace_one_by_many(const DeltaChunkVector* from, DeltaChunkVector* to, DeltaChunk* at) { - //fprintf(stderr, "Replace one by many: from->size = %i, to->size = %i, to->reserved = %i\n", (int)from->size, (int)to->size, (int)to->reserved_size); assert(from->size > 1); assert(to->size + from->size - 1 <= to->reserved_size); @@ -452,7 +450,6 @@ void DCV_replace_one_by_many(const DeltaChunkVector* from, DeltaChunkVector* to, // If we are somewhere in the middle, we have to make some space if (DCV_last(to) != at) { - //fprintf(stderr, "moving to %i from %i, num chunks = %i\n", (int)((at+from->size)-to->mem), (int)((at+1)-to->mem), (int)(DCV_end(to) - (at+1))); memmove((void*)(at+from->size), (void*)(at+1), (size_t)((DCV_end(to) - (at+1)) * sizeof(DeltaChunk))); } @@ -725,7 +722,8 @@ static PyObject* connect_deltas(PyObject *self, PyObject *dstreams) const ull target_size = msb_size(&data, dend); // estimate number of ops - assume one third adds, half two byte (size+offset) copies - const uint approx_num_cmds = (dlen / 3) + (((dlen / 3) * 2) / (2+2+1)); + // Assume good compression for the adds + const uint approx_num_cmds = ((dlen / 3) / 10) + (((dlen / 3) * 2) / (2+2+1)); DCV_reserve_memory(&dcv, approx_num_cmds); // parse command stream @@ -825,6 +823,11 @@ static PyObject* connect_deltas(PyObject *self, PyObject *dstreams) DCV_connect_with_base(&tdcv, &dcv, &tmpl); } + #ifdef DEBUG + fprintf(stderr, "tdcv->size = %i, tdcv->reserved_size = %i\n", (int)tdcv.size, (int)tdcv.reserved_size); + fprintf(stderr, "dcv->size = %i, dcv->reserved_size = %i\n", (int)dcv.size, (int)dcv.reserved_size); + #endif + if (is_first_run){ tdcv = dcv; // wipe out dcv without destroying the members, get its own memory @@ -337,6 +337,7 @@ class DeltaApplyReader(LazyMixin): # Aggregate all deltas into one delta in reverse order. Hence we take # the last delta, and reverse-merge its ancestor delta, until we receive # the final delta data stream. + # print "Handling %i delta streams, sizes: %s" % (len(self._dstreams), [ds.size for ds in self._dstreams]) dcl = connect_deltas(self._dstreams) # call len directly, as the (optional) c version doesn't implement the sequence |
