summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSebastian Thiel <byronimo@gmail.com>2010-10-12 18:17:19 +0200
committerSebastian Thiel <byronimo@gmail.com>2010-10-12 18:17:19 +0200
commit166e538f9aab8db7ab30b6e8b3be407200a7e3c1 (patch)
treeb25d506c585515ecafce8f4cca9c3cd138434cab
parenta93363cffb225520869d737de1081c1ff77ed108 (diff)
downloadgitdb-166e538f9aab8db7ab30b6e8b3be407200a7e3c1.tar.gz
Enhanced memory handling within the delta-stream parsing method. Removed the base delta chunk vector, which was a reminder of old (python) times which are long gone
-rw-r--r--_fun.c88
-rw-r--r--fun.py4
2 files changed, 60 insertions, 32 deletions
diff --git a/_fun.c b/_fun.c
index 1186c6d..142795e 100644
--- a/_fun.c
+++ b/_fun.c
@@ -198,46 +198,62 @@ typedef struct {
Py_ssize_t reserved_size; // Reserve in DeltaChunks
} DeltaChunkVector;
-/*
-Grow the delta chunk list by the given amount of bytes.
-This may trigger a realloc, but will do nothing if the reserved size is already
-large enough.
-Return 1 on success, 0 on failure
-*/
+
+
+// Reserve enough memory to hold the given amount of delta chunks
+// Return 1 on success
inline
-int DCV_grow(DeltaChunkVector* vec, uint num_dc)
+int DCV_reserve_memory(DeltaChunkVector* vec, uint num_dc)
{
- const uint grow_by_chunks = (vec->size + num_dc) - vec->reserved_size;
- if (grow_by_chunks <= 0){
+ if (num_dc <= vec->reserved_size){
return 1;
}
+#ifdef DEBUG
+ bool was_null = vec->mem == NULL;
+#endif
+
if (vec->mem == NULL){
- vec->mem = PyMem_Malloc(grow_by_chunks * sizeof(DeltaChunk));
+ vec->mem = PyMem_Malloc(num_dc * sizeof(DeltaChunk));
} else {
- vec->mem = PyMem_Realloc(vec->mem, (vec->reserved_size + grow_by_chunks) * sizeof(DeltaChunk));
+ vec->mem = PyMem_Realloc(vec->mem, num_dc * sizeof(DeltaChunk));
}
if (vec->mem == NULL){
Py_FatalError("Could not allocate memory for append operation");
}
- vec->reserved_size = vec->reserved_size + grow_by_chunks;
+ vec->reserved_size = num_dc;
#ifdef DEBUG
- fprintf(stderr, "Allocated %i bytes at %p, to hold up to %i chunks\n", (int)((vec->reserved_size + grow_by_chunks) * sizeof(DeltaChunk)), vec->mem, (int)(vec->reserved_size + grow_by_chunks));
+ const char* format = "Allocated %i bytes at %p, to hold up to %i chunks\n";
+ if (!was_null)
+ format = "Re-allocated %i bytes at %p, to hold up to %i chunks\n";
+ fprintf(stderr, format, (int)(vec->reserved_size * sizeof(DeltaChunk)), vec->mem, (int)vec->reserved_size);
#endif
return vec->mem != NULL;
}
+/*
+Grow the delta chunk list by the given amount of bytes.
+This may trigger a realloc, but will do nothing if the reserved size is already
+large enough.
+Return 1 on success, 0 on failure
+*/
+inline
+int DCV_grow_by(DeltaChunkVector* vec, uint num_dc)
+{
+ return DCV_reserve_memory(vec, vec->reserved_size + num_dc);
+}
+
int DCV_init(DeltaChunkVector* vec, ull initial_size)
{
vec->mem = NULL;
vec->size = 0;
vec->reserved_size = 0;
- return DCV_grow(vec, initial_size);
+ return DCV_grow_by(vec, initial_size);
}
inline
@@ -309,6 +325,24 @@ void DCV_forget_members(DeltaChunkVector* vec)
vec->size = 0;
}
+// Reset the vector so that its size will be zero, and its members will
+// have been deallocated properly.
+// It will keep its memory though, and hence can be filled again
+inline
+void DCV_reset(DeltaChunkVector* vec)
+{
+ if (vec->size == 0)
+ return;
+
+ DeltaChunk* dc = vec->mem;
+ DeltaChunk* dcend = DCV_end(vec);
+ for(;dc < dcend; dc++){
+ DC_destroy(dc);
+ }
+
+ vec->size = 0;
+}
+
// Append num-chunks to the end of the list, possibly reallocating existing ones
// Return a pointer to the first of the added items. They are already null initialized
// If num-chunks == 0, it returns the end pointer of the allocated memory
@@ -316,7 +350,7 @@ static inline
DeltaChunk* DCV_append_multiple(DeltaChunkVector* vec, uint num_chunks)
{
if (vec->size + num_chunks > vec->reserved_size){
- DCV_grow(vec, (vec->size + num_chunks) - vec->reserved_size);
+ DCV_grow_by(vec, (vec->size + num_chunks) - vec->reserved_size);
}
Py_FatalError("Could not allocate memory for append operation");
Py_ssize_t old_size = vec->size;
@@ -337,7 +371,7 @@ static inline
DeltaChunk* DCV_append(DeltaChunkVector* vec)
{
if (vec->size + 1 > vec->reserved_size){
- DCV_grow(vec, 1);
+ DCV_grow_by(vec, 1);
}
DeltaChunk* next = vec->mem + vec->size;
@@ -546,12 +580,10 @@ static PyObject* connect_deltas(PyObject *self, PyObject *dstreams)
stream_iter = dstreams;
}
- DeltaChunkVector bdcv;
- DeltaChunkVector tdcv;
DeltaChunkVector dcv;
+ DeltaChunkVector tdcv;
DeltaChunkVector tmpl;
- DCV_init(&bdcv, 0);
- DCV_init(&dcv, 0);
+ DCV_init(&dcv, 100); // should be enough to keep the average text file
DCV_init(&tdcv, 0);
DCV_init(&tmpl, 200);
@@ -578,7 +610,7 @@ static PyObject* connect_deltas(PyObject *self, PyObject *dstreams)
// estimate number of ops - assume one third adds, half two byte (size+offset) copies
const uint approx_num_cmds = (dlen / 3) + (((dlen / 3) * 2) / (2+2+1));
- DCV_grow(&dcv, approx_num_cmds);
+ DCV_reserve_memory(&dcv, approx_num_cmds);
// parse command stream
ull tbw = 0; // Amount of target bytes written
@@ -632,16 +664,15 @@ static PyObject* connect_deltas(PyObject *self, PyObject *dstreams)
if (!is_first_run){
DCV_connect_with_base(&tdcv, &dcv, &tmpl);
}
- // swap the vector
- // Skip the first vector, as it is also used as top chunk vector
- if (bdcv.mem != tdcv.mem){
- DCV_destroy(&bdcv);
- }
- bdcv = dcv;
+
if (is_first_run){
tdcv = dcv;
+ // wipe out dcv without destroying the members, get its own memory
+ DCV_init(&dcv, tdcv.size);
+ } else {
+ // destroy members, but keep memory
+ DCV_reset(&dcv);
}
- DCV_init(&dcv, 0);
loop_end:
// perform cleanup
@@ -662,7 +693,6 @@ loop_end:
}
DCV_destroy(&tmpl);
- DCV_destroy(&bdcv);
if (dsi > 1){
// otherwise dcv equals tcl
DCV_destroy(&dcv);
diff --git a/fun.py b/fun.py
index e6262b4..13a3c62 100644
--- a/fun.py
+++ b/fun.py
@@ -545,7 +545,6 @@ def connect_deltas(dstreams):
:param dstreams: iterable of delta stream objects, the delta to be applied last
comes first, then all its ancestors in order
:return: DeltaChunkList, containing all operations to apply"""
- bdcl = None # data chunk list for initial base
tdcl = None # topmost dcl
dcl = tdcl = TopdownDeltaChunkList()
@@ -611,13 +610,12 @@ def connect_deltas(dstreams):
dcl.compress()
# merge the lists !
- if bdcl is not None:
+ if dsi > 0:
if not tdcl.connect_with_next_base(dcl):
break
# END handle merge
# prepare next base
- bdcl = dcl
dcl = DeltaChunkList()
# END for each delta stream