summaryrefslogtreecommitdiff
path: root/tables
diff options
context:
space:
mode:
authorbrianp <brianp@13f79535-47bb-0310-9956-ffa450edef68>2003-06-22 21:50:25 +0000
committerbrianp <brianp@13f79535-47bb-0310-9956-ffa450edef68>2003-06-22 21:50:25 +0000
commit9c2c1db6a31714af01e5fff5ae9ae0cc26c4181f (patch)
treee3361b0021e73f8ad170620412219ee108d51e44 /tables
parente85a1a2df885c4da70251a1158c1124000a5dda0 (diff)
downloadlibapr-9c2c1db6a31714af01e5fff5ae9ae0cc26c4181f.tar.gz
Add an apr_table_compress() function to reconcile duplicate
entries in a table, and replace the red-black trees in apr_table_overlap() with a less complex mergesort Submitted by: Joe Schaefer <joe+gmane@sunstarsys.com> Reviewed by: Brian Pane git-svn-id: http://svn.apache.org/repos/asf/apr/apr/trunk@64547 13f79535-47bb-0310-9956-ffa450edef68
Diffstat (limited to 'tables')
-rw-r--r--tables/apr_tables.c500
1 files changed, 208 insertions, 292 deletions
diff --git a/tables/apr_tables.c b/tables/apr_tables.c
index 2f1ae128a..885a28220 100644
--- a/tables/apr_tables.c
+++ b/tables/apr_tables.c
@@ -996,334 +996,250 @@ APR_DECLARE(int) apr_table_vdo(apr_table_do_callback_fn_t *comp,
return vdorv;
}
-/* During apr_table_overlap(), we build an overlap key for
- * each element in the two tables.
- */
-#define RED 0
-#define BLACK 1
-typedef struct overlap_key {
- /* The table element */
- apr_table_entry_t *elt;
-
- /* 0 if from table 'a', 1 if from table 'b' */
- int level;
-
- /* Whether to omit this element when building the result table */
- int skip;
-
- /* overlap_keys can be assembled into a red-black tree */
- int black;
- struct overlap_key *tree_parent;
- struct overlap_key *tree_left;
- struct overlap_key *tree_right;
- int color;
-
- /* List of multiple values for this key */
- struct overlap_key *merge_next;
- struct overlap_key *merge_last;
-} overlap_key;
-
-/* Rotate a subtree of a red-black tree */
-static void rotate_counterclockwise(overlap_key **root,
- overlap_key *rotate_node)
-{
- overlap_key *child = rotate_node->tree_right;
- rotate_node->tree_right = child->tree_left;
- if (rotate_node->tree_right) {
- rotate_node->tree_right->tree_parent = rotate_node;
- }
- child->tree_parent = rotate_node->tree_parent;
- if (child->tree_parent == NULL) {
- *root = child;
- }
- else {
- if (rotate_node == rotate_node->tree_parent->tree_left) {
- rotate_node->tree_parent->tree_left = child;
- }
- else {
- rotate_node->tree_parent->tree_right = child;
- }
- }
- child->tree_left = rotate_node;
- rotate_node->tree_parent = child;
-}
-
-static void rotate_clockwise(overlap_key **root, overlap_key *rotate_node)
+static apr_table_entry_t **table_mergesort(apr_pool_t *pool,
+ apr_table_entry_t **values, int n)
{
- overlap_key *child = rotate_node->tree_left;
- rotate_node->tree_left = child->tree_right;
- if (rotate_node->tree_left) {
- rotate_node->tree_left->tree_parent = rotate_node;
- }
- child->tree_parent = rotate_node->tree_parent;
- if (child->tree_parent == NULL) {
- *root = child;
- }
- else {
- if (rotate_node == rotate_node->tree_parent->tree_left) {
- rotate_node->tree_parent->tree_left = child;
- }
- else {
- rotate_node->tree_parent->tree_right = child;
+ /* Bottom-up mergesort, based on design in Sedgewick's "Algorithms
+ * in C," chapter 8
+ */
+ apr_table_entry_t **values_tmp =
+ (apr_table_entry_t **)apr_palloc(pool, n * sizeof(apr_table_entry_t*));
+ int i;
+ int blocksize;
+
+ /* First pass: sort pairs of elements (blocksize=1) */
+ for (i = 0; i + 1 < n; i += 2) {
+ if (strcasecmp(values[i]->key, values[i + 1]->key) > 0) {
+ apr_table_entry_t *swap = values[i];
+ values[i] = values[i + 1];
+ values[i + 1] = swap;
}
}
- child->tree_right = rotate_node;
- rotate_node->tree_parent = child;
-}
+ /* Merge successively larger blocks */
+ blocksize = 2;
+ while (blocksize < n) {
+ apr_table_entry_t **dst = values_tmp;
+ int next_start;
+ apr_table_entry_t **swap;
-static void overlap_hash(overlap_key *elt,
- overlap_key **hash_table, int nhash,
- unsigned flags)
-{
- /* Each bucket in the hash table holds a red-black tree
- * containing the overlap_keys that hash into that bucket
- */
- overlap_key **child = &(hash_table[elt->elt->key_checksum & (nhash - 1)]);
- overlap_key **root = child;
- overlap_key *parent = NULL;
- overlap_key *next;
-
- /* Look for the element in the tree */
- while ((next = *child) != NULL) {
- int direction = strcasecmp(elt->elt->key, next->elt->key);
- if (direction < 0) {
- parent = next;
- child = &(next->tree_left);
- }
- else if (direction > 0) {
- parent = next;
- child = &(next->tree_right);
- }
- else {
- /* This is the key we're looking for */
- if (flags == APR_OVERLAP_TABLES_MERGE) {
- /* Just link this node at the end of the list
- * of values for the key. It doesn't need to
- * be linked into the tree, because the node at
- * the head of this key's value list is in the
- * tree already.
- */
- elt->skip = 1;
- elt->merge_next = NULL;
- if (next->merge_last) {
- next->merge_last->merge_next = elt;
- }
- else {
- next->merge_next = elt;
- }
- next->merge_last = elt;
+ /* Merge consecutive pairs blocks of the next blocksize.
+ * Within a block, elements are in sorted order due to
+ * the previous iteration.
+ */
+ for (next_start = 0; next_start + blocksize < n;
+ next_start += (blocksize + blocksize)) {
+
+ int block1_start = next_start;
+ int block2_start = block1_start + blocksize;
+ int block1_end = block2_start;
+ int block2_end = block2_start + blocksize;
+ if (block2_end > n) {
+ /* The last block may be smaller than blocksize */
+ block2_end = n;
}
- else {
- /* In the "set" case, don't bother storing
- * this value in the tree if it's already
- * there, except if the previous value was
- * from table 'a' (level==0) and this value
- * is from table 'b' (level==1)
+ for (;;) {
+
+ /* Merge the next two blocks:
+ * Pick the smaller of the next element from
+ * block 1 and the next element from block 2.
+ * Once either of the blocks is emptied, copy
+ * over all the remaining elements from the
+ * other block
*/
- if (elt->level > next->level) {
- elt->tree_left = next->tree_left;
- if (next->tree_left) {
- next->tree_left->tree_parent = elt;
+ apr_table_entry_t *lowest;
+ if (block1_start == block1_end) {
+ for (; block2_start < block2_end; block2_start++) {
+ *dst++ = values[block2_start];
}
- elt->tree_right = next->tree_right;
- if (next->tree_right) {
- next->tree_right->tree_parent = elt;
+ break;
+ }
+ else if (block2_start == block2_end) {
+ for (; block1_start < block1_end; block1_start++) {
+ *dst++ = values[block1_start];
}
- elt->tree_parent = next->tree_parent;
- elt->color = next->color;
- (*child) = elt;
- elt->merge_next = NULL;
- elt->merge_last = NULL;
- elt->skip = 0;
- next->skip = 1;
+ break;
+ }
+ if (strcasecmp(values[block1_start]->key,
+ values[block2_start]->key) > 0) {
+ *dst++ = values[block2_start++];
}
else {
- elt->skip = 1;
+ *dst++ = values[block1_start++];
}
}
- return;
}
+
+ /* If n is not a multiple of 2*blocksize, some elements
+ * will be left over at the end of the array.
+ */
+ for (i = dst - values_tmp; i < n; i++) {
+ values_tmp[i] = values[i];
+ }
+
+ /* The output array of this pass becomes the input
+ * array of the next pass, and vice versa
+ */
+ swap = values_tmp;
+ values_tmp = values;
+ values = swap;
+
+ blocksize += blocksize;
}
- /* The element wasn't in the tree, so add it */
- elt->tree_left = NULL;
- elt->tree_right = NULL;
- elt->tree_parent = parent;
- (*child) = elt;
- elt->merge_next = NULL;
- elt->merge_last = NULL;
- elt->skip = 0;
- elt->color = RED;
-
- /* Shuffle the nodes to maintain the red-black tree's balanced
- * shape property. (This is what guarantees O(n*log(n)) worst-case
- * performance for apr_table_overlap().)
+ return values;
+}
+
+APR_DECLARE(void) apr_table_compress(apr_table_t *t, unsigned flags)
+{
+ apr_table_entry_t **sort_array;
+ apr_table_entry_t **sort_next;
+ apr_table_entry_t **sort_end;
+ apr_table_entry_t *table_next;
+ apr_table_entry_t **last;
+ int i;
+ int dups_found;
+
+ if (t->a.nelts <= 1) {
+ return;
+ }
+
+ /* Copy pointers to all the table elements into an
+ * array and sort to allow for easy detection of
+ * duplicate keys
*/
- next = elt;
- while ((next->tree_parent) && (next->tree_parent->color == RED)) {
- /* Note: Root is always black, and red and black nodes
- * alternate on any path down the tree. So if we're inside
- * this block, the grandparent node is non-NULL.
- */
- overlap_key *grandparent = next->tree_parent->tree_parent;
- if (next->tree_parent == grandparent->tree_left) {
- overlap_key *parent_sibling = grandparent->tree_right;
- if (parent_sibling && (parent_sibling->color == RED)) {
- next->tree_parent->color = BLACK;
- parent_sibling->color = BLACK;
- grandparent->color = RED;
- next = grandparent;
+ sort_array = (apr_table_entry_t **)
+ apr_palloc(t->a.pool, t->a.nelts * sizeof(apr_table_entry_t*));
+ sort_next = sort_array;
+ table_next = (apr_table_entry_t *)t->a.elts;
+ i = t->a.nelts;
+ do {
+ *sort_next++ = table_next++;
+ } while (--i);
+
+ /* Note: the merge is done with mergesort instead of quicksort
+ * because mergesort is a stable sort and runs in n*log(n)
+ * time regardless of its inputs (quicksort is quadratic in
+ * the worst case)
+ */
+ sort_array = table_mergesort(t->a.pool, sort_array, t->a.nelts);
+
+ /* Process any duplicate keys */
+ dups_found = 0;
+ sort_next = sort_array;
+ sort_end = sort_array + t->a.nelts;
+ last = sort_next;
+ while (++sort_next < sort_end) {
+ if (((*sort_next)->key_checksum == (*last)->key_checksum) &&
+ !strcasecmp((*sort_next)->key, (*last)->key)) {
+ apr_table_entry_t **dup_last = sort_next + 1;
+ dups_found = 1;
+ while ((dup_last < sort_end) &&
+ ((*dup_last)->key_checksum == (*last)->key_checksum) &&
+ !strcasecmp((*dup_last)->key, (*last)->key)) {
+ dup_last++;
}
- else {
- if (next == next->tree_parent->tree_right) {
- next = next->tree_parent;
- rotate_counterclockwise(root, next);
+ dup_last--; /* Elements from last through dup_last, inclusive,
+ * all have the same key
+ */
+ if (flags == APR_OVERLAP_TABLES_MERGE) {
+ apr_size_t len = 0;
+ apr_table_entry_t **next = last;
+ char *new_val;
+ char *val_dst;
+ do {
+ len += strlen((*next)->val);
+ len += 2; /* for ", " or trailing null */
+ } while (++next <= dup_last);
+ new_val = (char *)apr_palloc(t->a.pool, len);
+ val_dst = new_val;
+ next = last;
+ for (;;) {
+ strcpy(val_dst, (*next)->val);
+ val_dst += strlen((*next)->val);
+ next++;
+ if (next > dup_last) {
+ *val_dst = 0;
+ break;
+ }
+ else {
+ *val_dst++ = ',';
+ *val_dst++ = ' ';
+ }
}
- next->tree_parent->color = BLACK;
- next->tree_parent->tree_parent->color = RED;
- rotate_clockwise(root, next->tree_parent->tree_parent);
+ (*last)->val = new_val;
+ }
+ else { /* overwrite */
+ (*last)->val = (*dup_last)->val;
}
+ do {
+ (*sort_next)->key = NULL;
+ } while (++sort_next <= dup_last);
}
else {
- overlap_key *parent_sibling = grandparent->tree_left;
- if (parent_sibling && (parent_sibling->color == RED)) {
- next->tree_parent->color = BLACK;
- parent_sibling->color = BLACK;
- grandparent->color = RED;
- next = grandparent;
+ last = sort_next;
+ }
+ }
+
+ /* Shift elements to the left to fill holes left by removing duplicates */
+ if (dups_found) {
+ apr_table_entry_t *src = (apr_table_entry_t *)t->a.elts;
+ apr_table_entry_t *dst = (apr_table_entry_t *)t->a.elts;
+ apr_table_entry_t *last_elt = src + t->a.nelts;
+ do {
+ if (src->key) {
+ *dst++ = *src;
}
- else {
- if (next == next->tree_parent->tree_left) {
- next = next->tree_parent;
- rotate_clockwise(root, next);
- }
- next->tree_parent->color = BLACK;
- next->tree_parent->tree_parent->color = RED;
- rotate_counterclockwise(root, next->tree_parent->tree_parent);
+ } while (++src < last_elt);
+ t->a.nelts -= (last_elt - dst);
+ }
+
+ table_reindex(t);
+}
+
+APR_DECLARE(void) apr_table_cat(apr_table_t *t, const apr_table_t *s)
+{
+ const int n = t->a.nelts;
+ register int idx;
+
+ apr_array_cat(&t->a,&s->a);
+
+ if (n == 0) {
+ memcpy(t->index_first,s->index_first,sizeof(int) * TABLE_HASH_SIZE);
+ memcpy(t->index_last, s->index_last, sizeof(int) * TABLE_HASH_SIZE);
+ t->index_initialized = s->index_initialized;
+ return;
+ }
+
+ for (idx = 0; idx < TABLE_HASH_SIZE; ++idx) {
+ if (TABLE_INDEX_IS_INITIALIZED(s, idx)) {
+ t->index_last[idx] = s->index_last[idx] + n;
+ if (!TABLE_INDEX_IS_INITIALIZED(t, idx)) {
+ t->index_first[idx] = s->index_first[idx] + n;
}
}
}
- (*root)->color = BLACK;
-}
-/* Must be a power of 2 */
-#define DEFAULT_HASH_SIZE 16
+ t->index_initialized |= s->index_initialized;
+}
APR_DECLARE(void) apr_table_overlap(apr_table_t *a, const apr_table_t *b,
unsigned flags)
{
- int max_keys;
- int nkeys;
- overlap_key *cat_keys; /* concatenation of the keys of a and b */
- overlap_key **hash_table;
- int nhash;
- int i;
- apr_table_entry_t *elts;
- apr_table_entry_t *dst_elt;
+ const int m = a->a.nelts;
+ const int n = b->a.nelts;
+ apr_pool_t *p = b->a.pool;
- max_keys = a->a.nelts + b->a.nelts;
- if (!max_keys) {
- /* The following logic won't do anything harmful if we keep
- * going in this situation, but
- *
- * 1) certain memory debuggers don't like an alloc size of 0
- * so we'd like to avoid that...
- * 2) this isn't all that rare a call anyway, so it is useful
- * to skip the storage allocation and other checks in the
- * following logic
- */
+ if (m + n == 0) {
return;
}
- cat_keys = apr_palloc(b->a.pool, sizeof(overlap_key) * max_keys);
- nhash = DEFAULT_HASH_SIZE;
- while (nhash < max_keys) {
- nhash <<= 1;
- }
- hash_table = (overlap_key **)apr_pcalloc(b->a.pool,
- sizeof(overlap_key *) * nhash);
-
- /* The cat_keys array contains an element for each entry in a,
- * followed by one for each in b. While populating this array,
- * we also use it as:
- * 1) a hash table, to detect matching keys, and
- * 2) a linked list of multiple values for a given key (in the
- * APR_OVERLAP_TABLES_MERGE case)
- */
- /* First, the elements of a */
- nkeys = 0;
- elts = (apr_table_entry_t *)a->a.elts;
- for (i = 0; i < a->a.nelts; i++, nkeys++) {
- cat_keys[nkeys].elt = &(elts[i]);
- cat_keys[nkeys].level = 0;
- overlap_hash(&(cat_keys[nkeys]), hash_table, nhash, flags);
+ /* copy (extend) a using b's pool */
+ if (a->a.pool != p) {
+ make_array_core(&a->a, p, m+n, sizeof(apr_table_entry_t), 0);
}
- /* Then the elements of b */
- elts = (apr_table_entry_t *)b->a.elts;
- for (i = 0; i < b->a.nelts; i++, nkeys++) {
- cat_keys[nkeys].elt = &(elts[i]);
- cat_keys[nkeys].level = 1;
- overlap_hash(&(cat_keys[nkeys]), hash_table, nhash, flags);
- }
+ apr_table_cat(a, b);
- /* Copy concatenated list of elements into table a to
- * form the new table contents, but:
- * 1) omit the ones marked "skip," and
- * 2) merge values for the same key if needed
- */
- make_array_core(&a->a, b->a.pool, max_keys, sizeof(apr_table_entry_t), 0);
- nkeys = 0;
- dst_elt = (apr_table_entry_t *)a->a.elts;
- for (i = 0; i < max_keys; i++) {
- if (cat_keys[i].skip) {
- continue;
- }
- if (cat_keys[i].merge_next) {
- char *new_val;
- char *val_next;
- overlap_key *next = cat_keys[i].merge_next;
- int len = (cat_keys[i].elt->val ?
- strlen(cat_keys[i].elt->val) : 0);
- do {
- len += 2;
- if (next->elt->val) {
- len += strlen(next->elt->val);
- }
- next = next->merge_next;
- } while (next);
- len++;
- new_val = (char *)apr_palloc(b->a.pool, len);
- val_next = new_val;
- if (cat_keys[i].elt->val) {
- strcpy(val_next, cat_keys[i].elt->val);
- val_next += strlen(cat_keys[i].elt->val);
- }
- next = cat_keys[i].merge_next;
- do {
- *val_next++ = ',';
- *val_next++ = ' ';
- if (next->elt->val) {
- strcpy(val_next, next->elt->val);
- val_next += strlen(next->elt->val);
- }
- next = next->merge_next;
- } while (next);
- *val_next = 0;
- dst_elt->key = cat_keys[i].elt->key;
- dst_elt->val = new_val;
- dst_elt->key_checksum = cat_keys[i].elt->key_checksum;
- dst_elt++;
- }
- else {
- dst_elt->key = cat_keys[i].elt->key;
- dst_elt->val = cat_keys[i].elt->val;
- dst_elt->key_checksum = cat_keys[i].elt->key_checksum;
- dst_elt++;
- }
- }
- a->a.nelts = dst_elt - (apr_table_entry_t *)a->a.elts;
- table_reindex(a);
+ apr_table_compress(a, flags);
}
-