summaryrefslogtreecommitdiff
path: root/src/search.cc
diff options
context:
space:
mode:
authorBruno Haible <bruno@clisp.org>2003-01-06 11:43:41 +0000
committerBruno Haible <bruno@clisp.org>2003-01-06 11:43:41 +0000
commitb91e4511c018268b66ea31beda0bb631c5cfd4b7 (patch)
tree3422843e3cf367a31f098114cada37839a4b56a0 /src/search.cc
parent72a3884ff96fc59b0df208efecfc50cfcd581c62 (diff)
downloadgperf-b91e4511c018268b66ea31beda0bb631c5cfd4b7.tar.gz
Continuing rework.
Diffstat (limited to 'src/search.cc')
-rw-r--r--src/search.cc72
1 files changed, 38 insertions, 34 deletions
diff --git a/src/search.cc b/src/search.cc
index 304da49..773e2ed 100644
--- a/src/search.cc
+++ b/src/search.cc
@@ -356,7 +356,7 @@ Search::reorder ()
}
}
-/* Returns the length of entire key list. */
+/* Returns the length of keyword list. */
int
Search::keyword_list_length ()
@@ -364,7 +364,7 @@ Search::keyword_list_length ()
return _list_len;
}
-/* Returns length of longest key read. */
+/* Returns the maximum length of keywords. */
int
Search::max_key_length ()
@@ -372,7 +372,7 @@ Search::max_key_length ()
return _max_key_len;
}
-/* Returns number of key positions. */
+/* Returns the number of key positions. */
int
Search::get_max_keysig_size ()
@@ -380,34 +380,44 @@ Search::get_max_keysig_size ()
return option[ALLCHARS] ? _max_key_len : option.get_max_keysig_size ();
}
-/* Generate a key set's hash value. */
+/* Computes a keyword's hash value, relative to the current _asso_values[],
+ and stores it in keyword->_hash_value.
+ This is called very frequently, and needs to be fast! */
inline int
-Search::hash (KeywordExt *key_node)
+Search::compute_hash (KeywordExt *keyword)
{
- int sum = option[NOLENGTH] ? 0 : key_node->_allchars_length;
+ int sum = option[NOLENGTH] ? 0 : keyword->_allchars_length;
- const unsigned char *p = key_node->_selchars;
- int i = key_node->_selchars_length;
+ const unsigned char *p = keyword->_selchars;
+ int i = keyword->_selchars_length;
for (; i > 0; p++, i--)
sum += _asso_values[*p];
- return key_node->_hash_value = sum;
+ return keyword->_hash_value = sum;
}
-/* Merge two disjoint hash key multisets to form the ordered disjoint union of the sets.
- (In a multiset, an element can occur multiple times.)
- Precondition: both set_1 and set_2 must be ordered. Returns the length
- of the combined set. */
+/* Computes the disjoint union of two multisets of characters, i.e.
+ the set of characters that are contained with a different multiplicity
+ in set_1 and set_2. This includes those characters which are contained
+ in one of the sets but not both.
+ Both sets set_1[0..size_1-1] and set_2[0..size_2-1] are given ordered.
+ The result, an ordered set (not multiset!) is stored in set_3[0...].
+ Returns the size of the resulting set. */
inline int
-Search::compute_disjoint_union (const unsigned char *set_1, int size_1, const unsigned char *set_2, int size_2, unsigned char *set_3)
+compute_disjoint_union (const unsigned char *set_1, int size_1,
+ const unsigned char *set_2, int size_2,
+ unsigned char *set_3)
{
unsigned char *base = set_3;
while (size_1 > 0 && size_2 > 0)
if (*set_1 == *set_2)
- set_1++, size_1--, set_2++, size_2--;
+ {
+ set_1++, size_1--;
+ set_2++, size_2--;
+ }
else
{
unsigned char next;
@@ -437,27 +447,23 @@ Search::compute_disjoint_union (const unsigned char *set_1, int size_1, const u
return set_3 - base;
}
-/* Sort the UNION_SET in increasing frequency of occurrence.
- This speeds up later processing since we may assume the resulting
- set (Set_3, in this case), is ordered. Uses insertion sort, since
- the UNION_SET is typically short. */
+/* Sorts the given set in increasing frequency of _occurrences[]. */
inline void
-Search::sort_set (unsigned char *union_set, int len)
+Search::sort_by_occurrence (unsigned char *set, int len)
{
- int i, j;
-
- for (i = 0, j = len - 1; i < j; i++)
+ /* Use bubble sort, since the set is typically short. */
+ for (int i = 1; i < len; i++)
{
int curr;
unsigned char tmp;
- for (curr = i + 1, tmp = union_set[curr];
- curr > 0 && _occurrences[tmp] < _occurrences[union_set[curr-1]];
+ for (curr = i, tmp = set[curr];
+ curr > 0 && _occurrences[tmp] < _occurrences[set[curr-1]];
curr--)
- union_set[curr] = union_set[curr - 1];
+ set[curr] = set[curr - 1];
- union_set[curr] = tmp;
+ set[curr] = tmp;
}
}
@@ -493,7 +499,7 @@ Search::affects_prev (unsigned char c, KeywordExt *curr)
for (KeywordExt_List *ptr = _head; ; ptr = ptr->rest())
{
KeywordExt *keyword = ptr->first();
- if (_collision_detector->set_bit (hash (keyword))
+ if (_collision_detector->set_bit (compute_hash (keyword))
&& ++collisions >= _fewest_collisions)
break;
if (keyword == curr)
@@ -533,7 +539,7 @@ Search::change (KeywordExt *prior, KeywordExt *curr)
fflush (stderr);
}
union_set_length = compute_disjoint_union (prior->_selchars, prior->_selchars_length, curr->_selchars, curr->_selchars_length, union_set);
- sort_set (union_set, union_set_length);
+ sort_by_occurrence (union_set, union_set_length);
/* Try changing some values, if change doesn't alter other values continue normal action. */
_fewest_collisions++;
@@ -555,13 +561,11 @@ Search::change (KeywordExt *prior, KeywordExt *curr)
for (KeywordExt_List *ptr = _head; ; ptr = ptr->rest())
{
KeywordExt* keyword = ptr->first();
+ compute_hash (keyword);
if (keyword == curr)
break;
- hash (keyword);
}
- hash (curr);
-
if (option[DEBUG])
{
fprintf (stderr, "** collision not resolved after %d iterations, %d duplicates remain, continuing...\n",
@@ -628,7 +632,7 @@ Search::optimize ()
{
KeywordExt *currkw = curr->first();
- hash (currkw);
+ compute_hash (currkw);
for (KeywordExt_List *ptr = _head; ptr != curr; ptr = ptr->rest())
{
@@ -649,7 +653,7 @@ Search::optimize ()
for (curr = _head; curr; curr = curr->rest())
{
- unsigned int hashcode = hash (curr->first());
+ unsigned int hashcode = compute_hash (curr->first());
if (_collision_detector->set_bit (hashcode))
{
if (option[DUP]) /* Keep track of this number... */