summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorsasha@mysql.sashanet.com <>2001-05-11 15:07:34 -0600
committersasha@mysql.sashanet.com <>2001-05-11 15:07:34 -0600
commitc706bf40f3723e4224844a52f632529fda3412ca (patch)
treec1011c976487f613ded4fa7db34dbff547084d18
parenta12117f0364761f89d6fdb015ece9ada2d1ce494 (diff)
downloadmariadb-git-c706bf40f3723e4224844a52f632529fda3412ca.tar.gz
use tree for count(distinct) when possible
-rw-r--r--include/my_tree.h2
-rw-r--r--mysys/tree.c2
-rw-r--r--sql/item_sum.cc103
-rw-r--r--sql/item_sum.h15
4 files changed, 117 insertions, 5 deletions
diff --git a/include/my_tree.h b/include/my_tree.h
index c0950409588..d4d28644a39 100644
--- a/include/my_tree.h
+++ b/include/my_tree.h
@@ -57,7 +57,7 @@ typedef struct st_tree {
void (*free)(void *);
} TREE;
- /* Functions on hole tree */
+ /* Functions on whole tree */
void init_tree(TREE *tree,uint default_alloc_size, int element_size,
qsort_cmp2 compare, my_bool with_delete,
void (*free_element)(void*));
diff --git a/mysys/tree.c b/mysys/tree.c
index db2b3989b81..e46ff00adad 100644
--- a/mysys/tree.c
+++ b/mysys/tree.c
@@ -84,7 +84,7 @@ void init_tree(TREE *tree, uint default_alloc_size, int size,
((uint) size <= sizeof(void*) || ((uint) size & (sizeof(void*)-1))))
{
tree->offset_to_key=sizeof(TREE_ELEMENT); /* Put key after element */
- /* Fix allocation size so that we don't loose any memory */
+ /* Fix allocation size so that we don't lose any memory */
default_alloc_size/=(sizeof(TREE_ELEMENT)+size);
if (!default_alloc_size)
default_alloc_size=1;
diff --git a/sql/item_sum.cc b/sql/item_sum.cc
index 375ba081f80..da0bc71ca1f 100644
--- a/sql/item_sum.cc
+++ b/sql/item_sum.cc
@@ -788,11 +788,56 @@ String *Item_std_field::val_str(String *str)
#include "sql_select.h"
+static int simple_raw_key_cmp(void* arg, byte* key1, byte* key2)
+{
+ return memcmp(key1, key2, (int)arg);
+}
+
+static int simple_str_key_cmp(void* arg, byte* key1, byte* key2)
+{
+ return my_sortcmp(key1, key2, (int)arg);
+}
+
+// did not make this one static - at least gcc gets confused when
+// I try to declare a static function as a friend. If you can figure
+// out the syntax to make a static function a friend, make this one
+// static
+int composite_key_cmp(void* arg, byte* key1, byte* key2)
+{
+ Item_sum_count_distinct* item = (Item_sum_count_distinct*)arg;
+ Field** field = item->table->field, **field_end;
+ field_end = field + item->table->fields;
+ for(; field < field_end; ++field)
+ {
+ int res;
+ int len = (*field)->field_length;
+ switch((*field)->type())
+ {
+ case FIELD_TYPE_STRING:
+ case FIELD_TYPE_VAR_STRING:
+ res = my_sortcmp(key1, key2, len);
+ break;
+ default:
+ res = memcmp(key1, key2, len);
+ break;
+ }
+ if(res)
+ return res;
+ key1 += len;
+ key2 += len;
+ }
+ return 0;
+}
+
+
+
Item_sum_count_distinct::~Item_sum_count_distinct()
{
if (table)
free_tmp_table(current_thd, table);
delete tmp_table_param;
+ if(use_tree)
+ delete_tree(&tree);
}
@@ -821,6 +866,53 @@ bool Item_sum_count_distinct::setup(THD *thd)
0, 0, current_lex->options | thd->options)))
return 1;
table->file->extra(HA_EXTRA_NO_ROWS); // Don't update rows
+
+ if(table->db_type == DB_TYPE_HEAP) // no blobs, otherwise it would be
+ // MyISAM
+ {
+ qsort_cmp2 compare_key;
+ void* cmp_arg;
+ int key_len;
+
+ if(table->fields == 1) // if we have only one field, which is
+ // the most common use of count(distinct), it is much faster
+ // to use a simpler key compare method that can take advantage
+ // of not having to worry about other fields
+ {
+ switch(table->field[0]->type())
+ {
+ // if we have a string, we must take care of charsets
+ // and case sensitivity
+ case FIELD_TYPE_STRING:
+ case FIELD_TYPE_VAR_STRING:
+ compare_key = (qsort_cmp2)simple_str_key_cmp;
+ break;
+ default: // since at this point we cannot have blobs
+ // anything else can be compared with memcmp
+ compare_key = (qsort_cmp2)simple_raw_key_cmp;
+ break;
+ }
+ cmp_arg = (void*)(key_len = table->field[0]->field_length);
+ rec_offset = 1;
+ }
+ else // too bad, cannot cheat - there is more than one field
+ {
+ cmp_arg = (void*)this;
+ compare_key = (qsort_cmp2)composite_key_cmp;
+ Field** field, **field_end;
+ field_end = (field = table->field) + table->fields;
+ for(key_len = 0; field < field_end; ++field)
+ {
+ key_len += (*field)->field_length;
+ }
+ rec_offset = table->reclength - key_len;
+ }
+
+ init_tree(&tree, 0, key_len, compare_key, 0, 0);
+ tree.cmp_arg = cmp_arg;
+ use_tree = 1;
+ }
+
return 0;
}
@@ -830,6 +922,8 @@ void Item_sum_count_distinct::reset()
table->file->extra(HA_EXTRA_NO_CACHE);
table->file->delete_all_rows();
table->file->extra(HA_EXTRA_WRITE_CACHE);
+ if(use_tree)
+ delete_tree(&tree);
(void) add();
}
@@ -843,7 +937,12 @@ bool Item_sum_count_distinct::add()
if ((*field)->is_real_null(0))
return 0; // Don't count NULL
- if ((error=table->file->write_row(table->record[0])))
+ if(use_tree)
+ {
+ if(!tree_insert(&tree, table->record[0] + rec_offset, 0))
+ return 1;
+ }
+ else if ((error=table->file->write_row(table->record[0])))
{
if (error != HA_ERR_FOUND_DUPP_KEY &&
error != HA_ERR_FOUND_DUPP_UNIQUE)
@@ -859,6 +958,8 @@ longlong Item_sum_count_distinct::val_int()
{
if (!table) // Empty query
return LL(0);
+ if(use_tree)
+ return tree.elements_in_tree;
table->file->info(HA_STATUS_VARIABLE | HA_STATUS_NO_LOCK);
return table->file->records;
}
diff --git a/sql/item_sum.h b/sql/item_sum.h
index 8cb09c85623..863623c3b1a 100644
--- a/sql/item_sum.h
+++ b/sql/item_sum.h
@@ -21,6 +21,8 @@
#pragma interface /* gcc class implementation */
#endif
+#include <my_tree.h>
+
class Item_sum :public Item_result_field
{
public:
@@ -145,11 +147,20 @@ class Item_sum_count_distinct :public Item_sum_int
table_map used_table_cache;
bool fix_fields(THD *thd,TABLE_LIST *tables);
TMP_TABLE_PARAM *tmp_table_param;
-
+ TREE tree;
+ bool use_tree; // If there are no blobs, we can use a tree, which
+ // is faster than heap table. In that case, we still use the table
+ // to help get things set up, but we insert nothing in it
+ int rec_offset; // the first few bytes of record ( at least one)
+ // are just markers for deleted and NULLs. We want to skip them since
+ // they will just bloat the tree without providing any valuable info
+
+ friend int composite_key_cmp(void* arg, byte* key1, byte* key2);
+
public:
Item_sum_count_distinct(List<Item> &list)
:Item_sum_int(list),table(0),used_table_cache(~(table_map) 0),
- tmp_table_param(0)
+ tmp_table_param(0),use_tree(0)
{ quick_group=0; }
~Item_sum_count_distinct();
table_map used_tables() const { return used_table_cache; }