summaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorunknown <monty@hundin.mysql.fi>2002-05-17 16:45:00 +0300
committerunknown <monty@hundin.mysql.fi>2002-05-17 16:45:00 +0300
commit387e77d104887d45a4574a9bd1b826cfa20f01c1 (patch)
tree138e0662f05b06e5000a3e7e6fa8383eea492a3b /sql
parent66f426c0635d16f7f594f6bf867e13e5cba18500 (diff)
downloadmariadb-git-387e77d104887d45a4574a9bd1b826cfa20f01c1.tar.gz
Optimize LIKE with turbo-boyer-more algoritm
Docs/manual.texi: Added info about LIKE optimization mysql-test/r/func_like.result: Test of new LIKE optimization mysql-test/t/func_like.test: Test of new LIKE optimization
Diffstat (limited to 'sql')
-rw-r--r--sql/item_cmpfunc.cc266
-rw-r--r--sql/item_cmpfunc.h29
-rw-r--r--sql/unireg.h7
3 files changed, 295 insertions, 7 deletions
diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc
index 09c2bdc593a..c21ad18f08e 100644
--- a/sql/item_cmpfunc.cc
+++ b/sql/item_cmpfunc.cc
@@ -1228,23 +1228,23 @@ void Item_func_like::fix_length_and_dec()
// cmp_type=STRING_RESULT; // For quick select
}
-
longlong Item_func_like::val_int()
{
- String *res,*res2;
- res=args[0]->val_str(&tmp_value1);
+ String* res = args[0]->val_str(&tmp_value1);
if (args[0]->null_value)
{
null_value=1;
return 0;
}
- res2=args[1]->val_str(&tmp_value2);
+ String* res2 = args[1]->val_str(&tmp_value2);
if (args[1]->null_value)
{
null_value=1;
return 0;
}
null_value=0;
+ if (canDoTurboBM)
+ return turboBM_matches(res->ptr(), res->length()) ? 1 : 0;
if (binary)
return wild_compare(*res,*res2,escape) ? 0 : 1;
else
@@ -1268,6 +1268,51 @@ Item_func::optimize_type Item_func_like::select_optimize() const
return OPTIMIZE_NONE;
}
+bool Item_func_like::fix_fields(THD *thd,struct st_table_list *tlist)
+{
+ if (Item_bool_func2::fix_fields(thd, tlist))
+ return 1;
+
+ /*
+ TODO--we could do it for non-const, but we'd have to
+ recompute the tables for each row--probably not worth it.
+ */
+ if (args[1]->const_item() && !(specialflag & SPECIAL_NO_NEW_FUNC))
+ {
+ String* res2 = args[1]->val_str(&tmp_value2);
+ const size_t len = res2->length();
+ const char* first = res2->ptr();
+ const char* last = first + len - 1;
+ /*
+ len must be > 2 ('%pattern%')
+ heuristic: only do TurboBM for pattern_len > 2
+ */
+
+ if (len > MIN_TURBOBM_PATTERN_LEN + 2 &&
+ *first == wild_many &&
+ *last == wild_many)
+ {
+ const char* tmp = first + 1;
+ for ( ; *tmp != wild_many && *tmp != wild_one && *tmp != escape; tmp++) ;
+ canDoTurboBM = tmp == last;
+ }
+
+ if (canDoTurboBM)
+ {
+ pattern = first + 1;
+ pattern_len = len - 2;
+ DBUG_PRINT("TurboBM", ("Initializing pattern: '%s'...", first));
+ int* suff = (int*)thd->alloc(sizeof(int[pattern_len + 1]));
+ bmGs = (int*)thd->alloc(sizeof(int[pattern_len + 1]));
+ bmBc = (int*)thd->alloc(sizeof(int[alphabet_size]));
+ turboBM_compute_good_suffix_shifts(suff);
+ turboBM_compute_bad_character_shifts();
+ DBUG_PRINT("turboBM",("done"));
+ }
+ }
+ return 0;
+}
+
#ifdef USE_REGEX
bool
@@ -1307,7 +1352,6 @@ Item_func_regex::fix_fields(THD *thd,TABLE_LIST *tables)
return 0;
}
-
longlong Item_func_regex::val_int()
{
char buff[MAX_FIELD_WIDTH];
@@ -1364,3 +1408,215 @@ Item_func_regex::~Item_func_regex()
}
#endif /* USE_REGEX */
+
+
+#ifdef LIKE_CMP_TOUPPER
+#define likeconv(A) (uchar) toupper(A)
+#else
+#define likeconv(A) (uchar) my_sort_order[(uchar) (A)]
+#endif
+
+
+/**********************************************************************
+ turboBM_compute_suffixes()
+ Precomputation dependent only on pattern_len.
+**********************************************************************/
+
+void Item_func_like::turboBM_compute_suffixes(int* suff)
+{
+ const int plm1 = pattern_len - 1;
+ int f = 0;
+ int g = plm1;
+ int* const splm1 = suff + plm1;
+
+ *splm1 = pattern_len;
+
+ if (binary)
+ {
+ int i;
+ for (i = pattern_len - 2; i >= 0; i--)
+ {
+ int tmp = *(splm1 + i - f);
+ if (g < i && tmp < i - g)
+ suff[i] = tmp;
+ else
+ {
+ if (i < g)
+ g = i; // g = min(i, g)
+ f = i;
+ while (g >= 0 && pattern[g] == pattern[g + plm1 - f])
+ g--;
+ suff[i] = f - g;
+ }
+ }
+ }
+ else
+ {
+ int i;
+ for (i = pattern_len - 2; 0 <= i; --i)
+ {
+ int tmp = *(splm1 + i - f);
+ if (g < i && tmp < i - g)
+ suff[i] = tmp;
+ else
+ {
+ if (i < g)
+ g = i; // g = min(i, g)
+ f = i;
+ while (g >= 0 && likeconv(pattern[g]) == likeconv(pattern[g + plm1 - f]))
+ g--;
+ suff[i] = f - g;
+ }
+ }
+ }
+}
+
+
+/**********************************************************************
+ turboBM_compute_good_suffix_shifts()
+ Precomputation dependent only on pattern_len.
+**********************************************************************/
+
+void Item_func_like::turboBM_compute_good_suffix_shifts(int* suff)
+{
+ turboBM_compute_suffixes(suff);
+
+ int* end = bmGs + pattern_len;
+ int* k;
+ for (k = bmGs; k < end; k++)
+ *k = pattern_len;
+
+ int tmp;
+ int i;
+ int j = 0;
+ const int plm1 = pattern_len - 1;
+ for (i = plm1; i > -1; i--)
+ {
+ if (suff[i] == i + 1)
+ {
+ for (tmp = plm1 - i; j < tmp; j++)
+ {
+ int* tmp2 = bmGs + j;
+ if (*tmp2 == pattern_len)
+ *tmp2 = tmp;
+ }
+ }
+ }
+
+ int* tmp2;
+ for (tmp = plm1 - i; j < tmp; j++)
+ {
+ tmp2 = bmGs + j;
+ if (*tmp2 == pattern_len)
+ *tmp2 = tmp;
+ }
+
+ tmp2 = bmGs + plm1;
+ for (i = 0; i <= pattern_len - 2; i++)
+ *(tmp2 - suff[i]) = plm1 - i;
+}
+
+
+/**********************************************************************
+ turboBM_compute_bad_character_shifts()
+ Precomputation dependent on pattern_len.
+**********************************************************************/
+
+void Item_func_like::turboBM_compute_bad_character_shifts()
+{
+ int* i;
+ int* end = bmBc + alphabet_size;
+ for (i = bmBc; i < end; i++)
+ *i = pattern_len;
+
+ int j;
+ const int plm1 = pattern_len - 1;
+ if (binary)
+ for (j = 0; j < plm1; j++)
+ bmBc[pattern[j]] = plm1 - j;
+ else
+ for (j = 0; j < plm1; j++)
+ bmBc[likeconv(pattern[j])] = plm1 - j;
+}
+
+
+/**********************************************************************
+ turboBM_matches()
+ Search for pattern in text, returns true/false for match/no match
+**********************************************************************/
+
+bool Item_func_like::turboBM_matches(const char* text, int text_len) const
+{
+ register int bcShift;
+ register int turboShift;
+ int shift = pattern_len;
+ int j = 0;
+ int u = 0;
+
+ const int plm1 = pattern_len - 1;
+ const int tlmpl = text_len - pattern_len;
+
+ /* Searching */
+ if (binary)
+ {
+ while (j <= tlmpl)
+ {
+ register int i = plm1;
+ while (i >= 0 && pattern[i] == text[i + j])
+ {
+ i--;
+ if (i == plm1 - shift)
+ i -= u;
+ }
+ if (i < 0)
+ return true;
+
+ register const int v = plm1 - i;
+ turboShift = u - v;
+ bcShift = bmBc[text[i + j]] - plm1 + i;
+ shift = max(turboShift, bcShift);
+ shift = max(shift, bmGs[i]);
+ if (shift == bmGs[i])
+ u = min(pattern_len - shift, v);
+ else
+ {
+ if (turboShift < bcShift)
+ shift = max(shift, u + 1);
+ u = 0;
+ }
+ j += shift;
+ }
+ return false;
+ }
+ else
+ {
+ while (j <= tlmpl)
+ {
+ register int i = plm1;
+ while (i >= 0 && likeconv(pattern[i]) == likeconv(text[i + j]))
+ {
+ i--;
+ if (i == plm1 - shift)
+ i -= u;
+ }
+ if (i < 0)
+ return true;
+
+ register const int v = plm1 - i;
+ turboShift = u - v;
+ bcShift = bmBc[likeconv(text[i + j])] - plm1 + i;
+ shift = max(turboShift, bcShift);
+ shift = max(shift, bmGs[i]);
+ if (shift == bmGs[i])
+ u = min(pattern_len - shift, v);
+ else
+ {
+ if (turboShift < bcShift)
+ shift = max(shift, u + 1);
+ u = 0;
+ }
+ j += shift;
+ }
+ return false;
+ }
+}
diff --git a/sql/item_cmpfunc.h b/sql/item_cmpfunc.h
index 2048c4baea8..cd6e3d6e414 100644
--- a/sql/item_cmpfunc.h
+++ b/sql/item_cmpfunc.h
@@ -478,15 +478,40 @@ public:
class Item_func_like :public Item_bool_func2
{
char escape;
-public:
- Item_func_like(Item *a,Item *b, char* escape_arg) :Item_bool_func2(a,b),escape(*escape_arg)
+
+ // Turbo Boyer-Moore data
+ bool canDoTurboBM; // pattern is '%abcd%' case
+ const char* pattern;
+ int pattern_len;
+
+ // TurboBM buffers, *this is owner
+ int* bmGs; // good suffix shift table, size is pattern_len + 1
+ int* bmBc; // bad character shift table, size is alphabet_size
+
+ void turboBM_compute_suffixes(int* suff);
+ void turboBM_compute_good_suffix_shifts(int* suff);
+ void turboBM_compute_bad_character_shifts();
+ bool turboBM_matches(const char* text, int text_len) const;
+ enum { alphabet_size = 256 };
+
+public:
+ Item_func_like::Item_func_like(Item *a,Item *b, char* escape_arg) :
+ Item_bool_func2(a,b),
+ escape(*escape_arg),
+ canDoTurboBM(false),
+ pattern(0),
+ pattern_len(0),
+ bmGs(0),
+ bmBc(0)
{}
+
longlong val_int();
enum Functype functype() const { return LIKE_FUNC; }
optimize_type select_optimize() const;
cond_result eq_cmp_result() const { return COND_TRUE; }
const char *func_name() const { return "like"; }
void fix_length_and_dec();
+ bool fix_fields(THD *thd,struct st_table_list *tlist);
};
#ifdef USE_REGEX
diff --git a/sql/unireg.h b/sql/unireg.h
index c4d2052d1da..5a61f4a6c12 100644
--- a/sql/unireg.h
+++ b/sql/unireg.h
@@ -122,6 +122,13 @@ bfill((A)->null_flags,(A)->null_bytes,255);\
#define TE_INFO_LENGTH 3
#define MTYP_NOEMPTY_BIT 128
+/*
+ * Minimum length pattern before Turbo Boyer-Moore is used
+ * for SELECT "text" LIKE "%pattern%", excluding the two
+ * wildcards in class Item_func_like.
+ */
+#define MIN_TURBOBM_PATTERN_LEN 3
+
/* Include prototypes for unireg */
#include "mysqld_error.h"