summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorserg@serg.mylan <>2003-10-02 20:22:29 +0200
committerserg@serg.mylan <>2003-10-02 20:22:29 +0200
commit9a057fd0bd392b2e58d68997c93ff6151808d393 (patch)
tree4ccd6f1ab67583e81c02d66fd35d83a3d9bee0fe
parent630eb4aec7cfbf3002a9d39ee42f56e2b8c915d2 (diff)
downloadmariadb-git-9a057fd0bd392b2e58d68997c93ff6151808d393.tar.gz
ft1->ft2 auto-conversion on INSERT (WL#725)
-rw-r--r--myisam/ft_update.c59
-rw-r--r--myisam/fulltext.h2
-rw-r--r--myisam/mi_delete.c18
-rw-r--r--myisam/mi_open.c7
-rw-r--r--myisam/mi_write.c85
-rw-r--r--myisam/myisamdef.h6
-rw-r--r--mysql-test/r/fulltext2.result104
-rw-r--r--mysql-test/t/fulltext2.test80
8 files changed, 331 insertions, 30 deletions
diff --git a/myisam/ft_update.c b/myisam/ft_update.c
index cdf3b306087..8423b6898cd 100644
--- a/myisam/ft_update.c
+++ b/myisam/ft_update.c
@@ -21,13 +21,6 @@
#include "ftdefs.h"
#include <math.h>
-/**************************************************************
- This is to make ft-code to ignore keyseg.length at all *
- and to index the whole VARCHAR/BLOB instead... */
-#undef set_if_smaller
-#define set_if_smaller(A,B) /* no op */
-/**************************************************************/
-
void _mi_ft_segiterator_init(MI_INFO *info, uint keynr, const byte *record,
FT_SEG_ITERATOR *ftsi)
{
@@ -88,7 +81,6 @@ uint _mi_ft_segiterator(register FT_SEG_ITERATOR *ftsi)
{
ftsi->len=uint2korr(ftsi->pos);
ftsi->pos+=2; /* Skip VARCHAR length */
- set_if_smaller(ftsi->len,ftsi->seg->length);
DBUG_RETURN(1);
}
if (ftsi->seg->flag & HA_BLOB_PART)
@@ -96,7 +88,6 @@ uint _mi_ft_segiterator(register FT_SEG_ITERATOR *ftsi)
ftsi->len=_mi_calc_blob_length(ftsi->seg->bit_start,ftsi->pos);
memcpy_fixed((char*) &ftsi->pos, ftsi->pos+ftsi->seg->bit_start,
sizeof(char*));
- set_if_smaller(ftsi->len,ftsi->seg->length);
DBUG_RETURN(1);
}
ftsi->len=ftsi->seg->length;
@@ -305,3 +296,53 @@ uint _ft_make_key(MI_INFO *info, uint keynr, byte *keybuf, FT_WORD *wptr,
memcpy(buf+HA_FT_WLEN+2,wptr->pos,wptr->len);
DBUG_RETURN(_mi_make_key(info,keynr,(uchar*) keybuf,buf,filepos));
}
+
+/*
+ convert key value to ft2
+*/
+uint _mi_ft_convert_to_ft2(MI_INFO *info, uint keynr, uchar *key)
+{
+ my_off_t root;
+ DYNAMIC_ARRAY *da=info->ft1_to_ft2;
+ MI_KEYDEF *keyinfo=&info->s->ft2_keyinfo;
+ uchar *key_ptr=dynamic_array_ptr(da, 0), *end;
+ uint length, key_length;
+ DBUG_ENTER("_mi_ft_convert_to_ft2");
+
+ /* we'll generate one pageful at once, and insert the rest one-by-one */
+ /* calculating the length of this page ...*/
+ length=(keyinfo->block_length-2) / keyinfo->keylength;
+ set_if_smaller(length, da->elements);
+ length=length * keyinfo->keylength;
+
+ get_key_full_length_rdonly(key_length, key);
+ while (_mi_ck_delete(info, keynr, key, key_length) == 0)
+ /* nothing to do here.
+ _mi_ck_delete() will populate info->ft1_to_ft2 with deleted keys
+ */;
+
+ /* creating pageful of keys */
+ mi_putint(info->buff,length+2,0);
+ memcpy(info->buff+2, key_ptr, length);
+ info->buff_used=info->page_changed=1; /* info->buff is used */
+ if ((root= _mi_new(info,keyinfo)) == HA_OFFSET_ERROR ||
+ _mi_write_keypage(info,keyinfo,root,info->buff))
+ DBUG_RETURN(-1);
+
+ /* inserting the rest of key values */
+ end=dynamic_array_ptr(da, da->elements);
+ for (key_ptr+=length; key_ptr < end; key_ptr+=keyinfo->keylength)
+ if(_mi_ck_real_write_btree(info, keyinfo, key_ptr, 0, &root, SEARCH_SAME))
+ DBUG_RETURN(-1);
+
+ /* now, writing the word key entry */
+ ft_intXstore(key+key_length, -da->elements);
+ _mi_dpointer(info, key+key_length+HA_FT_WLEN, root);
+
+ DBUG_RETURN(_mi_ck_real_write_btree(info,
+ info->s->keyinfo+keynr,
+ key, 0,
+ &info->s->state.key_root[keynr],
+ SEARCH_SAME));
+}
+
diff --git a/myisam/fulltext.h b/myisam/fulltext.h
index ec267eb3e86..d8c74d4e94b 100644
--- a/myisam/fulltext.h
+++ b/myisam/fulltext.h
@@ -34,3 +34,5 @@ int _mi_ft_cmp(MI_INFO *, uint, const byte *, const byte *);
int _mi_ft_add(MI_INFO *, uint, byte *, const byte *, my_off_t);
int _mi_ft_del(MI_INFO *, uint, byte *, const byte *, my_off_t);
+uint _mi_ft_convert_to_ft2(MI_INFO *, uint, uchar *);
+
diff --git a/myisam/mi_delete.c b/myisam/mi_delete.c
index 2ab5c5d0319..d8e1aef5eb6 100644
--- a/myisam/mi_delete.c
+++ b/myisam/mi_delete.c
@@ -18,6 +18,7 @@
#include "fulltext.h"
#include "rt_index.h"
+#include <assert.h>
#ifdef __WIN__
#include <errno.h>
@@ -231,13 +232,22 @@ static int d_search(register MI_INFO *info, register MI_KEYDEF *keyinfo,
get_key_full_length_rdonly(off, lastkey);
subkeys=ft_sintXkorr(lastkey+off);
+ DBUG_ASSERT(info->ft1_to_ft2==0 || subkeys >=0);
comp_flag=SEARCH_SAME;
if (subkeys >= 0)
{
/* normal word, one-level tree structure */
- DBUG_PRINT("info",("FT1"));
- flag=(*keyinfo->bin_search)(info,keyinfo,anc_buff,key,USE_WHOLE_KEY,
- comp_flag, &keypos, lastkey, &last_key);
+ if (info->ft1_to_ft2)
+ {
+ /* we're in ft1->ft2 conversion mode. Saving key data */
+ insert_dynamic(info->ft1_to_ft2, lastkey+off);
+ }
+ else
+ {
+ /* we need exact match only if not in ft1->ft2 conversion mode */
+ flag=(*keyinfo->bin_search)(info,keyinfo,anc_buff,key,USE_WHOLE_KEY,
+ comp_flag, &keypos, lastkey, &last_key);
+ }
/* fall through to normal delete */
}
else
@@ -252,13 +262,11 @@ static int d_search(register MI_INFO *info, register MI_KEYDEF *keyinfo,
if (subkeys == -1)
{
/* the last entry in sub-tree */
- DBUG_PRINT("info",("FT2: the last entry"));
_mi_dispose(info, keyinfo, root);
/* fall through to normal delete */
}
else
{
- DBUG_PRINT("info",("FT2: going down"));
keyinfo=&info->s->ft2_keyinfo;
kpos-=keyinfo->keylength+nod_flag; /* we'll modify key entry 'in vivo' */
key+=off;
diff --git a/myisam/mi_open.c b/myisam/mi_open.c
index c4b24acdb77..744bb9bb3b6 100644
--- a/myisam/mi_open.c
+++ b/myisam/mi_open.c
@@ -513,8 +513,8 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags)
NullS))
goto err;
errpos=6;
-
- if (!have_rtree)
+
+ if (!have_rtree)
info.rtree_recursion_state= NULL;
strmov(info.filename,org_name);
@@ -536,6 +536,7 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags)
info.lock_type=F_UNLCK;
info.quick_mode=0;
info.bulk_insert=0;
+ info.ft1_to_ft2=0;
info.errkey= -1;
info.page_changed=1;
pthread_mutex_lock(&share->intern_lock);
@@ -1112,7 +1113,7 @@ char *mi_recinfo_read(char *ptr, MI_COLUMNDEF *recinfo)
/**************************************************************************
Open data file with or without RAID
-We can't use dup() here as the data file descriptors need to have different
+We can't use dup() here as the data file descriptors need to have different
active seek-positions.
The argument file_to_dup is here for the future if there would on some OS
diff --git a/myisam/mi_write.c b/myisam/mi_write.c
index 8e0b7e3530c..b6a7bf50dd0 100644
--- a/myisam/mi_write.c
+++ b/myisam/mi_write.c
@@ -18,6 +18,7 @@
#include "fulltext.h"
#include "rt_index.h"
+#include <assert.h>
#ifdef __WIN__
#include <errno.h>
@@ -124,7 +125,7 @@ int mi_write(MI_INFO *info, byte *record)
else
{
if (share->keyinfo[i].ck_insert(info,i,buff,
- _mi_make_key(info,i,buff,record,filepos)))
+ _mi_make_key(info,i,buff,record,filepos)))
{
if (local_lock_tree)
rw_unlock(&share->key_root_lock[i]);
@@ -264,13 +265,32 @@ int _mi_ck_write_btree(register MI_INFO *info, uint keynr, uchar *key,
else
comp_flag=SEARCH_SAME; /* Keys in rec-pos order */
+ error=_mi_ck_real_write_btree(info, keyinfo, key, key_length,
+ root, comp_flag);
+ if (info->ft1_to_ft2)
+ {
+ if (!error)
+ error= _mi_ft_convert_to_ft2(info, keynr, key);
+ delete_dynamic(info->ft1_to_ft2);
+ my_free(info->ft1_to_ft2, MYF(0));
+ info->ft1_to_ft2=0;
+ }
+ DBUG_RETURN(error);
+} /* _mi_ck_write_btree */
+
+int _mi_ck_real_write_btree(MI_INFO *info, MI_KEYDEF *keyinfo,
+ uchar *key, uint key_length, my_off_t *root, uint comp_flag)
+{
+ int error;
+ DBUG_ENTER("_mi_ck_real_write_btree");
+ /* key_length parameter is used only if comp_flag is SEARCH_FIND */
if (*root == HA_OFFSET_ERROR ||
(error=w_search(info, keyinfo, comp_flag, key, key_length,
*root, (uchar *) 0, (uchar*) 0,
(my_off_t) 0, 1)) > 0)
error=_mi_enlarge_root(info,keyinfo,key,root);
DBUG_RETURN(error);
-} /* _mi_ck_write_btree */
+} /* _mi_ck_real_write_btree */
/* Make a new root with key as only pointer */
@@ -359,13 +379,11 @@ static int w_search(register MI_INFO *info, register MI_KEYDEF *keyinfo,
keyinfo=&info->s->ft2_keyinfo;
key+=off;
keypos-=keyinfo->keylength; /* we'll modify key entry 'in vivo' */
- if ((error=w_search(info, keyinfo, comp_flag, key, HA_FT_WLEN, root,
- (uchar *) 0, (uchar*) 0, (my_off_t) 0, 1)) > 0)
- {
- error=_mi_enlarge_root(info, keyinfo, key, &root);
- _mi_dpointer(info, keypos+HA_FT_WLEN, root);
- }
+ error=_mi_ck_real_write_btree(info, keyinfo, key, 0,
+ &root, comp_flag);
+ _mi_dpointer(info, keypos+HA_FT_WLEN, root);
subkeys--; /* should there be underflow protection ? */
+ DBUG_ASSERT(subkeys < 0);
ft_intXstore(keypos, subkeys);
if (!error)
error=_mi_write_keypage(info,keyinfo,page,temp_buff);
@@ -410,7 +428,6 @@ int _mi_insert(register MI_INFO *info, register MI_KEYDEF *keyinfo,
uchar *key, uchar *anc_buff, uchar *key_pos, uchar *key_buff,
uchar *father_buff, uchar *father_key_pos, my_off_t father_page,
my_bool insert_last)
-
{
uint a_length,nod_flag;
int t_length;
@@ -464,8 +481,56 @@ int _mi_insert(register MI_INFO *info, register MI_KEYDEF *keyinfo,
a_length+=t_length;
mi_putint(anc_buff,a_length,nod_flag);
if (a_length <= keyinfo->block_length)
+ {
+ if (keyinfo->block_length - a_length < 32 &&
+ keyinfo->flag & HA_FULLTEXT && key_pos == endpos &&
+ info->s->base.key_reflength <= info->s->base.rec_reflength &&
+ info->s->options & (HA_OPTION_PACK_RECORD | HA_OPTION_COMPRESS_RECORD))
+ {
+ /*
+ Normal word. One-level tree. Page is almost full.
+ Let's consider converting.
+ We'll compare 'key' and the first key at anc_buff
+ */
+ uchar *a=key, *b=anc_buff+2+nod_flag;
+ uint alen, blen, ft2len=info->s->ft2_keyinfo.keylength;
+ /* the very first key on the page is always unpacked */
+ DBUG_ASSERT((*b & 128) == 0);
+#if HA_FT_MAXLEN >= 127
+ blen= mi_uint2korr(b); b+=2;
+#else
+ blen= *b++;
+#endif
+ get_key_length(alen,a);
+ DBUG_ASSERT(info->ft1_to_ft2==0);
+ if (alen == blen &&
+ mi_compare_text(keyinfo->seg->charset, a, alen, b, blen, 0)==0)
+ {
+ /* yup. converting */
+ info->ft1_to_ft2=(DYNAMIC_ARRAY *)
+ my_malloc(sizeof(DYNAMIC_ARRAY), MYF(MY_WME));
+ my_init_dynamic_array(info->ft1_to_ft2, ft2len, 300, 50);
+
+ /*
+ now, adding all keys from the page to dynarray
+ if the page is a leaf (if not keys will be deleted later)
+ */
+ if (!nod_flag)
+ {
+ /* let's leave the first key on the page, though, because
+ we cannot easily dispatch an empty page here */
+ b+=blen+ft2len+2;
+ for (a=anc_buff+a_length ; b < a ; b+=ft2len+2)
+ insert_dynamic(info->ft1_to_ft2, b);
+
+ /* fixing the page's length - it contains only one key now */
+ mi_putint(anc_buff,2+blen+ft2len+2,0);
+ }
+ /* the rest will be done when we're back from recursion */
+ }
+ }
DBUG_RETURN(0); /* There is room on page */
-
+ }
/* Page is full */
if (nod_flag)
insert_last=0;
diff --git a/myisam/myisamdef.h b/myisam/myisamdef.h
index 83e28a42797..6ec7908c014 100644
--- a/myisam/myisamdef.h
+++ b/myisam/myisamdef.h
@@ -222,7 +222,8 @@ struct st_myisam_info {
MI_BLOB *blobs; /* Pointer to blobs */
MI_BIT_BUFF bit_buff;
/* accumulate indexfile changes between write's */
- TREE *bulk_insert;
+ TREE *bulk_insert;
+ DYNAMIC_ARRAY *ft1_to_ft2; /* used only in ft1->ft2 conversion */
char *filename; /* parameter to open filename */
uchar *buff, /* Temp area for key */
*lastkey,*lastkey2; /* Last used search key */
@@ -464,6 +465,9 @@ extern int _mi_delete_static_record(MI_INFO *info);
extern int _mi_cmp_static_record(MI_INFO *info,const byte *record);
extern int _mi_read_rnd_static_record(MI_INFO*, byte *,my_off_t, my_bool);
extern int _mi_ck_write(MI_INFO *info,uint keynr,uchar *key,uint length);
+extern int _mi_ck_real_write_btree(MI_INFO *info, MI_KEYDEF *keyinfo,
+ uchar *key, uint key_length,
+ my_off_t *root, uint comp_flag);
extern int _mi_enlarge_root(MI_INFO *info,MI_KEYDEF *keyinfo,uchar *key, my_off_t *root);
extern int _mi_insert(MI_INFO *info,MI_KEYDEF *keyinfo,uchar *key,
uchar *anc_buff,uchar *key_pos,uchar *key_buff,
diff --git a/mysql-test/r/fulltext2.result b/mysql-test/r/fulltext2.result
index a35be210ece..687bc6edfa0 100644
--- a/mysql-test/r/fulltext2.result
+++ b/mysql-test/r/fulltext2.result
@@ -103,4 +103,106 @@ count(*)
select count(*) from t1 where match a against ('aaazzz' in boolean mode);
count(*)
262
-DROP TABLE IF EXISTS t1;
+drop table t1;
+CREATE TABLE t1 (
+i int(10) unsigned not null auto_increment primary key,
+a varchar(255) not null,
+FULLTEXT KEY (a)
+) TYPE=MyISAM;
+select count(*) from t1 where match a against ('aaaxxx');
+count(*)
+260
+select count(*) from t1 where match a against ('aaayyy');
+count(*)
+250
+select count(*) from t1 where match a against ('aaazzz');
+count(*)
+255
+select count(*) from t1 where match a against ('aaaxxx' in boolean mode);
+count(*)
+260
+select count(*) from t1 where match a against ('aaayyy' in boolean mode);
+count(*)
+250
+select count(*) from t1 where match a against ('aaazzz' in boolean mode);
+count(*)
+255
+select count(*) from t1 where match a against ('aaaxxx aaayyy aaazzz');
+count(*)
+765
+select count(*) from t1 where match a against ('aaaxxx aaayyy aaazzz' in boolean mode);
+count(*)
+765
+select count(*) from t1 where match a against ('aaax*' in boolean mode);
+count(*)
+260
+select count(*) from t1 where match a against ('aaay*' in boolean mode);
+count(*)
+250
+select count(*) from t1 where match a against ('aaa*' in boolean mode);
+count(*)
+765
+insert t1 (a) values ('aaaxxx'),('aaayyy');
+insert t1 (a) values ('aaazzz'),('aaazzz'),('aaazzz'),('aaazzz'),('aaazzz');
+select count(*) from t1 where match a against ('aaaxxx');
+count(*)
+261
+select count(*) from t1 where match a against ('aaayyy');
+count(*)
+251
+select count(*) from t1 where match a against ('aaazzz');
+count(*)
+260
+insert t1 (a) values ('aaaxxx 000000');
+select count(*) from t1 where match a against ('000000');
+count(*)
+1
+delete from t1 where match a against ('000000');
+select count(*) from t1 where match a against ('000000');
+count(*)
+0
+select count(*) from t1 where match a against ('aaaxxx');
+count(*)
+261
+delete from t1 where match a against ('aaazzz');
+select count(*) from t1 where match a against ('aaaxxx' in boolean mode);
+count(*)
+261
+select count(*) from t1 where match a against ('aaayyy' in boolean mode);
+count(*)
+251
+select count(*) from t1 where match a against ('aaazzz' in boolean mode);
+count(*)
+0
+select count(*) from t1 where a = 'aaaxxx';
+count(*)
+261
+select count(*) from t1 where a = 'aaayyy';
+count(*)
+251
+select count(*) from t1 where a = 'aaazzz';
+count(*)
+0
+insert t1 (a) values ('aaaxxx 000000');
+select count(*) from t1 where match a against ('000000');
+count(*)
+1
+update t1 set a='aaazzz' where match a against ('000000');
+select count(*) from t1 where match a against ('aaaxxx' in boolean mode);
+count(*)
+261
+select count(*) from t1 where match a against ('aaazzz' in boolean mode);
+count(*)
+1
+update t1 set a='aaazzz' where a = 'aaaxxx';
+update t1 set a='aaaxxx' where a = 'aaayyy';
+select count(*) from t1 where match a against ('aaaxxx' in boolean mode);
+count(*)
+251
+select count(*) from t1 where match a against ('aaayyy' in boolean mode);
+count(*)
+0
+select count(*) from t1 where match a against ('aaazzz' in boolean mode);
+count(*)
+262
+drop table t1;
diff --git a/mysql-test/t/fulltext2.test b/mysql-test/t/fulltext2.test
index b739d60e3b3..a01ec3a59c9 100644
--- a/mysql-test/t/fulltext2.test
+++ b/mysql-test/t/fulltext2.test
@@ -94,5 +94,83 @@ select count(*) from t1 where match a against ('aaaxxx' in boolean mode);
select count(*) from t1 where match a against ('aaayyy' in boolean mode);
select count(*) from t1 where match a against ('aaazzz' in boolean mode);
-DROP TABLE IF EXISTS t1;
+drop table t1;
+
+CREATE TABLE t1 (
+ i int(10) unsigned not null auto_increment primary key,
+ a varchar(255) not null,
+ FULLTEXT KEY (a)
+) TYPE=MyISAM;
+
+# two-level entry, second-level tree with depth 2
+--disable_query_log
+let $1=260;
+while ($1)
+{
+ eval insert t1 (a) values ('aaaxxx');
+ dec $1;
+}
+let $1=255;
+while ($1)
+{
+ eval insert t1 (a) values ('aaazzz');
+ dec $1;
+}
+let $1=250;
+while ($1)
+{
+ eval insert t1 (a) values ('aaayyy');
+ dec $1;
+}
+--enable_query_log
+
+select count(*) from t1 where match a against ('aaaxxx');
+select count(*) from t1 where match a against ('aaayyy');
+select count(*) from t1 where match a against ('aaazzz');
+select count(*) from t1 where match a against ('aaaxxx' in boolean mode);
+select count(*) from t1 where match a against ('aaayyy' in boolean mode);
+select count(*) from t1 where match a against ('aaazzz' in boolean mode);
+select count(*) from t1 where match a against ('aaaxxx aaayyy aaazzz');
+select count(*) from t1 where match a against ('aaaxxx aaayyy aaazzz' in boolean mode);
+
+select count(*) from t1 where match a against ('aaax*' in boolean mode);
+select count(*) from t1 where match a against ('aaay*' in boolean mode);
+select count(*) from t1 where match a against ('aaa*' in boolean mode);
+
+# mi_write:
+
+insert t1 (a) values ('aaaxxx'),('aaayyy');
+insert t1 (a) values ('aaazzz'),('aaazzz'),('aaazzz'),('aaazzz'),('aaazzz');
+select count(*) from t1 where match a against ('aaaxxx');
+select count(*) from t1 where match a against ('aaayyy');
+select count(*) from t1 where match a against ('aaazzz');
+
+# mi_delete
+insert t1 (a) values ('aaaxxx 000000');
+select count(*) from t1 where match a against ('000000');
+delete from t1 where match a against ('000000');
+select count(*) from t1 where match a against ('000000');
+select count(*) from t1 where match a against ('aaaxxx');
+delete from t1 where match a against ('aaazzz');
+select count(*) from t1 where match a against ('aaaxxx' in boolean mode);
+select count(*) from t1 where match a against ('aaayyy' in boolean mode);
+select count(*) from t1 where match a against ('aaazzz' in boolean mode);
+# double-check without index
+select count(*) from t1 where a = 'aaaxxx';
+select count(*) from t1 where a = 'aaayyy';
+select count(*) from t1 where a = 'aaazzz';
+
+# update
+insert t1 (a) values ('aaaxxx 000000');
+select count(*) from t1 where match a against ('000000');
+update t1 set a='aaazzz' where match a against ('000000');
+select count(*) from t1 where match a against ('aaaxxx' in boolean mode);
+select count(*) from t1 where match a against ('aaazzz' in boolean mode);
+update t1 set a='aaazzz' where a = 'aaaxxx';
+update t1 set a='aaaxxx' where a = 'aaayyy';
+select count(*) from t1 where match a against ('aaaxxx' in boolean mode);
+select count(*) from t1 where match a against ('aaayyy' in boolean mode);
+select count(*) from t1 where match a against ('aaazzz' in boolean mode);
+
+drop table t1;