diff options
35 files changed, 1096 insertions, 463 deletions
diff --git a/heap/hp_hash.c b/heap/hp_hash.c index d30cbc9b82f..d040f37aea0 100644 --- a/heap/hp_hash.c +++ b/heap/hp_hash.c @@ -20,6 +20,38 @@ #include <m_ctype.h> #include <assert.h> + + +/* + Find out how many rows there is in the given range + + SYNOPSIS + hp_rb_records_in_range() + info HEAP handler + inx Index to use + start_key Start of range. Null pointer if from first key + start_key_len Length of start key + start_search_flag Flag if start key should be included or not + end_key End of range. Null pointer if to last key + end_key_len Length of end key + end_search_flag Flag if start key should be included or not + + NOTES + start_search_flag can have one of the following values: + HA_READ_KEY_EXACT Include the key in the range + HA_READ_AFTER_KEY Don't include key in range + + end_search_flag can have one of the following values: + HA_READ_BEFORE_KEY Don't include key in range + HA_READ_AFTER_KEY Include all 'end_key' values in the range + + RETURN + HA_POS_ERROR Something is wrong with the index tree. + 0 There is no matching keys in the given range + number > 0 There is approximately 'number' matching rows in + the range. +*/ + ha_rows hp_rb_records_in_range(HP_INFO *info, int inx, const byte *start_key, uint start_key_len, enum ha_rkey_function start_search_flag, @@ -30,6 +62,7 @@ ha_rows hp_rb_records_in_range(HP_INFO *info, int inx, const byte *start_key, HP_KEYDEF *keyinfo= info->s->keydef + inx; TREE *rb_tree = &keyinfo->rb_tree; heap_rb_param custom_arg; + DBUG_ENTER("hp_rb_records_in_range"); info->lastinx= inx; custom_arg.keyseg= keyinfo->seg; @@ -59,10 +92,12 @@ ha_rows hp_rb_records_in_range(HP_INFO *info, int inx, const byte *start_key, end_pos= rb_tree->elements_in_tree + (ha_rows)1; } + DBUG_PRINT("info",("start_pos: %lu end_pos: %lu", (ulong) start_pos, + (ulong) end_pos)); if (start_pos == HA_POS_ERROR || end_pos == HA_POS_ERROR) - return HA_POS_ERROR; - return end_pos < start_pos ? (ha_rows) 0 : - (end_pos == start_pos ? (ha_rows) 1 : end_pos - start_pos); + DBUG_RETURN(HA_POS_ERROR); + DBUG_RETURN(end_pos < start_pos ? (ha_rows) 0 : + (end_pos == start_pos ? (ha_rows) 1 : end_pos - start_pos)); } /* Search after a record based on a key */ diff --git a/include/my_handler.h b/include/my_handler.h index 618d1df1a6e..18a6234d3f6 100644 --- a/include/my_handler.h +++ b/include/my_handler.h @@ -58,7 +58,7 @@ typedef struct st_HA_KEYSEG /* Key-portion */ } extern int mi_compare_text(CHARSET_INFO *, uchar *, uint, uchar *, uint , - my_bool); + my_bool, my_bool); extern int ha_key_cmp(register HA_KEYSEG *keyseg, register uchar *a, register uchar *b, uint key_length, uint nextflag, uint *diff_pos); diff --git a/myisam/ft_boolean_search.c b/myisam/ft_boolean_search.c index bff1232750e..1d4bfee86a4 100644 --- a/myisam/ft_boolean_search.c +++ b/myisam/ft_boolean_search.c @@ -124,7 +124,7 @@ static int FTB_WORD_cmp_list(CHARSET_INFO *cs, FTB_WORD **a, FTB_WORD **b) { /* ORDER BY word DESC, ndepth DESC */ int i= mi_compare_text(cs, (uchar*) (*b)->word+1,(*b)->len-1, - (uchar*) (*a)->word+1,(*a)->len-1,0); + (uchar*) (*a)->word+1,(*a)->len-1,0,0); if (!i) i=CMP_NUM((*b)->ndepth,(*a)->ndepth); return i; @@ -228,7 +228,7 @@ static int _ft2_search(FTB *ftb, FTB_WORD *ftbw, my_bool init_search) ftbw->len - (ftbw->flags & FTB_FLAG_TRUNC), (uchar*) ftbw->word + (ftbw->flags & FTB_FLAG_TRUNC), ftbw->len - (ftbw->flags & FTB_FLAG_TRUNC), - 0); + 0,0); } if (r) /* not found */ @@ -633,7 +633,7 @@ float ft_boolean_find_relevance(FT_INFO *ftb, byte *record, uint length) ftbw=ftb->list[c]; if (mi_compare_text(ftb->charset, (uchar*) word.pos, word.len, (uchar*) ftbw->word+1, ftbw->len-1, - (my_bool) (ftbw->flags&FTB_FLAG_TRUNC)) >0) + (my_bool) (ftbw->flags&FTB_FLAG_TRUNC),0) >0) b=c; else a=c; @@ -643,7 +643,7 @@ float ft_boolean_find_relevance(FT_INFO *ftb, byte *record, uint length) ftbw=ftb->list[c]; if (mi_compare_text(ftb->charset, (uchar*) word.pos, word.len, (uchar*) ftbw->word+1,ftbw->len-1, - (my_bool) (ftbw->flags&FTB_FLAG_TRUNC))) + (my_bool) (ftbw->flags&FTB_FLAG_TRUNC),0)) break; if (ftbw->docid[1] == docid) continue; diff --git a/myisam/ft_nlq_search.c b/myisam/ft_nlq_search.c index 45d13f56c6e..03875abe7b0 100644 --- a/myisam/ft_nlq_search.c +++ b/myisam/ft_nlq_search.c @@ -96,7 +96,7 @@ static int walk_and_match(FT_WORD *word, uint32 count, ALL_IN_ONE *aio) if (keylen && mi_compare_text(aio->charset,info->lastkey+1, - info->lastkey_length-extra-1, keybuff+1,keylen-1,0)) + info->lastkey_length-extra-1, keybuff+1,keylen-1,0,0)) break; subkeys=ft_sintXkorr(info->lastkey+info->lastkey_length-extra); diff --git a/myisam/ft_parser.c b/myisam/ft_parser.c index 250e92566b7..b0fe180d0fb 100644 --- a/myisam/ft_parser.c +++ b/myisam/ft_parser.c @@ -27,7 +27,7 @@ typedef struct st_ft_docstat { static int FT_WORD_cmp(CHARSET_INFO* cs, FT_WORD *w1, FT_WORD *w2) { return mi_compare_text(cs, (uchar*) w1->pos, w1->len, - (uchar*) w2->pos, w2->len, 0); + (uchar*) w2->pos, w2->len, 0, 0); } static int walk_and_copy(FT_WORD *word,uint32 count,FT_DOCSTAT *docstat) diff --git a/myisam/ft_stopwords.c b/myisam/ft_stopwords.c index 6682de18c65..112af87d201 100644 --- a/myisam/ft_stopwords.c +++ b/myisam/ft_stopwords.c @@ -32,7 +32,7 @@ static int FT_STOPWORD_cmp(void* cmp_arg __attribute__((unused)), { return mi_compare_text(default_charset_info, (uchar *)w1->pos,w1->len, - (uchar *)w2->pos,w2->len,0); + (uchar *)w2->pos,w2->len,0,0); } static void FT_STOPWORD_free(FT_STOPWORD *w, TREE_FREE action, diff --git a/myisam/ft_update.c b/myisam/ft_update.c index 4015abbbeba..beccc062270 100644 --- a/myisam/ft_update.c +++ b/myisam/ft_update.c @@ -179,7 +179,7 @@ int _mi_ft_cmp(MI_INFO *info, uint keynr, const byte *rec1, const byte *rec2) if ((ftsi1.pos != ftsi2.pos) && (!ftsi1.pos || !ftsi2.pos || mi_compare_text(cs, (uchar*) ftsi1.pos,ftsi1.len, - (uchar*) ftsi2.pos,ftsi2.len,0))) + (uchar*) ftsi2.pos,ftsi2.len,0,0))) DBUG_RETURN(THOSE_TWO_DAMN_KEYS_ARE_REALLY_DIFFERENT); } DBUG_RETURN(GEE_THEY_ARE_ABSOLUTELY_IDENTICAL); @@ -207,7 +207,7 @@ int _mi_ft_update(MI_INFO *info, uint keynr, byte *keybuf, while(old_word->pos && new_word->pos) { cmp= mi_compare_text(cs, (uchar*) old_word->pos,old_word->len, - (uchar*) new_word->pos,new_word->len,0); + (uchar*) new_word->pos,new_word->len,0,0); cmp2= cmp ? 0 : (fabs(old_word->weight - new_word->weight) > 1.e-5); if (cmp < 0 || cmp2) diff --git a/myisam/mi_check.c b/myisam/mi_check.c index cdaed59cd01..9be191f349c 100644 --- a/myisam/mi_check.c +++ b/myisam/mi_check.c @@ -3313,7 +3313,7 @@ static int sort_ft_key_write(MI_SORT_PARAM *sort_param, const void *a) if (val_off == a_len && mi_compare_text(sort_param->seg->charset, ((uchar *)a)+1,a_len-1, - ft_buf->lastkey+1,val_off-1, 0)==0) + ft_buf->lastkey+1,val_off-1, 0, 0)==0) { if (!ft_buf->buf) /* store in second-level tree */ { diff --git a/myisam/mi_search.c b/myisam/mi_search.c index 2871633102d..73a4d229fd6 100644 --- a/myisam/mi_search.c +++ b/myisam/mi_search.c @@ -273,7 +273,8 @@ int _mi_prefix_search(MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *page, uchar *sort_order=keyinfo->seg->charset->sort_order; uchar tt_buff[MI_MAX_KEY_BUFF+2], *t_buff=tt_buff+2; uchar *saved_from, *saved_to, *saved_vseg; - uint saved_length=0, saved_prefix_len=0; + uint saved_length=0, saved_prefix_len=0; + uint length_pack; DBUG_ENTER("_mi_prefix_search"); LINT_INIT(length); @@ -289,26 +290,24 @@ int _mi_prefix_search(MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *page, page+=2+nod_flag; *ret_pos=page; kseg=key; - { - uint lenght_pack; - get_key_pack_length(kseg_len,lenght_pack,kseg); - key_len_skip=lenght_pack+kseg_len; - key_len_left=(int) key_len- (int) key_len_skip; - cmplen=(key_len_left>=0) ? kseg_len : key_len-lenght_pack; - DBUG_PRINT("info",("key: '%.*s'",kseg_len,kseg)); - } -/* - Keys are compressed the following way: + get_key_pack_length(kseg_len,length_pack,kseg); + key_len_skip=length_pack+kseg_len; + key_len_left=(int) key_len- (int) key_len_skip; + cmplen=(key_len_left>=0) ? kseg_len : key_len-length_pack; + DBUG_PRINT("info",("key: '%.*s'",kseg_len,kseg)); - If the max length of first key segment <= 127 characters the prefix is - 1 byte else it's 2 byte + /* + Keys are compressed the following way: - prefix The high bit is set if this is a prefix for the prev key - length Packed length if the previous was a prefix byte - [length] Length character of data - next-key-seg Next key segments -*/ + If the max length of first key segment <= 127 characters the prefix is + 1 byte else it's 2 byte + + prefix The high bit is set if this is a prefix for the prev key + length Packed length if the previous was a prefix byte + [length] Length character of data + next-key-seg Next key segments + */ matched=0; /* how many char's from prefix were alredy matched */ len=0; /* length of previous key unpacked */ @@ -350,7 +349,8 @@ int _mi_prefix_search(MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *page, saved_vseg=vseg; saved_prefix_len=prefix_len; - DBUG_PRINT("loop",("page: '%.*s%.*s'",prefix_len,t_buff+seg_len_pack,suffix_len,vseg)); + DBUG_PRINT("loop",("page: '%.*s%.*s'",prefix_len,t_buff+seg_len_pack, + suffix_len,vseg)); { uchar *from=vseg+suffix_len; HA_KEYSEG *keyseg; @@ -396,14 +396,15 @@ int _mi_prefix_search(MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *page, matched=prefix_len+left; - for(my_flag=0;left;left--) + for (my_flag=0;left;left--) if ((my_flag= (int) sort_order[*vseg++] - (int) sort_order[*k++])) break; if (my_flag>0) /* mismatch */ break; - else if (my_flag==0) /* match */ - { /* + if (my_flag==0) /* match */ + { + /* ** len cmplen seg_left_len more_segs ** < matched=len; continue search ** > = prefix ? found : (matched=len; continue search) @@ -414,30 +415,68 @@ int _mi_prefix_search(MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *page, */ if (len < cmplen) { - my_flag= -1; + if ((keyinfo->seg->type != HA_KEYTYPE_TEXT && + keyinfo->seg->type != HA_KEYTYPE_VARTEXT)) + my_flag= -1; + else + { + /* We have to compare k and vseg as if they where space extended */ + uchar *end= k+ (cmplen - len); + for ( ; k < end && *k == ' '; k++) ; + if (k == end) + goto cmp_rest; /* should never happen */ + if (*k < (uchar) ' ') + { + my_flag= 1; /* Compared string is smaller */ + break; + } + my_flag= -1; /* Continue searching */ + } } else if (len > cmplen) { - if ((my_flag= (!(nextflag & SEARCH_PREFIX) || key_len_left>0))) - break; - goto fix_flag; - } - else if (key_len_left>0) - { - uint not_used; - if ((flag = ha_key_cmp(keyinfo->seg+1,vseg, - k,key_len_left,nextflag,¬_used)) >= 0) - break; + uchar *end; + if ((nextflag & SEARCH_PREFIX) && key_len_left == 0) + goto fix_flag; + + /* We have to compare k and vseg as if they where space extended */ + for (end=vseg + (len-cmplen) ; + vseg < end && *vseg == (uchar) ' '; + vseg++) ; + if (vseg == end) + goto cmp_rest; /* should never happen */ + + if (*vseg > (uchar) ' ') + { + my_flag= 1; /* Compared string is smaller */ + break; + } + my_flag= -1; /* Continue searching */ } else - { - /* at this line flag==-1 if the following lines were already - visited and 0 otherwise, i.e. flag <=0 here always !!! */ - fix_flag: - if (nextflag & (SEARCH_NO_FIND | SEARCH_LAST)) - flag=(nextflag & (SEARCH_BIGGER | SEARCH_LAST)) ? -1 : 1; - if (flag>=0) break; - } + { + cmp_rest: + if (key_len_left>0) + { + uint not_used; + if ((flag = ha_key_cmp(keyinfo->seg+1,vseg, + k,key_len_left,nextflag,¬_used)) >= 0) + break; + } + else + { + /* + at this line flag==-1 if the following lines were already + visited and 0 otherwise, i.e. flag <=0 here always !!! + */ + fix_flag: + DBUG_ASSERT(flag <= 0); + if (nextflag & (SEARCH_NO_FIND | SEARCH_LAST)) + flag=(nextflag & (SEARCH_BIGGER | SEARCH_LAST)) ? -1 : 1; + if (flag>=0) + break; + } + } } matched-=left; } @@ -1567,7 +1606,7 @@ _mi_calc_var_pack_key_length(MI_KEYDEF *keyinfo,uint nod_flag,uchar *next_key, n_length-=tmp_length; length-=tmp_length+next_length_pack; /* We gained these chars */ } - if (n_length == 0) + if (n_length == 0 && ref_length == new_key_length) { s_temp->n_ref_length=pack_marker; /* Same as prev key */ } diff --git a/myisam/mi_unique.c b/myisam/mi_unique.c index f4ee39e55ca..38b4ed93311 100644 --- a/myisam/mi_unique.c +++ b/myisam/mi_unique.c @@ -180,7 +180,7 @@ int mi_unique_comp(MI_UNIQUEDEF *def, const byte *a, const byte *b, if (type == HA_KEYTYPE_TEXT || type == HA_KEYTYPE_VARTEXT) { if (mi_compare_text(keyseg->charset, (uchar *) pos_a, length, - (uchar *) pos_b, length, 0)) + (uchar *) pos_b, length, 0, 0)) return 1; } else diff --git a/myisam/mi_write.c b/myisam/mi_write.c index 88e7f070642..382fb1156cb 100644 --- a/myisam/mi_write.c +++ b/myisam/mi_write.c @@ -500,7 +500,7 @@ int _mi_insert(register MI_INFO *info, register MI_KEYDEF *keyinfo, get_key_length(alen,a); DBUG_ASSERT(info->ft1_to_ft2==0); if (alen == blen && - mi_compare_text(keyinfo->seg->charset, a, alen, b, blen, 0)==0) + mi_compare_text(keyinfo->seg->charset, a, alen, b, blen, 0, 0)==0) { /* yup. converting */ info->ft1_to_ft2=(DYNAMIC_ARRAY *) diff --git a/myisam/myisam_ftdump.c b/myisam/myisam_ftdump.c index 7d1b20eb854..8ab6a7600b2 100644 --- a/myisam/myisam_ftdump.c +++ b/myisam/myisam_ftdump.c @@ -68,7 +68,7 @@ int main(int argc,char *argv[]) struct { MI_INFO *info; } aio0, *aio=&aio0; /* for GWS_IN_USE */ MY_INIT(argv[0]); - if (error=handle_options(&argc, &argv, my_long_options, get_one_option)) + if ((error=handle_options(&argc, &argv, my_long_options, get_one_option))) exit(error); if (count || dump) verbose=0; diff --git a/mysql-test/r/ctype_collate.result b/mysql-test/r/ctype_collate.result index add730fe68f..c837d676b6e 100644 --- a/mysql-test/r/ctype_collate.result +++ b/mysql-test/r/ctype_collate.result @@ -337,15 +337,12 @@ SELECT latin1_f,count(*) FROM t1 GROUP BY latin1_f COLLATE latin1_german2_ci; latin1_f count(*) A 4 AD 2 -AE 2 -Ä 2 +AE 4 AF 2 B 2 -SS 2 -ß 1 +SS 3 U 2 -UE 2 -Ü 2 +UE 4 Y 2 Z 2 SELECT latin1_f,count(*) FROM t1 GROUP BY latin1_f COLLATE latin1_general_ci; @@ -431,13 +428,10 @@ A AD AE AF -Ä B U UE -Ü SS -ß Y Z SELECT DISTINCT latin1_f COLLATE latin1_general_ci FROM t1; diff --git a/mysql-test/r/endspace.result b/mysql-test/r/endspace.result new file mode 100644 index 00000000000..d2519523f36 --- /dev/null +++ b/mysql-test/r/endspace.result @@ -0,0 +1,196 @@ +drop table if exists t1; +select 'a' = 'a', 'a' = 'a ', 'a ' = 'a'; +'a' = 'a' 'a' = 'a ' 'a ' = 'a' +1 1 1 +select 'a\0' = 'a', 'a\0' < 'a', 'a\0' > 'a'; +'a\0' = 'a' 'a\0' < 'a' 'a\0' > 'a' +0 1 0 +select 'a' = 'a\0', 'a' < 'a\0', 'a' > 'a\0'; +'a' = 'a\0' 'a' < 'a\0' 'a' > 'a\0' +0 0 1 +select 'a\0' = 'a ', 'a\0' < 'a ', 'a\0' > 'a '; +'a\0' = 'a ' 'a\0' < 'a ' 'a\0' > 'a ' +0 1 0 +select 'a ' = 'a\0', 'a ' < 'a\0', 'a ' > 'a\0'; +'a ' = 'a\0' 'a ' < 'a\0' 'a ' > 'a\0' +0 0 1 +select 'a a' > 'a', 'a \0' < 'a'; +'a a' > 'a' 'a \0' < 'a' +1 1 +select binary 'a a' > 'a', binary 'a \0' > 'a', binary 'a\0' > 'a'; +binary 'a a' > 'a' binary 'a \0' > 'a' binary 'a\0' > 'a' +1 1 1 +create table t1 (text1 varchar(32) not NULL, KEY key1 (text1)); +insert into t1 values ('teststring'), ('nothing'), ('teststring\t'); +check table t1; +Table Op Msg_type Msg_text +test.t1 check status OK +select * from t1 ignore key (key1) where text1='teststring' or text1 like 'teststring_%'; +text1 +teststring +teststring +select * from t1 where text1='teststring' or text1 like 'teststring_%'; +text1 +teststring +teststring +select * from t1 where text1='teststring' or text1 > 'teststring\t'; +text1 +teststring +select * from t1 order by text1; +text1 +nothing +teststring +teststring +explain select * from t1 order by text1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index NULL key1 32 NULL 3 Using index +alter table t1 modify text1 char(32) binary not null; +check table t1; +Table Op Msg_type Msg_text +test.t1 check status OK +select * from t1 ignore key (key1) where text1='teststring' or text1 like 'teststring_%'; +text1 +teststring +teststring +select * from t1 where text1='teststring' or text1 like 'teststring_%'; +text1 +teststring +teststring +select * from t1 where text1='teststring' or text1 > 'teststring\t'; +text1 +teststring +select text1, length(text1) from t1 order by text1; +text1 length(text1) +nothing 7 +teststring 11 +teststring 10 +select text1, length(text1) from t1 order by binary text1; +text1 length(text1) +nothing 7 +teststring 10 +teststring 11 +alter table t1 modify text1 blob not null, drop key key1, add key key1 (text1(20)); +insert into t1 values ('teststring '); +select concat('|', text1, '|') from t1 order by text1; +concat('|', text1, '|') +|nothing| +|teststring| +|teststring | +|teststring | +alter table t1 modify text1 text not null, pack_keys=1; +select * from t1 where text1 like 'teststring_%'; +text1 +teststring +teststring +select * from t1 where text1='teststring' or text1 like 'teststring_%'; +text1 +teststring +teststring +teststring +select * from t1 where text1='teststring' or text1 > 'teststring\t'; +text1 +teststring +teststring +select concat('|', text1, '|') from t1 order by text1; +concat('|', text1, '|') +|nothing| +|teststring | +|teststring| +|teststring | +drop table t1; +create table t1 (text1 varchar(32) not NULL, KEY key1 (text1)) pack_keys=0; +insert into t1 values ('teststring'), ('nothing'), ('teststring\t'); +select * from t1 where text1='teststring' or text1 like 'teststring_%'; +text1 +teststring +teststring +select * from t1 where text1='teststring' or text1 >= 'teststring\t'; +text1 +teststring +teststring +drop table t1; +create table t1 (text1 varchar(32) not NULL, KEY key1 using BTREE (text1)) engine=heap; +insert into t1 values ('teststring'), ('nothing'), ('teststring\t'); +select * from t1 ignore key (key1) where text1='teststring' or text1 like 'teststring_%'; +text1 +teststring +teststring +select * from t1 where text1='teststring' or text1 like 'teststring_%'; +text1 +teststring +teststring +select * from t1 where text1='teststring' or text1 >= 'teststring\t'; +text1 +teststring +teststring +select * from t1 order by text1; +text1 +nothing +teststring +teststring +explain select * from t1 order by text1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index NULL key1 32 NULL 3 +alter table t1 modify text1 char(32) binary not null; +select * from t1 order by text1; +text1 +nothing +teststring +teststring +drop table t1; +create table t1 (text1 varchar(32) not NULL, KEY key1 (text1)) engine=innodb; +insert into t1 values ('teststring'), ('nothing'), ('teststring\t'); +check table t1; +Table Op Msg_type Msg_text +test.t1 check status OK +select * from t1 where text1='teststring' or text1 like 'teststring_%'; +text1 +teststring +teststring +select * from t1 where text1='teststring' or text1 > 'teststring\t'; +text1 +teststring +select * from t1 order by text1; +text1 +nothing +teststring +teststring +explain select * from t1 order by text1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index NULL key1 32 NULL 4 Using index +alter table t1 modify text1 char(32) binary not null; +select * from t1 order by text1; +text1 +nothing +teststring +teststring +alter table t1 modify text1 blob not null, drop key key1, add key key1 (text1(20)); +insert into t1 values ('teststring '); +select concat('|', text1, '|') from t1 order by text1; +concat('|', text1, '|') +|nothing| +|teststring| +|teststring | +|teststring | +alter table t1 modify text1 text not null, pack_keys=1; +select * from t1 where text1 like 'teststring_%'; +text1 +teststring +teststring +select text1, length(text1) from t1 where text1='teststring' or text1 like 'teststring_%'; +text1 length(text1) +teststring 10 +teststring 11 +teststring 11 +select text1, length(text1) from t1 where text1='teststring' or text1 >= 'teststring\t'; +text1 length(text1) +teststring 10 +teststring 11 +teststring 11 +select concat('|', text1, '|') from t1 order by text1; +concat('|', text1, '|') +|nothing| +|teststring | +|teststring| +|teststring | +drop table t1; diff --git a/mysql-test/r/heap_btree.result b/mysql-test/r/heap_btree.result index ef63b1d4e00..f92a8653a69 100644 --- a/mysql-test/r/heap_btree.result +++ b/mysql-test/r/heap_btree.result @@ -173,9 +173,22 @@ f1 f2 drop table t1; create table t1 (btn char(10) not null, key using BTREE (btn)) engine=heap; insert into t1 values ("hello"),("hello"),("hello"),("hello"),("hello"),("a"),("b"),("c"),("d"),("e"),("f"),("g"),("h"),("i"); -explain select * from t1 where btn like "q%"; +explain select * from t1 where btn like "i%"; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 ALL btn NULL NULL NULL 14 Using where +1 SIMPLE t1 range btn btn 10 NULL 1 Using where +explain select * from t1 where btn like "h%"; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range btn btn 10 NULL 4 Using where +explain select * from t1 where btn like "a%"; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range btn btn 10 NULL 1 Using where +explain select * from t1 where btn like "b%"; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range btn btn 10 NULL 1 Using where +select * from t1 where btn like "ff%"; +btn +select * from t1 where btn like " %"; +btn select * from t1 where btn like "q%"; btn alter table t1 add column new_col char(1) not null, add key using BTREE (btn,new_col), drop key btn; diff --git a/mysql-test/t/endspace.test b/mysql-test/t/endspace.test new file mode 100644 index 00000000000..a9933ff93b5 --- /dev/null +++ b/mysql-test/t/endspace.test @@ -0,0 +1,96 @@ +# +# Test problem with characters < ' ' at end of strings (Bug #3152) +# + +-- source include/have_innodb.inc +--disable_warnings +drop table if exists t1; +--enable_warnings + +select 'a' = 'a', 'a' = 'a ', 'a ' = 'a'; +select 'a\0' = 'a', 'a\0' < 'a', 'a\0' > 'a'; +select 'a' = 'a\0', 'a' < 'a\0', 'a' > 'a\0'; +select 'a\0' = 'a ', 'a\0' < 'a ', 'a\0' > 'a '; +select 'a ' = 'a\0', 'a ' < 'a\0', 'a ' > 'a\0'; +select 'a a' > 'a', 'a \0' < 'a'; +select binary 'a a' > 'a', binary 'a \0' > 'a', binary 'a\0' > 'a'; + +# +# Test MyISAM tables. +# + +create table t1 (text1 varchar(32) not NULL, KEY key1 (text1)); +insert into t1 values ('teststring'), ('nothing'), ('teststring\t'); +check table t1; +select * from t1 ignore key (key1) where text1='teststring' or text1 like 'teststring_%'; +select * from t1 where text1='teststring' or text1 like 'teststring_%'; +select * from t1 where text1='teststring' or text1 > 'teststring\t'; +select * from t1 order by text1; +explain select * from t1 order by text1; + +alter table t1 modify text1 char(32) binary not null; +check table t1; +select * from t1 ignore key (key1) where text1='teststring' or text1 like 'teststring_%'; +select * from t1 where text1='teststring' or text1 like 'teststring_%'; +select * from t1 where text1='teststring' or text1 > 'teststring\t'; +select text1, length(text1) from t1 order by text1; +select text1, length(text1) from t1 order by binary text1; + +alter table t1 modify text1 blob not null, drop key key1, add key key1 (text1(20)); +insert into t1 values ('teststring '); +select concat('|', text1, '|') from t1 order by text1; + +alter table t1 modify text1 text not null, pack_keys=1; +select * from t1 where text1 like 'teststring_%'; +select * from t1 where text1='teststring' or text1 like 'teststring_%'; +select * from t1 where text1='teststring' or text1 > 'teststring\t'; +select concat('|', text1, '|') from t1 order by text1; +drop table t1; + +create table t1 (text1 varchar(32) not NULL, KEY key1 (text1)) pack_keys=0; +insert into t1 values ('teststring'), ('nothing'), ('teststring\t'); +select * from t1 where text1='teststring' or text1 like 'teststring_%'; +select * from t1 where text1='teststring' or text1 >= 'teststring\t'; +drop table t1; + +# Test HEAP tables (with BTREE keys) + +create table t1 (text1 varchar(32) not NULL, KEY key1 using BTREE (text1)) engine=heap; +insert into t1 values ('teststring'), ('nothing'), ('teststring\t'); +select * from t1 ignore key (key1) where text1='teststring' or text1 like 'teststring_%'; +select * from t1 where text1='teststring' or text1 like 'teststring_%'; +select * from t1 where text1='teststring' or text1 >= 'teststring\t'; +select * from t1 order by text1; +explain select * from t1 order by text1; + +alter table t1 modify text1 char(32) binary not null; +select * from t1 order by text1; +drop table t1; + +# +# Test InnoDB tables +# + +create table t1 (text1 varchar(32) not NULL, KEY key1 (text1)) engine=innodb; +insert into t1 values ('teststring'), ('nothing'), ('teststring\t'); +check table t1; +select * from t1 where text1='teststring' or text1 like 'teststring_%'; +select * from t1 where text1='teststring' or text1 > 'teststring\t'; +select * from t1 order by text1; +explain select * from t1 order by text1; + +alter table t1 modify text1 char(32) binary not null; +select * from t1 order by text1; + +alter table t1 modify text1 blob not null, drop key key1, add key key1 (text1(20)); +insert into t1 values ('teststring '); +select concat('|', text1, '|') from t1 order by text1; + +alter table t1 modify text1 text not null, pack_keys=1; +select * from t1 where text1 like 'teststring_%'; + +# The following gives wrong result in InnoDB +select text1, length(text1) from t1 where text1='teststring' or text1 like 'teststring_%'; +select text1, length(text1) from t1 where text1='teststring' or text1 >= 'teststring\t'; +select concat('|', text1, '|') from t1 order by text1; +drop table t1; diff --git a/mysql-test/t/heap_btree.test b/mysql-test/t/heap_btree.test index a520065a8b3..ad78636d002 100644 --- a/mysql-test/t/heap_btree.test +++ b/mysql-test/t/heap_btree.test @@ -110,7 +110,13 @@ drop table t1; create table t1 (btn char(10) not null, key using BTREE (btn)) engine=heap; insert into t1 values ("hello"),("hello"),("hello"),("hello"),("hello"),("a"),("b"),("c"),("d"),("e"),("f"),("g"),("h"),("i"); -explain select * from t1 where btn like "q%"; +explain select * from t1 where btn like "i%"; +explain select * from t1 where btn like "h%"; +explain select * from t1 where btn like "a%"; +explain select * from t1 where btn like "b%"; +# For the following the BTREE MAY notice that there is no possible matches +select * from t1 where btn like "ff%"; +select * from t1 where btn like " %"; select * from t1 where btn like "q%"; alter table t1 add column new_col char(1) not null, add key using BTREE (btn,new_col), drop key btn; update t1 set new_col=left(btn,1); diff --git a/mysys/charset.c b/mysys/charset.c index c422ead89c0..80f62b06a3e 100644 --- a/mysys/charset.c +++ b/mysys/charset.c @@ -459,7 +459,9 @@ static my_bool init_available_charsets(myf myflags) init_compiled_charsets(myflags); /* Copy compiled charsets */ - for (cs=all_charsets; cs < all_charsets+255 ; cs++) + for (cs=all_charsets; + cs < all_charsets+array_elements(all_charsets)-1 ; + cs++) { if (*cs) { @@ -486,10 +488,11 @@ void free_charsets(void) uint get_collation_number(const char *name) { CHARSET_INFO **cs; - if (init_available_charsets(MYF(0))) /* If it isn't initialized */ - return 0; + init_available_charsets(MYF(0)); - for (cs= all_charsets; cs < all_charsets+255; ++cs) + for (cs= all_charsets; + cs < all_charsets+array_elements(all_charsets)-1 ; + cs++) { if ( cs[0] && cs[0]->name && !my_strcasecmp(&my_charset_latin1, cs[0]->name, name)) @@ -498,13 +501,15 @@ uint get_collation_number(const char *name) return 0; /* this mimics find_type() */ } + uint get_charset_number(const char *charset_name, uint cs_flags) { CHARSET_INFO **cs; - if (init_available_charsets(MYF(0))) /* If it isn't initialized */ - return 0; + init_available_charsets(MYF(0)); - for (cs= all_charsets; cs < all_charsets+255; ++cs) + for (cs= all_charsets; + cs < all_charsets+array_elements(all_charsets)-1 ; + cs++) { if ( cs[0] && cs[0]->csname && (cs[0]->state & cs_flags) && !my_strcasecmp(&my_charset_latin1, cs[0]->csname, charset_name)) @@ -517,8 +522,7 @@ uint get_charset_number(const char *charset_name, uint cs_flags) const char *get_charset_name(uint charset_number) { CHARSET_INFO *cs; - if (init_available_charsets(MYF(0))) /* If it isn't initialized */ - return "?"; + init_available_charsets(MYF(0)); cs=all_charsets[charset_number]; if (cs && (cs->number == charset_number) && cs->name ) @@ -554,9 +558,12 @@ static CHARSET_INFO *get_internal_charset(uint cs_number, myf flags) CHARSET_INFO *get_charset(uint cs_number, myf flags) { CHARSET_INFO *cs; + if (cs_number == default_charset_info->number) + return default_charset_info; + (void) init_available_charsets(MYF(0)); /* If it isn't initialized */ - if (!cs_number) + if (!cs_number || cs_number >= array_elements(all_charsets)-1) return NULL; cs=get_internal_charset(cs_number, flags); diff --git a/mysys/my_handler.c b/mysys/my_handler.c index 190c279aadf..35f620ccbcb 100644 --- a/mysys/my_handler.c +++ b/mysys/my_handler.c @@ -18,15 +18,21 @@ #include "my_handler.h" int mi_compare_text(CHARSET_INFO *charset_info, uchar *a, uint a_length, - uchar *b, uint b_length, my_bool part_key) + uchar *b, uint b_length, my_bool part_key, + my_bool skip_end_space) { if (part_key && b_length < a_length) a_length=b_length; - return my_strnncoll(charset_info, a, a_length, b, b_length); + if (skip_end_space) + return charset_info->coll->strnncollsp(charset_info, a, a_length, + b, b_length); + return charset_info->coll->strnncoll(charset_info, a, a_length, + b, b_length); } + static int compare_bin(uchar *a, uint a_length, uchar *b, uint b_length, - my_bool part_key) + my_bool part_key, my_bool skip_end_space) { uint length= min(a_length,b_length); uchar *end= a+ length; @@ -37,6 +43,31 @@ static int compare_bin(uchar *a, uint a_length, uchar *b, uint b_length, return flag; if (part_key && b_length < a_length) return 0; + if (skip_end_space && a_length != b_length) + { + int swap= 0; + /* + We are using space compression. We have to check if longer key + has next character < ' ', in which case it's less than the shorter + key that has an implicite space afterwards. + + This code is identical to the one in + strings/ctype-simple.c:my_strnncollsp_simple + */ + if (a_length < b_length) + { + /* put shorter key in a */ + a_length= b_length; + a= b; + swap= -1; /* swap sign of result */ + } + for (end= a + a_length-length; a < end ; a++) + { + if (*a != ' ') + return ((int) *a - (int) ' ') ^ swap; + } + return 0; + } return (int) (a_length-b_length); } @@ -128,7 +159,8 @@ int ha_key_cmp(register HA_KEYSEG *keyseg, register uchar *a, if (piks && (flag=mi_compare_text(keyseg->charset,a,a_length,b,b_length, (my_bool) ((nextflag & SEARCH_PREFIX) && - next_key_length <= 0)))) + next_key_length <= 0), + !(nextflag & SEARCH_PREFIX)))) return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); a+=a_length; b+=b_length; @@ -137,17 +169,11 @@ int ha_key_cmp(register HA_KEYSEG *keyseg, register uchar *a, else { uint length=(uint) (end-a), a_length=length, b_length=length; - if (!(nextflag & SEARCH_PREFIX)) - { - while (a_length && a[a_length-1] == ' ') - a_length--; - while (b_length && b[b_length-1] == ' ') - b_length--; - } if (piks && (flag= mi_compare_text(keyseg->charset, a, a_length, b, b_length, (my_bool) ((nextflag & SEARCH_PREFIX) && - next_key_length <= 0)))) + next_key_length <= 0), + !(nextflag & SEARCH_PREFIX)))) return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); a=end; b+=length; @@ -164,7 +190,7 @@ int ha_key_cmp(register HA_KEYSEG *keyseg, register uchar *a, if (piks && (flag=compare_bin(a,a_length,b,b_length, (my_bool) ((nextflag & SEARCH_PREFIX) && - next_key_length <= 0)))) + next_key_length <= 0),1))) return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); a+=a_length; b+=b_length; @@ -176,7 +202,7 @@ int ha_key_cmp(register HA_KEYSEG *keyseg, register uchar *a, if (piks && (flag=compare_bin(a,length,b,length, (my_bool) ((nextflag & SEARCH_PREFIX) && - next_key_length <= 0)))) + next_key_length <= 0),0))) return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); a+=length; b+=length; @@ -191,18 +217,13 @@ int ha_key_cmp(register HA_KEYSEG *keyseg, register uchar *a, full_b_length= b_length; next_key_length=key_length-b_length-pack_length; - if ((nextflag & (SEARCH_FIND | SEARCH_UPDATE)) == SEARCH_FIND) - { - while (a_length && a[a_length-1] == ' ') - a_length--; - while (b_length && b[b_length-1] == ' ') - b_length--; - } - if (piks && (flag= mi_compare_text(keyseg->charset,a,a_length,b,b_length, (my_bool) ((nextflag & SEARCH_PREFIX) && - next_key_length <= 0)))) + next_key_length <= 0), + (my_bool) ((nextflag & (SEARCH_FIND | + SEARCH_UPDATE)) == + SEARCH_FIND)))) return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); a+= full_a_length; b+= full_b_length; @@ -219,7 +240,7 @@ int ha_key_cmp(register HA_KEYSEG *keyseg, register uchar *a, if (piks && (flag=compare_bin(a,a_length,b,b_length, (my_bool) ((nextflag & SEARCH_PREFIX) && - next_key_length <= 0)))) + next_key_length <= 0), 0))) return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); a+=a_length; b+=b_length; diff --git a/mysys/tree.c b/mysys/tree.c index 0b30ffa4971..063c8739e58 100644 --- a/mysys/tree.c +++ b/mysys/tree.c @@ -481,7 +481,6 @@ ha_rows tree_record_pos(TREE *tree, const void *key, TREE_ELEMENT *element= tree->root; double left= 1; double right= tree->elements_in_tree; - ha_rows last_equal_pos= HA_POS_ERROR; while (element != &tree->null_element) { @@ -490,9 +489,6 @@ ha_rows tree_record_pos(TREE *tree, const void *key, { switch (flag) { case HA_READ_KEY_EXACT: - last_equal_pos= (ha_rows) ((left + right) / 2); - cmp= 1; - break; case HA_READ_BEFORE_KEY: cmp= 1; break; @@ -516,7 +512,6 @@ ha_rows tree_record_pos(TREE *tree, const void *key, } switch (flag) { case HA_READ_KEY_EXACT: - return last_equal_pos; case HA_READ_BEFORE_KEY: return (ha_rows) right; case HA_READ_AFTER_KEY: diff --git a/sql/field.cc b/sql/field.cc index 00b7b9ebdb9..574800b6180 100644 --- a/sql/field.cc +++ b/sql/field.cc @@ -4209,10 +4209,10 @@ int Field_string::cmp(const char *a_ptr, const char *b_ptr) void Field_string::sort_string(char *to,uint length) { uint tmp=my_strnxfrm(field_charset, - (unsigned char *)to, length, - (unsigned char *) ptr, field_length); + (unsigned char *) to, length, + (unsigned char *) ptr, field_length); if (tmp < length) - bzero(to + tmp, length - tmp); + field_charset->cset->fill(field_charset, to + tmp, length - tmp, ' '); } @@ -4384,7 +4384,8 @@ void Field_varstring::sort_string(char *to,uint length) (unsigned char *) to, length, (unsigned char *)ptr+2, tot_length); if (tot_length < length) - bzero(to+tot_length,length-tot_length); + field_charset->cset->fill(field_charset, to+tot_length,length-tot_length, + binary() ? (char) 0 : ' '); } @@ -4838,7 +4839,9 @@ void Field_blob::sort_string(char *to,uint length) (unsigned char *)to, length, (unsigned char *)blob, blob_length); if (blob_length < length) - bzero(to+blob_length, length-blob_length); + field_charset->cset->fill(field_charset, to+blob_length, + length-blob_length, + binary() ? (char) 0 : ' '); } } diff --git a/sql/filesort.cc b/sql/filesort.cc index 064e92b7888..a15ffb43f6f 100644 --- a/sql/filesort.cc +++ b/sql/filesort.cc @@ -528,6 +528,8 @@ static void make_sortkey(register SORTPARAM *param, case STRING_RESULT: { CHARSET_INFO *cs=item->collation.collation; + char fill_char= ((cs->state & MY_CS_BINSORT) ? (char) 0 : ' '); + if ((maybe_null=item->maybe_null)) *to++=1; /* All item->str() to use some extra byte for end null.. */ @@ -564,14 +566,16 @@ static void make_sortkey(register SORTPARAM *param, uint tmp_length=my_strnxfrm(cs,to,sort_field->length, (unsigned char *) from, length); if (tmp_length < sort_field->length) - bzero((char*) to+tmp_length,sort_field->length-tmp_length); + cs->cset->fill(cs, (char*) to+tmp_length, + sort_field->length-tmp_length, + fill_char); } else { my_strnxfrm(cs,(uchar*)to,length,(const uchar*)res->ptr(),length); - bzero((char *)to+length,diff); + cs->cset->fill(cs, (char *)to+length,diff,fill_char); } - break; + break; } case INT_RESULT: { diff --git a/sql/ha_heap.cc b/sql/ha_heap.cc index 807b6e35a29..c84f0da0d25 100644 --- a/sql/ha_heap.cc +++ b/sql/ha_heap.cc @@ -44,6 +44,16 @@ int ha_heap::open(const char *name, int mode, uint test_if_locked) } } ref_length= sizeof(HEAP_PTR); + if (file) + { + /* Initialize variables for the opened table */ + btree_keys.clear_all(); + for (uint i= 0 ; i < table->keys ; i++) + { + if (table->key_info[i].algorithm == HA_KEY_ALG_BTREE) + btree_keys.set_bit(i); + } + } return (file ? 0 : 1); } diff --git a/sql/ha_heap.h b/sql/ha_heap.h index c369c7029b4..feadc0c3c0f 100644 --- a/sql/ha_heap.h +++ b/sql/ha_heap.h @@ -26,6 +26,7 @@ class ha_heap: public handler { HP_INFO *file; + key_map btree_keys; public: ha_heap(TABLE *table): handler(table), file(0) {} @@ -49,6 +50,7 @@ class ha_heap: public handler (HA_ONLY_WHOLE_INDEX | HA_WRONG_ASCII_ORDER | HA_NOT_READ_PREFIX_LAST)); } + const key_map *keys_to_use_for_scanning() { return &btree_keys; } uint max_record_length() const { return HA_MAX_REC_LENGTH; } uint max_keys() const { return MAX_KEY; } uint max_key_parts() const { return MAX_REF_PARTS; } diff --git a/strings/ctype-big5.c b/strings/ctype-big5.c index ee55cfda6c1..2bde29ecc47 100644 --- a/strings/ctype-big5.c +++ b/strings/ctype-big5.c @@ -218,40 +218,80 @@ static uint16 big5strokexfrm(uint16 i) return 0xA140; } -static int my_strnncoll_big5(CHARSET_INFO *cs __attribute__((unused)), - const uchar * s1, uint len1, - const uchar * s2, uint len2) + + +static int my_strnncoll_big5_internal(const uchar **a_res, + const uchar **b_res, uint length) { - uint len; + const char *a= *a_res, *b= *b_res; - len = min(len1,len2); - while (len--) + while (length--) { - if ((len > 0) && isbig5code(*s1,*(s1+1)) && isbig5code(*s2, *(s2+1))) + if ((length > 0) && isbig5code(*a,*(a+1)) && isbig5code(*b, *(b+1))) { - if (*s1 != *s2 || *(s1+1) != *(s2+1)) - return ((int) big5code(*s1,*(s1+1)) - - (int) big5code(*s2,*(s2+1))); - s1 +=2; - s2 +=2; - len--; - } else if (sort_order_big5[(uchar) *s1++] != sort_order_big5[(uchar) *s2++]) - return ((int) sort_order_big5[(uchar) s1[-1]] - - (int) sort_order_big5[(uchar) s2[-1]]); + if (*a != *b || *(a+1) != *(b+1)) + return ((int) big5code(*a,*(a+1)) - + (int) big5code(*b,*(b+1))); + a+= 2; + b+= 2; + length--; + } + else if (sort_order_big5[(uchar) *a++] != + sort_order_big5[(uchar) *b++]) + return ((int) sort_order_big5[(uchar) a[-1]] - + (int) sort_order_big5[(uchar) b[-1]]); } - return (int) (len1-len2); + *a_res= a; + *b_res= b; + return 0; } -static -int my_strnncollsp_big5(CHARSET_INFO * cs, - const uchar *s, uint slen, - const uchar *t, uint tlen) + +/* Compare strings */ + +static int my_strnncoll_big5(CHARSET_INFO *cs __attribute__((unused)), + const uchar *a, uint a_length, + const uchar *b, uint b_length) { - for ( ; slen && s[slen-1] == ' ' ; slen--); - for ( ; tlen && t[tlen-1] == ' ' ; tlen--); - return my_strnncoll_big5(cs,s,slen,t,tlen); + uint length= min(a_length, b_length); + int res= my_strnncoll_big5_internal(&a, &b, length); + return res ? res : (int) (a_length - b_length); } + +/* compare strings, ignore end space */ + +static int my_strnncollsp_big5(CHARSET_INFO * cs __attribute__((unused)), + const uchar *a, uint a_length, + const uchar *b, uint b_length) +{ + uint length= min(a_length, b_length); + int res= my_strnncoll_big5_internal(&a, &b, length); + if (!res && a_length != b_length) + { + const uchar *end; + int swap= 0; + /* + Check the next not space character of the longer key. If it's < ' ', + then it's smaller than the other key. + */ + if (a_length < b_length) + { + /* put shorter key in a */ + a_length= b_length; + a= b; + swap= -1; /* swap sign of result */ + } + for (end= a + a_length-length; a < end ; a++) + { + if (*a != ' ') + return ((int) *a - (int) ' ') ^ swap; + } + } + return res; +} + + static int my_strnxfrm_big5(CHARSET_INFO *cs __attribute__((unused)), uchar * dest, uint len, const uchar * src, uint srclen) @@ -377,7 +417,7 @@ static my_bool my_like_range_big5(CHARSET_INFO *cs __attribute__((unused)), *min_length= (uint) (min_str-min_org); *max_length= res_length; do { - *min_str++ = ' '; /* Because if key compression */ + *min_str++ = 0; *max_str++ = max_sort_char; } while (min_str != min_end); return 0; diff --git a/strings/ctype-czech.c b/strings/ctype-czech.c index ed8c0b5b415..5094a7c45da 100644 --- a/strings/ctype-czech.c +++ b/strings/ctype-czech.c @@ -165,169 +165,144 @@ static struct wordvalue doubles[] = { Na konci připojíme znak 0 */ -#define ADD_TO_RESULT(dest, len, totlen, value) \ - if ((totlen) < (len)) { dest[totlen] = value; } (totlen++); - -#define NEXT_CMP_VALUE(src, p, store, pass, value, len) \ - while (1) /* we will make a loop */ \ - { \ - if (IS_END(p, src, len)) \ - /* when we are at the end of string */ \ - { /* return either 0 for end of string */ \ - /* or 1 for end of pass */ \ - if (pass == 3) { value = 0; break; } \ - if (pass == 0) p = store; \ - else p = src; \ - value = 1; pass++; break; \ - } \ - /* not at end of string */ \ - value = CZ_SORT_TABLE[pass][*p]; \ - \ - if (value == 0) { p++; continue; } /* ignore value */ \ - if (value == 2) /* space */ \ - { \ - const uchar * tmp; \ - const uchar * runner = ++p; \ - while (!(IS_END(runner, src, len)) && (CZ_SORT_TABLE[pass][*runner] == 2)) \ - runner++; /* skip all spaces */ \ - if (IS_END(runner, src, len) && SKIP_TRAILING_SPACES) \ - p = runner; \ - if ((pass <= 2) && !(IS_END(runner, src, len))) \ - p = runner; \ - if (IS_END(p, src, len)) \ - continue; \ - /* we switch passes */ \ - if (pass > 1) \ - break; \ - tmp = p; \ - if (pass == 0) pass = 1; \ - else pass = 0; \ - p = store; store = tmp; \ - break; \ - } \ - if (value == 255) \ - { \ - int i; \ - for (i = 0; i < (int) sizeof(doubles); i++) \ - { \ - const char * pattern = doubles[i].word; \ - const char * q = (const char *) p; \ - int j = 0; \ - while (pattern[j]) \ - { \ - if (IS_END(q, src, len) || (*q != pattern[j])) \ - { break ; } \ - j++; q++; \ - } \ - if (!(pattern[j])) \ - { \ - value = (int)(doubles[i].outvalue[pass]); \ - p = (const uchar *) q - 1; \ - break; \ - } \ - } \ - } \ - p++; \ - break; \ - } - -#define IS_END(p, src, len) (!(*p)) - -#if 0 -/* Function strcoll, with Czech sorting, for zero terminated strings */ -static int my_strcoll_czech(const uchar * s1, const uchar * s2) - { - int v1, v2; - const uchar * p1, * p2, * store1, * store2; - int pass1 = 0, pass2 = 0; - int diff; - - p1 = s1; p2 = s2; - store1 = s1; store2 = s2; - - do - { - NEXT_CMP_VALUE(s1, p1, store1, pass1, v1, 0); - NEXT_CMP_VALUE(s2, p2, store2, pass2, v2, 0); - diff = v1 - v2; - if (diff != 0) return diff; - } - while (v1); - return 0; - } -#endif +#define ADD_TO_RESULT(dest, len, totlen, value) \ +if ((totlen) < (len)) { dest[totlen] = value; } (totlen++); +#define IS_END(p, src, len) (((char *)p - (char *)src) >= (len)) -#if 0 -/* Function strxfrm, with Czech sorting, for zero terminated strings */ -static int my_strxfrm_czech(uchar * dest, const uchar * src, int len) +#define NEXT_CMP_VALUE(src, p, store, pass, value, len) \ +while (1) \ +{ \ + if (IS_END(p, src, len)) \ + { \ + /* when we are at the end of string */ \ + /* return either 0 for end of string */ \ + /* or 1 for end of pass */ \ + value= 0; \ + if (pass != 3) \ + { \ + p= (pass++ == 0) ? store : src; \ + value = 1; \ + } \ + break; \ + } \ + /* not at end of string */ \ + value = CZ_SORT_TABLE[pass][*p]; \ + if (value == 0) \ + { p++; continue; } /* ignore value */ \ + if (value == 2) /* space */ \ + { \ + const uchar * tmp; \ + const uchar * runner = ++p; \ + while (!(IS_END(runner, src, len)) && (CZ_SORT_TABLE[pass][*runner] == 2)) \ + runner++; /* skip all spaces */ \ + if (IS_END(runner, src, len) && SKIP_TRAILING_SPACES) \ + p = runner; \ + if ((pass <= 2) && !(IS_END(runner, src, len))) \ + p = runner; \ + if (IS_END(p, src, len)) \ + continue; \ + /* we switch passes */ \ + if (pass > 1) \ + break; \ + tmp = p; \ + pass= 1-pass; \ + p = store; store = tmp; \ + break; \ + } \ + if (value == 255) \ + { \ + int i; \ + for (i = 0; i < (int) sizeof(doubles); i++) \ + { \ + const char * pattern = doubles[i].word; \ + const char * q = (const char *) p; \ + int j = 0; \ + while (pattern[j]) \ + { \ + if (IS_END(q, src, len) || (*q != pattern[j])) \ + break; \ + j++; q++; \ + } \ + if (!(pattern[j])) \ + { \ + value = (int)(doubles[i].outvalue[pass]); \ + p= (const uchar *) q - 1; \ + break; \ + } \ + } \ + } \ + p++; \ + break; \ +} + +/* + Function strnncoll, actually strcoll, with Czech sorting, which expect + the length of the strings being specified +*/ + +static int my_strnncoll_czech(CHARSET_INFO *cs __attribute__((unused)), + const uchar * s1, uint len1, + const uchar * s2, uint len2) { - int value; - const uchar * p, * store; - int pass = 0; - int totlen = 0; - p = store = src; - - do - { - NEXT_CMP_VALUE(src, p, store, pass, value, 0); - ADD_TO_RESULT(dest, len, totlen, value); - } - while (value); - return totlen; - } -#endif + int v1, v2; + const uchar * p1, * p2, * store1, * store2; + int pass1 = 0, pass2 = 0; + p1 = s1; p2 = s2; + store1 = s1; store2 = s2; -#undef IS_END + do + { + int diff; + NEXT_CMP_VALUE(s1, p1, store1, pass1, v1, (int)len1); + NEXT_CMP_VALUE(s2, p2, store2, pass2, v2, (int)len2); + if ((diff = v1 - v2)) + return diff; + } + while (v1); + return 0; +} -#define IS_END(p, src, len) (((char *)p - (char *)src) >= (len)) -/* Function strnncoll, actually strcoll, with Czech sorting, which expect - the length of the strings being specified */ -static int my_strnncoll_czech(CHARSET_INFO *cs __attribute__((unused)), - const uchar * s1, uint len1, - const uchar * s2, uint len2) - { - int v1, v2; - const uchar * p1, * p2, * store1, * store2; - int pass1 = 0, pass2 = 0; - int diff; - - p1 = s1; p2 = s2; - store1 = s1; store2 = s2; - - do - { - NEXT_CMP_VALUE(s1, p1, store1, pass1, v1, (int)len1); - NEXT_CMP_VALUE(s2, p2, store2, pass2, v2, (int)len2); - diff = v1 - v2; - - if (diff != 0) return diff; - } - while (v1); - return 0; - } - -/* Function strnxfrm, actually strxfrm, with Czech sorting, which expect - the length of the strings being specified */ + +/* + TODO: Fix this one to compare strings as they are done in ctype-simple1 +*/ + +static +int my_strnncollsp_czech(CHARSET_INFO * cs, + const uchar *s, uint slen, + const uchar *t, uint tlen) +{ + for ( ; slen && s[slen-1] == ' ' ; slen--); + for ( ; tlen && t[tlen-1] == ' ' ; tlen--); + return my_strnncoll_czech(cs,s,slen,t,tlen); +} + + +/* + Function strnxfrm, actually strxfrm, with Czech sorting, which expect + the length of the strings being specified +*/ + static int my_strnxfrm_czech(CHARSET_INFO *cs __attribute__((unused)), - uchar * dest, uint len, - const uchar * src, uint srclen) - { - int value; - const uchar * p, * store; - int pass = 0; - int totlen = 0; - p = src; store = src; - - do - { - NEXT_CMP_VALUE(src, p, store, pass, value, (int)srclen); - ADD_TO_RESULT(dest, (int)len, totlen, value); - } - while (value); - return totlen; - } + uchar * dest, uint len, + const uchar * src, uint srclen) +{ + int value; + const uchar * p, * store; + int pass = 0; + int totlen = 0; + p = src; store = src; + + do + { + NEXT_CMP_VALUE(src, p, store, pass, value, (int)srclen); + ADD_TO_RESULT(dest, (int)len, totlen, value); + } + while (value); + return totlen; +} #undef IS_END @@ -595,16 +570,6 @@ static MY_UNI_IDX idx_uni_8859_2[]={ }; -static -int my_strnncollsp_czech(CHARSET_INFO * cs, - const uchar *s, uint slen, - const uchar *t, uint tlen) -{ - for ( ; slen && s[slen-1] == ' ' ; slen--); - for ( ; tlen && t[tlen-1] == ' ' ; tlen--); - return my_strnncoll_czech(cs,s,slen,t,tlen); -} - static MY_COLLATION_HANDLER my_collation_latin2_czech_ci_handler = { my_strnncoll_czech, diff --git a/strings/ctype-gbk.c b/strings/ctype-gbk.c index 98511406ba9..1990060e67b 100644 --- a/strings/ctype-gbk.c +++ b/strings/ctype-gbk.c @@ -2582,40 +2582,74 @@ static uint16 gbksortorder(uint16 i) } -int my_strnncoll_gbk(CHARSET_INFO *cs __attribute__((unused)), - const uchar * s1, uint len1, - const uchar * s2, uint len2) +int my_strnncoll_gbk_internal(const uchar **a_res, const uchar **b_res, + uint length) { - uint len,c1,c2; + const char *a= *a_res, *b= *b_res; + uint a_char,b_char; - len = min(len1,len2); - while (len--) + while (length--) { - if ((len > 0) && isgbkcode(*s1,*(s1+1)) && isgbkcode(*s2, *(s2+1))) + if ((length > 0) && isgbkcode(*a,*(a+1)) && isgbkcode(*b, *(b+1))) { - c1=gbkcode(*s1,*(s1+1)); - c2=gbkcode(*s2,*(s2+1)); - if (c1!=c2) - return ((int) gbksortorder((uint16) c1) - - (int) gbksortorder((uint16) c2)); - s1+=2; - s2+=2; - --len; - } else if (sort_order_gbk[(uchar) *s1++] != sort_order_gbk[(uchar) *s2++]) - return ((int) sort_order_gbk[(uchar) s1[-1]] - - (int) sort_order_gbk[(uchar) s2[-1]]); + a_char= gbkcode(*a,*(a+1)); + b_char= gbkcode(*b,*(b+1)); + if (a_char != b_char) + return ((int) gbksortorder((uint16) a_char) - + (int) gbksortorder((uint16) b_char)); + a+= 2; + b+= 2; + length--; + } + else if (sort_order_gbk[(uchar) *a++] != sort_order_gbk[(uchar) *b++]) + return ((int) sort_order_gbk[(uchar) a[-1]] - + (int) sort_order_gbk[(uchar) b[-1]]); } - return (int) (len1-len2); + *a_res= a; + *b_res= b; + return 0; } -static -int my_strnncollsp_gbk(CHARSET_INFO * cs, - const uchar *s, uint slen, - const uchar *t, uint tlen) + + +int my_strnncoll_gbk(CHARSET_INFO *cs __attribute__((unused)), + const uchar *a, uint a_length, + const uchar *b, uint b_length) { - for ( ; slen && s[slen-1] == ' ' ; slen--); - for ( ; tlen && t[tlen-1] == ' ' ; tlen--); - return my_strnncoll_gbk(cs,s,slen,t,tlen); + uint length= min(a_length, b_length); + int res= my_strnncoll_gbk_internal(&a, &b, length); + return res ? res : (int) (a_length - b_length); +} + + +static int my_strnncollsp_gbk(CHARSET_INFO * cs __attribute__((unused)), + const uchar *a, uint a_length, + const uchar *b, uint b_length) +{ + uint length= min(a_length, b_length); + int res= my_strnncoll_gbk_internal(&a, &b, length); + if (!res && a_length != b_length) + { + const uchar *end; + int swap= 0; + /* + Check the next not space character of the longer key. If it's < ' ', + then it's smaller than the other key. + */ + if (a_length < b_length) + { + /* put shorter key in a */ + a_length= b_length; + a= b; + swap= -1; /* swap sign of result */ + } + for (end= a + a_length-length; a < end ; a++) + { + if (*a != ' ') + return ((int) *a - (int) ' ') ^ swap; + } + } + return res; } @@ -2696,7 +2730,7 @@ static my_bool my_like_range_gbk(CHARSET_INFO *cs __attribute__((unused)), *min_length= (uint) (min_str - min_org); *max_length= res_length; do { - *min_str++ = '\0'; /* Because if key compression */ + *min_str++= 0; *max_str++ = max_sort_char; } while (min_str != min_end); return 0; diff --git a/strings/ctype-latin1.c b/strings/ctype-latin1.c index 0682b15d135..7a010c3bef8 100644 --- a/strings/ctype-latin1.c +++ b/strings/ctype-latin1.c @@ -319,51 +319,105 @@ uchar combo2map[]={ static int my_strnncoll_latin1_de(CHARSET_INFO *cs __attribute__((unused)), - const uchar *s1, uint len1, - const uchar *s2, uint len2) + const uchar *a, uint a_length, + const uchar *b, uint b_length) { - const uchar *e1 = s1 + len1; - const uchar *e2 = s2 + len2; - uchar c1, c12=0, c2, c22=0; + const uchar *a_end= a + a_length; + const uchar *b_end= b + b_length; + uchar a_char, a_extend= 0, b_char, b_extend= 0; - while ((s1 < e1 || c12) && (s2 < e2 || c22)) + while ((a < a_end || a_extend) && (b < b_end || b_extend)) { - if (c12) + if (a_extend) { - c1=c12; c12=0; + a_char=a_extend; a_extend=0; } else { - c12=combo2map[*s1]; - c1=combo1map[*s1++]; + a_extend=combo2map[*a]; + a_char=combo1map[*a++]; } - if (c22) + if (b_extend) { - c2=c22; c22=0; + b_char=b_extend; b_extend=0; } else { - c22=combo2map[*s2]; - c2=combo1map[*s2++]; + b_extend=combo2map[*b]; + b_char=combo1map[*b++]; } - if (c1 != c2) return (int)c1 - (int)c2; + if (a_char != b_char) + return (int) a_char - (int) b_char; } - /* A simple test of string lengths won't work -- we test to see which string ran out first */ - return (s1 < e1 || c12) ? 1 : (s2 < e2 || c22) ? -1 : 0; + return ((a < a_end || a_extend) ? 1 : + (b < b_end || b_extend) ? -1 : 0); } -static int my_strnncollsp_latin1_de(CHARSET_INFO *cs, - const uchar *s, uint slen, - const uchar *t, uint tlen) +static int my_strnncollsp_latin1_de(CHARSET_INFO *cs __attribute__((unused)), + const uchar *a, uint a_length, + const uchar *b, uint b_length) { - for ( ; slen && s[slen-1] == ' ' ; slen--); - for ( ; tlen && t[tlen-1] == ' ' ; tlen--); - return my_strnncoll_latin1_de(cs,s,slen,t,tlen); + const uchar *a_end= a + a_length; + const uchar *b_end= b + b_length; + uchar a_char, a_extend= 0, b_char, b_extend= 0; + + while ((a < a_end || a_extend) && (b < b_end || b_extend)) + { + if (a_extend) + { + a_char=a_extend; + a_extend= 0; + } + else + { + a_extend= combo2map[*a]; + a_char= combo1map[*a++]; + } + if (b_extend) + { + b_char= b_extend; + b_extend= 0; + } + else + { + b_extend= combo2map[*b]; + b_char= combo1map[*b++]; + } + if (a_char != b_char) + return (int) a_char - (int) b_char; + } + /* Check if double character last */ + if (a_extend) + return 1; + if (b_extend) + return -1; + + if (a != a_end || b != b_end) + { + int swap= 0; + /* + Check the next not space character of the longer key. If it's < ' ', + then it's smaller than the other key. + */ + if (a == a_end) + { + /* put shorter key in a */ + a_end= b_end; + a= b; + swap= -1; /* swap sign of result */ + } + for ( ; a < a_end ; a++) + { + if (*a != ' ') + return ((int) *a - (int) ' ') ^ swap; + } + } + return 0; } @@ -385,6 +439,32 @@ static int my_strnxfrm_latin1_de(CHARSET_INFO *cs __attribute__((unused)), } +void my_hash_sort_latin1_de(CHARSET_INFO *cs __attribute__((unused)), + const uchar *key, uint len, + ulong *nr1, ulong *nr2) +{ + const uchar *end= key+len; + /* + Remove end space. We have to do this to be able to compare + 'AE' and 'Ä' as identical + */ + while (end > key && end[-1] == ' ') + end--; + + for (; key < end ; key++) + { + uint X= (uint) combo1map[(uint) *key]; + nr1[0]^=(ulong) ((((uint) nr1[0] & 63)+nr2[0]) * X) + (nr1[0] << 8); + nr2[0]+=3; + if ((X= combo2map[*key])) + { + nr1[0]^=(ulong) ((((uint) nr1[0] & 63)+nr2[0]) * X) + (nr1[0] << 8); + nr2[0]+=3; + } + } +} + + static MY_COLLATION_HANDLER my_collation_german2_ci_handler= { my_strnncoll_latin1_de, @@ -394,7 +474,7 @@ static MY_COLLATION_HANDLER my_collation_german2_ci_handler= my_wildcmp_8bit, my_strcasecmp_8bit, my_instr_simple, - my_hash_sort_simple + my_hash_sort_latin1_de }; diff --git a/strings/ctype-mb.c b/strings/ctype-mb.c index 2f7cf698664..ed772a68845 100644 --- a/strings/ctype-mb.c +++ b/strings/ctype-mb.c @@ -347,6 +347,7 @@ uint my_instr_mb(CHARSET_INFO *cs, return 0; } + /* BINARY collations handlers for MB charsets */ static int my_strnncoll_mb_bin(CHARSET_INFO * cs __attribute__((unused)), @@ -357,20 +358,6 @@ static int my_strnncoll_mb_bin(CHARSET_INFO * cs __attribute__((unused)), return cmp ? cmp : (int) (slen - tlen); } -static int my_strnncollsp_mb_bin(CHARSET_INFO * cs __attribute__((unused)), - const uchar *s, uint slen, - const uchar *t, uint tlen) -{ - int len, cmp; - - for ( ; slen && s[slen-1] == ' ' ; slen--); - for ( ; tlen && t[tlen-1] == ' ' ; tlen--); - - len = ( slen > tlen ) ? tlen : slen; - - cmp= memcmp(s,t,len); - return cmp ? cmp : (int) (slen - tlen); -} static int my_strnxfrm_mb_bin(CHARSET_INFO *cs __attribute__((unused)), uchar * dest, uint len, @@ -526,7 +513,7 @@ static int my_wildcmp_mb_bin(CHARSET_INFO *cs, MY_COLLATION_HANDLER my_collation_mb_bin_handler = { my_strnncoll_mb_bin, - my_strnncollsp_mb_bin, + my_strnncoll_mb_bin, my_strnxfrm_mb_bin, my_like_range_simple, my_wildcmp_mb_bin, diff --git a/strings/ctype-simple.c b/strings/ctype-simple.c index 0aae60a0b56..c8eb3c07a3f 100644 --- a/strings/ctype-simple.c +++ b/strings/ctype-simple.c @@ -60,25 +60,69 @@ int my_strnncoll_simple(CHARSET_INFO * cs, const uchar *s, uint slen, } -int my_strnncollsp_simple(CHARSET_INFO * cs, const uchar *s, uint slen, - const uchar *t, uint tlen) +/* + Compare strings, discarding end space + + SYNOPSIS + my_strnncollsp_simple() + cs character set handler + a First string to compare + a_length Length of 'a' + b Second string to compare + b_length Length of 'b' + + IMPLEMENTATION + If one string is shorter as the other, then we space extend the other + so that the strings have equal length. + + This will ensure that the following things hold: + + "a" == "a " + "a\0" < "a" + "a\0" < "a " + + RETURN + < 0 a < b + = 0 a == b + > 0 a > b +*/ + +int my_strnncollsp_simple(CHARSET_INFO * cs, const uchar *a, uint a_length, + const uchar *b, uint b_length) { - uchar *map= cs->sort_order; - int len; - - for ( ; slen && s[slen-1] == ' ' ; slen--); - for ( ; tlen && t[tlen-1] == ' ' ; tlen--); - - len = ( slen > tlen ) ? tlen : slen; - - while (len--) + const uchar *map= cs->sort_order, *end; + uint length; + + end= a + (length= min(a_length, b_length)); + while (a < end) { - if (map[*s++] != map[*t++]) - return ((int) map[s[-1]] - (int) map[t[-1]]); + if (map[*a++] != map[*b++]) + return ((int) map[a[-1]] - (int) map[b[-1]]); } - return (int) (slen-tlen); + if (a_length != b_length) + { + int swap= 0; + /* + Check the next not space character of the longer key. If it's < ' ', + then it's smaller than the other key. + */ + if (a_length < b_length) + { + /* put shorter key in s */ + a_length= b_length; + a= b; + swap= -1; /* swap sign of result */ + } + for (end= a + a_length-length; a < end ; a++) + { + if (*a != ' ') + return ((int) *a - (int) ' ') ^ swap; + } + } + return 0; } + void my_caseup_str_8bit(CHARSET_INFO * cs,char *str) { register uchar *map=cs->to_upper; @@ -169,8 +213,8 @@ int my_snprintf_8bit(CHARSET_INFO *cs __attribute__((unused)), void my_hash_sort_simple(CHARSET_INFO *cs, - const uchar *key, uint len, - ulong *nr1, ulong *nr2) + const uchar *key, uint len, + ulong *nr1, ulong *nr2) { register uchar *sort_order=cs->sort_order; const uchar *pos = key; @@ -953,9 +997,10 @@ my_bool my_like_range_simple(CHARSET_INFO *cs, { *min_length= (uint) (min_str - min_org); *max_length=res_length; - do { - *min_str++ = ' '; /* Because if key compression */ - *max_str++ = (char) cs->max_sort_char; + do + { + *min_str++= 0; + *max_str++= (char) cs->max_sort_char; } while (min_str != min_end); return 0; } @@ -963,13 +1008,6 @@ my_bool my_like_range_simple(CHARSET_INFO *cs, } *min_length= *max_length = (uint) (min_str - min_org); - /* Temporary fix for handling w_one at end of string (key compression) */ - { - char *tmp; - for (tmp= min_str ; tmp > min_org && tmp[-1] == '\0';) - *--tmp=' '; - } - while (min_str != min_end) *min_str++ = *max_str++ = ' '; /* Because if key compression */ return 0; diff --git a/strings/ctype-sjis.c b/strings/ctype-sjis.c index c337b8122fb..91a24fa8bee 100644 --- a/strings/ctype-sjis.c +++ b/strings/ctype-sjis.c @@ -184,7 +184,7 @@ static uchar NEAR sort_order_sjis[]= static int ismbchar_sjis(CHARSET_INFO *cs __attribute__((unused)), - const char* p, const char *e) + const char* p, const char *e) { return (issjishead((uchar) *p) && (e-p)>1 && issjistail((uchar)p[1]) ? 2: 0); } @@ -197,59 +197,101 @@ static int mbcharlen_sjis(CHARSET_INFO *cs __attribute__((unused)),uint c) #define sjiscode(c,d) ((((uint) (uchar)(c)) << 8) | (uint) (uchar) (d)) -static int my_strnncoll_sjis(CHARSET_INFO *cs __attribute__((unused)), - const uchar *s1, uint len1, - const uchar *s2, uint len2) + +static int my_strnncoll_sjis_internal(CHARSET_INFO *cs, + const uchar **a_res, uint a_length, + const uchar **b_res, uint b_length) { - const uchar *e1 = s1 + len1; - const uchar *e2 = s2 + len2; - while (s1 < e1 && s2 < e2) { - if (ismbchar_sjis(cs,(char*) s1, (char*) e1) && - ismbchar_sjis(cs,(char*) s2, (char*) e2)) { - uint c1 = sjiscode(*s1, *(s1+1)); - uint c2 = sjiscode(*s2, *(s2+1)); - if (c1 != c2) - return c1 - c2; - s1 += 2; - s2 += 2; - } else { - if (sort_order_sjis[(uchar)*s1] != sort_order_sjis[(uchar)*s2]) - return sort_order_sjis[(uchar)*s1] - sort_order_sjis[(uchar)*s2]; - s1++; - s2++; + const uchar *a= *a_res, *b= *b_res; + const uchar *a_end= a + a_length; + const uchar *b_end= b + b_length; + while (a < a_end && b < b_end) + { + if (ismbchar_sjis(cs,(char*) a, (char*) a_end) && + ismbchar_sjis(cs,(char*) b, (char*) b_end)) + { + uint a_char= sjiscode(*a, *(a+1)); + uint b_char= sjiscode(*b, *(b+1)); + if (a_char != b_char) + return a_char - b_char; + a += 2; + b += 2; + } else + { + if (sort_order_sjis[(uchar)*a] != sort_order_sjis[(uchar)*b]) + return sort_order_sjis[(uchar)*a] - sort_order_sjis[(uchar)*b]; + a++; + b++; } } - return len1 - len2; + *a_res= a; + *b_res= b; + return 0; +} + + +static int my_strnncoll_sjis(CHARSET_INFO *cs __attribute__((unused)), + const uchar *a, uint a_length, + const uchar *b, uint b_length) +{ + int res= my_strnncoll_sjis_internal(cs, &a, a_length, &b, b_length); + return res ? res : (int) (a_length - b_length); } -static -int my_strnncollsp_sjis(CHARSET_INFO * cs, - const uchar *s, uint slen, - const uchar *t, uint tlen) + +static int my_strnncollsp_sjis(CHARSET_INFO *cs __attribute__((unused)), + const uchar *a, uint a_length, + const uchar *b, uint b_length) { - for ( ; slen && s[slen-1] == ' ' ; slen--); - for ( ; tlen && t[tlen-1] == ' ' ; tlen--); - return my_strnncoll_sjis(cs,s,slen,t,tlen); + const uchar *a_end= a + a_length; + const uchar *b_end= b + b_length; + int res= my_strnncoll_sjis_internal(cs, &a, a_length, &b, b_length); + if (!res && (a != a_end || b != b_end)) + { + int swap= 0; + /* + Check the next not space character of the longer key. If it's < ' ', + then it's smaller than the other key. + */ + if (a == a_end) + { + /* put shorter key in a */ + a_end= b_end; + a= b; + swap= -1; /* swap sign of result */ + } + for (; a < a_end ; a++) + { + if (*a != ' ') + return ((int) *a - (int) ' ') ^ swap; + } + } + return res; } + + static int my_strnxfrm_sjis(CHARSET_INFO *cs __attribute__((unused)), uchar *dest, uint len, const uchar *src, uint srclen) { uchar *d_end = dest + len; uchar *s_end = (uchar*) src + srclen; - while (dest < d_end && src < s_end) { - if (ismbchar_sjis(cs,(char*) src, (char*) s_end)) { + while (dest < d_end && src < s_end) + { + if (ismbchar_sjis(cs,(char*) src, (char*) s_end)) + { *dest++ = *src++; if (dest < d_end && src < s_end) *dest++ = *src++; - } else { - *dest++ = sort_order_sjis[(uchar)*src++]; } + else + *dest++ = sort_order_sjis[(uchar)*src++]; } return srclen; } + /* ** Calculate min_str and max_str that ranges a LIKE string. ** Arguments: @@ -300,12 +342,14 @@ static my_bool my_like_range_sjis(CHARSET_INFO *cs __attribute__((unused)), ptr++; continue; } - if (*ptr == w_many) { /* '%' in SQL */ + if (*ptr == w_many) + { /* '%' in SQL */ *min_length = (uint)(min_str - min_org); *max_length = res_length; - do { - *min_str++ = ' '; /* Because if key compression */ - *max_str++ = max_sort_char; + do + { + *min_str++= 0; + *max_str++= max_sort_char; } while (min_str < min_end); return 0; } diff --git a/strings/ctype-tis620.c b/strings/ctype-tis620.c index 59be820863a..954a3768536 100644 --- a/strings/ctype-tis620.c +++ b/strings/ctype-tis620.c @@ -518,6 +518,10 @@ static uint thai2sortable(uchar *tstr, uint len) strncoll() replacement, compare 2 string, both are converted to sortable string + NOTE: + We can't cut strings at end \0 as this would break comparision with + LIKE characters, where the min range is stored as end \0 + Arg: 2 Strings and it compare length Ret: strcmp result */ @@ -530,9 +534,6 @@ int my_strnncoll_tis620(CHARSET_INFO *cs __attribute__((unused)), uchar *tc1, *tc2; int i; - /* Cut strings at end \0 */ - len1= (int) strnlen((char*) s1,len1); - len2= (int) strnlen((char*) s2,len2); tc1= buf; if ((len1 + len2 +2) > (int) sizeof(buf)) tc1= (uchar*) malloc(len1+len2); @@ -550,6 +551,10 @@ int my_strnncoll_tis620(CHARSET_INFO *cs __attribute__((unused)), } +/* + TODO: Has to be fixed like strnncollsp in ctype-simple.c +*/ + static int my_strnncollsp_tis620(CHARSET_INFO * cs, const uchar *s, uint slen, @@ -637,8 +642,9 @@ my_bool my_like_range_tis620(CHARSET_INFO *cs __attribute__((unused)), { *min_length= (uint) (min_str - min_org); *max_length=res_length; - do { - *min_str++ = ' '; /* Because of key compression */ + do + { + *min_str++ = 0; *max_str++ = max_sort_chr; } while (min_str != min_end); return 0; diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c index f4c1a22939a..7d32dcb1b61 100644 --- a/strings/ctype-ucs2.c +++ b/strings/ctype-ucs2.c @@ -90,8 +90,8 @@ static uchar to_upper_ucs2[] = { }; -static int my_ucs2_uni (CHARSET_INFO *cs __attribute__((unused)), - my_wc_t * pwc, const uchar *s, const uchar *e) +static int my_ucs2_uni(CHARSET_INFO *cs __attribute__((unused)), + my_wc_t * pwc, const uchar *s, const uchar *e) { if (s+2 > e) /* Need 2 characters */ return MY_CS_TOOFEW(0); @@ -100,8 +100,8 @@ static int my_ucs2_uni (CHARSET_INFO *cs __attribute__((unused)), return 2; } -static int my_uni_ucs2 (CHARSET_INFO *cs __attribute__((unused)) , - my_wc_t wc, uchar *r, uchar *e) +static int my_uni_ucs2(CHARSET_INFO *cs __attribute__((unused)) , + my_wc_t wc, uchar *r, uchar *e) { if ( r+2 > e ) return MY_CS_TOOSMALL; @@ -128,13 +128,15 @@ static void my_caseup_ucs2(CHARSET_INFO *cs, char *s, uint slen) } } -static void my_hash_sort_ucs2(CHARSET_INFO *cs, const uchar *s, uint slen, ulong *n1, ulong *n2) + +static void my_hash_sort_ucs2(CHARSET_INFO *cs, const uchar *s, uint slen, + ulong *n1, ulong *n2) { my_wc_t wc; int res; const uchar *e=s+slen; - while ((s < e) && (res=my_ucs2_uni(cs,&wc, (uchar *)s, (uchar*)e))>0 ) + while ((s < e) && (res=my_ucs2_uni(cs,&wc, (uchar *)s, (uchar*)e)) >0) { int plane = (wc>>8) & 0xFF; wc = uni_plane[plane] ? uni_plane[plane][wc & 0xFF].sort : wc; @@ -148,7 +150,7 @@ static void my_hash_sort_ucs2(CHARSET_INFO *cs, const uchar *s, uint slen, ulong static void my_caseup_str_ucs2(CHARSET_INFO * cs __attribute__((unused)), - char * s __attribute__((unused))) + char * s __attribute__((unused))) { } @@ -173,13 +175,14 @@ static void my_casedn_ucs2(CHARSET_INFO *cs, char *s, uint slen) } static void my_casedn_str_ucs2(CHARSET_INFO *cs __attribute__((unused)), - char * s __attribute__((unused))) + char * s __attribute__((unused))) { } static int my_strnncoll_ucs2(CHARSET_INFO *cs, - const uchar *s, uint slen, const uchar *t, uint tlen) + const uchar *s, uint slen, + const uchar *t, uint tlen) { int s_res,t_res; my_wc_t s_wc,t_wc; @@ -213,8 +216,9 @@ static int my_strnncoll_ucs2(CHARSET_INFO *cs, return ( (se-s) - (te-t) ); } + static int my_strncasecmp_ucs2(CHARSET_INFO *cs, - const char *s, const char *t, uint len) + const char *s, const char *t, uint len) { int s_res,t_res; my_wc_t s_wc,t_wc; @@ -249,6 +253,7 @@ static int my_strncasecmp_ucs2(CHARSET_INFO *cs, return ( (se-s) - (te-t) ); } + static int my_strcasecmp_ucs2(CHARSET_INFO *cs, const char *s, const char *t) { uint s_len=strlen(s); @@ -257,6 +262,7 @@ static int my_strcasecmp_ucs2(CHARSET_INFO *cs, const char *s, const char *t) return my_strncasecmp_ucs2(cs, s, t, len); } + static int my_strnxfrm_ucs2(CHARSET_INFO *cs, uchar *dst, uint dstlen, const uchar *src, uint srclen) { @@ -288,6 +294,7 @@ static int my_strnxfrm_ucs2(CHARSET_INFO *cs, return dst - dst_orig; } + static int my_ismbchar_ucs2(CHARSET_INFO *cs __attribute__((unused)), const char *b __attribute__((unused)), const char *e __attribute__((unused))) @@ -295,6 +302,7 @@ static int my_ismbchar_ucs2(CHARSET_INFO *cs __attribute__((unused)), return 2; } + static int my_mbcharlen_ucs2(CHARSET_INFO *cs __attribute__((unused)) , uint c __attribute__((unused))) { @@ -380,8 +388,8 @@ static int my_vsnprintf_ucs2(char *dst, uint n, const char* fmt, va_list ap) return (uint) (dst - start); } -static int my_snprintf_ucs2(CHARSET_INFO *cs __attribute__((unused)) - ,char* to, uint n, const char* fmt, ...) +static int my_snprintf_ucs2(CHARSET_INFO *cs __attribute__((unused)), + char* to, uint n, const char* fmt, ...) { va_list args; va_start(args,fmt); @@ -389,9 +397,9 @@ static int my_snprintf_ucs2(CHARSET_INFO *cs __attribute__((unused)) } -long my_strntol_ucs2(CHARSET_INFO *cs, - const char *nptr, uint l, int base, - char **endptr, int *err) +long my_strntol_ucs2(CHARSET_INFO *cs, + const char *nptr, uint l, int base, + char **endptr, int *err) { int negative=0; int overflow; @@ -504,9 +512,9 @@ bs: } -ulong my_strntoul_ucs2(CHARSET_INFO *cs, - const char *nptr, uint l, int base, - char **endptr, int *err) +ulong my_strntoul_ucs2(CHARSET_INFO *cs, + const char *nptr, uint l, int base, + char **endptr, int *err) { int negative=0; int overflow; @@ -1334,8 +1342,8 @@ my_bool my_like_range_ucs2(CHARSET_INFO *cs, *min_length= (uint) (min_str - min_org); *max_length=res_length; do { - *min_str++ = '\0'; - *min_str++ = ' '; /* Because if key compression */ + *min_str++ = 0; + *min_str++ = 0; *max_str++ = (char) cs->max_sort_char >>8; *max_str++ = (char) cs->max_sort_char & 255; } while (min_str + 1 < min_end); diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c index dca73e5a79f..886ecfbd0c9 100644 --- a/strings/ctype-utf8.c +++ b/strings/ctype-utf8.c @@ -1801,7 +1801,8 @@ static void my_casedn_str_utf8(CHARSET_INFO *cs, char * s) static int my_strnncoll_utf8(CHARSET_INFO *cs, - const uchar *s, uint slen, const uchar *t, uint tlen) + const uchar *s, uint slen, + const uchar *t, uint tlen) { int s_res,t_res; my_wc_t s_wc,t_wc; @@ -1835,6 +1836,11 @@ static int my_strnncoll_utf8(CHARSET_INFO *cs, return ( (se-s) - (te-t) ); } + +/* + TODO: Has to be fixed as strnncollsp in ctype-simple +*/ + static int my_strnncollsp_utf8(CHARSET_INFO * cs, const uchar *s, uint slen, diff --git a/strings/ctype-win1250ch.c b/strings/ctype-win1250ch.c index bda349f1988..8fd4e612713 100644 --- a/strings/ctype-win1250ch.c +++ b/strings/ctype-win1250ch.c @@ -467,6 +467,10 @@ static int my_strnncoll_win1250ch(CHARSET_INFO *cs __attribute__((unused)), } +/* + TODO: Has to be fixed as strnncollsp in ctype-simple +*/ + static int my_strnncollsp_win1250ch(CHARSET_INFO * cs, const uchar *s, uint slen, |