diff options
-rw-r--r-- | mysql-test/r/ctype_ucs.result | 127 | ||||
-rw-r--r-- | mysql-test/t/ctype_ucs.test | 46 | ||||
-rw-r--r-- | strings/ctype-ucs2.c | 263 |
3 files changed, 428 insertions, 8 deletions
diff --git a/mysql-test/r/ctype_ucs.result b/mysql-test/r/ctype_ucs.result index ee71ffff2f0..2a0b17df043 100644 --- a/mysql-test/r/ctype_ucs.result +++ b/mysql-test/r/ctype_ucs.result @@ -13,6 +13,133 @@ hex(word) 0420 2004 DROP TABLE t1; +SET NAMES koi8r; +SET character_set_connection=ucs2; +create table t1 (a varchar(10) character set ucs2, key(a)); +insert into t1 values ("a"),("abc"),("abcd"),("hello"),("test"); +explain select * from t1 where a like 'abc%'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 21 NULL 1 Using where; Using index +explain select * from t1 where a like concat('abc','%'); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 21 NULL 1 Using where; Using index +select * from t1 where a like "abc%"; +a +abc +abcd +select * from t1 where a like concat("abc","%"); +a +abc +abcd +select * from t1 where a like "ABC%"; +a +abc +abcd +select * from t1 where a like "test%"; +a +test +select * from t1 where a like "te_t"; +a +test +select * from t1 where a like "%a%"; +a +select * from t1 where a like "%abcd%"; +a +abcd +select * from t1 where a like "%abc\d%"; +a +abcd +drop table t1; +CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET ucs2); +INSERT INTO t1 VALUES ('ÆÙ×Á'),('æÙ×Á'),('Æù×Á'),('ÆÙ÷Á'),('ÆÙ×á'),('æù÷á'); +INSERT INTO t1 VALUES ('ÆÙ×ÁÐÒÏÌÄÖ'),('æÙ×ÁÐÒÏÌÄÖ'),('Æù×ÁÐÒÏÌÄÖ'),('ÆÙ÷ÁÐÒÏÌÄÖ'); +INSERT INTO t1 VALUES ('ÆÙ×áÐÒÏÌÄÖ'),('ÆÙ×ÁðÒÏÌÄÖ'),('ÆÙ×ÁÐòÏÌÄÖ'),('ÆÙ×ÁÐÒïÌÄÖ'); +INSERT INTO t1 VALUES ('ÆÙ×ÁÐÒÏìÄÖ'),('ÆÙ×ÁÐÒÏÌäÖ'),('ÆÙ×ÁÐÒÏÌÄö'),('æù÷áðòïìäö'); +SELECT * FROM t1 WHERE a LIKE '%Æù×Á%'; +a +ÆÙ×Á +æÙ×Á +Æù×Á +ÆÙ÷Á +ÆÙ×á +æù÷á +ÆÙ×ÁÐÒÏÌÄÖ +æÙ×ÁÐÒÏÌÄÖ +Æù×ÁÐÒÏÌÄÖ +ÆÙ÷ÁÐÒÏÌÄÖ +ÆÙ×áÐÒÏÌÄÖ +ÆÙ×ÁðÒÏÌÄÖ +ÆÙ×ÁÐòÏÌÄÖ +ÆÙ×ÁÐÒïÌÄÖ +ÆÙ×ÁÐÒÏìÄÖ +ÆÙ×ÁÐÒÏÌäÖ +ÆÙ×ÁÐÒÏÌÄö +æù÷áðòïìäö +SELECT * FROM t1 WHERE a LIKE '%Æù×%'; +a +ÆÙ×Á +æÙ×Á +Æù×Á +ÆÙ÷Á +ÆÙ×á +æù÷á +ÆÙ×ÁÐÒÏÌÄÖ +æÙ×ÁÐÒÏÌÄÖ +Æù×ÁÐÒÏÌÄÖ +ÆÙ÷ÁÐÒÏÌÄÖ +ÆÙ×áÐÒÏÌÄÖ +ÆÙ×ÁðÒÏÌÄÖ +ÆÙ×ÁÐòÏÌÄÖ +ÆÙ×ÁÐÒïÌÄÖ +ÆÙ×ÁÐÒÏìÄÖ +ÆÙ×ÁÐÒÏÌäÖ +ÆÙ×ÁÐÒÏÌÄö +æù÷áðòïìäö +SELECT * FROM t1 WHERE a LIKE 'Æù×Á%'; +a +ÆÙ×Á +æÙ×Á +Æù×Á +ÆÙ÷Á +ÆÙ×á +æù÷á +ÆÙ×ÁÐÒÏÌÄÖ +æÙ×ÁÐÒÏÌÄÖ +Æù×ÁÐÒÏÌÄÖ +ÆÙ÷ÁÐÒÏÌÄÖ +ÆÙ×áÐÒÏÌÄÖ +ÆÙ×ÁðÒÏÌÄÖ +ÆÙ×ÁÐòÏÌÄÖ +ÆÙ×ÁÐÒïÌÄÖ +ÆÙ×ÁÐÒÏìÄÖ +ÆÙ×ÁÐÒÏÌäÖ +ÆÙ×ÁÐÒÏÌÄö +æù÷áðòïìäö +SELECT * FROM t1 WHERE a LIKE 'Æù×Á%' COLLATE ucs2_bin; +a +Æù×Á +Æù×ÁÐÒÏÌÄÖ +DROP TABLE t1; +CREATE TABLE t1 (word varchar(64) NOT NULL, PRIMARY KEY (word)) +TYPE=MyISAM CHARACTER SET ucs2 COLLATE ucs2_general_ci; +INSERT INTO t1 (word) VALUES ("cat"); +SELECT * FROM t1 WHERE word LIKE "c%"; +word +cat +SELECT * FROM t1 WHERE word LIKE "ca_"; +word +cat +SELECT * FROM t1 WHERE word LIKE "cat"; +word +cat +SELECT * FROM t1 WHERE word LIKE _ucs2 x'00630025'; +word +cat +SELECT * FROM t1 WHERE word LIKE _ucs2 x'00630061005F'; +word +cat +DROP TABLE t1; +SET NAMES latin1; CREATE TABLE t1 ( word VARCHAR(64), bar INT(11) default 0, diff --git a/mysql-test/t/ctype_ucs.test b/mysql-test/t/ctype_ucs.test index ee30baed008..80ae70c0fe2 100644 --- a/mysql-test/t/ctype_ucs.test +++ b/mysql-test/t/ctype_ucs.test @@ -29,6 +29,51 @@ INSERT INTO t1 VALUES (X'042000200020'), (X'200400200020'); SELECT hex(word) FROM t1 ORDER BY word; DROP TABLE t1; +###################################################### +# +# Test of like +# + +SET NAMES koi8r; +SET character_set_connection=ucs2; + +create table t1 (a varchar(10) character set ucs2, key(a)); +insert into t1 values ("a"),("abc"),("abcd"),("hello"),("test"); +explain select * from t1 where a like 'abc%'; +explain select * from t1 where a like concat('abc','%'); +select * from t1 where a like "abc%"; +select * from t1 where a like concat("abc","%"); +select * from t1 where a like "ABC%"; +select * from t1 where a like "test%"; +select * from t1 where a like "te_t"; +select * from t1 where a like "%a%"; +select * from t1 where a like "%abcd%"; +select * from t1 where a like "%abc\d%"; +drop table t1; + +CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET ucs2); +INSERT INTO t1 VALUES ('ÆÙ×Á'),('æÙ×Á'),('Æù×Á'),('ÆÙ÷Á'),('ÆÙ×á'),('æù÷á'); +INSERT INTO t1 VALUES ('ÆÙ×ÁÐÒÏÌÄÖ'),('æÙ×ÁÐÒÏÌÄÖ'),('Æù×ÁÐÒÏÌÄÖ'),('ÆÙ÷ÁÐÒÏÌÄÖ'); +INSERT INTO t1 VALUES ('ÆÙ×áÐÒÏÌÄÖ'),('ÆÙ×ÁðÒÏÌÄÖ'),('ÆÙ×ÁÐòÏÌÄÖ'),('ÆÙ×ÁÐÒïÌÄÖ'); +INSERT INTO t1 VALUES ('ÆÙ×ÁÐÒÏìÄÖ'),('ÆÙ×ÁÐÒÏÌäÖ'),('ÆÙ×ÁÐÒÏÌÄö'),('æù÷áðòïìäö'); +SELECT * FROM t1 WHERE a LIKE '%Æù×Á%'; +SELECT * FROM t1 WHERE a LIKE '%Æù×%'; +SELECT * FROM t1 WHERE a LIKE 'Æù×Á%'; +SELECT * FROM t1 WHERE a LIKE 'Æù×Á%' COLLATE ucs2_bin; +DROP TABLE t1; + +# +# Bug 1181 +# +CREATE TABLE t1 (word varchar(64) NOT NULL, PRIMARY KEY (word)) +TYPE=MyISAM CHARACTER SET ucs2 COLLATE ucs2_general_ci; +INSERT INTO t1 (word) VALUES ("cat"); +SELECT * FROM t1 WHERE word LIKE "c%"; +SELECT * FROM t1 WHERE word LIKE "ca_"; +SELECT * FROM t1 WHERE word LIKE "cat"; +SELECT * FROM t1 WHERE word LIKE _ucs2 x'00630025'; +SELECT * FROM t1 WHERE word LIKE _ucs2 x'00630061005F'; +DROP TABLE t1; ###################################################### @@ -53,6 +98,7 @@ DROP TABLE t1; # are not part of the index sorted on, it does a filesort, which fails. # Using a straight index yields correct results. +SET NAMES latin1; # # Two fields, index diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c index 31c0f063529..cdcd91b2916 100644 --- a/strings/ctype-ucs2.c +++ b/strings/ctype-ucs2.c @@ -90,7 +90,7 @@ static uchar to_upper_ucs2[] = { }; -static int my_ucs2_uni (CHARSET_INFO *cs __attribute__((unused)) , +static int my_ucs2_uni (CHARSET_INFO *cs __attribute__((unused)), my_wc_t * pwc, const uchar *s, const uchar *e) { if (s+2 > e) /* Need 2 characters */ @@ -1018,21 +1018,268 @@ uint my_lengthsp_ucs2(CHARSET_INFO *cs __attribute__((unused)), return (uint) (end-ptr); } +/* +** Compare string against string with wildcard +** 0 if matched +** -1 if not matched with wildcard +** 1 if matched with wildcard +*/ + +static +int my_wildcmp_ucs2(CHARSET_INFO *cs, + const char *str,const char *str_end, + const char *wildstr,const char *wildend, + int escape, int w_one, int w_many, + MY_UNICASE_INFO **weights) +{ + int result= -1; /* Not found, using wildcards */ + my_wc_t s_wc, w_wc; + int scan, plane; + + while (wildstr != wildend) + { + + while (1) + { + scan= my_ucs2_uni(cs,&w_wc, (const uchar*)wildstr, (const uchar*)wildend); + if (scan <= 0) + return 1; + wildstr+= scan; + + if (w_wc == (my_wc_t)escape) + { + scan= my_ucs2_uni(cs,&w_wc, (const uchar*)wildstr, (const uchar*)wildend); + if (scan <= 0) + return 1; + wildstr+= scan; + } + + if (w_wc == (my_wc_t)w_many) + { + result= 1; /* Found an anchor char */ + break; + } + + scan= my_ucs2_uni(cs, &s_wc, (const uchar*)str, (const uchar*)str_end); + if (scan <=0) + return 1; + str+= scan; + + if (w_wc == (my_wc_t)w_one) + { + result= 1; /* Found an anchor char */ + } + else + { + if (weights) + { + plane=(s_wc>>8) & 0xFF; + s_wc = weights[plane] ? weights[plane][s_wc & 0xFF].sort : s_wc; + plane=(w_wc>>8) & 0xFF; + w_wc = weights[plane] ? weights[plane][w_wc & 0xFF].sort : w_wc; + } + if (s_wc != w_wc) + return 1; /* No match */ + } + if (wildstr == wildend) + return (str != str_end); /* Match if both are at end */ + } + + + if (w_wc == (my_wc_t)w_many) + { /* Found w_many */ + + /* Remove any '%' and '_' from the wild search string */ + for ( ; wildstr != wildend ; ) + { + scan= my_ucs2_uni(cs,&w_wc, (const uchar*)wildstr, (const uchar*)wildend); + if (scan <= 0) + return 1; + wildstr+= scan; + + if (w_wc == (my_wc_t)w_many) + continue; + + if (w_wc == (my_wc_t)w_one) + { + scan= my_ucs2_uni(cs, &s_wc, (const uchar*)str, (const uchar*)str_end); + if (scan <=0) + return 1; + str+= scan; + continue; + } + break; /* Not a wild character */ + } + + if (wildstr == wildend) + return 0; /* Ok if w_many is last */ + + if (str == str_end) + return -1; + + scan= my_ucs2_uni(cs,&w_wc, (const uchar*)wildstr, (const uchar*)wildend); + if (scan <= 0) + return 1; + wildstr+= scan; + + if (w_wc == (my_wc_t)escape) + { + scan= my_ucs2_uni(cs,&w_wc, (const uchar*)wildstr, (const uchar*)wildend); + if (scan <= 0) + return 1; + wildstr+= scan; + } + + do + { + /* Skip until the first character from wildstr is found */ + while (str != str_end) + { + scan= my_ucs2_uni(cs,&s_wc, (const uchar*)str, (const uchar*)str_end); + if (scan <= 0) + return 1; + str+= scan; + + if (weights) + { + plane=(s_wc>>8) & 0xFF; + s_wc = weights[plane] ? weights[plane][s_wc & 0xFF].sort : s_wc; + plane=(w_wc>>8) & 0xFF; + w_wc = weights[plane] ? weights[plane][w_wc & 0xFF].sort : w_wc; + } + + if (s_wc == w_wc) + break; + } + if (str == str_end) + return -1; + + result= my_wildcmp_ucs2(cs,str,str_end,wildstr,wildend,escape, + w_one,w_many,weights); + if (result <= 0) + return result; + + } while (str != str_end && w_wc != (my_wc_t)w_many); + return -1; + } + } + return (str != str_end ? 1 : 0); +} -static MY_COLLATION_HANDLER my_collation_ci_handler = +static +int my_wildcmp_ucs2_ci(CHARSET_INFO *cs, + const char *str,const char *str_end, + const char *wildstr,const char *wildend, + int escape, int w_one, int w_many) +{ + return my_wildcmp_ucs2(cs,str,str_end,wildstr,wildend, + escape,w_one,w_many,uni_plane); +} + +static +int my_wildcmp_ucs2_bin(CHARSET_INFO *cs, + const char *str,const char *str_end, + const char *wildstr,const char *wildend, + int escape, int w_one, int w_many) +{ + return my_wildcmp_ucs2(cs,str,str_end,wildstr,wildend, + escape,w_one,w_many,NULL); +} + + +static +int my_strnncoll_ucs2_bin(CHARSET_INFO *cs, + const uchar *s, uint slen, + const uchar *t, uint tlen) +{ + int s_res,t_res; + my_wc_t s_wc,t_wc; + const uchar *se=s+slen; + const uchar *te=t+tlen; + + while ( s < se && t < te ) + { + s_res=my_ucs2_uni(cs,&s_wc, s, se); + t_res=my_ucs2_uni(cs,&t_wc, t, te); + + if ( s_res <= 0 || t_res <= 0 ) + { + /* Incorrect string, compare by char value */ + return ((int)s[0]-(int)t[0]); + } + if ( s_wc != t_wc ) + { + return ((int) s_wc) - ((int) t_wc); + } + + s+=s_res; + t+=t_res; + } + return ( (se-s) - (te-t) ); +} + +static +int my_strcasecmp_ucs2_bin(CHARSET_INFO *cs, const char *s, const char *t) +{ + uint s_len=strlen(s); + uint t_len=strlen(t); + uint len = (s_len > t_len) ? s_len : t_len; + return my_strncasecmp_ucs2(cs, s, t, len); +} + +static +int my_strnxfrm_ucs2_bin(CHARSET_INFO *cs __attribute__((unused)), + uchar *dst, uint dstlen, + const uchar *src, uint srclen) +{ + if (dst != src) + memcpy(dst,src,srclen= min(dstlen,srclen)); + return srclen; +} + +static +void my_hash_sort_ucs2_bin(CHARSET_INFO *cs __attribute__((unused)), + const uchar *key, uint len,ulong *nr1, ulong *nr2) +{ + const uchar *pos = key; + + key+= len; + + for (; pos < (uchar*) key ; pos++) + { + nr1[0]^=(ulong) ((((uint) nr1[0] & 63)+nr2[0]) * + ((uint)*pos)) + (nr1[0] << 8); + nr2[0]+=3; + } +} + + +static MY_COLLATION_HANDLER my_collation_ucs2_general_ci_handler = { my_strnncoll_ucs2, my_strnncoll_ucs2, my_strnxfrm_ucs2, my_like_range_simple, - my_wildcmp_mb, + my_wildcmp_ucs2_ci, my_strcasecmp_ucs2, my_instr_mb, my_hash_sort_ucs2 }; -static MY_CHARSET_HANDLER my_charset_handler= +static MY_COLLATION_HANDLER my_collation_ucs2_bin_handler = +{ + my_strnncoll_ucs2_bin, + my_strnncoll_ucs2_bin, + my_strnxfrm_ucs2_bin, + my_like_range_simple, + my_wildcmp_ucs2_bin, + my_strcasecmp_ucs2_bin, + my_instr_mb, + my_hash_sort_ucs2_bin +}; + +static MY_CHARSET_HANDLER my_charset_ucs2_handler= { my_ismbchar_ucs2, /* ismbchar */ my_mbcharlen_ucs2, /* mbcharlen */ @@ -1077,8 +1324,8 @@ CHARSET_INFO my_charset_ucs2_general_ci= 1, /* strxfrm_multiply */ 2, /* mbmaxlen */ 0, - &my_charset_handler, - &my_collation_ci_handler + &my_charset_ucs2_handler, + &my_collation_ucs2_general_ci_handler }; @@ -1100,8 +1347,8 @@ CHARSET_INFO my_charset_ucs2_bin= 1, /* strxfrm_multiply */ 2, /* mbmaxlen */ 0, - &my_charset_handler, - &my_collation_mb_bin_handler + &my_charset_ucs2_handler, + &my_collation_ucs2_bin_handler }; |