summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--mysql-test/r/ctype_ucs.result127
-rw-r--r--mysql-test/t/ctype_ucs.test46
-rw-r--r--strings/ctype-ucs2.c263
3 files changed, 428 insertions, 8 deletions
diff --git a/mysql-test/r/ctype_ucs.result b/mysql-test/r/ctype_ucs.result
index ee71ffff2f0..2a0b17df043 100644
--- a/mysql-test/r/ctype_ucs.result
+++ b/mysql-test/r/ctype_ucs.result
@@ -13,6 +13,133 @@ hex(word)
0420
2004
DROP TABLE t1;
+SET NAMES koi8r;
+SET character_set_connection=ucs2;
+create table t1 (a varchar(10) character set ucs2, key(a));
+insert into t1 values ("a"),("abc"),("abcd"),("hello"),("test");
+explain select * from t1 where a like 'abc%';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range a a 21 NULL 1 Using where; Using index
+explain select * from t1 where a like concat('abc','%');
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range a a 21 NULL 1 Using where; Using index
+select * from t1 where a like "abc%";
+a
+abc
+abcd
+select * from t1 where a like concat("abc","%");
+a
+abc
+abcd
+select * from t1 where a like "ABC%";
+a
+abc
+abcd
+select * from t1 where a like "test%";
+a
+test
+select * from t1 where a like "te_t";
+a
+test
+select * from t1 where a like "%a%";
+a
+select * from t1 where a like "%abcd%";
+a
+abcd
+select * from t1 where a like "%abc\d%";
+a
+abcd
+drop table t1;
+CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET ucs2);
+INSERT INTO t1 VALUES ('ÆÙ×Á'),('æÙ×Á'),('Æù×Á'),('ÆÙ÷Á'),('ÆÙ×á'),('æù÷á');
+INSERT INTO t1 VALUES ('ÆÙ×ÁÐÒÏÌÄÖ'),('æÙ×ÁÐÒÏÌÄÖ'),('Æù×ÁÐÒÏÌÄÖ'),('ÆÙ÷ÁÐÒÏÌÄÖ');
+INSERT INTO t1 VALUES ('ÆÙ×áÐÒÏÌÄÖ'),('ÆÙ×ÁðÒÏÌÄÖ'),('ÆÙ×ÁÐòÏÌÄÖ'),('ÆÙ×ÁÐÒïÌÄÖ');
+INSERT INTO t1 VALUES ('ÆÙ×ÁÐÒÏìÄÖ'),('ÆÙ×ÁÐÒÏÌäÖ'),('ÆÙ×ÁÐÒÏÌÄö'),('æù÷áðòïìäö');
+SELECT * FROM t1 WHERE a LIKE '%Æù×Á%';
+a
+ÆÙ×Á
+æÙ×Á
+Æù×Á
+ÆÙ÷Á
+ÆÙ×á
+æù÷á
+ÆÙ×ÁÐÒÏÌÄÖ
+æÙ×ÁÐÒÏÌÄÖ
+Æù×ÁÐÒÏÌÄÖ
+ÆÙ÷ÁÐÒÏÌÄÖ
+ÆÙ×áÐÒÏÌÄÖ
+ÆÙ×ÁðÒÏÌÄÖ
+ÆÙ×ÁÐòÏÌÄÖ
+ÆÙ×ÁÐÒïÌÄÖ
+ÆÙ×ÁÐÒÏìÄÖ
+ÆÙ×ÁÐÒÏÌäÖ
+ÆÙ×ÁÐÒÏÌÄö
+æù÷áðòïìäö
+SELECT * FROM t1 WHERE a LIKE '%Æù×%';
+a
+ÆÙ×Á
+æÙ×Á
+Æù×Á
+ÆÙ÷Á
+ÆÙ×á
+æù÷á
+ÆÙ×ÁÐÒÏÌÄÖ
+æÙ×ÁÐÒÏÌÄÖ
+Æù×ÁÐÒÏÌÄÖ
+ÆÙ÷ÁÐÒÏÌÄÖ
+ÆÙ×áÐÒÏÌÄÖ
+ÆÙ×ÁðÒÏÌÄÖ
+ÆÙ×ÁÐòÏÌÄÖ
+ÆÙ×ÁÐÒïÌÄÖ
+ÆÙ×ÁÐÒÏìÄÖ
+ÆÙ×ÁÐÒÏÌäÖ
+ÆÙ×ÁÐÒÏÌÄö
+æù÷áðòïìäö
+SELECT * FROM t1 WHERE a LIKE 'Æù×Á%';
+a
+ÆÙ×Á
+æÙ×Á
+Æù×Á
+ÆÙ÷Á
+ÆÙ×á
+æù÷á
+ÆÙ×ÁÐÒÏÌÄÖ
+æÙ×ÁÐÒÏÌÄÖ
+Æù×ÁÐÒÏÌÄÖ
+ÆÙ÷ÁÐÒÏÌÄÖ
+ÆÙ×áÐÒÏÌÄÖ
+ÆÙ×ÁðÒÏÌÄÖ
+ÆÙ×ÁÐòÏÌÄÖ
+ÆÙ×ÁÐÒïÌÄÖ
+ÆÙ×ÁÐÒÏìÄÖ
+ÆÙ×ÁÐÒÏÌäÖ
+ÆÙ×ÁÐÒÏÌÄö
+æù÷áðòïìäö
+SELECT * FROM t1 WHERE a LIKE 'Æù×Á%' COLLATE ucs2_bin;
+a
+Æù×Á
+Æù×ÁÐÒÏÌÄÖ
+DROP TABLE t1;
+CREATE TABLE t1 (word varchar(64) NOT NULL, PRIMARY KEY (word))
+TYPE=MyISAM CHARACTER SET ucs2 COLLATE ucs2_general_ci;
+INSERT INTO t1 (word) VALUES ("cat");
+SELECT * FROM t1 WHERE word LIKE "c%";
+word
+cat
+SELECT * FROM t1 WHERE word LIKE "ca_";
+word
+cat
+SELECT * FROM t1 WHERE word LIKE "cat";
+word
+cat
+SELECT * FROM t1 WHERE word LIKE _ucs2 x'00630025';
+word
+cat
+SELECT * FROM t1 WHERE word LIKE _ucs2 x'00630061005F';
+word
+cat
+DROP TABLE t1;
+SET NAMES latin1;
CREATE TABLE t1 (
word VARCHAR(64),
bar INT(11) default 0,
diff --git a/mysql-test/t/ctype_ucs.test b/mysql-test/t/ctype_ucs.test
index ee30baed008..80ae70c0fe2 100644
--- a/mysql-test/t/ctype_ucs.test
+++ b/mysql-test/t/ctype_ucs.test
@@ -29,6 +29,51 @@ INSERT INTO t1 VALUES (X'042000200020'), (X'200400200020');
SELECT hex(word) FROM t1 ORDER BY word;
DROP TABLE t1;
+######################################################
+#
+# Test of like
+#
+
+SET NAMES koi8r;
+SET character_set_connection=ucs2;
+
+create table t1 (a varchar(10) character set ucs2, key(a));
+insert into t1 values ("a"),("abc"),("abcd"),("hello"),("test");
+explain select * from t1 where a like 'abc%';
+explain select * from t1 where a like concat('abc','%');
+select * from t1 where a like "abc%";
+select * from t1 where a like concat("abc","%");
+select * from t1 where a like "ABC%";
+select * from t1 where a like "test%";
+select * from t1 where a like "te_t";
+select * from t1 where a like "%a%";
+select * from t1 where a like "%abcd%";
+select * from t1 where a like "%abc\d%";
+drop table t1;
+
+CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET ucs2);
+INSERT INTO t1 VALUES ('ÆÙ×Á'),('æÙ×Á'),('Æù×Á'),('ÆÙ÷Á'),('ÆÙ×á'),('æù÷á');
+INSERT INTO t1 VALUES ('ÆÙ×ÁÐÒÏÌÄÖ'),('æÙ×ÁÐÒÏÌÄÖ'),('Æù×ÁÐÒÏÌÄÖ'),('ÆÙ÷ÁÐÒÏÌÄÖ');
+INSERT INTO t1 VALUES ('ÆÙ×áÐÒÏÌÄÖ'),('ÆÙ×ÁðÒÏÌÄÖ'),('ÆÙ×ÁÐòÏÌÄÖ'),('ÆÙ×ÁÐÒïÌÄÖ');
+INSERT INTO t1 VALUES ('ÆÙ×ÁÐÒÏìÄÖ'),('ÆÙ×ÁÐÒÏÌäÖ'),('ÆÙ×ÁÐÒÏÌÄö'),('æù÷áðòïìäö');
+SELECT * FROM t1 WHERE a LIKE '%Æù×Á%';
+SELECT * FROM t1 WHERE a LIKE '%Æù×%';
+SELECT * FROM t1 WHERE a LIKE 'Æù×Á%';
+SELECT * FROM t1 WHERE a LIKE 'Æù×Á%' COLLATE ucs2_bin;
+DROP TABLE t1;
+
+#
+# Bug 1181
+#
+CREATE TABLE t1 (word varchar(64) NOT NULL, PRIMARY KEY (word))
+TYPE=MyISAM CHARACTER SET ucs2 COLLATE ucs2_general_ci;
+INSERT INTO t1 (word) VALUES ("cat");
+SELECT * FROM t1 WHERE word LIKE "c%";
+SELECT * FROM t1 WHERE word LIKE "ca_";
+SELECT * FROM t1 WHERE word LIKE "cat";
+SELECT * FROM t1 WHERE word LIKE _ucs2 x'00630025';
+SELECT * FROM t1 WHERE word LIKE _ucs2 x'00630061005F';
+DROP TABLE t1;
######################################################
@@ -53,6 +98,7 @@ DROP TABLE t1;
# are not part of the index sorted on, it does a filesort, which fails.
# Using a straight index yields correct results.
+SET NAMES latin1;
#
# Two fields, index
diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c
index 31c0f063529..cdcd91b2916 100644
--- a/strings/ctype-ucs2.c
+++ b/strings/ctype-ucs2.c
@@ -90,7 +90,7 @@ static uchar to_upper_ucs2[] = {
};
-static int my_ucs2_uni (CHARSET_INFO *cs __attribute__((unused)) ,
+static int my_ucs2_uni (CHARSET_INFO *cs __attribute__((unused)),
my_wc_t * pwc, const uchar *s, const uchar *e)
{
if (s+2 > e) /* Need 2 characters */
@@ -1018,21 +1018,268 @@ uint my_lengthsp_ucs2(CHARSET_INFO *cs __attribute__((unused)),
return (uint) (end-ptr);
}
+/*
+** Compare string against string with wildcard
+** 0 if matched
+** -1 if not matched with wildcard
+** 1 if matched with wildcard
+*/
+
+static
+int my_wildcmp_ucs2(CHARSET_INFO *cs,
+ const char *str,const char *str_end,
+ const char *wildstr,const char *wildend,
+ int escape, int w_one, int w_many,
+ MY_UNICASE_INFO **weights)
+{
+ int result= -1; /* Not found, using wildcards */
+ my_wc_t s_wc, w_wc;
+ int scan, plane;
+
+ while (wildstr != wildend)
+ {
+
+ while (1)
+ {
+ scan= my_ucs2_uni(cs,&w_wc, (const uchar*)wildstr, (const uchar*)wildend);
+ if (scan <= 0)
+ return 1;
+ wildstr+= scan;
+
+ if (w_wc == (my_wc_t)escape)
+ {
+ scan= my_ucs2_uni(cs,&w_wc, (const uchar*)wildstr, (const uchar*)wildend);
+ if (scan <= 0)
+ return 1;
+ wildstr+= scan;
+ }
+
+ if (w_wc == (my_wc_t)w_many)
+ {
+ result= 1; /* Found an anchor char */
+ break;
+ }
+
+ scan= my_ucs2_uni(cs, &s_wc, (const uchar*)str, (const uchar*)str_end);
+ if (scan <=0)
+ return 1;
+ str+= scan;
+
+ if (w_wc == (my_wc_t)w_one)
+ {
+ result= 1; /* Found an anchor char */
+ }
+ else
+ {
+ if (weights)
+ {
+ plane=(s_wc>>8) & 0xFF;
+ s_wc = weights[plane] ? weights[plane][s_wc & 0xFF].sort : s_wc;
+ plane=(w_wc>>8) & 0xFF;
+ w_wc = weights[plane] ? weights[plane][w_wc & 0xFF].sort : w_wc;
+ }
+ if (s_wc != w_wc)
+ return 1; /* No match */
+ }
+ if (wildstr == wildend)
+ return (str != str_end); /* Match if both are at end */
+ }
+
+
+ if (w_wc == (my_wc_t)w_many)
+ { /* Found w_many */
+
+ /* Remove any '%' and '_' from the wild search string */
+ for ( ; wildstr != wildend ; )
+ {
+ scan= my_ucs2_uni(cs,&w_wc, (const uchar*)wildstr, (const uchar*)wildend);
+ if (scan <= 0)
+ return 1;
+ wildstr+= scan;
+
+ if (w_wc == (my_wc_t)w_many)
+ continue;
+
+ if (w_wc == (my_wc_t)w_one)
+ {
+ scan= my_ucs2_uni(cs, &s_wc, (const uchar*)str, (const uchar*)str_end);
+ if (scan <=0)
+ return 1;
+ str+= scan;
+ continue;
+ }
+ break; /* Not a wild character */
+ }
+
+ if (wildstr == wildend)
+ return 0; /* Ok if w_many is last */
+
+ if (str == str_end)
+ return -1;
+
+ scan= my_ucs2_uni(cs,&w_wc, (const uchar*)wildstr, (const uchar*)wildend);
+ if (scan <= 0)
+ return 1;
+ wildstr+= scan;
+
+ if (w_wc == (my_wc_t)escape)
+ {
+ scan= my_ucs2_uni(cs,&w_wc, (const uchar*)wildstr, (const uchar*)wildend);
+ if (scan <= 0)
+ return 1;
+ wildstr+= scan;
+ }
+
+ do
+ {
+ /* Skip until the first character from wildstr is found */
+ while (str != str_end)
+ {
+ scan= my_ucs2_uni(cs,&s_wc, (const uchar*)str, (const uchar*)str_end);
+ if (scan <= 0)
+ return 1;
+ str+= scan;
+
+ if (weights)
+ {
+ plane=(s_wc>>8) & 0xFF;
+ s_wc = weights[plane] ? weights[plane][s_wc & 0xFF].sort : s_wc;
+ plane=(w_wc>>8) & 0xFF;
+ w_wc = weights[plane] ? weights[plane][w_wc & 0xFF].sort : w_wc;
+ }
+
+ if (s_wc == w_wc)
+ break;
+ }
+ if (str == str_end)
+ return -1;
+
+ result= my_wildcmp_ucs2(cs,str,str_end,wildstr,wildend,escape,
+ w_one,w_many,weights);
+ if (result <= 0)
+ return result;
+
+ } while (str != str_end && w_wc != (my_wc_t)w_many);
+ return -1;
+ }
+ }
+ return (str != str_end ? 1 : 0);
+}
-static MY_COLLATION_HANDLER my_collation_ci_handler =
+static
+int my_wildcmp_ucs2_ci(CHARSET_INFO *cs,
+ const char *str,const char *str_end,
+ const char *wildstr,const char *wildend,
+ int escape, int w_one, int w_many)
+{
+ return my_wildcmp_ucs2(cs,str,str_end,wildstr,wildend,
+ escape,w_one,w_many,uni_plane);
+}
+
+static
+int my_wildcmp_ucs2_bin(CHARSET_INFO *cs,
+ const char *str,const char *str_end,
+ const char *wildstr,const char *wildend,
+ int escape, int w_one, int w_many)
+{
+ return my_wildcmp_ucs2(cs,str,str_end,wildstr,wildend,
+ escape,w_one,w_many,NULL);
+}
+
+
+static
+int my_strnncoll_ucs2_bin(CHARSET_INFO *cs,
+ const uchar *s, uint slen,
+ const uchar *t, uint tlen)
+{
+ int s_res,t_res;
+ my_wc_t s_wc,t_wc;
+ const uchar *se=s+slen;
+ const uchar *te=t+tlen;
+
+ while ( s < se && t < te )
+ {
+ s_res=my_ucs2_uni(cs,&s_wc, s, se);
+ t_res=my_ucs2_uni(cs,&t_wc, t, te);
+
+ if ( s_res <= 0 || t_res <= 0 )
+ {
+ /* Incorrect string, compare by char value */
+ return ((int)s[0]-(int)t[0]);
+ }
+ if ( s_wc != t_wc )
+ {
+ return ((int) s_wc) - ((int) t_wc);
+ }
+
+ s+=s_res;
+ t+=t_res;
+ }
+ return ( (se-s) - (te-t) );
+}
+
+static
+int my_strcasecmp_ucs2_bin(CHARSET_INFO *cs, const char *s, const char *t)
+{
+ uint s_len=strlen(s);
+ uint t_len=strlen(t);
+ uint len = (s_len > t_len) ? s_len : t_len;
+ return my_strncasecmp_ucs2(cs, s, t, len);
+}
+
+static
+int my_strnxfrm_ucs2_bin(CHARSET_INFO *cs __attribute__((unused)),
+ uchar *dst, uint dstlen,
+ const uchar *src, uint srclen)
+{
+ if (dst != src)
+ memcpy(dst,src,srclen= min(dstlen,srclen));
+ return srclen;
+}
+
+static
+void my_hash_sort_ucs2_bin(CHARSET_INFO *cs __attribute__((unused)),
+ const uchar *key, uint len,ulong *nr1, ulong *nr2)
+{
+ const uchar *pos = key;
+
+ key+= len;
+
+ for (; pos < (uchar*) key ; pos++)
+ {
+ nr1[0]^=(ulong) ((((uint) nr1[0] & 63)+nr2[0]) *
+ ((uint)*pos)) + (nr1[0] << 8);
+ nr2[0]+=3;
+ }
+}
+
+
+static MY_COLLATION_HANDLER my_collation_ucs2_general_ci_handler =
{
my_strnncoll_ucs2,
my_strnncoll_ucs2,
my_strnxfrm_ucs2,
my_like_range_simple,
- my_wildcmp_mb,
+ my_wildcmp_ucs2_ci,
my_strcasecmp_ucs2,
my_instr_mb,
my_hash_sort_ucs2
};
-static MY_CHARSET_HANDLER my_charset_handler=
+static MY_COLLATION_HANDLER my_collation_ucs2_bin_handler =
+{
+ my_strnncoll_ucs2_bin,
+ my_strnncoll_ucs2_bin,
+ my_strnxfrm_ucs2_bin,
+ my_like_range_simple,
+ my_wildcmp_ucs2_bin,
+ my_strcasecmp_ucs2_bin,
+ my_instr_mb,
+ my_hash_sort_ucs2_bin
+};
+
+static MY_CHARSET_HANDLER my_charset_ucs2_handler=
{
my_ismbchar_ucs2, /* ismbchar */
my_mbcharlen_ucs2, /* mbcharlen */
@@ -1077,8 +1324,8 @@ CHARSET_INFO my_charset_ucs2_general_ci=
1, /* strxfrm_multiply */
2, /* mbmaxlen */
0,
- &my_charset_handler,
- &my_collation_ci_handler
+ &my_charset_ucs2_handler,
+ &my_collation_ucs2_general_ci_handler
};
@@ -1100,8 +1347,8 @@ CHARSET_INFO my_charset_ucs2_bin=
1, /* strxfrm_multiply */
2, /* mbmaxlen */
0,
- &my_charset_handler,
- &my_collation_mb_bin_handler
+ &my_charset_ucs2_handler,
+ &my_collation_ucs2_bin_handler
};