summaryrefslogtreecommitdiff
path: root/mysql-test/include/ctype_utf8_ilseq.inc
blob: c400731c07fe3b3c9b65fbbe2e97ba5f03009a2e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#
# Compare a field to an utf8 string literal with illegal byte sequences
#

--echo #
--echo # Start of ctype_utf8_ilseq.inc
--echo #

--eval CREATE TABLE t1 ENGINE=$ENGINE AS SELECT REPEAT(' ', 60) AS ch LIMIT 0;
ALTER TABLE t1
  ADD id BIGINT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY,
  ADD KEY(ch);
SHOW CREATE TABLE t1;

INSERT INTO t1 (ch) VALUES ('admin'),('admin1');
SELECT ch FROM t1 WHERE ch='admin๐Œ†';
SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch='admin๐Œ†';
SELECT ch FROM t1 FORCE KEY (ch) WHERE ch='admin๐Œ†';
DELETE FROM t1;
INSERT INTO t1 (ch) VALUES ('a'), ('a?'), ('a??'), ('a???'), ('a????');
INSERT INTO t1 (ch) VALUES ('ab'),('a?b'),('a??b'),('a???b'),('a????b');
INSERT INTO t1 (ch) VALUES ('az'),('a?z'),('a??z'),('a???z'),('a????z');
INSERT INTO t1 (ch) VALUES ('z');
# LATIN SMALL LETTER A + LATIN CAPITAL LETTER E WITH GRAVE
INSERT INTO t1 (ch) VALUES (_utf8 0x61D080);
# LATIN SMALL LETTER A + ARMENIAN SMALL LETTER REH
INSERT INTO t1 (ch) VALUES (_utf8 0x61D680);

SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch='a๐Œ†' ORDER BY ch;
SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch='a๐Œ†b' ORDER BY ch;
SELECT ch FROM t1 FORCE KEY (ch) WHERE ch='a๐Œ†' ORDER BY ch;
SELECT ch FROM t1 FORCE KEY (ch) WHERE ch='a๐Œ†b' ORDER BY ch;

SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch<'a๐Œ†' ORDER BY ch;
SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch<'a๐Œ†b' ORDER BY ch;
SELECT ch FROM t1 FORCE KEY (ch) WHERE ch<'a๐Œ†' ORDER BY ch;
SELECT ch FROM t1 FORCE KEY (ch) WHERE ch<'a๐Œ†b' ORDER BY ch;

SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch>'a๐Œ†' ORDER BY ch;
SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch>'a๐Œ†b' ORDER BY ch;
SELECT ch FROM t1 FORCE KEY (ch) WHERE ch>'a๐Œ†' ORDER BY ch;
SELECT ch FROM t1 FORCE KEY (ch) WHERE ch>'a๐Œ†b' ORDER BY ch;

ALTER TABLE t1 DROP KEY ch;

--echo # 0xD18F would be a good 2-byte character, 0xD1 is an incomplete sequence
SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0xD1,'''');
PREPARE stmt FROM @query;
EXECUTE stmt;
SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0xD1,'b''');
PREPARE stmt FROM @query;
EXECUTE stmt;

#
# Non-equality comparison currently work differently depending on collation:
#
# - utf8_general_ci falls back to memcmp() on bad byte
# - utf8_unicode_ci treats bad bytes greater than any valid character
#
#  For example, these two characters:
#    _utf8 0xD080 (U+00C8 LATIN CAPITAL LETTER E WITH GRAVE)
#    _utf8 0xD680 (U+0580 ARMENIAN SMALL LETTER REH)
#
#  will give different results (depending on collation) when compared 
#  to an incomplete byte sequence 0xD1 (mb2head not followed by mb2tail).
#
# For utf8_general_ci the result depends on the valid side:
# - 0xD080 is smaller than 0xD1, because 0xD0 < 0xD1
# - 0xD680 is greater than 0xD1, because 0xD6 > 0xD1
#
# For utf8_unicode_ci the result does not depend on the valid side:
# - 0xD080 is smaller than 0xD1, because 0xD1 is greater than any valid character
# - 0xD680 is smaller than 0xD1, because 0xD1 is greater than any valid character
#
# utf8_general_ci should be eventually fixed to treat bad bytes greater
# than any valid character, similar to utf8_unicode_ci.
#

SET @query=CONCAT('SELECT ch FROM t1 WHERE ch<''a', 0xD1,''' ORDER BY ch');
PREPARE stmt FROM @query;
EXECUTE stmt;
SET @query=CONCAT('SELECT ch FROM t1 WHERE ch>''a', 0xD1,''' ORDER BY ch');
PREPARE stmt FROM @query;
EXECUTE stmt;

--echo # 0xEA9A96 would be a good 3-byte character, 0xEA9A is an incomplete sequence
SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0xEA9A,''' ORDER BY ch');
PREPARE stmt FROM @query;
EXECUTE stmt;
SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0xEA9A,'b'' ORDER BY ch');
PREPARE stmt FROM @query;
EXECUTE stmt;

--echo # 0x8F is a bad byte sequence (an mb2tail without mb2head)
SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0x8F,''' ORDER BY ch');
PREPARE stmt FROM @query;
EXECUTE stmt;
SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0x8F,'b'' ORDER BY ch');
PREPARE stmt FROM @query;
EXECUTE stmt;

--echo # 0x8F8F is a bad byte sequence (an mb2tail without mb2head, two times)
SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0x8F8F,''' ORDER BY ch');
PREPARE stmt FROM @query;
EXECUTE stmt;
SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0x8F8F,'b'' ORDER BY ch');
PREPARE stmt FROM @query;
EXECUTE stmt;

DROP TABLE t1;

--echo #
--echo # End of ctype_utf8_ilseq.inc
--echo #