summaryrefslogtreecommitdiff
path: root/mysql-test/suite/innodb/t/innodb_ctype_ldml.test
blob: 26af717b15a1587686e0662370f1a659570fefaa (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
--source include/have_innodb.inc
--source include/have_ucs2.inc
--source include/have_utf8mb4.inc
--source include/have_utf16.inc
--source include/have_utf32.inc

--disable_warnings
drop table if exists t1;
--enable_warnings

# Index.xml has some intentional errors in collation definitions.
# They are reported to the server error log. Let's suppress them.
--disable_query_log
call mtr.add_suppression("Syntax error at '\\[strength tertiary\\]'");
call mtr.add_suppression("Can't reset before a primary ignorable character U\\+A48C");
--enable_query_log

--echo In the following tests we change the order of letter "b"
--echo making it equal to letter "a", and check that it works
--echo with all Unicode character sets
set names utf8;

--replace_result $MYSQL_TEST_DIR MYSQL_TEST_DIR
show variables like 'character_sets_dir%';

show collation like 'utf8_phone_ci';
CREATE TABLE t1 (
 name VARCHAR(64),
 phone VARCHAR(64) CHARACTER SET utf8 COLLATE utf8_phone_ci
);
INSERT INTO t1 VALUES ('Svoj','+7 912 800 80 02');
INSERT INTO t1 VALUES ('Hf','+7 (912) 800 80 04');
INSERT INTO t1 VALUES ('Bar','+7-912-800-80-01');
INSERT INTO t1 VALUES ('Ramil','(7912) 800 80 03');
INSERT INTO t1 VALUES ('Sanja','+380 (912) 8008005');
SELECT * FROM t1 ORDER BY phone;
SELECT * FROM t1 WHERE phone='+7(912)800-80-01';
SELECT * FROM t1 WHERE phone='79128008001';
SELECT * FROM t1 WHERE phone='7 9 1 2 8 0 0 8 0 0 1';
DROP TABLE t1;

show collation like 'utf8_test_ci';
create table t1 (c1 char(1) character set utf8 collate utf8_test_ci);
insert into t1 values ('a');
select * from t1 where c1='b';
drop table t1;

show collation like 'ucs2_test_ci';
create table t1 (c1 char(1) character set ucs2 collate ucs2_test_ci);
insert into t1 values ('a');
select * from t1 where c1='b';
drop table t1;

show collation like 'utf8mb4_test_ci';
create table t1 (c1 char(1) character set utf8mb4 collate utf8mb4_test_ci);
insert into t1 values ('a');
select * from t1 where c1='b';
drop table t1;

show collation like 'utf16_test_ci';
create table t1 (c1 char(1) character set utf16 collate utf16_test_ci);
insert into t1 values ('a');
select * from t1 where c1='b';
drop table t1;

show collation like 'utf32_test_ci';
create table t1 (c1 char(1) character set utf32 collate utf32_test_ci);
insert into t1 values ('a');
select * from t1 where c1='b';
drop table t1;

###
### TODO: activate this when utf8_unicode_520_ci is merged
###
### make sure utf8_test_ci is Unicode-5.0.0
##SELECT hex(weight_string(_utf8mb4'a' collate utf8mb4_test_ci));
##SELECT hex(weight_string(convert(_utf32 0x10002 using utf8mb4) collate utf8mb4_test_ci));
##SELECT hex(@a:=convert(_utf32 0x10400 using utf8mb4) collate utf8mb4_test_ci), hex(lower(@a));
##SELECT hex(@a:=convert(_utf32 0x10428 using utf8mb4) collate utf8mb4_test_ci), hex(upper(@a));
##SELECT hex(@a:=convert(_utf32 0x2C00 using utf8mb4) collate utf8mb4_test_ci), hex(lower(@a));
##SELECT hex(@a:=convert(_utf32 0x2C30 using utf8mb4) collate utf8mb4_test_ci), hex(upper(@a));

### check that it works with supplementary characters
##SELECT hex(weight_string(convert(_utf32 0x61 using utf8mb4) collate utf8mb4_test_ci));
##SELECT hex(weight_string(convert(_utf32 0x62 using utf8mb4) collate utf8mb4_test_ci));
##SELECT hex(weight_string(convert(_utf32 0x10062 using utf8mb4) collate utf8mb4_test_ci));
##SELECT hex(weight_string(convert(_utf32 0x10400 using utf8mb4) collate utf8mb4_test_ci));
##SELECT hex(weight_string(convert(_utf32 0x100400 using utf8mb4) collate utf8mb4_test_ci));

### check contractions with non-ascii characters
##SELECT hex(weight_string(_utf8mb4 0x64 collate utf8mb4_test_ci));
##SELECT hex(weight_string(convert(_ucs2 0x0064017e using utf8mb4) collate utf8mb4_test_ci));
##SELECT hex(weight_string(convert(_ucs2 0x0044017e using utf8mb4) collate utf8mb4_test_ci));
##SELECT hex(weight_string(convert(_ucs2 0x0044017d using utf8mb4) collate utf8mb4_test_ci));


--echo #
--echo # Bug#45645 Mysql server close all connection and restart using lower function
--echo #
CREATE TABLE t1 (a VARCHAR(10)) CHARACTER SET utf8 COLLATE utf8_test_ci;
INSERT INTO t1 (a) VALUES ('hello!');
SELECT * FROM t1 WHERE LOWER(a)=LOWER('N');
DROP TABLE t1;

--echo #
--echo # Bug#51976 LDML collations issue (cyrillic example)
--echo #
CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET utf8 COLLATE utf8_test_ci);
INSERT INTO t1 (a) VALUES ('Hello');
SELECT a, UPPER(a), LOWER(a) FROM t1;
DROP TABLE t1;

--echo #
--echo # Bug#43827 Server closes connections and restarts
--echo #
# Crash happened with a user-defined utf8 collation,
# on attempt to insert a string longer than the column can store.
CREATE TABLE t1 (c1 VARCHAR(10) CHARACTER SET utf8 COLLATE utf8_test_ci);
INSERT IGNORE INTO t1 SELECT REPEAT('a',11);
DROP TABLE t1;

#
#  Vietnamese experimental collation
#
--echo  Vietnamese experimental collation

show collation like 'ucs2_vn_ci';
create table t1 (c1 char(1) character set ucs2 collate ucs2_vn_ci);
insert into t1 values (0x0061),(0x0041),(0x00E0),(0x00C0),(0x1EA3),(0x1EA2),
                      (0x00E3),(0x00C3),(0x00E1),(0x00C1),(0x1EA1),(0x1EA0);
insert into t1 values (0x0103),(0x0102),(0x1EB1),(0x1EB0),(0x1EB3),(0x1EB2),
                      (0x1EB5),(0x1EB4),(0x1EAF),(0x1EAE),(0x1EB7),(0x1EB6);
insert into t1 values (0x00E2),(0x00C2),(0x1EA7),(0x1EA6),(0x1EA9),(0x1EA8),
                      (0x1EAB),(0x1EAA),(0x1EA5),(0x1EA4),(0x1EAD),(0x1EAC);
insert into t1 values ('b'),('B'),('c'),('C');
insert into t1 values ('d'),('D'),(0x0111),(0x0110);
insert into t1 values (0x0065),(0x0045),(0x00E8),(0x00C8),(0x1EBB),(0x1EBA),
                      (0x1EBD),(0x1EBC),(0x00E9),(0x00C9),(0x1EB9),(0x1EB8);
insert into t1 values (0x00EA),(0x00CA),(0x1EC1),(0x1EC0),(0x1EC3),(0x1EC2),
                      (0x1EC5),(0x1EC4),(0x1EBF),(0x1EBE),(0x1EC7),(0x1EC6);
insert into t1 values ('g'),('G'),('h'),('H');
insert into t1 values (0x0069),(0x0049),(0x00EC),(0x00CC),(0x1EC9),(0x1EC8),
                      (0x0129),(0x0128),(0x00ED),(0x00CD),(0x1ECB),(0x1ECA);
insert into t1 values ('k'),('K'),('l'),('L'),('m'),('M');
insert into t1 values (0x006F),(0x004F),(0x00F2),(0x00D2),(0x1ECF),(0x1ECE),
                      (0x00F5),(0x00D5),(0x00F3),(0x00D3),(0x1ECD),(0x1ECC);
insert into t1 values (0x00F4),(0x00D4),(0x1ED3),(0x1ED2),(0x1ED5),(0x1ED4),
                      (0x1ED7),(0x1ED6),(0x1ED1),(0x1ED0),(0x1ED9),(0x1ED8);
insert into t1 values (0x01A1),(0x01A0),(0x1EDD),(0x1EDC),(0x1EDF),(0x1EDE),
                      (0x1EE1),(0x1EE0),(0x1EDB),(0x1EDA),(0x1EE3),(0x1EE2);
insert into t1 values ('p'),('P'),('q'),('Q'),('r'),('R'),('s'),('S'),('t'),('T');
insert into t1 values (0x0075),(0x0055),(0x00F9),(0x00D9),(0x1EE7),(0x1EE6),
                      (0x0169),(0x0168),(0x00FA),(0x00DA),(0x1EE5),(0x1EE4);
insert into t1 values (0x01B0),(0x01AF),(0x1EEB),(0x1EEA),(0x1EED),(0x1EEC),
                      (0x1EEF),(0x1EEE),(0x1EE9),(0x1EE8),(0x1EF1),(0x1EF0);
insert into t1 values ('v'),('V'),('x'),('X');
insert into t1 values (0x0079),(0x0059),(0x1EF3),(0x1EF2),(0x1EF7),(0x1EF6),
                      (0x1EF9),(0x1EF8),(0x00FD),(0x00DD),(0x1EF5),(0x1EF4);
select hex(c1) as h, c1 from t1 order by c1, h;
select group_concat(hex(c1) order by hex(c1)) from t1 group by c1;
select group_concat(c1 order by hex(c1) SEPARATOR '') from t1 group by c1;
drop table t1;

--echo Bug#46448 trailing spaces are not ignored when user collation maps space != 0x20
set names latin1;
show collation like 'latin1_test';
select "foo" = "foo " collate latin1_test;

-- echo The following tests check that two-byte collation IDs work
# The file ../std-data/Index.xml has a number of collations with high IDs.

# Test that the "ID" column in I_S and SHOW queries can handle two bytes
select * from information_schema.collations where id>256 and is_compiled<>'Yes' order by id;
show collation like '%test%';

# Test that two-byte collation ID is correctly transfered to the client side.
show collation like 'ucs2_vn_ci';
create table t1 (c1 char(1) character set ucs2 collate ucs2_vn_ci);

show create table t1;

insert into t1 values (0x0061);
--enable_metadata
set @@character_set_results=NULL;
select * from t1;
--disable_metadata
drop table t1;

#
# Check maximum collation ID (2047 as of MySQL-6.0.9)
#
CREATE TABLE t1 (s1 char(10) character set utf8 collate utf8_maxuserid_ci);
INSERT INTO t1 VALUES ('a'),('b');
SELECT * FROM t1 WHERE s1='a' ORDER BY BINARY s1;
DROP TABLE t1;


#
# Bug#47756 Setting 2byte collation ID with 'set names' crashes the server
#
SET NAMES utf8 COLLATE utf8_phone_ci;
SHOW COLLATION LIKE 'utf8_phone_ci';
SET NAMES utf8;

# make sure utf8mb4_test_400_ci is Unicode-4.0.0 based
SELECT hex(weight_string(_utf8mb4'a' collate utf8mb4_test_400_ci));
SELECT hex(weight_string(convert(_utf32 0x10002 using utf8mb4) collate utf8mb4_test_400_ci));
SELECT hex(@a:=convert(_utf32 0x10400 using utf8mb4) collate utf8mb4_test_400_ci), hex(lower(@a));
SELECT hex(@a:=convert(_utf32 0x10428 using utf8mb4) collate utf8mb4_test_400_ci), hex(upper(@a));
SELECT hex(@a:=convert(_utf32 0x2C00 using utf8mb4) collate utf8mb4_test_400_ci), hex(lower(@a));
SELECT hex(@a:=convert(_utf32 0x2C30 using utf8mb4) collate utf8mb4_test_400_ci), hex(upper(@a));

--echo #
--echo # WL#5624 Collation customization improvements
--echo #
SET NAMES utf8 COLLATE utf8_5624_1;
CREATE TABLE t1 AS SELECT REPEAT(' ', 16) AS a LIMIT 0;
# Part 1,2,3: long contractions and expansions
# Part 7: Quarternary difference
INSERT INTO t1 VALUES ('012345'),('001234'),('000123'),('000012'),('000001');
INSERT INTO t1 VALUES ('12345'),('01234'),('00123'),('00012'),('00001');
INSERT INTO t1 VALUES ('1234'),('0123'),('0012'),('0001');
INSERT INTO t1 VALUES ('123'),('012'),('001');
INSERT INTO t1 VALUES ('12'),('01');
INSERT INTO t1 VALUES ('1'),('9');
INSERT INTO t1 VALUES ('ГАИ'),('ГИБДД');
# Part 4: reset before
# Part 6: characters rather than escape sequences
INSERT INTO t1 VALUES ('a'),('b'),('c'),('d'),('e');
INSERT INTO t1 VALUES ('cz'),('Ċ'),('ċ');
INSERT INTO t1 VALUES ('f'),('fz'),('g'),('Ġ'),('ġ');
INSERT INTO t1 VALUES ('h'),('hz'),('GĦ'),('Għ'),('gĦ'),('għ');
INSERT INTO t1 VALUES ('i'),('iz'),('Ħ'),('ħ');
INSERT INTO t1 VALUES ('y'),('yz'),('z'),('Ż'),('ż');
INSERT INTO t1 VALUES ('ā'),('Ā'),('á'),('Á'),('à'),('À');
INSERT INTO t1 VALUES ('ē'),('é'),('ě'),('ê'),('Ē'),('É'),('Ě'),('Ê');
# Part 8: Abbreviated shift syntax
INSERT INTO t1 VALUES ('a'),('~'),('!'),('@'),('#'),('$'),('%'),('^');
INSERT INTO t1 VALUES ('('),(')'),('-'),('+'),('|'),('='),(':'),(';');
INSERT INTO t1 VALUES ('"'),('\''),('?');
# Part 9: Normal expansion syntax
INSERT INTO t1 VALUES ('ch'),('k'),('cs'),('ccs'),('cscs');
# Part 10: Previous context
INSERT INTO t1 VALUES ('aa-'),('ab-'),('ac-'),('ad-'),('ae-'),('af-'),('az-');
# Part 12: Logical reset positions
INSERT INTO t1 VALUES ('lp-fni'),('lp-lni');
INSERT INTO t1 VALUES ('lp-fpi'),('lp-lpi');
INSERT INTO t1 VALUES ('lp-fsi'),('lp-lsi');
INSERT INTO t1 VALUES ('lp-fti'),('lp-lti');
INSERT INTO t1 VALUES ('lp-ft'),('lp-lt');
INSERT INTO t1 VALUES ('lp-fv'),('lp-lv');
# Logical positions with reset before
INSERT INTO t1 VALUES ('lb-fni'),('lb-lni');
INSERT INTO t1 VALUES ('lb-fv'),('lb-lv');
# Part 5: Long tailoring
INSERT INTO t1 VALUES (_ucs2 0x3106),(_ucs2 0x3110), (_ucs2 0x3111), (_ucs2 0x3112);
INSERT INTO t1 VALUES (_ucs2 0x32A3), (_ucs2 0x3231);
INSERT INTO t1 VALUES (_ucs2 0x84D9), (_ucs2 0x98F5), (_ucs2 0x7CF3), (_ucs2 0x5497);
SELECT a, HEX(WEIGHT_STRING(a)) FROM t1 ORDER BY a, LENGTH(a), BINARY a;
--echo #
--echo # WL#5624, the same test with UCS2
--echo #
ALTER TABLE t1 CONVERT TO CHARACTER SET ucs2 COLLATE ucs2_5624_1;
SELECT a, HEX(WEIGHT_STRING(a)) FROM t1 ORDER BY a, LENGTH(a), BINARY(a);
DROP TABLE t1;

--echo #
--echo # WL#5624, unsupported features
--echo #
# Part 13: More verbosity
--error ER_UNKNOWN_COLLATION
SET NAMES utf8 COLLATE utf8_5624_2;
SHOW WARNINGS;

--echo #
--echo # WL#5624, reset before primary ignorable
--echo #
--error ER_UNKNOWN_COLLATION
SET NAMES utf8 COLLATE utf8_5624_3;
SHOW WARNINGS;

--echo #
--echo # WL#5624, \u without hex digits is equal to {'\\', 'u'}
--echo #
SET NAMES utf8 COLLATE utf8_5624_4;
CREATE TABLE t1 AS SELECT REPEAT(' ', 10) AS a LIMIT 0;
INSERT INTO t1 VALUES ('\\'),('u'),('x'),('X');
SELECT a, HEX(WEIGHT_STRING(a)) FROM t1 ORDER BY a, LENGTH(a), BINARY(a);
DROP TABLE t1;

--echo #
--echo # WL#5624, testing Bengali collations
--echo #
SET NAMES utf8, collation_connection=utf8_bengali_standard_ci;
CREATE TABLE t1 AS SELECT REPEAT (' ', 10) AS a LIMIT 0;
INSERT INTO t1 VALUES (_ucs2 0x09FA), (_ucs2 0x09F8), (_ucs2 0x09F9), (_ucs2 0x09F2);
INSERT INTO t1 VALUES (_ucs2 0x09DC), (_ucs2 0x09A109BC);
INSERT INTO t1 VALUES (_ucs2 0x09A2), (_ucs2 0x09DD), (_ucs2 0x09A209BC);
INSERT INTO t1 VALUES (_ucs2 0x09A3);
SELECT HEX(WEIGHT_STRING(a)), HEX(CONVERT(a USING ucs2)), HEX(a)
FROM t1 ORDER BY a, BINARY a;
DROP TABLE t1;

SET NAMES utf8, collation_connection=utf8_bengali_traditional_ci;
CREATE TABLE t1 AS SELECT REPEAT (' ', 10) AS a LIMIT 0;
INSERT INTO t1 VALUES
(_ucs2 0x0985),(_ucs2 0x0986),(_ucs2 0x0987),(_ucs2 0x0988),
(_ucs2 0x0989),(_ucs2 0x098A),(_ucs2 0x098B),(_ucs2 0x09E0),
(_ucs2 0x098C),(_ucs2 0x09E1),(_ucs2 0x098F),(_ucs2 0x0990),
(_ucs2 0x0993);

INSERT INTO t1 VALUES
(_ucs2 0x0994),(_ucs2 0x0982),(_ucs2 0x0983),(_ucs2 0x0981),
(_ucs2 0x099509CD), (_ucs2 0x099609CD), (_ucs2 0x099709CD), (_ucs2 0x099809CD),
(_ucs2 0x099909CD), (_ucs2 0x099A09CD), (_ucs2 0x099B09CD), (_ucs2 0x099C09CD),
(_ucs2 0x099D09CD), (_ucs2 0x099E09CD), (_ucs2 0x099F09CD), (_ucs2 0x09A009CD),
(_ucs2 0x09A109CD), (_ucs2 0x09A209CD), (_ucs2 0x09A309CD),
(_ucs2 0x09CE), (_ucs2 0x09A409CD200D), (_ucs2 0x09A409CD),
(_ucs2 0x09A509CD),(_ucs2 0x09A609CD),
(_ucs2 0x09A709CD), (_ucs2 0x09A809CD), (_ucs2 0x09AA09CD), (_ucs2 0x09AB09CD),
(_ucs2 0x09AC09CD), (_ucs2 0x09AD09CD), (_ucs2 0x09AE09CD), (_ucs2 0x09AF09CD),
(_ucs2 0x09B009CD), (_ucs2 0x09F009CD), (_ucs2 0x09B209CD), (_ucs2 0x09F109CD),
(_ucs2 0x09B609CD), (_ucs2 0x09B709CD), (_ucs2 0x09B809CD), (_ucs2 0x09B909CD);

INSERT INTO t1 VALUES 
 (_ucs2 0x099509CD0985),(_ucs2 0x0995),
 (_ucs2 0x099509CD0986),(_ucs2 0x099509BE),
 (_ucs2 0x099509CD0987),(_ucs2 0x099509BF),
 (_ucs2 0x099509CD0988),(_ucs2 0x099509C0),
 (_ucs2 0x099509CD0989),(_ucs2 0x099509C1),
 (_ucs2 0x099509CD098A),(_ucs2 0x099509C2),
 (_ucs2 0x099509CD098B),(_ucs2 0x099509C3),
 (_ucs2 0x099509CD09E0),(_ucs2 0x099509C4),
 (_ucs2 0x099509CD098C),(_ucs2 0x099509E2),
 (_ucs2 0x099509CD09E1),(_ucs2 0x099509E3),
 (_ucs2 0x099509CD098F),(_ucs2 0x099509C7),
 (_ucs2 0x099509CD0990),(_ucs2 0x099509C8),
 (_ucs2 0x099509CD0993),(_ucs2 0x099509CB),
 (_ucs2 0x099509CD0994),(_ucs2 0x099509CC);

SELECT HEX(WEIGHT_STRING(a)), HEX(CONVERT(a USING ucs2)), HEX(a)
FROM t1 ORDER BY a, BINARY(a);
SELECT HEX(WEIGHT_STRING(a)) as wa,
GROUP_CONCAT(HEX(CONVERT(a USING ucs2)) ORDER BY LENGTH(a), BINARY a)
FROM t1 GROUP BY a ORDER BY a;
DROP TABLE t1;

--echo #
--echo # WL#5624, shift after, using expansion
--echo #
SET NAMES utf8 COLLATE utf8_5624_5;
CREATE TABLE t1 AS SELECT REPEAT(' ', 10) AS a LIMIT 0;
INSERT INTO t1 VALUES ('0'),('1'),('0z'),(_ucs2 0x0030FF9D);
INSERT INTO t1 VALUES ('a'),('b'),('c'),('d'),('e'),('f'),('g'),('h'),('i');
INSERT INTO t1 VALUES ('j'),('k'),('l'),('m'),('n'),('o'),('p'),('q'),('r');
INSERT INTO t1 VALUES ('s'),('t'),('u'),('v'),('w'),('x'),('y'),('z');
INSERT INTO t1 VALUES ('aa'),('aaa');
INSERT INTO t1 VALUES ('A'),('B'),('C'),('D'),('E'),('F'),('G'),('H'),('I');
INSERT INTO t1 VALUES ('J'),('K'),('L'),('M'),('N'),('O'),('P'),('Q'),('R');
INSERT INTO t1 VALUES ('S'),('T'),('U'),('V'),('W'),('X'),('Y'),('Z');
INSERT INTO t1 VALUES ('AA'),('AAA');

SELECT a, HEX(WEIGHT_STRING(a)) FROM t1 ORDER BY a, LENGTH(a), BINARY(a);
DROP TABLE t1;


--echo #
--echo # End of WL#5624
--echo #