From 7f6b648d7db895c09a6ace3b5ee62a5fb482a8e0 Mon Sep 17 00:00:00 2001 From: Alexander Barkov Date: Fri, 17 Feb 2023 16:20:01 +0400 Subject: MDEV-30661 UPPER() returns an empty string for U+0251 in uca1400 collations for utf8 String length growth during upper/lower conversion in Unicode collations depends only on the underlying MY_UNICASE_INFO used in the collation. Maintaining a separate member CHARSET_INFO::caseup_multiply and CHARSET_INFO::casedn_multiply duplicated this information and caused bugs like this (when MY_UNICASE_INFO and case??_multiply when out of sync because of incomplete CHARSET_INFO initialization). Fix: Changing CHARSET_INFO::caseup_multiply and CHARSET_INFO::casedn_multiply from members to virtual functions. The virtual functions in Unicode collations calculate case conversion growth factors from the MY_UNICASE_INFO. This guarantees that the growth factors are always in sync with the MY_UNICASE_INFO. --- mysql-test/main/ctype_ldml.result | 56 ++++++++++++ mysql-test/main/ctype_ldml.test | 7 ++ mysql-test/main/ctype_utf8_uca.result | 145 +++++++++++++++++++++++++++++++ mysql-test/main/ctype_utf8_uca.test | 19 ++++ mysql-test/main/ctype_utf8mb4_uca.result | 139 +++++++++++++++++++++++++++++ mysql-test/main/ctype_utf8mb4_uca.test | 10 +++ 6 files changed, 376 insertions(+) (limited to 'mysql-test/main') diff --git a/mysql-test/main/ctype_ldml.result b/mysql-test/main/ctype_ldml.result index efafa9dc041..a23e835d1fa 100644 --- a/mysql-test/main/ctype_ldml.result +++ b/mysql-test/main/ctype_ldml.result @@ -3068,6 +3068,12 @@ INSERT INTO case_folding (code) VALUES (0x26B), (0x271), (0x27D); +INSERT INTO case_folding (code) VALUES +(0x0049) /* LATIN CAPITAL LETTER I */, +(0x0069) /* LATIN SMALL LETTER I */, +(0x0130) /* LATIN CAPITAL LETTER I WITH DOT ABOVE */, +(0x0131) /* LATIN SMALL LETTER DOTLESS I */ +; UPDATE case_folding SET c=CHAR(code USING ucs2); SELECT HEX(code), HEX(LOWER(c)), HEX(UPPER(c)), c FROM case_folding; HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c @@ -3081,6 +3087,10 @@ HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c 26B C9AB E2B1A2 ɫ 271 C9B1 E2B1AE ɱ 27D C9BD E2B1A4 ɽ +49 69 49 I +69 69 49 i +130 69 C4B0 İ +131 C4B1 49 ı DROP TABLE case_folding; # # End of 10.3 tests @@ -3230,5 +3240,51 @@ is_520 0 is_1400 1 DROP TABLE t1; # +# MDEV-30661 UPPER() returns an empty string for U+0251 in uca1400 collations for utf8 +# +SET NAMES utf8mb4 COLLATE utf8mb4_uca1400_test01_as_ci; +CREATE OR REPLACE TABLE case_folding AS SELECT 0 AS code, SPACE(32) AS c LIMIT 0; +SHOW CREATE TABLE case_folding; +Table Create Table +case_folding CREATE TABLE `case_folding` ( + `code` int(1) NOT NULL, + `c` varchar(32) CHARACTER SET utf8mb4 COLLATE utf8mb4_uca1400_test01_as_ci DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +INSERT INTO case_folding (code) VALUES +(0x23A), +(0x23E), +(0x23F), +(0x240), +(0x250), +(0x251), +(0x252), +(0x26B), +(0x271), +(0x27D); +INSERT INTO case_folding (code) VALUES +(0x0049) /* LATIN CAPITAL LETTER I */, +(0x0069) /* LATIN SMALL LETTER I */, +(0x0130) /* LATIN CAPITAL LETTER I WITH DOT ABOVE */, +(0x0131) /* LATIN SMALL LETTER DOTLESS I */ +; +UPDATE case_folding SET c=CHAR(code USING ucs2); +SELECT HEX(code), HEX(LOWER(c)), HEX(UPPER(c)), c FROM case_folding; +HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c +23A E2B1A5 C8BA Ⱥ +23E E2B1A6 C8BE Ⱦ +23F C8BF E2B1BE ȿ +240 C980 E2B1BF ɀ +250 C990 E2B1AF ɐ +251 C991 E2B1AD ɑ +252 C992 E2B1B0 ɒ +26B C9AB E2B1A2 ɫ +271 C9B1 E2B1AE ɱ +27D C9BD E2B1A4 ɽ +49 69 49 I +69 69 49 i +130 69 C4B0 İ +131 C4B1 49 ı +DROP TABLE case_folding; +# # End of 10.10 tests # diff --git a/mysql-test/main/ctype_ldml.test b/mysql-test/main/ctype_ldml.test index 5ee56c3b950..6336a1d0f5f 100644 --- a/mysql-test/main/ctype_ldml.test +++ b/mysql-test/main/ctype_ldml.test @@ -697,6 +697,13 @@ ORDER BY a, HEX(a); --horizontal_results DROP TABLE t1; +--echo # +--echo # MDEV-30661 UPPER() returns an empty string for U+0251 in uca1400 collations for utf8 +--echo # + +SET NAMES utf8mb4 COLLATE utf8mb4_uca1400_test01_as_ci; +--source include/ctype_casefolding.inc + --echo # --echo # End of 10.10 tests --echo # diff --git a/mysql-test/main/ctype_utf8_uca.result b/mysql-test/main/ctype_utf8_uca.result index e31acf8edc4..ecd2daa5272 100644 --- a/mysql-test/main/ctype_utf8_uca.result +++ b/mysql-test/main/ctype_utf8_uca.result @@ -612,6 +612,12 @@ INSERT INTO case_folding (code) VALUES (0x26B), (0x271), (0x27D); +INSERT INTO case_folding (code) VALUES +(0x0049) /* LATIN CAPITAL LETTER I */, +(0x0069) /* LATIN SMALL LETTER I */, +(0x0130) /* LATIN CAPITAL LETTER I WITH DOT ABOVE */, +(0x0131) /* LATIN SMALL LETTER DOTLESS I */ +; UPDATE case_folding SET c=CHAR(code USING ucs2); SELECT HEX(code), HEX(LOWER(c)), HEX(UPPER(c)), c FROM case_folding; HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c @@ -625,6 +631,53 @@ HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c 26B C9AB C9AB ɫ 271 C9B1 C9B1 ɱ 27D C9BD C9BD ɽ +49 69 49 I +69 69 49 i +130 69 C4B0 İ +131 C4B1 49 ı +DROP TABLE case_folding; +SET NAMES utf8mb3 COLLATE utf8mb3_turkish_ci /*Unicode-4.0 folding, with Turkish mapping for I */; +CREATE OR REPLACE TABLE case_folding AS SELECT 0 AS code, SPACE(32) AS c LIMIT 0; +SHOW CREATE TABLE case_folding; +Table Create Table +case_folding CREATE TABLE `case_folding` ( + `code` int(1) NOT NULL, + `c` varchar(32) CHARACTER SET utf8mb3 COLLATE utf8mb3_turkish_ci DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +INSERT INTO case_folding (code) VALUES +(0x23A), +(0x23E), +(0x23F), +(0x240), +(0x250), +(0x251), +(0x252), +(0x26B), +(0x271), +(0x27D); +INSERT INTO case_folding (code) VALUES +(0x0049) /* LATIN CAPITAL LETTER I */, +(0x0069) /* LATIN SMALL LETTER I */, +(0x0130) /* LATIN CAPITAL LETTER I WITH DOT ABOVE */, +(0x0131) /* LATIN SMALL LETTER DOTLESS I */ +; +UPDATE case_folding SET c=CHAR(code USING ucs2); +SELECT HEX(code), HEX(LOWER(c)), HEX(UPPER(c)), c FROM case_folding; +HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c +23A C8BA C8BA Ⱥ +23E C8BE C8BE Ⱦ +23F C8BF C8BF ȿ +240 C980 C980 ɀ +250 C990 C990 ɐ +251 C991 C991 ɑ +252 C992 C992 ɒ +26B C9AB C9AB ɫ +271 C9B1 C9B1 ɱ +27D C9BD C9BD ɽ +49 C4B1 49 I +69 69 C4B0 i +130 69 C4B0 İ +131 C4B1 49 ı DROP TABLE case_folding; SET NAMES utf8mb3 COLLATE utf8mb3_unicode_520_ci; CREATE OR REPLACE TABLE case_folding AS SELECT 0 AS code, SPACE(32) AS c LIMIT 0; @@ -645,6 +698,12 @@ INSERT INTO case_folding (code) VALUES (0x26B), (0x271), (0x27D); +INSERT INTO case_folding (code) VALUES +(0x0049) /* LATIN CAPITAL LETTER I */, +(0x0069) /* LATIN SMALL LETTER I */, +(0x0130) /* LATIN CAPITAL LETTER I WITH DOT ABOVE */, +(0x0131) /* LATIN SMALL LETTER DOTLESS I */ +; UPDATE case_folding SET c=CHAR(code USING ucs2); SELECT HEX(code), HEX(LOWER(c)), HEX(UPPER(c)), c FROM case_folding; HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c @@ -658,6 +717,10 @@ HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c 26B C9AB E2B1A2 ɫ 271 C9B1 E2B1AE ɱ 27D C9BD E2B1A4 ɽ +49 69 49 I +69 69 49 i +130 69 C4B0 İ +131 C4B1 49 ı DROP TABLE case_folding; SET NAMES utf8mb3 COLLATE utf8mb3_unicode_520_nopad_ci; CREATE OR REPLACE TABLE case_folding AS SELECT 0 AS code, SPACE(32) AS c LIMIT 0; @@ -678,6 +741,12 @@ INSERT INTO case_folding (code) VALUES (0x26B), (0x271), (0x27D); +INSERT INTO case_folding (code) VALUES +(0x0049) /* LATIN CAPITAL LETTER I */, +(0x0069) /* LATIN SMALL LETTER I */, +(0x0130) /* LATIN CAPITAL LETTER I WITH DOT ABOVE */, +(0x0131) /* LATIN SMALL LETTER DOTLESS I */ +; UPDATE case_folding SET c=CHAR(code USING ucs2); SELECT HEX(code), HEX(LOWER(c)), HEX(UPPER(c)), c FROM case_folding; HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c @@ -691,6 +760,10 @@ HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c 26B C9AB E2B1A2 ɫ 271 C9B1 E2B1AE ɱ 27D C9BD E2B1A4 ɽ +49 69 49 I +69 69 49 i +130 69 C4B0 İ +131 C4B1 49 ı DROP TABLE case_folding; SET NAMES utf8mb3 COLLATE utf8mb3_myanmar_ci; CREATE OR REPLACE TABLE case_folding AS SELECT 0 AS code, SPACE(32) AS c LIMIT 0; @@ -711,6 +784,12 @@ INSERT INTO case_folding (code) VALUES (0x26B), (0x271), (0x27D); +INSERT INTO case_folding (code) VALUES +(0x0049) /* LATIN CAPITAL LETTER I */, +(0x0069) /* LATIN SMALL LETTER I */, +(0x0130) /* LATIN CAPITAL LETTER I WITH DOT ABOVE */, +(0x0131) /* LATIN SMALL LETTER DOTLESS I */ +; UPDATE case_folding SET c=CHAR(code USING ucs2); SELECT HEX(code), HEX(LOWER(c)), HEX(UPPER(c)), c FROM case_folding; HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c @@ -724,6 +803,10 @@ HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c 26B C9AB E2B1A2 ɫ 271 C9B1 E2B1AE ɱ 27D C9BD E2B1A4 ɽ +49 69 49 I +69 69 49 i +130 69 C4B0 İ +131 C4B1 49 ı DROP TABLE case_folding; SET NAMES utf8mb3 COLLATE utf8mb3_thai_520_w2; CREATE OR REPLACE TABLE case_folding AS SELECT 0 AS code, SPACE(32) AS c LIMIT 0; @@ -744,6 +827,12 @@ INSERT INTO case_folding (code) VALUES (0x26B), (0x271), (0x27D); +INSERT INTO case_folding (code) VALUES +(0x0049) /* LATIN CAPITAL LETTER I */, +(0x0069) /* LATIN SMALL LETTER I */, +(0x0130) /* LATIN CAPITAL LETTER I WITH DOT ABOVE */, +(0x0131) /* LATIN SMALL LETTER DOTLESS I */ +; UPDATE case_folding SET c=CHAR(code USING ucs2); SELECT HEX(code), HEX(LOWER(c)), HEX(UPPER(c)), c FROM case_folding; HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c @@ -757,6 +846,10 @@ HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c 26B C9AB E2B1A2 ɫ 271 C9B1 E2B1AE ɱ 27D C9BD E2B1A4 ɽ +49 69 49 I +69 69 49 i +130 69 C4B0 İ +131 C4B1 49 ı DROP TABLE case_folding; # # End of 10.3 tests @@ -1533,3 +1626,55 @@ SET NAMES utf8mb3; # # End of 10.10 tests # +# +# Start of 10.10 tests +# +# +# MDEV-30661 UPPER() returns an empty string for U+0251 in uca1400 collations for utf8 +# +SET NAMES utf8mb3 COLLATE utf8mb3_uca1400_ai_ci; +CREATE OR REPLACE TABLE case_folding AS SELECT 0 AS code, SPACE(32) AS c LIMIT 0; +SHOW CREATE TABLE case_folding; +Table Create Table +case_folding CREATE TABLE `case_folding` ( + `code` int(1) NOT NULL, + `c` varchar(32) CHARACTER SET utf8mb3 COLLATE utf8mb3_uca1400_ai_ci DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +INSERT INTO case_folding (code) VALUES +(0x23A), +(0x23E), +(0x23F), +(0x240), +(0x250), +(0x251), +(0x252), +(0x26B), +(0x271), +(0x27D); +INSERT INTO case_folding (code) VALUES +(0x0049) /* LATIN CAPITAL LETTER I */, +(0x0069) /* LATIN SMALL LETTER I */, +(0x0130) /* LATIN CAPITAL LETTER I WITH DOT ABOVE */, +(0x0131) /* LATIN SMALL LETTER DOTLESS I */ +; +UPDATE case_folding SET c=CHAR(code USING ucs2); +SELECT HEX(code), HEX(LOWER(c)), HEX(UPPER(c)), c FROM case_folding; +HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c +23A E2B1A5 C8BA Ⱥ +23E E2B1A6 C8BE Ⱦ +23F C8BF E2B1BE ȿ +240 C980 E2B1BF ɀ +250 C990 E2B1AF ɐ +251 C991 E2B1AD ɑ +252 C992 E2B1B0 ɒ +26B C9AB E2B1A2 ɫ +271 C9B1 E2B1AE ɱ +27D C9BD E2B1A4 ɽ +49 69 49 I +69 69 49 i +130 69 C4B0 İ +131 C4B1 49 ı +DROP TABLE case_folding; +# +# End of 10.10 tests +# diff --git a/mysql-test/main/ctype_utf8_uca.test b/mysql-test/main/ctype_utf8_uca.test index 946649417ae..5abf9192511 100644 --- a/mysql-test/main/ctype_utf8_uca.test +++ b/mysql-test/main/ctype_utf8_uca.test @@ -34,6 +34,9 @@ SET NAMES utf8 COLLATE utf8_unicode_nopad_ci; SET NAMES utf8mb3 COLLATE utf8mb3_unicode_ci /*Unicode-4.0 folding*/; --source include/ctype_casefolding.inc +SET NAMES utf8mb3 COLLATE utf8mb3_turkish_ci /*Unicode-4.0 folding, with Turkish mapping for I */; +--source include/ctype_casefolding.inc + SET NAMES utf8mb3 COLLATE utf8mb3_unicode_520_ci; --source include/ctype_casefolding.inc @@ -79,3 +82,19 @@ SET NAMES utf8mb3; --echo # --echo # End of 10.10 tests --echo # + + +--echo # +--echo # Start of 10.10 tests +--echo # + +--echo # +--echo # MDEV-30661 UPPER() returns an empty string for U+0251 in uca1400 collations for utf8 +--echo # + +SET NAMES utf8mb3 COLLATE utf8mb3_uca1400_ai_ci; +--source include/ctype_casefolding.inc + +--echo # +--echo # End of 10.10 tests +--echo # diff --git a/mysql-test/main/ctype_utf8mb4_uca.result b/mysql-test/main/ctype_utf8mb4_uca.result index 195c47e0360..42de45c98a3 100644 --- a/mysql-test/main/ctype_utf8mb4_uca.result +++ b/mysql-test/main/ctype_utf8mb4_uca.result @@ -6630,6 +6630,55 @@ INSERT INTO case_folding (code) VALUES (0x26B), (0x271), (0x27D); +INSERT INTO case_folding (code) VALUES +(0x0049) /* LATIN CAPITAL LETTER I */, +(0x0069) /* LATIN SMALL LETTER I */, +(0x0130) /* LATIN CAPITAL LETTER I WITH DOT ABOVE */, +(0x0131) /* LATIN SMALL LETTER DOTLESS I */ +; +UPDATE case_folding SET c=CHAR(code USING ucs2); +SELECT HEX(code), HEX(LOWER(c)), HEX(UPPER(c)), c FROM case_folding; +HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c +23A C8BA C8BA Ⱥ +23E C8BE C8BE Ⱦ +23F C8BF C8BF ȿ +240 C980 C980 ɀ +250 C990 C990 ɐ +251 C991 C991 ɑ +252 C992 C992 ɒ +26B C9AB C9AB ɫ +271 C9B1 C9B1 ɱ +27D C9BD C9BD ɽ +49 69 49 I +69 69 49 i +130 69 C4B0 İ +131 C4B1 49 ı +DROP TABLE case_folding; +SET NAMES utf8mb4 COLLATE utf8mb4_turkish_ci /*Unicode-4.0 folding with Turkish mapping for I */; +CREATE OR REPLACE TABLE case_folding AS SELECT 0 AS code, SPACE(32) AS c LIMIT 0; +SHOW CREATE TABLE case_folding; +Table Create Table +case_folding CREATE TABLE `case_folding` ( + `code` int(1) NOT NULL, + `c` varchar(32) CHARACTER SET utf8mb4 COLLATE utf8mb4_turkish_ci DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +INSERT INTO case_folding (code) VALUES +(0x23A), +(0x23E), +(0x23F), +(0x240), +(0x250), +(0x251), +(0x252), +(0x26B), +(0x271), +(0x27D); +INSERT INTO case_folding (code) VALUES +(0x0049) /* LATIN CAPITAL LETTER I */, +(0x0069) /* LATIN SMALL LETTER I */, +(0x0130) /* LATIN CAPITAL LETTER I WITH DOT ABOVE */, +(0x0131) /* LATIN SMALL LETTER DOTLESS I */ +; UPDATE case_folding SET c=CHAR(code USING ucs2); SELECT HEX(code), HEX(LOWER(c)), HEX(UPPER(c)), c FROM case_folding; HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c @@ -6643,6 +6692,10 @@ HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c 26B C9AB C9AB ɫ 271 C9B1 C9B1 ɱ 27D C9BD C9BD ɽ +49 C4B1 49 I +69 69 C4B0 i +130 69 C4B0 İ +131 C4B1 49 ı DROP TABLE case_folding; SET NAMES utf8mb4 COLLATE utf8mb4_unicode_520_ci; CREATE OR REPLACE TABLE case_folding AS SELECT 0 AS code, SPACE(32) AS c LIMIT 0; @@ -6663,6 +6716,12 @@ INSERT INTO case_folding (code) VALUES (0x26B), (0x271), (0x27D); +INSERT INTO case_folding (code) VALUES +(0x0049) /* LATIN CAPITAL LETTER I */, +(0x0069) /* LATIN SMALL LETTER I */, +(0x0130) /* LATIN CAPITAL LETTER I WITH DOT ABOVE */, +(0x0131) /* LATIN SMALL LETTER DOTLESS I */ +; UPDATE case_folding SET c=CHAR(code USING ucs2); SELECT HEX(code), HEX(LOWER(c)), HEX(UPPER(c)), c FROM case_folding; HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c @@ -6676,6 +6735,10 @@ HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c 26B C9AB E2B1A2 ɫ 271 C9B1 E2B1AE ɱ 27D C9BD E2B1A4 ɽ +49 69 49 I +69 69 49 i +130 69 C4B0 İ +131 C4B1 49 ı DROP TABLE case_folding; SET NAMES utf8mb4 COLLATE utf8mb4_unicode_520_nopad_ci; CREATE OR REPLACE TABLE case_folding AS SELECT 0 AS code, SPACE(32) AS c LIMIT 0; @@ -6696,6 +6759,12 @@ INSERT INTO case_folding (code) VALUES (0x26B), (0x271), (0x27D); +INSERT INTO case_folding (code) VALUES +(0x0049) /* LATIN CAPITAL LETTER I */, +(0x0069) /* LATIN SMALL LETTER I */, +(0x0130) /* LATIN CAPITAL LETTER I WITH DOT ABOVE */, +(0x0131) /* LATIN SMALL LETTER DOTLESS I */ +; UPDATE case_folding SET c=CHAR(code USING ucs2); SELECT HEX(code), HEX(LOWER(c)), HEX(UPPER(c)), c FROM case_folding; HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c @@ -6709,6 +6778,10 @@ HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c 26B C9AB E2B1A2 ɫ 271 C9B1 E2B1AE ɱ 27D C9BD E2B1A4 ɽ +49 69 49 I +69 69 49 i +130 69 C4B0 İ +131 C4B1 49 ı DROP TABLE case_folding; SET NAMES utf8mb4 COLLATE utf8mb4_myanmar_ci; CREATE OR REPLACE TABLE case_folding AS SELECT 0 AS code, SPACE(32) AS c LIMIT 0; @@ -6729,6 +6802,12 @@ INSERT INTO case_folding (code) VALUES (0x26B), (0x271), (0x27D); +INSERT INTO case_folding (code) VALUES +(0x0049) /* LATIN CAPITAL LETTER I */, +(0x0069) /* LATIN SMALL LETTER I */, +(0x0130) /* LATIN CAPITAL LETTER I WITH DOT ABOVE */, +(0x0131) /* LATIN SMALL LETTER DOTLESS I */ +; UPDATE case_folding SET c=CHAR(code USING ucs2); SELECT HEX(code), HEX(LOWER(c)), HEX(UPPER(c)), c FROM case_folding; HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c @@ -6742,6 +6821,10 @@ HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c 26B C9AB E2B1A2 ɫ 271 C9B1 E2B1AE ɱ 27D C9BD E2B1A4 ɽ +49 69 49 I +69 69 49 i +130 69 C4B0 İ +131 C4B1 49 ı DROP TABLE case_folding; SET NAMES utf8mb4 COLLATE utf8mb4_thai_520_w2; CREATE OR REPLACE TABLE case_folding AS SELECT 0 AS code, SPACE(32) AS c LIMIT 0; @@ -6762,6 +6845,12 @@ INSERT INTO case_folding (code) VALUES (0x26B), (0x271), (0x27D); +INSERT INTO case_folding (code) VALUES +(0x0049) /* LATIN CAPITAL LETTER I */, +(0x0069) /* LATIN SMALL LETTER I */, +(0x0130) /* LATIN CAPITAL LETTER I WITH DOT ABOVE */, +(0x0131) /* LATIN SMALL LETTER DOTLESS I */ +; UPDATE case_folding SET c=CHAR(code USING ucs2); SELECT HEX(code), HEX(LOWER(c)), HEX(UPPER(c)), c FROM case_folding; HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c @@ -6775,6 +6864,10 @@ HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c 26B C9AB E2B1A2 ɫ 271 C9B1 E2B1AE ɱ 27D C9BD E2B1A4 ɽ +49 69 49 I +69 69 49 i +130 69 C4B0 İ +131 C4B1 49 ı DROP TABLE case_folding; # # End of 10.3 tests @@ -11379,5 +11472,51 @@ DROP TABLE t1; DROP PROCEDURE exec_verbose; DROP PROCEDURE test_styles; # +# MDEV-30661 UPPER() returns an empty string for U+0251 in uca1400 collations for utf8 +# +SET NAMES utf8mb4 COLLATE utf8mb4_uca1400_ai_ci; +CREATE OR REPLACE TABLE case_folding AS SELECT 0 AS code, SPACE(32) AS c LIMIT 0; +SHOW CREATE TABLE case_folding; +Table Create Table +case_folding CREATE TABLE `case_folding` ( + `code` int(1) NOT NULL, + `c` varchar(32) CHARACTER SET utf8mb4 COLLATE utf8mb4_uca1400_ai_ci DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +INSERT INTO case_folding (code) VALUES +(0x23A), +(0x23E), +(0x23F), +(0x240), +(0x250), +(0x251), +(0x252), +(0x26B), +(0x271), +(0x27D); +INSERT INTO case_folding (code) VALUES +(0x0049) /* LATIN CAPITAL LETTER I */, +(0x0069) /* LATIN SMALL LETTER I */, +(0x0130) /* LATIN CAPITAL LETTER I WITH DOT ABOVE */, +(0x0131) /* LATIN SMALL LETTER DOTLESS I */ +; +UPDATE case_folding SET c=CHAR(code USING ucs2); +SELECT HEX(code), HEX(LOWER(c)), HEX(UPPER(c)), c FROM case_folding; +HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c +23A E2B1A5 C8BA Ⱥ +23E E2B1A6 C8BE Ⱦ +23F C8BF E2B1BE ȿ +240 C980 E2B1BF ɀ +250 C990 E2B1AF ɐ +251 C991 E2B1AD ɑ +252 C992 E2B1B0 ɒ +26B C9AB E2B1A2 ɫ +271 C9B1 E2B1AE ɱ +27D C9BD E2B1A4 ɽ +49 69 49 I +69 69 49 i +130 69 C4B0 İ +131 C4B1 49 ı +DROP TABLE case_folding; +# # End of 10.10 tests # diff --git a/mysql-test/main/ctype_utf8mb4_uca.test b/mysql-test/main/ctype_utf8mb4_uca.test index b4cfc910336..3f428447cc9 100644 --- a/mysql-test/main/ctype_utf8mb4_uca.test +++ b/mysql-test/main/ctype_utf8mb4_uca.test @@ -121,6 +121,9 @@ SET NAMES utf8mb4; SET NAMES utf8mb4 COLLATE utf8mb4_unicode_ci /*Unicode-4.0 folding*/; --source include/ctype_casefolding.inc +SET NAMES utf8mb4 COLLATE utf8mb4_turkish_ci /*Unicode-4.0 folding with Turkish mapping for I */; +--source include/ctype_casefolding.inc + SET NAMES utf8mb4 COLLATE utf8mb4_unicode_520_ci; --source include/ctype_casefolding.inc @@ -426,6 +429,13 @@ DROP TABLE t1; DROP PROCEDURE exec_verbose; DROP PROCEDURE test_styles; +--echo # +--echo # MDEV-30661 UPPER() returns an empty string for U+0251 in uca1400 collations for utf8 +--echo # + +SET NAMES utf8mb4 COLLATE utf8mb4_uca1400_ai_ci; +--source include/ctype_casefolding.inc + --echo # --echo # End of 10.10 tests --echo # -- cgit v1.2.1