From a464e01713d19d358eb7feab2cfdf88bb274d5df Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 12 Dec 2005 21:42:09 +0400 Subject: Bug#15375 Unassigned multibyte codes are broken into parts when converting to Unicode. m_ctype.h: Reorganizing mb_wc return codes to be able to return "an unassigned N-byte-long character". sql_string.cc: Adding code to detect and properly handle unassigned characters (i.e. the those character which are correctly formed according to the character specifications, but don't have Unicode mapping). Many files: Fixing conversion function to return new codes. ctype_ujis.test, ctype_gbk.test, ctype_big5.test: Adding a test case. ctype_ujis.result, ctype_gbk.result, ctype_big5.result: Fixing results accordingly. include/m_ctype.h: Reorganizing mb_wc return codes to be able to return "an unassigned N-byte long character". Bug#15375 Unassigned multibyte codes are broken into parts when converting to Unicode. mysql-test/r/ctype_big5.result: Fixing results accordingly. mysql-test/r/ctype_gbk.result: Fixing results accordingly. mysql-test/r/ctype_ujis.result: Fixing results accordingly. mysql-test/t/ctype_big5.test: Adding a test case. mysql-test/t/ctype_gbk.test: Adding a test case. mysql-test/t/ctype_ujis.test: Adding a test case. sql/sql_string.cc: Adding code to detect and properly hanlde unassigned characters (i.e. the those character which are correctly formed according to the character specifications, but don't have Unicode mapping). strings/ctype-big5.c: Fixing conversion function to return new codes. strings/ctype-bin.c: Fixing conversion function to return new codes. strings/ctype-cp932.c: Fixing conversion function to return new codes. strings/ctype-euc_kr.c: Fixing conversion function to return new codes. strings/ctype-gb2312.c: Fixing conversion function to return new codes. strings/ctype-gbk.c: Fixing conversion function to return new codes. strings/ctype-latin1.c: Fixing conversion function to return new codes. strings/ctype-simple.c: Fixing conversion function to return new codes. strings/ctype-sjis.c: Fixing conversion function to return new codes. strings/ctype-tis620.c: Fixing conversion function to return new codes. strings/ctype-ucs2.c: Fixing conversion function to return new codes. strings/ctype-ujis.c: Fixing conversion function to return new codes. strings/ctype-utf8.c: Fixing conversion function to return new codes. --- strings/ctype-utf8.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'strings/ctype-utf8.c') diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c index b3e78ce27e9..94e8e6ba797 100644 --- a/strings/ctype-utf8.c +++ b/strings/ctype-utf8.c @@ -1765,7 +1765,7 @@ static int my_utf8_uni(CHARSET_INFO *cs __attribute__((unused)), unsigned char c; if (s >= e) - return MY_CS_TOOFEW(0); + return MY_CS_TOOSMALL; c= s[0]; if (c < 0x80) @@ -1778,7 +1778,7 @@ static int my_utf8_uni(CHARSET_INFO *cs __attribute__((unused)), else if (c < 0xe0) { if (s+2 > e) /* We need 2 characters */ - return MY_CS_TOOFEW(0); + return MY_CS_TOOSMALL2; if (!((s[1] ^ 0x80) < 0x40)) return MY_CS_ILSEQ; @@ -1789,7 +1789,7 @@ static int my_utf8_uni(CHARSET_INFO *cs __attribute__((unused)), else if (c < 0xf0) { if (s+3 > e) /* We need 3 characters */ - return MY_CS_TOOFEW(0); + return MY_CS_TOOSMALL3; if (!((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 && (c >= 0xe1 || s[1] >= 0xa0))) return MY_CS_ILSEQ; @@ -1804,7 +1804,7 @@ static int my_utf8_uni(CHARSET_INFO *cs __attribute__((unused)), else if (c < 0xf8 && sizeof(my_wc_t)*8 >= 32) { if (s+4 > e) /* We need 4 characters */ - return MY_CS_TOOFEW(0); + return MY_CS_TOOSMALL4; if (!((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 && @@ -1822,7 +1822,7 @@ static int my_utf8_uni(CHARSET_INFO *cs __attribute__((unused)), else if (c < 0xfc && sizeof(my_wc_t)*8 >= 32) { if (s+5 >e) /* We need 5 characters */ - return MY_CS_TOOFEW(0); + return MY_CS_TOOSMALL5; if (!((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 && @@ -1841,7 +1841,7 @@ static int my_utf8_uni(CHARSET_INFO *cs __attribute__((unused)), else if (c < 0xfe && sizeof(my_wc_t)*8 >= 32) { if ( s+6 >e ) /* We need 6 characters */ - return MY_CS_TOOFEW(0); + return MY_CS_TOOSMALL6; if (!((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 && @@ -1892,7 +1892,7 @@ static int my_uni_utf8 (CHARSET_INFO *cs __attribute__((unused)) , Because of it (r+count > e), not (r+count-1 >e ) */ if ( r+count > e ) - return MY_CS_TOOSMALL; + return MY_CS_TOOSMALLN(count); switch (count) { /* Fall through all cases!!! */ -- cgit v1.2.1