From a464e01713d19d358eb7feab2cfdf88bb274d5df Mon Sep 17 00:00:00 2001
From: unknown <bar@mysql.com>
Date: Mon, 12 Dec 2005 21:42:09 +0400
Subject:   Bug#15375 Unassigned multibyte codes are broken   into parts when
 converting to Unicode. m_ctype.h:   Reorganizing mb_wc return codes to be
 able   to return "an unassigned N-byte-long character". sql_string.cc:  
 Adding code to detect and properly handle   unassigned characters (i.e. the
 those character   which are correctly formed according to the   character
 specifications, but don't have Unicode   mapping). Many files:   Fixing
 conversion function to return new codes. ctype_ujis.test, ctype_gbk.test,
 ctype_big5.test:   Adding a test case. ctype_ujis.result, ctype_gbk.result,
 ctype_big5.result:   Fixing results accordingly.

include/m_ctype.h:
  Reorganizing mb_wc return codes to be able
  to return "an unassigned N-byte long character".
  Bug#15375 Unassigned multibyte codes are broken into parts when converting to Unicode.
mysql-test/r/ctype_big5.result:
  Fixing results accordingly.
mysql-test/r/ctype_gbk.result:
  Fixing results accordingly.
mysql-test/r/ctype_ujis.result:
  Fixing results accordingly.
mysql-test/t/ctype_big5.test:
  Adding a test case.
mysql-test/t/ctype_gbk.test:
  Adding a test case.
mysql-test/t/ctype_ujis.test:
  Adding a test case.
sql/sql_string.cc:
  Adding code to detect and properly hanlde
  unassigned characters (i.e. the those character
  which are correctly formed according to the
  character specifications, but don't have Unicode
  mapping).
strings/ctype-big5.c:
  Fixing conversion function to return new codes.
strings/ctype-bin.c:
  Fixing conversion function to return new codes.
strings/ctype-cp932.c:
  Fixing conversion function to return new codes.
strings/ctype-euc_kr.c:
  Fixing conversion function to return new codes.
strings/ctype-gb2312.c:
  Fixing conversion function to return new codes.
strings/ctype-gbk.c:
  Fixing conversion function to return new codes.
strings/ctype-latin1.c:
  Fixing conversion function to return new codes.
strings/ctype-simple.c:
  Fixing conversion function to return new codes.
strings/ctype-sjis.c:
  Fixing conversion function to return new codes.
strings/ctype-tis620.c:
  Fixing conversion function to return new codes.
strings/ctype-ucs2.c:
  Fixing conversion function to return new codes.
strings/ctype-ujis.c:
  Fixing conversion function to return new codes.
strings/ctype-utf8.c:
  Fixing conversion function to return new codes.
---
 strings/ctype-utf8.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'strings/ctype-utf8.c')

diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c
index b3e78ce27e9..94e8e6ba797 100644
--- a/strings/ctype-utf8.c
+++ b/strings/ctype-utf8.c
@@ -1765,7 +1765,7 @@ static int my_utf8_uni(CHARSET_INFO *cs __attribute__((unused)),
   unsigned char c;
 
   if (s >= e)
-    return MY_CS_TOOFEW(0);
+    return MY_CS_TOOSMALL;
 
   c= s[0];
   if (c < 0x80)
@@ -1778,7 +1778,7 @@ static int my_utf8_uni(CHARSET_INFO *cs __attribute__((unused)),
   else if (c < 0xe0)
   {
     if (s+2 > e) /* We need 2 characters */
-      return MY_CS_TOOFEW(0);
+      return MY_CS_TOOSMALL2;
 
     if (!((s[1] ^ 0x80) < 0x40))
       return MY_CS_ILSEQ;
@@ -1789,7 +1789,7 @@ static int my_utf8_uni(CHARSET_INFO *cs __attribute__((unused)),
   else if (c < 0xf0)
   {
     if (s+3 > e) /* We need 3 characters */
-      return MY_CS_TOOFEW(0);
+      return MY_CS_TOOSMALL3;
 
     if (!((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 && (c >= 0xe1 || s[1] >= 0xa0)))
       return MY_CS_ILSEQ;
@@ -1804,7 +1804,7 @@ static int my_utf8_uni(CHARSET_INFO *cs __attribute__((unused)),
   else if (c < 0xf8 && sizeof(my_wc_t)*8 >= 32)
   {
     if (s+4 > e) /* We need 4 characters */
-      return MY_CS_TOOFEW(0);
+      return MY_CS_TOOSMALL4;
 
     if (!((s[1] ^ 0x80) < 0x40 &&
           (s[2] ^ 0x80) < 0x40 &&
@@ -1822,7 +1822,7 @@ static int my_utf8_uni(CHARSET_INFO *cs __attribute__((unused)),
    else if (c < 0xfc && sizeof(my_wc_t)*8 >= 32)
   {
     if (s+5 >e) /* We need 5 characters */
-      return MY_CS_TOOFEW(0);
+      return MY_CS_TOOSMALL5;
 
     if (!((s[1] ^ 0x80) < 0x40 &&
           (s[2] ^ 0x80) < 0x40 &&
@@ -1841,7 +1841,7 @@ static int my_utf8_uni(CHARSET_INFO *cs __attribute__((unused)),
   else if (c < 0xfe && sizeof(my_wc_t)*8 >= 32)
   {
     if ( s+6 >e ) /* We need 6 characters */
-      return MY_CS_TOOFEW(0);
+      return MY_CS_TOOSMALL6;
 
     if (!((s[1] ^ 0x80) < 0x40   &&
           (s[2] ^ 0x80) < 0x40   &&
@@ -1892,7 +1892,7 @@ static int my_uni_utf8 (CHARSET_INFO *cs __attribute__((unused)) ,
     Because of it (r+count > e), not (r+count-1 >e )
    */
   if ( r+count > e )
-    return MY_CS_TOOSMALL;
+    return MY_CS_TOOSMALLN(count);
 
   switch (count) {
     /* Fall through all cases!!! */
-- 
cgit v1.2.1