1 files changed, 31 insertions, 40 deletions
diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c
index 88bab1fac76..b96ca0e5bbe 100644
--- a/strings/ctype-utf8.c
+++ b/strings/ctype-utf8.c
@@ -2404,46 +2404,33 @@ static int my_utf8_uni_no_range(CHARSET_INFO *cs __attribute__((unused)),
 static int my_uni_utf8 (CHARSET_INFO *cs __attribute__((unused)),
                         my_wc_t wc, uchar *r, uchar *e)
 {
-  int count;
-
-  if (r >= e)
-    return MY_CS_TOOSMALL;
-
   if (wc < 0x80)
-    count = 1;
-  else if (wc < 0x800)
-    count = 2;
-  else if (wc < 0x10000)
-    count = 3;
-#ifdef UNICODE_32BIT
-  else if (wc < 0x200000)
-    count = 4;
-  else if (wc < 0x4000000)
-    count = 5;
-  else if (wc <= 0x7fffffff)
-    count = 6;
-#endif
-    else return MY_CS_ILUNI;
-
-  /*
-    e is a character after the string r, not the last character of it.
-    Because of it (r+count > e), not (r+count-1 >e )
-   */
-  if ( r+count > e )
-    return MY_CS_TOOSMALLN(count);
-
-  switch (count) {
-    /* Fall through all cases!!! */
-#ifdef UNICODE_32BIT
-    case 6: r[5] = (uchar) (0x80 | (wc & 0x3f)); wc = wc >> 6; wc |= 0x4000000;
-    case 5: r[4] = (uchar) (0x80 | (wc & 0x3f)); wc = wc >> 6; wc |= 0x200000;
-    case 4: r[3] = (uchar) (0x80 | (wc & 0x3f)); wc = wc >> 6; wc |= 0x10000;
-#endif
-    case 3: r[2] = (uchar) (0x80 | (wc & 0x3f)); wc = wc >> 6; wc |= 0x800;
-    case 2: r[1] = (uchar) (0x80 | (wc & 0x3f)); wc = wc >> 6; wc |= 0xc0;
-    case 1: r[0] = (uchar) wc;
+  {
+    if (r >= e)
+      return MY_CS_TOOSMALL;
+    *r= (uchar) wc;
+    return 1;
   }
-  return count;
+  if (wc < 0x800)
+  {
+    if (r + 2 > e)
+      return MY_CS_TOOSMALLN(2);
+    /* U+0080..U+07FF:  00000xxx.xxyyyyyy -> 110xxxxx 10yyyyyy */
+    *r++= (uchar) (0xC0 | (wc >> 6));
+    *r=   (uchar) (0x80 | (wc & 0x3F));
+    return 2;
+  }
+  if (wc < 0x10000)
+  {
+    if (r + 3 > e)
+      return MY_CS_TOOSMALLN(3);
+    /* U+0800..U+FFFF: xxxxyyyy.yyzzzzzz  -> 1110xxxx 10yyyyyy 10zzzzzz */
+    *r++= (uchar) (0xE0 | (wc >> 12));
+    *r++= (uchar) (0x80 | ((wc >> 6) & 0x3f));
+    *r=   (uchar) (0x80 | (wc & 0x3f));
+    return 3;
+  }
+  return MY_CS_ILUNI;
 }
 
 
@@ -4353,6 +4340,10 @@ static const char filename_safe_char[128]=
 
 #define MY_FILENAME_ESCAPE '@'
 
+/*
+  note, that we cannot trust 'e' here, it's may be fake,
+  see strconvert()
+*/
 static int
 my_mb_wc_filename(CHARSET_INFO *cs __attribute__((unused)),
                   my_wc_t *pwc, const uchar *s, const uchar *e)
@@ -4374,7 +4365,7 @@ my_mb_wc_filename(CHARSET_INFO *cs __attribute__((unused)),
     return MY_CS_TOOSMALL3;
   
   byte1= s[1];
-  byte2= s[2];
+  byte2= byte1 ? s[2] : 0;
   
   if (byte1 >= 0x30 && byte1 <= 0x7F &&
       byte2 >= 0x30 && byte2 <= 0x7F)
@@ -4399,7 +4390,7 @@ my_mb_wc_filename(CHARSET_INFO *cs __attribute__((unused)),
       (byte2= hexlo(byte2)) >= 0)
   {
     int byte3= hexlo(s[3]);
-    int byte4= hexlo(s[4]);
+    int byte4= hexlo(s[3] ? s[4] : 0);
     if (byte3 >=0 && byte4 >=0)
     {
       *pwc= (byte1 << 12) + (byte2 << 8) + (byte3 << 4) + byte4;