1 files changed, 141 insertions, 0 deletions
diff --git a/strings/ctype.c b/strings/ctype.c
index 6b6983ada60..adff69ad680 100644
--- a/strings/ctype.c
+++ b/strings/ctype.c
@@ -428,3 +428,144 @@ my_charset_is_ascii_compatible(CHARSET_INFO *cs)
   }
   return 1;
 }
+
+
+/*
+  Convert a string between two character sets.
+  'to' must be large enough to store (form_length * to_cs->mbmaxlen) bytes.
+
+  @param  to[OUT]       Store result here
+  @param  to_length     Size of "to" buffer
+  @param  to_cs         Character set of result string
+  @param  from          Copy from here
+  @param  from_length   Length of the "from" string
+  @param  from_cs       Character set of the "from" string
+  @param  errors[OUT]   Number of conversion errors
+
+  @return Number of bytes copied to 'to' string
+*/
+
+static uint32
+my_convert_internal(char *to, uint32 to_length,
+                    const CHARSET_INFO *to_cs,
+                    const char *from, uint32 from_length,
+                    const CHARSET_INFO *from_cs, uint *errors)
+{
+  int         cnvres;
+  my_wc_t     wc;
+  const uchar *from_end= (const uchar*) from + from_length;
+  char *to_start= to;
+  uchar *to_end= (uchar*) to + to_length;
+  my_charset_conv_mb_wc mb_wc= from_cs->cset->mb_wc;
+  my_charset_conv_wc_mb wc_mb= to_cs->cset->wc_mb;
+  uint error_count= 0;
+
+  while (1)
+  {
+    if ((cnvres= (*mb_wc)(from_cs, &wc, (uchar*) from, from_end)) > 0)
+      from+= cnvres;
+    else if (cnvres == MY_CS_ILSEQ)
+    {
+      error_count++;
+      from++;
+      wc= '?';
+    }
+    else if (cnvres > MY_CS_TOOSMALL)
+    {
+      /*
+        A correct multibyte sequence detected
+        But it doesn't have Unicode mapping.
+      */
+      error_count++;
+      from+= (-cnvres);
+      wc= '?';
+    }
+    else
+      break;  // Not enough characters
+
+outp:
+    if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0)
+      to+= cnvres;
+    else if (cnvres == MY_CS_ILUNI && wc != '?')
+    {
+      error_count++;
+      wc= '?';
+      goto outp;
+    }
+    else
+      break;
+  }
+  *errors= error_count;
+  return (uint32) (to - to_start);
+}
+
+
+/*
+  Convert a string between two character sets.
+   Optimized for quick copying of ASCII characters in the range 0x00..0x7F.
+  'to' must be large enough to store (form_length * to_cs->mbmaxlen) bytes.
+
+  @param  to[OUT]       Store result here
+  @param  to_length     Size of "to" buffer
+  @param  to_cs         Character set of result string
+  @param  from          Copy from here
+  @param  from_length   Length of the "from" string
+  @param  from_cs       Character set of the "from" string
+  @param  errors[OUT]   Number of conversion errors
+
+  @return Number of bytes copied to 'to' string
+*/
+
+uint32
+my_convert(char *to, uint32 to_length, const CHARSET_INFO *to_cs,
+           const char *from, uint32 from_length,
+           const CHARSET_INFO *from_cs, uint *errors)
+{
+  uint32 length, length2;
+  /*
+    If any of the character sets is not ASCII compatible,
+    immediately switch to slow mb_wc->wc_mb method.
+  */
+  if ((to_cs->state | from_cs->state) & MY_CS_NONASCII)
+    return my_convert_internal(to, to_length, to_cs,
+                               from, from_length, from_cs, errors);
+
+  length= length2= MY_MIN(to_length, from_length);
+
+#if defined(__i386__)
+  /*
+    Special loop for i386, it allows to refer to a
+    non-aligned memory block as UINT32, which makes
+    it possible to copy four bytes at once. This
+    gives about 10% performance improvement comparing
+    to byte-by-byte loop.
+  */
+  for ( ; length >= 4; length-= 4, from+= 4, to+= 4)
+  {
+    if ((*(uint32*)from) & 0x80808080)
+      break;
+    *((uint32*) to)= *((const uint32*) from);
+  }
+#endif /* __i386__ */
+
+  for (; ; *to++= *from++, length--)
+  {
+    if (!length)
+    {
+      *errors= 0;
+      return length2;
+    }
+    if (*((unsigned char*) from) > 0x7F) /* A non-ASCII character */
+    {
+      uint32 copied_length= length2 - length;
+      to_length-= copied_length;
+      from_length-= copied_length;
+      return copied_length + my_convert_internal(to, to_length, to_cs,
+                                                 from, from_length, from_cs,
+                                                 errors);
+    }
+  }
+
+  DBUG_ASSERT(FALSE); // Should never get to here
+  return 0;           // Make compiler happy
+}